75 files changed, 18689 insertions, 0 deletions
diff --git a/usr/src/libm/src/m9x/__fenv_amd64.il b/usr/src/libm/src/m9x/__fenv_amd64.il
new file mode 100644
index 0000000..bae2414
--- /dev/null
+++ b/usr/src/libm/src/m9x/__fenv_amd64.il
@@ -0,0 +1,349 @@
+/
+/ CDDL HEADER START
+/
+/ The contents of this file are subject to the terms of the
+/ Common Development and Distribution License (the "License").
+/ You may not use this file except in compliance with the License.
+/
+/ You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+/ or http://www.opensolaris.org/os/licensing.
+/ See the License for the specific language governing permissions
+/ and limitations under the License.
+/
+/ When distributing Covered Code, include this CDDL HEADER in each
+/ file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+/ If applicable, add the following below this CDDL HEADER, with the
+/ fields enclosed by brackets "[]" replaced with your own identifying
+/ information: Portions Copyright [yyyy] [name of copyright owner]
+/
+/ CDDL HEADER END
+/
+/ Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+/ Use is subject to license terms.
+/
+/ @(#)__fenv_amd64.il	1.5	06/01/31 SMI
+/
+	.inline	__fenv_getcwsw,1
+	fstsw	(%rdi)
+	fstcw	2(%rdi)
+	.end
+
+	.inline	__fenv_setcwsw,1
+	movw	(%rdi),%dx
+	movw	2(%rdi),%cx
+	subq	$32,%rsp
+	fstenv	(%rsp)
+	movw	%cx,(%rsp)
+	movw	%dx,4(%rsp)
+	fldenv	(%rsp)
+	fwait
+	addq	$32,%rsp
+	.end
+
+	.inline	__fenv_getmxcsr,1
+	stmxcsr	(%rdi)
+	.end
+
+	.inline	__fenv_setmxcsr,1
+	ldmxcsr	(%rdi)
+	.end
+
+	.inline	f2xm1,1
+	fldt	(%rsp)
+	f2xm1
+	.end
+
+	.inline	fyl2x,2
+	fldt	(%rsp)
+	fldt	16(%rsp)
+	fyl2x
+	.end
+
+	.inline	fptan,1
+	fldt	(%rsp)
+	fptan
+	fstpt	(%rsp)
+	.end
+
+	.inline	fpatan,2
+	fldt	(%rsp)
+	fldt	16(%rsp)
+	fpatan
+	.end
+
+	.inline	fxtract,1
+	fldt	(%rsp)
+	fxtract
+	.end
+
+	.inline	fprem1,2
+	fldt	(%rsp)
+	fldt	16(%rsp)
+	fprem1
+	fstp	%st(1)
+	.end
+
+	.inline	fprem,2
+	fldt	(%rsp)
+	fldt	16(%rsp)
+	fprem
+	fstp	%st(1)
+	.end
+
+	.inline	fyl2xp1,2
+	fldt	(%rsp)
+	fldt	16(%rsp)
+	fyl2xp1
+	.end
+
+	.inline	fsqrt,1
+	fldt	(%rsp)
+	fsqrt
+	.end
+
+	.inline	fsincos,1
+	fldt	(%rsp)
+	fsincos
+	.end
+
+	.inline	frndint,1
+	fldt	(%rsp)
+	frndint
+	.end
+
+	.inline	fscale,2
+	fldt	(%rsp)
+	fldt	16(%rsp)
+	fscale
+	fstp	%st(1)
+	.end
+
+	.inline	fsin,1
+	fldt	(%rsp)
+	fsin
+	.end
+
+	.inline	fcos,1
+	fldt	(%rsp)
+	fcos
+	.end
+
+	.inline	sse_cmpeqss,3
+	movss	(%rdi),%xmm0
+	cmpeqss	(%rsi),%xmm0
+	movss	%xmm0,(%rdx)
+	.end
+
+	.inline	sse_cmpltss,3
+	movss	(%rdi),%xmm0
+	cmpltss	(%rsi),%xmm0
+	movss	%xmm0,(%rdx)
+	.end
+
+	.inline	sse_cmpless,3
+	movss	(%rdi),%xmm0
+	cmpless	(%rsi),%xmm0
+	movss	%xmm0,(%rdx)
+	.end
+
+	.inline	sse_cmpunordss,3
+	movss	(%rdi),%xmm0
+	cmpunordss	(%rsi),%xmm0
+	movss	%xmm0,(%rdx)
+	.end
+
+	.inline	sse_minss,3
+	movss	(%rdi),%xmm0
+	minss	(%rsi),%xmm0
+	movss	%xmm0,(%rdx)
+	.end
+
+	.inline	sse_maxss,3
+	movss	(%rdi),%xmm0
+	maxss	(%rsi),%xmm0
+	movss	%xmm0,(%rdx)
+	.end
+
+	.inline	sse_addss,3
+	movss	(%rdi),%xmm0
+	addss	(%rsi),%xmm0
+	movss	%xmm0,(%rdx)
+	.end
+
+	.inline	sse_subss,3
+	movss	(%rdi),%xmm0
+	subss	(%rsi),%xmm0
+	movss	%xmm0,(%rdx)
+	.end
+
+	.inline	sse_mulss,3
+	movss	(%rdi),%xmm0
+	mulss	(%rsi),%xmm0
+	movss	%xmm0,(%rdx)
+	.end
+
+	.inline	sse_divss,3
+	movss	(%rdi),%xmm0
+	divss	(%rsi),%xmm0
+	movss	%xmm0,(%rdx)
+	.end
+
+	.inline	sse_sqrtss,2
+	sqrtss	(%rdi),%xmm0
+	movss	%xmm0,(%rsi)
+	.end
+
+	.inline	sse_ucomiss,2
+	movss	(%rdi),%xmm0
+	ucomiss	(%rsi),%xmm0
+	.end
+
+	.inline	sse_comiss,2
+	movss	(%rdi),%xmm0
+	comiss	(%rsi),%xmm0
+	.end
+
+	.inline	sse_cvtss2sd,2
+	cvtss2sd	(%rdi),%xmm0
+	movsd	%xmm0,(%rsi)
+	.end
+
+	.inline	sse_cvtsi2ss,2
+	cvtsi2ss	(%rdi),%xmm0
+	movss	%xmm0,(%rsi)
+	.end
+
+	.inline	sse_cvttss2si,2
+	cvttss2si	(%rdi),%ecx
+	movw	%ecx,(%rsi)
+	.end
+
+	.inline	sse_cvtss2si,2
+	cvtss2si	(%rdi),%ecx
+	movw	%ecx,(%rsi)
+	.end
+
+	.inline	sse_cvtsi2ssq,2
+	cvtsi2ssq	(%rdi),%xmm0
+	movss	%xmm0,(%rsi)
+	.end
+
+	.inline	sse_cvttss2siq,2
+	cvttss2siq	(%rdi),%rcx
+	movq	%rcx,(%rsi)
+	.end
+
+	.inline	sse_cvtss2siq,2
+	cvtss2siq	(%rdi),%rcx
+	movq	%rcx,(%rsi)
+	.end
+
+	.inline	sse_cmpeqsd,3
+	movsd	(%rdi),%xmm0
+	cmpeqsd	(%rsi),%xmm0
+	movsd	%xmm0,(%rdx)
+	.end
+
+	.inline	sse_cmpltsd,3
+	movsd	(%rdi),%xmm0
+	cmpltsd	(%rsi),%xmm0
+	movsd	%xmm0,(%rdx)
+	.end
+
+	.inline	sse_cmplesd,3
+	movsd	(%rdi),%xmm0
+	cmplesd	(%rsi),%xmm0
+	movsd	%xmm0,(%rdx)
+	.end
+
+	.inline	sse_cmpunordsd,3
+	movsd	(%rdi),%xmm0
+	cmpunordsd	(%rsi),%xmm0
+	movsd	%xmm0,(%rdx)
+	.end
+
+	.inline	sse_minsd,3
+	movsd	(%rdi),%xmm0
+	minsd	(%rsi),%xmm0
+	movsd	%xmm0,(%rdx)
+	.end
+
+	.inline	sse_maxsd,3
+	movsd	(%rdi),%xmm0
+	maxsd	(%rsi),%xmm0
+	movsd	%xmm0,(%rdx)
+	.end
+
+	.inline	sse_addsd,3
+	movsd	(%rdi),%xmm0
+	addsd	(%rsi),%xmm0
+	movsd	%xmm0,(%rdx)
+	.end
+
+	.inline	sse_subsd,3
+	movsd	(%rdi),%xmm0
+	subsd	(%rsi),%xmm0
+	movsd	%xmm0,(%rdx)
+	.end
+
+	.inline	sse_mulsd,3
+	movsd	(%rdi),%xmm0
+	mulsd	(%rsi),%xmm0
+	movsd	%xmm0,(%rdx)
+	.end
+
+	.inline	sse_divsd,3
+	movsd	(%rdi),%xmm0
+	divsd	(%rsi),%xmm0
+	movsd	%xmm0,(%rdx)
+	.end
+
+	.inline	sse_sqrtsd,2
+	sqrtsd	(%rdi),%xmm0
+	movsd	%xmm0,(%rsi)
+	.end
+
+	.inline	sse_ucomisd,2
+	movsd	(%rdi),%xmm0
+	ucomisd	(%rsi),%xmm0
+	.end
+
+	.inline	sse_comisd,2
+	movsd	(%rdi),%xmm0
+	comisd	(%rsi),%xmm0
+	.end
+
+	.inline	sse_cvtsd2ss,2
+	cvtsd2ss	(%rdi),%xmm0
+	movss	%xmm0,(%rsi)
+	.end
+
+	.inline	sse_cvtsi2sd,2
+	cvtsi2sd	(%rdi),%xmm0
+	movsd	%xmm0,(%rsi)
+	.end
+
+	.inline	sse_cvttsd2si,2
+	cvttsd2si	(%rdi),%ecx
+	movw	%ecx,(%rsi)
+	.end
+
+	.inline	sse_cvtsd2si,2
+	cvtsd2si	(%rdi),%ecx
+	movw	%ecx,(%rsi)
+	.end
+
+	.inline	sse_cvtsi2sdq,2
+	cvtsi2sdq	(%rdi),%xmm0
+	movsd	%xmm0,(%rsi)
+	.end
+
+	.inline	sse_cvttsd2siq,2
+	cvttsd2siq	(%rdi),%rcx
+	movq	%rcx,(%rsi)
+	.end
+
+	.inline	sse_cvtsd2siq,2
+	cvtsd2siq	(%rdi),%rcx
+	movq	%rcx,(%rsi)
+	.end
diff --git a/usr/src/libm/src/m9x/__fenv_i386.il b/usr/src/libm/src/m9x/__fenv_i386.il
new file mode 100644
index 0000000..253d4db
--- /dev/null
+++ b/usr/src/libm/src/m9x/__fenv_i386.il
@@ -0,0 +1,411 @@
+/
+/ CDDL HEADER START
+/
+/ The contents of this file are subject to the terms of the
+/ Common Development and Distribution License (the "License").
+/ You may not use this file except in compliance with the License.
+/
+/ You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+/ or http://www.opensolaris.org/os/licensing.
+/ See the License for the specific language governing permissions
+/ and limitations under the License.
+/
+/ When distributing Covered Code, include this CDDL HEADER in each
+/ file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+/ If applicable, add the following below this CDDL HEADER, with the
+/ fields enclosed by brackets "[]" replaced with your own identifying
+/ information: Portions Copyright [yyyy] [name of copyright owner]
+/
+/ CDDL HEADER END
+/
+/ Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+/ Use is subject to license terms.
+/
+/ @(#)__fenv_i386.il	1.8	06/01/31 SMI
+/
+	.inline	__fenv_getcwsw,1
+	movl	(%esp),%eax
+	fstsw	(%eax)
+	fstcw	2(%eax)
+	.end
+
+	.inline	__fenv_setcwsw,1
+	movl	(%esp),%eax
+	movw	(%eax),%dx
+	movw	2(%eax),%cx
+	subl	$28,%esp
+	fstenv	(%esp)
+	movw	%cx,(%esp)
+	movw	%dx,4(%esp)
+	fldenv	(%esp)
+	fwait
+	addl	$28,%esp
+	.end
+
+	.inline	__fenv_getmxcsr,1
+	movl	(%esp),%eax
+	stmxcsr	(%eax)
+	.end
+
+	.inline	__fenv_setmxcsr,1
+	movl	(%esp),%eax
+	ldmxcsr	(%eax)
+	.end
+
+	.inline	f2xm1,1
+	fldt	(%esp)
+	f2xm1
+	.end
+
+	.inline	fyl2x,2
+	fldt	(%esp)
+	fldt	12(%esp)
+	fyl2x
+	.end
+
+	.inline	fptan,1
+	fldt	(%esp)
+	fptan
+	fstpt	(%esp)
+	.end
+
+	.inline	fpatan,2
+	fldt	(%esp)
+	fldt	12(%esp)
+	fpatan
+	.end
+
+	.inline	fxtract,1
+	fldt	(%esp)
+	fxtract
+	.end
+
+	.inline	fprem1,2
+	fldt	(%esp)
+	fldt	12(%esp)
+	fprem1
+	fstp	%st(1)
+	.end
+
+	.inline	fprem,2
+	fldt	(%esp)
+	fldt	12(%esp)
+	fprem
+	fstp	%st(1)
+	.end
+
+	.inline	fyl2xp1,2
+	fldt	(%esp)
+	fldt	12(%esp)
+	fyl2xp1
+	.end
+
+	.inline	fsqrt,1
+	fldt	(%esp)
+	fsqrt
+	.end
+
+	.inline	fsincos,1
+	fldt	(%esp)
+	fsincos
+	.end
+
+	.inline	frndint,1
+	fldt	(%esp)
+	frndint
+	.end
+
+	.inline	fscale,2
+	fldt	(%esp)
+	fldt	12(%esp)
+	fscale
+	fstp	%st(1)
+	.end
+
+	.inline	fsin,1
+	fldt	(%esp)
+	fsin
+	.end
+
+	.inline	fcos,1
+	fldt	(%esp)
+	fcos
+	.end
+
+	.inline	sse_cmpeqss,3
+	movl	(%esp),%eax
+	movl	4(%esp),%edx
+	movl	8(%esp),%ecx
+	movss	(%eax),%xmm0
+	cmpeqss	(%edx),%xmm0
+	movss	%xmm0,(%ecx)
+	.end
+
+	.inline	sse_cmpltss,3
+	movl	(%esp),%eax
+	movl	4(%esp),%edx
+	movl	8(%esp),%ecx
+	movss	(%eax),%xmm0
+	cmpltss	(%edx),%xmm0
+	movss	%xmm0,(%ecx)
+	.end
+
+	.inline	sse_cmpless,3
+	movl	(%esp),%eax
+	movl	4(%esp),%edx
+	movl	8(%esp),%ecx
+	movss	(%eax),%xmm0
+	cmpless	(%edx),%xmm0
+	movss	%xmm0,(%ecx)
+	.end
+
+	.inline	sse_cmpunordss,3
+	movl	(%esp),%eax
+	movl	4(%esp),%edx
+	movl	8(%esp),%ecx
+	movss	(%eax),%xmm0
+	cmpunordss	(%edx),%xmm0
+	movss	%xmm0,(%ecx)
+	.end
+
+	.inline	sse_minss,3
+	movl	(%esp),%eax
+	movl	4(%esp),%edx
+	movl	8(%esp),%ecx
+	movss	(%eax),%xmm0
+	minss	(%edx),%xmm0
+	movss	%xmm0,(%ecx)
+	.end
+
+	.inline	sse_maxss,3
+	movl	(%esp),%eax
+	movl	4(%esp),%edx
+	movl	8(%esp),%ecx
+	movss	(%eax),%xmm0
+	maxss	(%edx),%xmm0
+	movss	%xmm0,(%ecx)
+	.end
+
+	.inline	sse_addss,3
+	movl	(%esp),%eax
+	movl	4(%esp),%edx
+	movl	8(%esp),%ecx
+	movss	(%eax),%xmm0
+	addss	(%edx),%xmm0
+	movss	%xmm0,(%ecx)
+	.end
+
+	.inline	sse_subss,3
+	movl	(%esp),%eax
+	movl	4(%esp),%edx
+	movl	8(%esp),%ecx
+	movss	(%eax),%xmm0
+	subss	(%edx),%xmm0
+	movss	%xmm0,(%ecx)
+	.end
+
+	.inline	sse_mulss,3
+	movl	(%esp),%eax
+	movl	4(%esp),%edx
+	movl	8(%esp),%ecx
+	movss	(%eax),%xmm0
+	mulss	(%edx),%xmm0
+	movss	%xmm0,(%ecx)
+	.end
+
+	.inline	sse_divss,3
+	movl	(%esp),%eax
+	movl	4(%esp),%edx
+	movl	8(%esp),%ecx
+	movss	(%eax),%xmm0
+	divss	(%edx),%xmm0
+	movss	%xmm0,(%ecx)
+	.end
+
+	.inline	sse_sqrtss,2
+	movl	(%esp),%eax
+	movl	4(%esp),%edx
+	sqrtss	(%eax),%xmm0
+	movss	%xmm0,(%edx)
+	.end
+
+	.inline	sse_ucomiss,2
+	movl	(%esp),%eax
+	movl	4(%esp),%edx
+	movss	(%eax),%xmm0
+	ucomiss	(%edx),%xmm0
+	.end
+
+	.inline	sse_comiss,2
+	movl	(%esp),%eax
+	movl	4(%esp),%edx
+	movss	(%eax),%xmm0
+	comiss	(%edx),%xmm0
+	.end
+
+	.inline	sse_cvtss2sd,2
+	movl	(%esp),%eax
+	movl	4(%esp),%edx
+	cvtss2sd	(%eax),%xmm0
+	movsd	%xmm0,(%edx)
+	.end
+
+	.inline	sse_cvtsi2ss,2
+	movl	(%esp),%eax
+	movl	4(%esp),%edx
+	cvtsi2ss	(%eax),%xmm0
+	movss	%xmm0,(%edx)
+	.end
+
+	.inline	sse_cvttss2si,2
+	movl	(%esp),%eax
+	movl	4(%esp),%edx
+	cvttss2si	(%eax),%ecx
+	movw	%ecx,(%edx)
+	.end
+
+	.inline	sse_cvtss2si,2
+	movl	(%esp),%eax
+	movl	4(%esp),%edx
+	cvtss2si	(%eax),%ecx
+	movw	%ecx,(%edx)
+	.end
+
+	.inline	sse_cmpeqsd,3
+	movl	(%esp),%eax
+	movl	4(%esp),%edx
+	movl	8(%esp),%ecx
+	movsd	(%eax),%xmm0
+	cmpeqsd	(%edx),%xmm0
+	movsd	%xmm0,(%ecx)
+	.end
+
+	.inline	sse_cmpltsd,3
+	movl	(%esp),%eax
+	movl	4(%esp),%edx
+	movl	8(%esp),%ecx
+	movsd	(%eax),%xmm0
+	cmpltsd	(%edx),%xmm0
+	movsd	%xmm0,(%ecx)
+	.end
+
+	.inline	sse_cmplesd,3
+	movl	(%esp),%eax
+	movl	4(%esp),%edx
+	movl	8(%esp),%ecx
+	movsd	(%eax),%xmm0
+	cmplesd	(%edx),%xmm0
+	movsd	%xmm0,(%ecx)
+	.end
+
+	.inline	sse_cmpunordsd,3
+	movl	(%esp),%eax
+	movl	4(%esp),%edx
+	movl	8(%esp),%ecx
+	movsd	(%eax),%xmm0
+	cmpunordsd	(%edx),%xmm0
+	movsd	%xmm0,(%ecx)
+	.end
+
+	.inline	sse_minsd,3
+	movl	(%esp),%eax
+	movl	4(%esp),%edx
+	movl	8(%esp),%ecx
+	movsd	(%eax),%xmm0
+	minsd	(%edx),%xmm0
+	movsd	%xmm0,(%ecx)
+	.end
+
+	.inline	sse_maxsd,3
+	movl	(%esp),%eax
+	movl	4(%esp),%edx
+	movl	8(%esp),%ecx
+	movsd	(%eax),%xmm0
+	maxsd	(%edx),%xmm0
+	movsd	%xmm0,(%ecx)
+	.end
+
+	.inline	sse_addsd,3
+	movl	(%esp),%eax
+	movl	4(%esp),%edx
+	movl	8(%esp),%ecx
+	movsd	(%eax),%xmm0
+	addsd	(%edx),%xmm0
+	movsd	%xmm0,(%ecx)
+	.end
+
+	.inline	sse_subsd,3
+	movl	(%esp),%eax
+	movl	4(%esp),%edx
+	movl	8(%esp),%ecx
+	movsd	(%eax),%xmm0
+	subsd	(%edx),%xmm0
+	movsd	%xmm0,(%ecx)
+	.end
+
+	.inline	sse_mulsd,3
+	movl	(%esp),%eax
+	movl	4(%esp),%edx
+	movl	8(%esp),%ecx
+	movsd	(%eax),%xmm0
+	mulsd	(%edx),%xmm0
+	movsd	%xmm0,(%ecx)
+	.end
+
+	.inline	sse_divsd,3
+	movl	(%esp),%eax
+	movl	4(%esp),%edx
+	movl	8(%esp),%ecx
+	movsd	(%eax),%xmm0
+	divsd	(%edx),%xmm0
+	movsd	%xmm0,(%ecx)
+	.end
+
+	.inline	sse_sqrtsd,2
+	movl	(%esp),%eax
+	movl	4(%esp),%edx
+	sqrtsd	(%eax),%xmm0
+	movsd	%xmm0,(%edx)
+	.end
+
+	.inline	sse_ucomisd,2
+	movl	(%esp),%eax
+	movl	4(%esp),%edx
+	movsd	(%eax),%xmm0
+	ucomisd	(%edx),%xmm0
+	.end
+
+	.inline	sse_comisd,2
+	movl	(%esp),%eax
+	movl	4(%esp),%edx
+	movsd	(%eax),%xmm0
+	comisd	(%edx),%xmm0
+	.end
+
+	.inline	sse_cvtsd2ss,2
+	movl	(%esp),%eax
+	movl	4(%esp),%edx
+	cvtsd2ss	(%eax),%xmm0
+	movss	%xmm0,(%edx)
+	.end
+
+	.inline	sse_cvtsi2sd,2
+	movl	(%esp),%eax
+	movl	4(%esp),%edx
+	cvtsi2sd	(%eax),%xmm0
+	movsd	%xmm0,(%edx)
+	.end
+
+	.inline	sse_cvttsd2si,2
+	movl	(%esp),%eax
+	movl	4(%esp),%edx
+	cvttsd2si	(%eax),%ecx
+	movw	%ecx,(%edx)
+	.end
+
+	.inline	sse_cvtsd2si,2
+	movl	(%esp),%eax
+	movl	4(%esp),%edx
+	cvtsd2si	(%eax),%ecx
+	movw	%ecx,(%edx)
+	.end
diff --git a/usr/src/libm/src/m9x/__fenv_sparc.il b/usr/src/libm/src/m9x/__fenv_sparc.il
new file mode 100644
index 0000000..d942a33
--- /dev/null
+++ b/usr/src/libm/src/m9x/__fenv_sparc.il
@@ -0,0 +1,40 @@
+!
+! CDDL HEADER START
+!
+! The contents of this file are subject to the terms of the
+! Common Development and Distribution License (the "License").
+! You may not use this file except in compliance with the License.
+!
+! You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+! or http://www.opensolaris.org/os/licensing.
+! See the License for the specific language governing permissions
+! and limitations under the License.
+!
+! When distributing Covered Code, include this CDDL HEADER in each
+! file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+! If applicable, add the following below this CDDL HEADER, with the
+! fields enclosed by brackets "[]" replaced with your own identifying
+! information: Portions Copyright [yyyy] [name of copyright owner]
+!
+! CDDL HEADER END
+!
+! Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+! Use is subject to license terms.
+!
+! @(#)__fenv_sparc.il	1.5	06/01/31 SMI
+!
+	.inline	__fenv_getfsr,1
+	st	%fsr,[%o0]
+	.end
+
+	.inline	__fenv_setfsr,1
+	ld	[%o0],%fsr
+	.end
+
+	.inline	__fenv_getfsrx,1
+	stx	%fsr,[%o0]
+	.end
+
+	.inline	__fenv_setfsrx,1
+	ldx	[%o0],%fsr
+	.end
diff --git a/usr/src/libm/src/m9x/__fex_hdlr.c b/usr/src/libm/src/m9x/__fex_hdlr.c
new file mode 100644
index 0000000..4b87e3f
--- /dev/null
+++ b/usr/src/libm/src/m9x/__fex_hdlr.c
@@ -0,0 +1,850 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"@(#)__fex_hdlr.c	1.12	06/01/31 SMI"
+
+#include "fenv_synonyms.h"
+#undef lint
+#include <signal.h>
+#include <siginfo.h>
+#if defined(__i386) && !defined(__amd64)
+/* for now, pick up local copy of Solaris 10 sys/regset.h; we can get rid
+   of this once we no longer need to build on Solaris 8 */
+#include "regset.h"
+#endif
+#include <ucontext.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <thread.h>
+#include <math.h>
+#include <sunmath.h>
+#include <fenv.h>
+#include "fex_handler.h"
+
+#if defined(__sparc) && !defined(__sparcv9)
+#include <sys/procfs.h>
+#endif
+
+/* 2.x signal.h doesn't declare sigemptyset or sigismember
+   if they're #defined (see sys/signal.h) */
+extern int sigemptyset(sigset_t *);
+extern int sigismember(const sigset_t *, int);
+
+/* external globals */
+void (*__mt_fex_sync)() = NULL; /* for synchronization with libmtsk */
+#pragma weak __mt_fex_sync
+
+#ifdef LIBM_MT_FEX_SYNC
+void (*__libm_mt_fex_sync)() = NULL; /* new, improved version of above */
+#pragma weak __libm_mt_fex_sync
+#endif
+
+/* private variables */
+static fex_handler_t main_handlers;
+static int handlers_initialized = 0;
+static thread_key_t handlers_key;
+static mutex_t handlers_key_lock = DEFAULTMUTEX;
+
+static struct sigaction oact = { 0, SIG_DFL };
+static mutex_t hdlr_lock = DEFAULTMUTEX;
+static int hdlr_installed = 0;
+
+/* private const data */
+static const int te_bit[FEX_NUM_EXC] = {
+	1 << fp_trap_inexact,
+	1 << fp_trap_division,
+	1 << fp_trap_underflow,
+	1 << fp_trap_overflow,
+	1 << fp_trap_invalid,
+	1 << fp_trap_invalid,
+	1 << fp_trap_invalid,
+	1 << fp_trap_invalid,
+	1 << fp_trap_invalid,
+	1 << fp_trap_invalid,
+	1 << fp_trap_invalid,
+	1 << fp_trap_invalid
+};
+
+/*
+*  Return the traps to be enabled given the current handling modes
+*  and flags
+*/
+static int
+__fex_te_needed(struct fex_handler_data *thr_handlers, unsigned long fsr)
+{
+	int		i, ex, te;
+
+	/* set traps for handling modes */
+	te = 0;
+	for (i = 0; i < FEX_NUM_EXC; i++)
+		if (thr_handlers[i].__mode != FEX_NONSTOP)
+			te |= te_bit[i];
+
+	/* add traps for retrospective diagnostics */
+	if (fex_get_log()) {
+		ex = (int)__fenv_get_ex(fsr);
+		if (!(ex & FE_INEXACT))
+			te |= (1 << fp_trap_inexact);
+		if (!(ex & FE_UNDERFLOW))
+			te |= (1 << fp_trap_underflow);
+		if (!(ex & FE_OVERFLOW))
+			te |= (1 << fp_trap_overflow);
+		if (!(ex & FE_DIVBYZERO))
+			te |= (1 << fp_trap_division);
+		if (!(ex & FE_INVALID))
+			te |= (1 << fp_trap_invalid);
+	}
+
+	return te;
+}
+
+/*
+*  The following function synchronizes with libmtsk (SPARC only, for now)
+*/
+static void
+__fex_sync_with_libmtsk(int begin, int master)
+{
+	static fenv_t master_env;
+	static int env_initialized = 0;
+	static mutex_t env_lock = DEFAULTMUTEX;
+
+	if (begin) {
+		mutex_lock(&env_lock);
+		if (master) {
+			(void) fegetenv(&master_env);
+			env_initialized = 1;
+		}
+		else if (env_initialized)
+			(void) fesetenv(&master_env);
+		mutex_unlock(&env_lock);
+	}
+	else if (master && fex_get_log())
+		__fex_update_te();
+}
+
+#ifdef LIBM_MT_FEX_SYNC
+/*
+*  The following function may be used for synchronization with any
+*  internal project that manages multiple threads
+*/
+enum __libm_mt_fex_sync_actions {
+	__libm_mt_fex_start_master = 0,
+	__libm_mt_fex_start_slave,
+	__libm_mt_fex_finish_master,
+	__libm_mt_fex_finish_slave
+};
+
+struct __libm_mt_fex_sync_data {
+	fenv_t	master_env;
+	int		initialized;
+	mutex_t	lock;
+};
+
+static void
+__fex_sync_with_threads(enum __libm_mt_fex_sync_actions action,
+	struct __libm_mt_fex_sync_data *thr_env)
+{
+	switch (action) {
+	case __libm_mt_fex_start_master:
+		mutex_lock(&thr_env->lock);
+		(void) fegetenv(&thr_env->master_env);
+		thr_env->initialized = 1;
+		mutex_unlock(&thr_env->lock);
+		break;
+
+	case __libm_mt_fex_start_slave:
+		mutex_lock(&thr_env->lock);
+		if (thr_env->initialized)
+			(void) fesetenv(&thr_env->master_env);
+		mutex_unlock(&thr_env->lock);
+		break;
+
+	case __libm_mt_fex_finish_master:
+#ifdef __i386
+		__fex_update_te();
+#else
+		if (fex_get_log())
+			__fex_update_te();
+#endif
+		break;
+
+	case __libm_mt_fex_finish_slave:
+#ifdef __i386
+		/* clear traps, making all accrued flags visible in status word */
+		{
+			unsigned long   fsr;
+			__fenv_getfsr(&fsr);
+			__fenv_set_te(fsr, 0);
+			__fenv_setfsr(&fsr);
+		}
+#endif
+		break;
+	}
+}
+#endif
+
+#if defined(__sparc)
+
+/*
+*  Code for setting or clearing interval mode on US-III and above.
+*  This is embedded as data so we don't have to mark the library
+*  as a v8plusb/v9b object.  (I could have just used one entry and
+*  modified the second word to set the bits I want, but that would
+*  have required another mutex.)
+*/
+static const unsigned int siam[][2] = {
+	{ 0x81c3e008, 0x81b01020 }, /* retl, siam 0 */
+	{ 0x81c3e008, 0x81b01024 }, /* retl, siam 4 */
+	{ 0x81c3e008, 0x81b01025 }, /* retl, siam 5 */
+	{ 0x81c3e008, 0x81b01026 }, /* retl, siam 6 */
+	{ 0x81c3e008, 0x81b01027 }  /* retl, siam 7 */
+};
+
+/*
+*  If a handling mode is in effect, apply it; otherwise invoke the
+*  saved handler
+*/
+static void
+__fex_hdlr(int sig, siginfo_t *sip, ucontext_t *uap)
+{
+	struct fex_handler_data	*thr_handlers;
+	struct sigaction	act;
+	void			(*handler)(), (*siamp)();
+	int			mode, i;
+	enum fex_exception	e;
+	fex_info_t		info;
+	unsigned long		fsr, tmpfsr, addr;
+	unsigned int		gsr;
+
+	/* determine which exception occurred */
+	switch (sip->si_code) {
+	case FPE_FLTDIV:
+		e = fex_division;
+		break;
+	case FPE_FLTOVF:
+		e = fex_overflow;
+		break;
+	case FPE_FLTUND:
+		e = fex_underflow;
+		break;
+	case FPE_FLTRES:
+		e = fex_inexact;
+		break;
+	case FPE_FLTINV:
+		if ((int)(e = __fex_get_invalid_type(sip, uap)) < 0)
+			goto not_ieee;
+		break;
+	default:
+		/* not an IEEE exception */
+		goto not_ieee;
+	}
+
+	/* get the handling mode */
+	mode = FEX_NOHANDLER;
+	handler = oact.sa_handler; /* for log; just looking, no need to lock */
+	thr_handlers = __fex_get_thr_handlers();
+	if (thr_handlers && thr_handlers[(int)e].__mode != FEX_NOHANDLER) {
+		mode = thr_handlers[(int)e].__mode;
+		handler = thr_handlers[(int)e].__handler;
+	}
+
+	/* make an entry in the log of retro. diag. if need be */
+	i = ((int)uap->uc_mcontext.fpregs.fpu_fsr >> 5) & 0x1f;
+	__fex_mklog(uap, (char *)sip->si_addr, i, e, mode, (void *)handler);
+
+	/* handle the exception based on the mode */
+	if (mode == FEX_NOHANDLER)
+		goto not_ieee;
+	else if (mode == FEX_ABORT)
+		abort();
+	else if (mode == FEX_SIGNAL) {
+		handler(sig, sip, uap);
+		return;
+	}
+
+	/* custom or nonstop mode; disable traps and clear flags */
+	__fenv_getfsr(&fsr);
+	__fenv_set_te(fsr, 0);
+	__fenv_set_ex(fsr, 0);
+
+	/* if interval mode was set, clear it, then substitute the
+	   interval rounding direction and clear ns mode in the fsr */
+#ifdef __sparcv9
+	gsr = uap->uc_mcontext.asrs[3];
+#else
+	gsr = 0;
+	if (uap->uc_mcontext.xrs.xrs_id == XRS_ID)
+		gsr = (*(unsigned long long*)((prxregset_t*)uap->uc_mcontext.
+		    xrs.xrs_ptr)->pr_un.pr_v8p.pr_filler);
+#endif
+	gsr = (gsr >> 25) & 7;
+	if (gsr & 4) {
+		siamp = (void (*)()) siam[0];
+		siamp();
+		tmpfsr = fsr;
+		fsr = (fsr & ~0xc0400000ul) | ((gsr & 3) << 30);
+	}
+	__fenv_setfsr(&fsr);
+
+	/* decode the operation */
+	__fex_get_op(sip, uap, &info);
+
+	/* if a custom mode handler is installed, invoke it */
+	if (mode == FEX_CUSTOM) {
+		/* if we got here from feraiseexcept, pass dummy info */
+		addr = (unsigned long)sip->si_addr;
+		if (addr >= (unsigned long)feraiseexcept &&
+		    addr < (unsigned long)fetestexcept ) {
+			info.op = fex_other;
+			info.op1.type = info.op2.type = info.res.type =
+			    fex_nodata;
+		}
+
+		/* restore interval mode if it was set, and put the original
+		   rounding direction and ns mode back in the fsr */
+		if (gsr & 4) {
+			__fenv_setfsr(&tmpfsr);
+			siamp = (void (*)()) siam[1 + (gsr & 3)];
+			siamp();
+		}
+
+		handler(1 << (int)e, &info);
+
+		/* restore modes in case the user's handler changed them */
+		if (gsr & 4) {
+			siamp = (void (*)()) siam[0];
+			siamp();
+		}
+		__fenv_setfsr(&fsr);
+	}
+
+	/* stuff the result */
+	__fex_st_result(sip, uap, &info);
+
+	/* "or" in any exception flags and update traps */
+	fsr = uap->uc_mcontext.fpregs.fpu_fsr;
+	fsr |= ((info.flags & 0x1f) << 5);
+	i = __fex_te_needed(thr_handlers, fsr);
+	__fenv_set_te(fsr, i);
+	uap->uc_mcontext.fpregs.fpu_fsr = fsr;
+	return;
+
+not_ieee:
+	/* revert to the saved handler (if any) */
+	mutex_lock(&hdlr_lock);
+	act = oact;
+	mutex_unlock(&hdlr_lock);
+	switch ((unsigned long)act.sa_handler) {
+	case (unsigned long)SIG_DFL:
+		/* simulate trap with no handler installed */
+		sigaction(SIGFPE, &act, NULL);
+		kill(getpid(), SIGFPE);
+		break;
+#if !defined(__lint)
+	case (unsigned long)SIG_IGN:
+		break;
+#endif
+	default:
+		act.sa_handler(sig, sip, uap);
+	}
+}
+
+#elif defined(__i386)
+
+#if defined(__amd64)
+#define test_sse_hw	1
+#else
+extern int _sse_hw;
+#define test_sse_hw	&_sse_hw && _sse_hw
+#endif
+
+#if !defined(REG_PC)
+#define REG_PC	EIP
+#endif
+
+/*
+*  If a handling mode is in effect, apply it; otherwise invoke the
+*  saved handler
+*/
+static void
+__fex_hdlr(int sig, siginfo_t *sip, ucontext_t *uap)
+{
+	struct fex_handler_data	*thr_handlers;
+	struct sigaction	act;
+	void			(*handler)(), (*simd_handler[4])();
+	int			mode, simd_mode[4], i, len, accrued, *ap;
+	unsigned int		cwsw, oldcwsw, mxcsr, oldmxcsr;
+	enum fex_exception	e, simd_e[4];
+	fex_info_t		info, simd_info[4];
+	unsigned long		addr;
+	siginfo_t		osip = *sip;
+	sseinst_t		inst;
+
+	/* check for an exception caused by an SSE instruction */
+	if (!(uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.status & 0x80)) {
+		len = __fex_parse_sse(uap, &inst);
+		if (len == 0)
+			goto not_ieee;
+
+		/* disable all traps and clear flags */
+		__fenv_getcwsw(&oldcwsw);
+		cwsw = (oldcwsw & ~0x3f) | 0x003f0000;
+		__fenv_setcwsw(&cwsw);
+		__fenv_getmxcsr(&oldmxcsr);
+		mxcsr = (oldmxcsr & ~0x3f) | 0x1f80;
+		__fenv_setmxcsr(&mxcsr);
+
+		if ((int)inst.op & SIMD) {
+			__fex_get_simd_op(uap, &inst, simd_e, simd_info);
+
+			thr_handlers = __fex_get_thr_handlers();
+			addr = (unsigned long)uap->uc_mcontext.gregs[REG_PC];
+			accrued = uap->uc_mcontext.fpregs.fp_reg_set.
+			    fpchip_state.mxcsr;
+
+			e = (enum fex_exception)-1;
+			mode = FEX_NONSTOP;
+			for (i = 0; i < 4; i++) {
+				if ((int)simd_e[i] < 0)
+					continue;
+
+				e = simd_e[i];
+				simd_mode[i] = FEX_NOHANDLER;
+				simd_handler[i] = oact.sa_handler;
+				if (thr_handlers &&
+				    thr_handlers[(int)e].__mode !=
+				    FEX_NOHANDLER) {
+					simd_mode[i] =
+					    thr_handlers[(int)e].__mode;
+					simd_handler[i] =
+					    thr_handlers[(int)e].__handler;
+				}
+				accrued &= ~te_bit[(int)e];
+				switch (simd_mode[i]) {
+				case FEX_ABORT:
+					mode = FEX_ABORT;
+					break;
+				case FEX_SIGNAL:
+					if (mode != FEX_ABORT)
+						mode = FEX_SIGNAL;
+					handler = simd_handler[i];
+					break;
+				case FEX_NOHANDLER:
+					if (mode != FEX_ABORT && mode !=
+					    FEX_SIGNAL)
+						mode = FEX_NOHANDLER;
+					break;
+				}
+			}
+			if (e == (enum fex_exception)-1) {
+				__fenv_setcwsw(&oldcwsw);
+				__fenv_setmxcsr(&oldmxcsr);
+				goto not_ieee;
+			}
+			accrued |= uap->uc_mcontext.fpregs.fp_reg_set.
+			    fpchip_state.status;
+			ap = __fex_accrued();
+			accrued |= *ap;
+			accrued &= 0x3d;
+
+			for (i = 0; i < 4; i++) {
+				if ((int)simd_e[i] < 0)
+					continue;
+
+				__fex_mklog(uap, (char *)addr, accrued,
+				    simd_e[i], simd_mode[i],
+				    (void *)simd_handler[i]);
+			}
+
+			if (mode == FEX_NOHANDLER) {
+				__fenv_setcwsw(&oldcwsw);
+				__fenv_setmxcsr(&oldmxcsr);
+				goto not_ieee;
+			} else if (mode == FEX_ABORT) {
+				abort();
+			} else if (mode == FEX_SIGNAL) {
+				__fenv_setcwsw(&oldcwsw);
+				__fenv_setmxcsr(&oldmxcsr);
+				handler(sig, &osip, uap);
+				return;
+			}
+
+			*ap = 0;
+			for (i = 0; i < 4; i++) {
+				if ((int)simd_e[i] < 0)
+					continue;
+
+				if (simd_mode[i] == FEX_CUSTOM) {
+					handler(1 << (int)simd_e[i],
+					    &simd_info[i]);
+					__fenv_setcwsw(&cwsw);
+					__fenv_setmxcsr(&mxcsr);
+				}
+			}
+
+			__fex_st_simd_result(uap, &inst, simd_e, simd_info);
+			for (i = 0; i < 4; i++) {
+				if ((int)simd_e[i] < 0)
+					continue;
+
+				accrued |= simd_info[i].flags;
+			}
+
+			if ((int)inst.op & INTREG) {
+				/* set MMX mode */
+#if defined(__amd64)
+				uap->uc_mcontext.fpregs.fp_reg_set.
+				    fpchip_state.sw &= ~0x3800;
+				uap->uc_mcontext.fpregs.fp_reg_set.
+				    fpchip_state.fctw = 0;
+#else
+				uap->uc_mcontext.fpregs.fp_reg_set.
+				    fpchip_state.state[1] &= ~0x3800;
+				uap->uc_mcontext.fpregs.fp_reg_set.
+				    fpchip_state.state[2] = 0;
+#endif
+			}
+		} else {
+			e = __fex_get_sse_op(uap, &inst, &info);
+			if ((int)e < 0) {
+				__fenv_setcwsw(&oldcwsw);
+				__fenv_setmxcsr(&oldmxcsr);
+				goto not_ieee;
+			}
+
+			mode = FEX_NOHANDLER;
+			handler = oact.sa_handler;
+			thr_handlers = __fex_get_thr_handlers();
+			if (thr_handlers && thr_handlers[(int)e].__mode !=
+			    FEX_NOHANDLER) {
+				mode = thr_handlers[(int)e].__mode;
+				handler = thr_handlers[(int)e].__handler;
+			}
+
+			addr = (unsigned long)uap->uc_mcontext.gregs[REG_PC];
+			accrued = uap->uc_mcontext.fpregs.fp_reg_set.
+			    fpchip_state.mxcsr & ~te_bit[(int)e];
+			accrued |= uap->uc_mcontext.fpregs.fp_reg_set.
+			    fpchip_state.status;
+			ap = __fex_accrued();
+			accrued |= *ap;
+			accrued &= 0x3d;
+			__fex_mklog(uap, (char *)addr, accrued, e, mode,
+			    (void *)handler);
+
+			if (mode == FEX_NOHANDLER) {
+				__fenv_setcwsw(&oldcwsw);
+				__fenv_setmxcsr(&oldmxcsr);
+				goto not_ieee;
+			} else if (mode == FEX_ABORT) {
+				abort();
+			} else if (mode == FEX_SIGNAL) {
+				__fenv_setcwsw(&oldcwsw);
+				__fenv_setmxcsr(&oldmxcsr);
+				handler(sig, &osip, uap);
+				return;
+			} else if (mode == FEX_CUSTOM) {
+				*ap = 0;
+				if (addr >= (unsigned long)feraiseexcept &&
+				    addr < (unsigned long)fetestexcept ) {
+					info.op = fex_other;
+					info.op1.type = info.op2.type =
+					    info.res.type = fex_nodata;
+				}
+				handler(1 << (int)e, &info);
+				__fenv_setcwsw(&cwsw);
+				__fenv_setmxcsr(&mxcsr);
+			}
+
+			__fex_st_sse_result(uap, &inst, e, &info);
+			accrued |= info.flags;
+
+#ifdef __amd64
+			/*
+			 * In 64-bit mode, the 32-bit convert-to-integer
+			 * instructions zero the upper 32 bits of the
+			 * destination.  (We do this here and not in
+			 * __fex_st_sse_result because __fex_st_sse_result
+			 * can be called from __fex_st_simd_result, too.)
+			 */
+			if (inst.op == cvtss2si || inst.op == cvttss2si ||
+			    inst.op == cvtsd2si || inst.op == cvttsd2si)
+				inst.op1->i[1] = 0;
+#endif
+		}
+
+		/* advance the pc past the SSE instruction */
+		uap->uc_mcontext.gregs[REG_PC] += len;
+		goto update_state;
+	}
+
+	/* determine which exception occurred */
+	__fex_get_x86_exc(sip, uap);
+	switch (sip->si_code) {
+	case FPE_FLTDIV:
+		e = fex_division;
+		break;
+	case FPE_FLTOVF:
+		e = fex_overflow;
+		break;
+	case FPE_FLTUND:
+		e = fex_underflow;
+		break;
+	case FPE_FLTRES:
+		e = fex_inexact;
+		break;
+	case FPE_FLTINV:
+		if ((int)(e = __fex_get_invalid_type(sip, uap)) < 0)
+			goto not_ieee;
+		break;
+	default:
+		/* not an IEEE exception */
+		goto not_ieee;
+	}
+
+	/* get the handling mode */
+	mode = FEX_NOHANDLER;
+	handler = oact.sa_handler; /* for log; just looking, no need to lock */
+	thr_handlers = __fex_get_thr_handlers();
+	if (thr_handlers && thr_handlers[(int)e].__mode != FEX_NOHANDLER) {
+		mode = thr_handlers[(int)e].__mode;
+		handler = thr_handlers[(int)e].__handler;
+	}
+
+	/* make an entry in the log of retro. diag. if need be */
+#if defined(__amd64)
+	addr = (unsigned long)uap->uc_mcontext.fpregs.fp_reg_set.
+	    fpchip_state.rip;
+#else
+	addr = (unsigned long)uap->uc_mcontext.fpregs.fp_reg_set.
+	    fpchip_state.state[3];
+#endif
+	accrued = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.status & 
+	    ~te_bit[(int)e];
+	if (test_sse_hw)
+		accrued |= uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.
+		    mxcsr;
+	ap = __fex_accrued();
+	accrued |= *ap;
+	accrued &= 0x3d;
+	__fex_mklog(uap, (char *)addr, accrued, e, mode, (void *)handler);
+
+	/* handle the exception based on the mode */
+	if (mode == FEX_NOHANDLER)
+		goto not_ieee;
+	else if (mode == FEX_ABORT)
+		abort();
+	else if (mode == FEX_SIGNAL) {
+		handler(sig, &osip, uap);
+		return;
+	}
+
+	/* disable all traps and clear flags */
+	__fenv_getcwsw(&cwsw);
+	cwsw = (cwsw & ~0x3f) | 0x003f0000;
+	__fenv_setcwsw(&cwsw);
+	if (test_sse_hw) {
+		__fenv_getmxcsr(&mxcsr);
+		mxcsr = (mxcsr & ~0x3f) | 0x1f80;
+		__fenv_setmxcsr(&mxcsr);
+	}
+	*ap = 0;
+
+	/* decode the operation */
+	__fex_get_op(sip, uap, &info);
+
+	/* if a custom mode handler is installed, invoke it */
+	if (mode == FEX_CUSTOM) {
+		/* if we got here from feraiseexcept, pass dummy info */
+		if (addr >= (unsigned long)feraiseexcept &&
+		    addr < (unsigned long)fetestexcept ) {
+			info.op = fex_other;
+			info.op1.type = info.op2.type = info.res.type =
+			    fex_nodata;
+		}
+
+		handler(1 << (int)e, &info);
+
+		/* restore modes in case the user's handler changed them */
+		__fenv_setcwsw(&cwsw);
+		if (test_sse_hw)
+			__fenv_setmxcsr(&mxcsr);
+	}
+
+	/* stuff the result */
+	__fex_st_result(sip, uap, &info);
+	accrued |= info.flags;
+
+update_state:
+	accrued &= 0x3d;
+	i = __fex_te_needed(thr_handlers, accrued);
+	*ap = accrued & i;
+#if defined(__amd64)
+	uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.sw &= ~0x3d;
+	uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.sw |= (accrued & ~i);
+	uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.cw |= 0x3d;
+	uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.cw &= ~i;
+#else
+	uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[1] &= ~0x3d;
+	uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[1] |=
+	    (accrued & ~i);
+	uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[0] |= 0x3d;
+	uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[0] &= ~i;
+#endif
+	if (test_sse_hw) {
+		uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.mxcsr &= ~0x3d;
+		uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.mxcsr |=
+		    0x1e80 | (accrued & ~i);
+		uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.mxcsr &=
+		    ~(i << 7);
+	}
+	return;
+
+not_ieee:
+	/* revert to the saved handler (if any) */
+	mutex_lock(&hdlr_lock);
+	act = oact;
+	mutex_unlock(&hdlr_lock);
+	switch ((unsigned long)act.sa_handler) {
+	case (unsigned long)SIG_DFL:
+		/* simulate trap with no handler installed */
+		sigaction(SIGFPE, &act, NULL);
+		kill(getpid(), SIGFPE);
+		break;
+#if !defined(__lint)
+	case (unsigned long)SIG_IGN:
+		break;
+#endif
+	default:
+		act.sa_handler(sig, &osip, uap);
+	}
+}
+
+#else
+#error Unknown architecture
+#endif
+
+/*
+*  Return a pointer to the thread-specific handler data, and
+*  initialize it if necessary
+*/
+struct fex_handler_data *
+__fex_get_thr_handlers()
+{
+	struct fex_handler_data	*ptr;
+	unsigned long			fsr;
+	int						i, te;
+
+	if (thr_main()) {
+		if (!handlers_initialized) {
+			/* initialize to FEX_NOHANDLER if trap is enabled,
+			   FEX_NONSTOP if trap is disabled */
+			__fenv_getfsr(&fsr);
+			te = (int)__fenv_get_te(fsr);
+			for (i = 0; i < FEX_NUM_EXC; i++)
+				main_handlers[i].__mode =
+					((te & te_bit[i])? FEX_NOHANDLER : FEX_NONSTOP);
+			handlers_initialized = 1;
+		}
+		return main_handlers;
+	}
+	else {
+		ptr = NULL;
+		mutex_lock(&handlers_key_lock);
+		if (thr_getspecific(handlers_key, (void **)&ptr) != 0 &&
+			thr_keycreate(&handlers_key, free) != 0) {
+			mutex_unlock(&handlers_key_lock);
+			return NULL;
+		}
+		mutex_unlock(&handlers_key_lock);
+		if (!ptr) {
+			if ((ptr = (struct fex_handler_data *)
+				malloc(sizeof(fex_handler_t))) == NULL) {
+				return NULL;
+			}
+			if (thr_setspecific(handlers_key, (void *)ptr) != 0) {
+				(void)free(ptr);
+				return NULL;
+			}
+			/* initialize to FEX_NOHANDLER if trap is enabled,
+			   FEX_NONSTOP if trap is disabled */
+			__fenv_getfsr(&fsr);
+			te = (int)__fenv_get_te(fsr);
+			for (i = 0; i < FEX_NUM_EXC; i++)
+				ptr[i].__mode = ((te & te_bit[i])? FEX_NOHANDLER : FEX_NONSTOP);
+		}
+		return ptr;
+	}
+}
+
+/*
+*  Update the trap enable bits according to the selected modes
+*/
+void
+__fex_update_te()
+{
+	struct fex_handler_data	*thr_handlers;
+	struct sigaction		act, tmpact;
+	sigset_t				blocked;
+	unsigned long			fsr;
+	int						te;
+
+	/* determine which traps are needed */
+	thr_handlers = __fex_get_thr_handlers();
+	__fenv_getfsr(&fsr);
+	te = __fex_te_needed(thr_handlers, fsr);
+
+	/* install __fex_hdlr as necessary */
+	if (!hdlr_installed && te) {
+		act.sa_handler = __fex_hdlr;
+		sigemptyset(&act.sa_mask);
+		act.sa_flags = SA_SIGINFO;
+		sigaction(SIGFPE, &act, &tmpact);
+		if (tmpact.sa_handler != __fex_hdlr)
+		{
+			mutex_lock(&hdlr_lock);
+			oact = tmpact;
+			mutex_unlock(&hdlr_lock);
+		}
+		hdlr_installed = 1;
+	}
+
+	/* set the new trap enable bits (only if SIGFPE is not blocked) */
+	if (sigprocmask(0, NULL, &blocked) == 0 &&
+		!sigismember(&blocked, SIGFPE)) {
+		__fenv_set_te(fsr, te);
+		__fenv_setfsr(&fsr);
+	}
+
+	/* synchronize with libmtsk */
+	__mt_fex_sync = __fex_sync_with_libmtsk;
+
+#ifdef LIBM_MT_FEX_SYNC
+	/* synchronize with other projects */
+	__libm_mt_fex_sync = __fex_sync_with_threads;
+#endif
+}
diff --git a/usr/src/libm/src/m9x/__fex_i386.c b/usr/src/libm/src/m9x/__fex_i386.c
new file mode 100644
index 0000000..62ec4ba
--- /dev/null
+++ b/usr/src/libm/src/m9x/__fex_i386.c
@@ -0,0 +1,1671 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"@(#)__fex_i386.c	1.15	06/01/31 SMI"
+
+#include "fenv_synonyms.h"
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <signal.h>
+#include <siginfo.h>
+#include <ucontext.h>
+#include <thread.h>
+#include <math.h>
+#include <sunmath.h>
+#include <fenv.h>
+#include "fex_handler.h"
+
+#if defined(__amd64)
+#define test_sse_hw	1
+#else
+/*
+ * The following variable lives in libc on Solaris 10, where it
+ * gets set to a nonzero value at startup time on systems with SSE.
+ */
+int _sse_hw = 0;
+#pragma weak _sse_hw
+#define test_sse_hw	&_sse_hw && _sse_hw
+#endif
+
+static int accrued = 0;
+static thread_key_t accrued_key;
+static mutex_t accrued_key_lock = DEFAULTMUTEX;
+
+int *
+__fex_accrued()
+{
+	int		*p;
+
+	if (thr_main())
+		return &accrued;
+	else {
+		p = NULL;
+		mutex_lock(&accrued_key_lock);
+		if (thr_getspecific(accrued_key, (void **)&p) != 0 &&
+			thr_keycreate(&accrued_key, free) != 0) {
+			mutex_unlock(&accrued_key_lock);
+			return NULL;
+		}
+		mutex_unlock(&accrued_key_lock);
+		if (!p) {
+			if ((p = (int*) malloc(sizeof(int))) == NULL)
+				return NULL;
+			if (thr_setspecific(accrued_key, (void *)p) != 0) {
+				(void)free(p);
+				return NULL;
+			}
+			*p = 0;
+		}
+		return p;
+	}
+}
+
+void
+__fenv_getfsr(unsigned long *fsr)
+{
+	unsigned int	cwsw, mxcsr;
+
+	__fenv_getcwsw(&cwsw);
+	/* clear reserved bits for no particularly good reason */
+	cwsw &= ~0xe0c00000u;
+	if (test_sse_hw) {
+		/* pick up exception flags (excluding denormal operand
+		   flag) from mxcsr */
+		__fenv_getmxcsr(&mxcsr);
+		cwsw |= (mxcsr & 0x3d);
+	}
+	cwsw |= *__fex_accrued();
+	*fsr = cwsw ^ 0x003f0000u;
+}
+
+void
+__fenv_setfsr(const unsigned long *fsr)
+{
+	unsigned int	cwsw, mxcsr;
+	int				te;
+
+	/* save accrued exception flags corresponding to enabled exceptions */
+	cwsw = (unsigned int)*fsr;
+	te = __fenv_get_te(cwsw);
+	*__fex_accrued() = cwsw & te;
+	cwsw = (cwsw & ~te) ^ 0x003f0000;
+	if (test_sse_hw) {
+		/* propagate rounding direction, masks, and exception flags
+		   (excluding denormal operand mask and flag) to mxcsr */
+		__fenv_getmxcsr(&mxcsr);
+		mxcsr = (mxcsr & ~0x7ebd) | ((cwsw >> 13) & 0x6000) |
+			((cwsw >> 9) & 0x1e80) | (cwsw & 0x3d);
+		__fenv_setmxcsr(&mxcsr);
+	}
+	__fenv_setcwsw(&cwsw);
+}
+
+/* Offsets into the fp environment save area (assumes 32-bit protected mode) */
+#define CW	0	/* control word */
+#define SW	1	/* status word */
+#define TW	2	/* tag word */
+#define IP	3	/* instruction pointer */
+#define OP	4	/* opcode */
+#define EA	5	/* operand address */
+
+/* macro for accessing fp registers in the save area */
+#if defined(__amd64)
+#define fpreg(u,x)	*(long double *)(10*(x)+(char*)&(u)->uc_mcontext.fpregs.fp_reg_set.fpchip_state.st)
+#else
+#define fpreg(u,x)	*(long double *)(10*(x)+(char*)&(u)->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[7])
+#endif
+
+/*
+*  Fix sip->si_code; the Solaris x86 kernel can get it wrong
+*/
+void
+__fex_get_x86_exc(siginfo_t *sip, ucontext_t *uap)
+{
+	unsigned	sw, cw;
+
+	sw = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.status;
+#if defined(__amd64)
+	cw = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.cw;
+#else
+	cw = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[CW];
+#endif
+	if ((sw & FE_INVALID) && !(cw & (1 << fp_trap_invalid)))
+		/* store 0 for stack fault, FPE_FLTINV for IEEE invalid op */
+		sip->si_code = ((sw & 0x40)? 0 : FPE_FLTINV);
+	else if ((sw & FE_DIVBYZERO) && !(cw & (1 << fp_trap_division)))
+		sip->si_code = FPE_FLTDIV;
+	else if ((sw & FE_OVERFLOW) && !(cw & (1 << fp_trap_overflow)))
+		sip->si_code = FPE_FLTOVF;
+	else if ((sw & FE_UNDERFLOW) && !(cw & (1 << fp_trap_underflow)))
+		sip->si_code = FPE_FLTUND;
+	else if ((sw & FE_INEXACT) && !(cw & (1 << fp_trap_inexact)))
+		sip->si_code = FPE_FLTRES;
+	else
+		sip->si_code = 0;
+}
+
+static enum fp_class_type
+my_fp_classf(float *x)
+{
+	int		i = *(int*)x & ~0x80000000;
+
+	if (i < 0x7f800000) {
+		if (i < 0x00800000)
+			return ((i == 0)? fp_zero : fp_subnormal);
+		return fp_normal;
+	}
+	else if (i == 0x7f800000)
+		return fp_infinity;
+	else if (i & 0x400000)
+		return fp_quiet;
+	else
+		return fp_signaling;
+}
+
+static enum fp_class_type
+my_fp_class(double *x)
+{
+	int		i = *(1+(int*)x) & ~0x80000000;
+
+	if (i < 0x7ff00000) {
+		if (i < 0x00100000)
+			return (((i | *(int*)x) == 0)? fp_zero : fp_subnormal);
+		return fp_normal;
+	}
+	else if (i == 0x7ff00000 && *(int*)x == 0)
+		return fp_infinity;
+	else if (i & 0x80000)
+		return fp_quiet;
+	else
+		return fp_signaling;
+}
+
+static enum fp_class_type
+my_fp_classl(long double *x)
+{
+	int		i = *(2+(int*)x) & 0x7fff;
+
+	if (i < 0x7fff) {
+		if (i < 1) {
+			if (*(1+(int*)x) < 0) return fp_normal; /* pseudo-denormal */
+			return (((*(1+(int*)x) | *(int*)x) == 0)?	
+				fp_zero : fp_subnormal);
+		}
+		return ((*(1+(int*)x) < 0)? fp_normal :
+			(enum fp_class_type) -1); /* unsupported format */
+	}
+	else if (*(1+(int*)x) == 0x80000000 && *(int*)x == 0)
+		return fp_infinity;
+	else if (*(1+(unsigned*)x) >= 0xc0000000)
+		return fp_quiet;
+	else if (*(1+(int*)x) < 0)
+		return fp_signaling;
+	else
+		return (enum fp_class_type) -1; /* unsupported format */
+}
+
+/*
+*  Determine which type of invalid operation exception occurred
+*/
+enum fex_exception
+__fex_get_invalid_type(siginfo_t *sip, ucontext_t *uap)
+{
+	unsigned			op;
+	unsigned long			ea;
+	enum fp_class_type	t1, t2;
+
+	/* get the opcode and data address */
+#if defined(__amd64)
+	op = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.fop >> 16;
+	ea = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.rdp;
+#else
+	op = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[OP] >> 16;
+	ea = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[EA];
+#endif
+
+	/* if the instruction is fld, the source must be snan (it can't be
+	   an unsupported format, since fldt doesn't raise any exceptions) */
+	switch (op & 0x7f8) {
+	case 0x100:
+	case 0x140:
+	case 0x180:
+	case 0x500:
+	case 0x540:
+	case 0x580:
+		return fex_inv_snan;
+	}
+
+	/* otherwise st is one of the operands; see if it's snan */
+	t1 = my_fp_classl(&fpreg(uap, 0));
+	if (t1 == fp_signaling)
+		return fex_inv_snan;
+	else if (t1 == (enum fp_class_type) -1)
+		return (enum fex_exception) -1;
+
+	/* determine the class of the second operand if there is one */
+	t2 = fp_normal;
+	switch (op & 0x7e0) {
+	case 0x600:
+	case 0x620:
+	case 0x640:
+	case 0x660:
+	case 0x680:
+	case 0x6a0:
+		/* short memory operand */
+		if (!ea)
+			return (enum fex_exception) -1;
+		if (*(short *)ea == 0)
+			t2 = fp_zero;
+		break;
+
+	case 0x200:
+	case 0x220:
+	case 0x240:
+	case 0x260:
+	case 0x280:
+	case 0x2a0:
+		/* int memory operand */
+		if (!ea)
+			return (enum fex_exception) -1;
+		if (*(int *)ea == 0)
+			t2 = fp_zero;
+		break;
+
+	case 0x000:
+	case 0x020:
+	case 0x040:
+	case 0x060:
+	case 0x080:
+	case 0x0a0:
+		/* single precision memory operand */
+		if (!ea)
+			return (enum fex_exception) -1;
+		t2 = my_fp_classf((float *)ea);
+		break;
+
+	case 0x400:
+	case 0x420:
+	case 0x440:
+	case 0x460:
+	case 0x480:
+	case 0x4a0:
+		/* double precision memory operand */
+		if (!ea)
+			return (enum fex_exception) -1;
+		t2 = my_fp_class((double *)ea);
+		break;
+
+	case 0x0c0:
+	case 0x0e0:
+	case 0x3e0:
+	case 0x4c0:
+	case 0x4e0:
+	case 0x5e0:
+	case 0x6c0:
+	case 0x6e0:
+	case 0x7e0:
+		/* register operand determined by opcode */
+		switch (op & 0x7f8) {
+		case 0x3e0:
+		case 0x3f8:
+		case 0x5f0:
+		case 0x5f8:
+		case 0x7e0:
+		case 0x7f8:
+			/* weed out nonexistent opcodes */
+			break;
+
+		default:
+			t2 = my_fp_classl(&fpreg(uap, op & 7));
+		}
+		break;
+
+	case 0x1e0:
+	case 0x2e0:
+		/* special forms */
+		switch (op) {
+		case 0x1f1: /* fyl2x */
+		case 0x1f3: /* fpatan */
+		case 0x1f5: /* fprem1 */
+		case 0x1f8: /* fprem */
+		case 0x1f9: /* fyl2xp1 */
+		case 0x1fd: /* fscale */
+		case 0x2e9: /* fucompp */
+			t2 = my_fp_classl(&fpreg(uap, 1));
+			break;
+		}
+		break;
+	}
+
+	/* see if the second op is snan */
+	if (t2 == fp_signaling)
+		return fex_inv_snan;
+	else if (t2 == (enum fp_class_type) -1)
+		return (enum fex_exception) -1;
+
+	/* determine the type of operation */
+	switch (op & 0x7f8) {
+	case 0x000:
+	case 0x020:
+	case 0x028:
+	case 0x040:
+	case 0x060:
+	case 0x068:
+	case 0x080:
+	case 0x0a0:
+	case 0x0a8:
+	case 0x0c0:
+	case 0x0e0:
+	case 0x0e8:
+	case 0x400:
+	case 0x420:
+	case 0x428:
+	case 0x440:
+	case 0x460:
+	case 0x468:
+	case 0x480:
+	case 0x4a0:
+	case 0x4a8:
+	case 0x4c0:
+	case 0x4e0:
+	case 0x4e8:
+	case 0x6c0:
+	case 0x6e0:
+	case 0x6e8:
+		/* fadd, fsub, fsubr */
+		if (t1 == fp_infinity && t2 == fp_infinity)
+			return fex_inv_isi;
+		break;
+
+	case 0x008:
+	case 0x048:
+	case 0x088:
+	case 0x0c8:
+	case 0x208:
+	case 0x248:
+	case 0x288:
+	case 0x408:
+	case 0x448:
+	case 0x488:
+	case 0x4c8:
+	case 0x608:
+	case 0x648:
+	case 0x688:
+	case 0x6c8:
+		/* fmul */
+		if ((t1 == fp_zero && t2 == fp_infinity) || (t2 == fp_zero &&
+		  t1 == fp_infinity))
+			return fex_inv_zmi;
+		break;
+
+	case 0x030:
+	case 0x038:
+	case 0x070:
+	case 0x078:
+	case 0x0b0:
+	case 0x0b8:
+	case 0x0f0:
+	case 0x0f8:
+	case 0x230:
+	case 0x238:
+	case 0x270:
+	case 0x278:
+	case 0x2b0:
+	case 0x2b8:
+	case 0x430:
+	case 0x438:
+	case 0x470:
+	case 0x478:
+	case 0x4b0:
+	case 0x4b8:
+	case 0x4f0:
+	case 0x4f8:
+	case 0x630:
+	case 0x638:
+	case 0x670:
+	case 0x678:
+	case 0x6b0:
+	case 0x6b8:
+	case 0x6f0:
+	case 0x6f8:
+		/* fdiv */
+		if (t1 == fp_zero && t2 == fp_zero)
+			return fex_inv_zdz;
+		else if (t1 == fp_infinity && t2 == fp_infinity)
+			return fex_inv_idi;
+		break;
+
+	case 0x1f0:
+	case 0x1f8:
+		/* fsqrt, other special ops */
+		return fex_inv_sqrt;
+
+	case 0x010:
+	case 0x018:
+	case 0x050:
+	case 0x058:
+	case 0x090:
+	case 0x098:
+	case 0x0d0:
+	case 0x0d8:
+	case 0x210:
+	case 0x218:
+	case 0x250:
+	case 0x258:
+	case 0x290:
+	case 0x298:
+	case 0x2e8:
+	case 0x3f0:
+	case 0x410:
+	case 0x418:
+	case 0x450:
+	case 0x458:
+	case 0x490:
+	case 0x498:
+	case 0x4d0:
+	case 0x4d8:
+	case 0x5e0:
+	case 0x5e8:
+	case 0x610:
+	case 0x618:
+	case 0x650:
+	case 0x658:
+	case 0x690:
+	case 0x698:
+	case 0x6d0:
+	case 0x6d8:
+	case 0x7f0:
+		/* fcom */
+		if (t1 == fp_quiet || t2 == fp_quiet)
+			return fex_inv_cmp;
+		break;
+
+	case 0x1e0:
+		/* ftst */
+		if (op == 0x1e4 && t1 == fp_quiet)
+			return fex_inv_cmp;
+		break;
+
+	case 0x310:
+	case 0x318:
+	case 0x350:
+	case 0x358:
+	case 0x390:
+	case 0x398:
+	case 0x710:
+	case 0x718:
+	case 0x730:
+	case 0x738:
+	case 0x750:
+	case 0x758:
+	case 0x770:
+	case 0x778:
+	case 0x790:
+	case 0x798:
+	case 0x7b0:
+	case 0x7b8:
+		/* fist, fbst */
+		return fex_inv_int;
+	}
+
+	return (enum fex_exception) -1;
+}
+
+/* scale factors for exponent unwrapping */
+static const long double
+	two12288 = 1.139165225263043370845938579315932009e+3699l,	/* 2^12288 */
+	twom12288 = 8.778357852076208839765066529179033145e-3700l,	/* 2^-12288 */
+	twom12288mulp = 8.778357852076208839289190796475222545e-3700l;
+		/* (")*(1-2^-64) */
+
+/* inline templates */
+extern long double f2xm1(long double);
+extern long double fyl2x(long double, long double);
+extern long double fptan(long double);
+extern long double fpatan(long double, long double);
+extern long double fxtract(long double);
+extern long double fprem1(long double, long double);
+extern long double fprem(long double, long double);
+extern long double fyl2xp1(long double, long double);
+extern long double fsqrt(long double);
+extern long double fsincos(long double);
+extern long double frndint(long double);
+extern long double fscale(long double, long double);
+extern long double fsin(long double);
+extern long double fcos(long double);
+
+/*
+*  Get the operands, generate the default untrapped result with
+*  exceptions, and set a code indicating the type of operation
+*/
+void
+__fex_get_op(siginfo_t *sip, ucontext_t *uap, fex_info_t *info)
+{
+	fex_numeric_t			t;
+	long double			op2v, x;
+	unsigned int			cwsw, ex, sw, op;
+	unsigned long			ea;
+	volatile int			c;
+
+	/* get the exception type, status word, opcode, and data address */
+	ex = sip->si_code;
+	sw = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.status;
+#if defined(__amd64)
+	op = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.fop >> 16;
+	ea = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.rdp;
+#else
+	op = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[OP] >> 16;
+	ea = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[EA];
+#endif
+
+	/* initialize res to the default untrapped result and ex to the
+	   corresponding flags (assume trapping is disabled and flags
+	   are clear) */
+
+	/* single operand instructions */
+	info->op = fex_cnvt;
+	info->op2.type = fex_nodata;
+	switch (op & 0x7f8) {
+	/* load instructions */
+	case 0x100:
+	case 0x140:
+	case 0x180:
+		if (!ea) {
+			info->op = fex_other;
+			info->op1.type = info->op2.type = info->res.type = fex_nodata;
+			info->flags = 0;
+			return;
+		}
+		info->op1.type = fex_float;
+		info->op1.val.f = *(float *)ea;
+		info->res.type = fex_ldouble;
+		info->res.val.q = (long double) info->op1.val.f;
+		goto done;
+
+	case 0x500:
+	case 0x540:
+	case 0x580:
+		if (!ea) {
+			info->op = fex_other;
+			info->op1.type = info->op2.type = info->res.type = fex_nodata;
+			info->flags = 0;
+			return;
+		}
+		info->op1.type = fex_double;
+		info->op1.val.d = *(double *)ea;
+		info->res.type = fex_ldouble;
+		info->res.val.q = (long double) info->op1.val.d;
+		goto done;
+
+	/* store instructions */
+	case 0x110:
+	case 0x118:
+	case 0x150:
+	case 0x158:
+	case 0x190:
+	case 0x198:
+		info->res.type = fex_float;
+		if (ex == FPE_FLTRES && (op & 8) != 0) {
+			/* inexact, stack popped */
+			if (!ea) {
+				info->op = fex_other;
+				info->op1.type = info->op2.type = info->res.type = fex_nodata;
+				info->flags = 0;
+				return;
+			}
+			info->op1.type = fex_nodata;
+			info->res.val.f = *(float *)ea;
+			info->flags = FE_INEXACT;
+			return;
+		}
+		info->op1.type = fex_ldouble;
+		info->op1.val.q = fpreg(uap, 0);
+		info->res.val.f = (float) info->op1.val.q;
+		goto done;
+
+	case 0x310:
+	case 0x318:
+	case 0x350:
+	case 0x358:
+	case 0x390:
+	case 0x398:
+		info->res.type = fex_int;
+		if (ex == FPE_FLTRES && (op & 8) != 0) {
+			/* inexact, stack popped */
+			if (!ea) {
+				info->op = fex_other;
+				info->op1.type = info->op2.type = info->res.type = fex_nodata;
+				info->flags = 0;
+				return;
+			}
+			info->op1.type = fex_nodata;
+			info->res.val.i = *(int *)ea;
+			info->flags = FE_INEXACT;
+			return;
+		}
+		info->op1.type = fex_ldouble;
+		info->op1.val.q = fpreg(uap, 0);
+		info->res.val.i = (int) info->op1.val.q;
+		goto done;
+
+	case 0x510:
+	case 0x518:
+	case 0x550:
+	case 0x558:
+	case 0x590:
+	case 0x598:
+		info->res.type = fex_double;
+		if (ex == FPE_FLTRES && (op & 8) != 0) {
+			/* inexact, stack popped */
+			if (!ea) {
+				info->op = fex_other;
+				info->op1.type = info->op2.type = info->res.type = fex_nodata;
+				info->flags = 0;
+				return;
+			}
+			info->op1.type = fex_nodata;
+			info->res.val.d = *(double *)ea;
+			info->flags = FE_INEXACT;
+			return;
+		}
+		info->op1.type = fex_ldouble;
+		info->op1.val.q = fpreg(uap, 0);
+		info->res.val.d = (double) info->op1.val.q;
+		goto done;
+
+	case 0x710:
+	case 0x718:
+	case 0x750:
+	case 0x758:
+	case 0x790:
+	case 0x798:
+		info->res.type = fex_int;
+		if (ex == FPE_FLTRES && (op & 8) != 0) {
+			/* inexact, stack popped */
+			if (!ea) {
+				info->op = fex_other;
+				info->op1.type = info->op2.type = info->res.type = fex_nodata;
+				info->flags = 0;
+				return;
+			}
+			info->op1.type = fex_nodata;
+			info->res.val.i = *(short *)ea;
+			info->flags = FE_INEXACT;
+			return;
+		}
+		info->op1.type = fex_ldouble;
+		info->op1.val.q = fpreg(uap, 0);
+		info->res.val.i = (short) info->op1.val.q;
+		goto done;
+
+	case 0x730:
+	case 0x770:
+	case 0x7b0:
+		/* fbstp; don't bother */
+		info->op = fex_other;
+		info->op1.type = info->res.type = fex_nodata;
+		info->flags = 0;
+		return;
+
+	case 0x738:
+	case 0x778:
+	case 0x7b8:
+		info->res.type = fex_llong;
+		if (ex == FPE_FLTRES) {
+			/* inexact, stack popped */
+			if (!ea) {
+				info->op = fex_other;
+				info->op1.type = info->op2.type = info->res.type = fex_nodata;
+				info->flags = 0;
+				return;
+			}
+			info->op1.type = fex_nodata;
+			info->res.val.l = *(long long *)ea;
+			info->flags = FE_INEXACT;
+			return;
+		}
+		info->op1.type = fex_ldouble;
+		info->op1.val.q = fpreg(uap, 0);
+		info->res.val.l = (long long) info->op1.val.q;
+		goto done;
+	}
+
+	/* all other ops (except compares) have destinations on the stack
+	   so overflow, underflow, and inexact will stomp their operands */
+	if (ex == FPE_FLTOVF || ex == FPE_FLTUND || ex == FPE_FLTRES) {
+		/* find the trapped result */
+		info->op1.type = info->op2.type = fex_nodata;
+		info->res.type = fex_ldouble;
+		switch (op & 0x7f8) {
+		case 0x1f0:
+			/* fptan pushes 1.0 afterward, so result is in st(1) */
+			info->res.val.q = ((op == 0x1f2)? fpreg(uap, 1) :
+				fpreg(uap, 0));
+			break;
+
+		case 0x4c0:
+		case 0x4c8:
+		case 0x4e0:
+		case 0x4e8:
+		case 0x4f0:
+		case 0x4f8:
+			info->res.val.q = fpreg(uap, op & 7);
+			break;
+
+		case 0x6c0:
+		case 0x6c8:
+		case 0x6e0:
+		case 0x6e8:
+		case 0x6f0:
+		case 0x6f8:
+			/* stack was popped afterward */
+			info->res.val.q = fpreg(uap, (op - 1) & 7);
+			break;
+
+		default:
+			info->res.val.q = fpreg(uap, 0);
+		}
+
+		/* reconstruct default untrapped result */
+		if (ex == FPE_FLTOVF) {
+			/* generate an overflow with the sign of the result */
+			x = two12288;
+			*(4+(short*)&x) |= (*(4+(short*)&info->res.val.q) & 0x8000);
+			info->res.val.q = x * two12288;
+			info->flags = FE_OVERFLOW | FE_INEXACT;
+			__fenv_getcwsw(&cwsw);
+			cwsw &= ~FE_ALL_EXCEPT;
+			__fenv_setcwsw(&cwsw);
+		}
+		else if (ex == FPE_FLTUND) {
+			/* undo the scaling; we can't distinguish a chopped result
+			   from an exact one without futzing around to trap all in-
+			   exact exceptions so as to keep the flag clear, so we just
+			   punt */
+			if (sw & 0x200) /* result was rounded up */
+				info->res.val.q = (info->res.val.q * twom12288) * twom12288mulp;
+			else
+				info->res.val.q = (info->res.val.q * twom12288) * twom12288;
+			__fenv_getcwsw(&cwsw);
+			info->flags = (cwsw & FE_INEXACT) | FE_UNDERFLOW;
+			cwsw &= ~FE_ALL_EXCEPT;
+			__fenv_setcwsw(&cwsw);
+		}
+		else
+			info->flags = FE_INEXACT;
+
+		/* determine the operation code */
+		switch (op) {
+		case 0x1f0: /* f2xm1 */
+		case 0x1f1: /* fyl2x */
+		case 0x1f2: /* fptan */
+		case 0x1f3: /* fpatan */
+		case 0x1f5: /* fprem1 */
+		case 0x1f8: /* fprem */
+		case 0x1f9: /* fyl2xp1 */
+		case 0x1fb: /* fsincos */
+		case 0x1fc: /* frndint */
+		case 0x1fd: /* fscale */
+		case 0x1fe: /* fsin */
+		case 0x1ff: /* fcos */
+			info->op = fex_other;
+			return;
+
+		case 0x1fa: /* fsqrt */
+			info->op = fex_sqrt;
+			return;
+		}
+
+		info->op = fex_other;
+		switch (op & 0x7c0) {
+		case 0x000:
+		case 0x040:
+		case 0x080:
+		case 0x0c0:
+		case 0x200:
+		case 0x240:
+		case 0x280:
+		case 0x400:
+		case 0x440:
+		case 0x480:
+		case 0x4c0:
+		case 0x600:
+		case 0x640:
+		case 0x680:
+		case 0x6c0:
+			switch (op & 0x38) {
+			case 0x00:
+				info->op = fex_add;
+				break;
+
+			case 0x08:
+				info->op = fex_mul;
+				break;
+
+			case 0x20:
+			case 0x28:
+				info->op = fex_sub;
+				break;
+
+			case 0x30:
+			case 0x38:
+				info->op = fex_div;
+				break;
+			}
+		}
+		return;
+	}
+
+	/* for other exceptions, the operands are preserved, so we can
+	   just emulate the operation with traps disabled */
+
+	/* one operand is always in st */
+	info->op1.type = fex_ldouble;
+	info->op1.val.q = fpreg(uap, 0);
+
+	/* oddball instructions */
+	info->op = fex_other;
+	switch (op) {
+	case 0x1e4: /* ftst */
+		info->op = fex_cmp;
+		info->op2.type = fex_ldouble;
+		info->op2.val.q = 0.0l;
+		info->res.type = fex_nodata;
+		c = (info->op1.val.q < info->op2.val.q);
+		goto done;
+
+	case 0x1f0: /* f2xm1 */
+		info->res.type = fex_ldouble;
+		info->res.val.q = f2xm1(info->op1.val.q);
+		goto done;
+
+	case 0x1f1: /* fyl2x */
+		info->op2.type = fex_ldouble;
+		info->op2.val.q = fpreg(uap, 1);
+		info->res.type = fex_ldouble;
+		info->res.val.q = fyl2x(info->op1.val.q, info->op2.val.q);
+		goto done;
+
+	case 0x1f2: /* fptan */
+		info->res.type = fex_ldouble;
+		info->res.val.q = fptan(info->op1.val.q);
+		goto done;
+
+	case 0x1f3: /* fpatan */
+		info->op2.type = fex_ldouble;
+		info->op2.val.q = fpreg(uap, 1);
+		info->res.type = fex_ldouble;
+		info->res.val.q = fpatan(info->op1.val.q, info->op2.val.q);
+		goto done;
+
+	case 0x1f4: /* fxtract */
+		info->res.type = fex_ldouble;
+		info->res.val.q = fxtract(info->op1.val.q);
+		goto done;
+
+	case 0x1f5: /* fprem1 */
+		info->op2.type = fex_ldouble;
+		info->op2.val.q = fpreg(uap, 1);
+		info->res.type = fex_ldouble;
+		info->res.val.q = fprem1(info->op1.val.q, info->op2.val.q);
+		goto done;
+
+	case 0x1f8: /* fprem */
+		info->op2.type = fex_ldouble;
+		info->op2.val.q = fpreg(uap, 1);
+		info->res.type = fex_ldouble;
+		info->res.val.q = fprem(info->op1.val.q, info->op2.val.q);
+		goto done;
+
+	case 0x1f9: /* fyl2xp1 */
+		info->op2.type = fex_ldouble;
+		info->op2.val.q = fpreg(uap, 1);
+		info->res.type = fex_ldouble;
+		info->res.val.q = fyl2xp1(info->op1.val.q, info->op2.val.q);
+		goto done;
+
+	case 0x1fa: /* fsqrt */
+		info->op = fex_sqrt;
+		info->res.type = fex_ldouble;
+		info->res.val.q = fsqrt(info->op1.val.q);
+		goto done;
+
+	case 0x1fb: /* fsincos */
+		info->res.type = fex_ldouble;
+		info->res.val.q = fsincos(info->op1.val.q);
+		goto done;
+
+	case 0x1fc: /* frndint */
+		info->res.type = fex_ldouble;
+		info->res.val.q = frndint(info->op1.val.q);
+		goto done;
+
+	case 0x1fd: /* fscale */
+		info->op2.type = fex_ldouble;
+		info->op2.val.q = fpreg(uap, 1);
+		info->res.type = fex_ldouble;
+		info->res.val.q = fscale(info->op1.val.q, info->op2.val.q);
+		goto done;
+
+	case 0x1fe: /* fsin */
+		info->res.type = fex_ldouble;
+		info->res.val.q = fsin(info->op1.val.q);
+		goto done;
+
+	case 0x1ff: /* fcos */
+		info->res.type = fex_ldouble;
+		info->res.val.q = fcos(info->op1.val.q);
+		goto done;
+
+	case 0x2e9: /* fucompp */
+		info->op = fex_cmp;
+		info->op2.type = fex_ldouble;
+		info->op2.val.q = fpreg(uap, 1);
+		info->res.type = fex_nodata;
+		c = (info->op1.val.q == info->op2.val.q);
+		goto done;
+	}
+
+	/* fucom[p], fcomi[p], fucomi[p] */
+	switch (op & 0x7f8) {
+	case 0x3e8:
+	case 0x5e0:
+	case 0x5e8:
+	case 0x7e8: /* unordered compares */
+		info->op = fex_cmp;
+		info->op2.type = fex_ldouble;
+		info->op2.val.q = fpreg(uap, op & 7);
+		info->res.type = fex_nodata;
+		c = (info->op1.val.q == info->op2.val.q);
+		goto done;
+
+	case 0x3f0:
+	case 0x7f0: /* ordered compares */
+		info->op = fex_cmp;
+		info->op2.type = fex_ldouble;
+		info->op2.val.q = fpreg(uap, op & 7);
+		info->res.type = fex_nodata;
+		c = (info->op1.val.q < info->op2.val.q);
+		goto done;
+	}
+
+	/* all other instructions come in groups of the form
+	   fadd, fmul, fcom, fcomp, fsub, fsubr, fdiv, fdivr */
+
+	/* get the second operand */
+	switch (op & 0x7c0) {
+	case 0x000:
+	case 0x040:
+	case 0x080:
+		if (!ea) {
+			info->op = fex_other;
+			info->op1.type = info->op2.type = info->res.type = fex_nodata;
+			info->flags = 0;
+			return;
+		}
+		info->op2.type = fex_float;
+		info->op2.val.f = *(float *)ea;
+		op2v = (long double) info->op2.val.f;
+		break;
+
+	case 0x0c0:
+		info->op2.type = fex_ldouble;
+		op2v = info->op2.val.q = fpreg(uap, op & 7);
+		break;
+
+	case 0x200:
+	case 0x240:
+	case 0x280:
+		if (!ea) {
+			info->op = fex_other;
+			info->op1.type = info->op2.type = info->res.type = fex_nodata;
+			info->flags = 0;
+			return;
+		}
+		info->op2.type = fex_int;
+		info->op2.val.i = *(int *)ea;
+		op2v = (long double) info->op2.val.i;
+		break;
+
+	case 0x400:
+	case 0x440:
+	case 0x480:
+		if (!ea) {
+			info->op = fex_other;
+			info->op1.type = info->op2.type = info->res.type = fex_nodata;
+			info->flags = 0;
+			return;
+		}
+		info->op2.type = fex_double;
+		info->op2.val.d = *(double *)ea;
+		op2v = (long double) info->op2.val.d;
+		break;
+
+	case 0x4c0:
+	case 0x6c0:
+		info->op2.type = fex_ldouble;
+		info->op2.val.q = fpreg(uap, op & 7);
+		t = info->op1;
+		info->op1 = info->op2;
+		info->op2 = t;
+		op2v = info->op2.val.q;
+		break;
+
+	case 0x600:
+	case 0x640:
+	case 0x680:
+		if (!ea) {
+			info->op = fex_other;
+			info->op1.type = info->op2.type = info->res.type = fex_nodata;
+			info->flags = 0;
+			return;
+		}
+		info->op2.type = fex_int;
+		info->op2.val.i = *(short *)ea;
+		op2v = (long double) info->op2.val.i;
+		break;
+
+	default:
+		info->op = fex_other;
+		info->op1.type = info->op2.type = info->res.type = fex_nodata;
+		info->flags = 0;
+		return;
+	}
+
+	/* distinguish different operations in the group */
+	info->res.type = fex_ldouble;
+	switch (op & 0x38) {
+	case 0x00:
+		info->op = fex_add;
+		info->res.val.q = info->op1.val.q + op2v;
+		break;
+
+	case 0x08:
+		info->op = fex_mul;
+		info->res.val.q = info->op1.val.q * op2v;
+		break;
+
+	case 0x10:
+	case 0x18:
+		info->op = fex_cmp;
+		info->res.type = fex_nodata;
+		c = (info->op1.val.q < op2v);
+		break;
+
+	case 0x20:
+		info->op = fex_sub;
+		info->res.val.q = info->op1.val.q - op2v;
+		break;
+
+	case 0x28:
+		info->op = fex_sub;
+		info->res.val.q = op2v - info->op1.val.q;
+		t = info->op1;
+		info->op1 = info->op2;
+		info->op2 = t;
+		break;
+
+	case 0x30:
+		info->op = fex_div;
+		info->res.val.q = info->op1.val.q / op2v;
+		break;
+
+	case 0x38:
+		info->op = fex_div;
+		info->res.val.q = op2v / info->op1.val.q;
+		t = info->op1;
+		info->op1 = info->op2;
+		info->op2 = t;
+		break;
+
+	default:
+		info->op = fex_other;
+		info->op1.type = info->op2.type = info->res.type = fex_nodata;
+		info->flags = 0;
+		return;
+	}
+
+done:
+	__fenv_getcwsw(&cwsw);
+	info->flags = cwsw & FE_ALL_EXCEPT;
+	cwsw &= ~FE_ALL_EXCEPT;
+	__fenv_setcwsw(&cwsw);
+}
+
+/* pop the saved stack */
+static void pop(ucontext_t *uap)
+{
+	unsigned top;
+
+	fpreg(uap, 0) = fpreg(uap, 1);
+	fpreg(uap, 1) = fpreg(uap, 2);
+	fpreg(uap, 2) = fpreg(uap, 3);
+	fpreg(uap, 3) = fpreg(uap, 4);
+	fpreg(uap, 4) = fpreg(uap, 5);
+	fpreg(uap, 5) = fpreg(uap, 6);
+	fpreg(uap, 6) = fpreg(uap, 7);
+#if defined(__amd64)
+	top = (uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.sw >> 10)
+		& 0xe;
+	uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.fctw |= (3 << top);
+	top = (top + 2) & 0xe;
+	uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.sw =
+		(uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.sw & ~0x3800)
+		| (top << 10);
+#else
+	top = (uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[SW] >> 10)
+		& 0xe;
+	uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[TW] |= (3 << top);
+	top = (top + 2) & 0xe;
+	uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[SW] =
+		(uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[SW] & ~0x3800)
+		| (top << 10);
+#endif
+}
+
+/* push x onto the saved stack */
+static void push(long double x, ucontext_t *uap)
+{
+	unsigned top;
+
+	fpreg(uap, 7) = fpreg(uap, 6);
+	fpreg(uap, 6) = fpreg(uap, 5);
+	fpreg(uap, 5) = fpreg(uap, 4);
+	fpreg(uap, 4) = fpreg(uap, 3);
+	fpreg(uap, 3) = fpreg(uap, 2);
+	fpreg(uap, 2) = fpreg(uap, 1);
+	fpreg(uap, 1) = fpreg(uap, 0);
+	fpreg(uap, 0) = x;
+#if defined(__amd64)
+	top = (uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.sw >> 10)
+		& 0xe;
+	top = (top - 2) & 0xe;
+	uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.fctw &= ~(3 << top);
+	uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.sw =
+		(uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.sw & ~0x3800)
+		| (top << 10);
+#else
+	top = (uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[SW] >> 10)
+		& 0xe;
+	top = (top - 2) & 0xe;
+	uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[TW] &= ~(3 << top);
+	uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[SW] =
+		(uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[SW] & ~0x3800)
+		| (top << 10);
+#endif
+}
+
+/* scale factors for exponent wrapping */
+static const float
+	fun = 7.922816251e+28f,	/* 2^96 */
+	fov = 1.262177448e-29f;	/* 2^-96 */
+static const double
+	dun = 1.552518092300708935e+231,	/* 2^768 */
+	dov = 6.441148769597133308e-232;	/* 2^-768 */
+
+/*
+*  Store the specified result; if no result is given but the exception
+*  is underflow or overflow, use the default trapped result
+*/
+void
+__fex_st_result(siginfo_t *sip, ucontext_t *uap, fex_info_t *info)
+{
+	fex_numeric_t	r;
+	unsigned		ex, op, ea, stack;
+
+	/* get the exception type, opcode, and data address */
+	ex = sip->si_code;
+#if defined(__amd64)
+	op = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.fop >> 16;
+	ea = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.rdp; /*???*/
+#else
+	op = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[OP] >> 16;
+	ea = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[EA];
+#endif
+
+	/* if the instruction is a compare, set the condition codes
+	   to unordered and update the stack */
+	switch (op & 0x7f8) {
+	case 0x010:
+	case 0x050:
+	case 0x090:
+	case 0x0d0:
+	case 0x210:
+	case 0x250:
+	case 0x290:
+	case 0x410:
+	case 0x450:
+	case 0x490:
+	case 0x4d0:
+	case 0x5e0:
+	case 0x610:
+	case 0x650:
+	case 0x690:
+		/* f[u]com */
+#if defined(__amd64)
+		uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.sw |= 0x4500;
+#else
+		uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[SW] |= 0x4500;
+#endif
+		return;
+
+	case 0x018:
+	case 0x058:
+	case 0x098:
+	case 0x0d8:
+	case 0x218:
+	case 0x258:
+	case 0x298:
+	case 0x418:
+	case 0x458:
+	case 0x498:
+	case 0x4d8:
+	case 0x5e8:
+	case 0x618:
+	case 0x658:
+	case 0x698:
+	case 0x6d0:
+		/* f[u]comp */
+#if defined(__amd64)
+		uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.sw |= 0x4500;
+#else
+		uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[SW] |= 0x4500;
+#endif
+		pop(uap);
+		return;
+
+	case 0x2e8:
+	case 0x6d8:
+		/* f[u]compp */
+#if defined(__amd64)
+		uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.sw |= 0x4500;
+#else
+		uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[SW] |= 0x4500;
+#endif
+		pop(uap);
+		pop(uap);
+		return;
+
+	case 0x1e0:
+		if (op == 0x1e4) { /* ftst */
+#if defined(__amd64)
+			uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.sw |= 0x4500;
+#else
+			uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[SW] |= 0x4500;
+#endif
+			return;
+		}
+		break;
+
+	case 0x3e8:
+	case 0x3f0:
+		/* f[u]comi */
+#if defined(__amd64)
+		uap->uc_mcontext.gregs[REG_PS] |= 0x45;
+#else
+		uap->uc_mcontext.gregs[EFL] |= 0x45;
+#endif
+		return;
+
+	case 0x7e8:
+	case 0x7f0:
+		/* f[u]comip */
+#if defined(__amd64)
+		uap->uc_mcontext.gregs[REG_PS] |= 0x45;
+#else
+		uap->uc_mcontext.gregs[EFL] |= 0x45;
+#endif
+		pop(uap);
+		return;
+	}
+
+	/* if there is no result available and the exception is overflow
+	   or underflow, use the wrapped result */
+	r = info->res;
+	if (r.type == fex_nodata) {
+		if (ex == FPE_FLTOVF || ex == FPE_FLTUND) {
+			/* for store instructions, do the scaling and store */
+			switch (op & 0x7f8) {
+			case 0x110:
+			case 0x118:
+			case 0x150:
+			case 0x158:
+			case 0x190:
+			case 0x198:
+				if (!ea)
+					return;
+				if (ex == FPE_FLTOVF)
+					*(float *)ea = (fpreg(uap, 0) * fov) * fov;
+				else
+					*(float *)ea = (fpreg(uap, 0) * fun) * fun;
+				if ((op & 8) != 0)
+					pop(uap);
+				break;
+
+			case 0x510:
+			case 0x518:
+			case 0x550:
+			case 0x558:
+			case 0x590:
+			case 0x598:
+				if (!ea)
+					return;
+				if (ex == FPE_FLTOVF)
+					*(double *)ea = (fpreg(uap, 0) * dov) * dov;
+				else
+					*(double *)ea = (fpreg(uap, 0) * dun) * dun;
+				if ((op & 8) != 0)
+					pop(uap);
+				break;
+			}
+		}
+#ifdef DEBUG
+		else if (ex != FPE_FLTRES)
+			printf( "No result supplied, stack may be hosed\n" );
+#endif
+		return;
+	}
+
+	/* otherwise convert the supplied result to the correct type,
+	   put it in the destination, and update the stack as need be */
+
+	/* store instructions */
+	switch (op & 0x7f8) {
+	case 0x110:
+	case 0x118:
+	case 0x150:
+	case 0x158:
+	case 0x190:
+	case 0x198:
+		if (!ea)
+			return;
+		switch (r.type) {
+		case fex_int:
+			*(float *)ea = (float) r.val.i;
+			break;
+
+		case fex_llong:
+			*(float *)ea = (float) r.val.l;
+			break;
+
+		case fex_float:
+			*(float *)ea = r.val.f;
+			break;
+
+		case fex_double:
+			*(float *)ea = (float) r.val.d;
+			break;
+
+		case fex_ldouble:
+			*(float *)ea = (float) r.val.q;
+			break;
+		}
+		if (ex != FPE_FLTRES && (op & 8) != 0)
+			pop(uap);
+		return;
+
+	case 0x310:
+	case 0x318:
+	case 0x350:
+	case 0x358:
+	case 0x390:
+	case 0x398:
+		if (!ea)
+			return;
+		switch (r.type) {
+		case fex_int:
+			*(int *)ea = r.val.i;
+			break;
+
+		case fex_llong:
+			*(int *)ea = (int) r.val.l;
+			break;
+
+		case fex_float:
+			*(int *)ea = (int) r.val.f;
+			break;
+
+		case fex_double:
+			*(int *)ea = (int) r.val.d;
+			break;
+
+		case fex_ldouble:
+			*(int *)ea = (int) r.val.q;
+			break;
+		}
+		if (ex != FPE_FLTRES && (op & 8) != 0)
+			pop(uap);
+		return;
+
+	case 0x510:
+	case 0x518:
+	case 0x550:
+	case 0x558:
+	case 0x590:
+	case 0x598:
+		if (!ea)
+			return;
+		switch (r.type) {
+		case fex_int:
+			*(double *)ea = (double) r.val.i;
+			break;
+
+		case fex_llong:
+			*(double *)ea = (double) r.val.l;
+			break;
+
+		case fex_float:
+			*(double *)ea = (double) r.val.f;
+			break;
+
+		case fex_double:
+			*(double *)ea = r.val.d;
+			break;
+
+		case fex_ldouble:
+			*(double *)ea = (double) r.val.q;
+			break;
+		}
+		if (ex != FPE_FLTRES && (op & 8) != 0)
+			pop(uap);
+		return;
+
+	case 0x710:
+	case 0x718:
+	case 0x750:
+	case 0x758:
+	case 0x790:
+	case 0x798:
+		if (!ea)
+			return;
+		switch (r.type) {
+		case fex_int:
+			*(short *)ea = (short) r.val.i;
+			break;
+
+		case fex_llong:
+			*(short *)ea = (short) r.val.l;
+			break;
+
+		case fex_float:
+			*(short *)ea = (short) r.val.f;
+			break;
+
+		case fex_double:
+			*(short *)ea = (short) r.val.d;
+			break;
+
+		case fex_ldouble:
+			*(short *)ea = (short) r.val.q;
+			break;
+		}
+		if (ex != FPE_FLTRES && (op & 8) != 0)
+			pop(uap);
+		return;
+
+	case 0x730:
+	case 0x770:
+	case 0x7b0:
+		/* fbstp; don't bother */
+		if (ea && ex != FPE_FLTRES)
+			pop(uap);
+		return;
+
+	case 0x738:
+	case 0x778:
+	case 0x7b8:
+		if (!ea)
+			return;
+		switch (r.type) {
+		case fex_int:
+			*(long long *)ea = (long long) r.val.i;
+			break;
+
+		case fex_llong:
+			*(long long *)ea = r.val.l;
+			break;
+
+		case fex_float:
+			*(long long *)ea = (long long) r.val.f;
+			break;
+
+		case fex_double:
+			*(long long *)ea = (long long) r.val.d;
+			break;
+
+		case fex_ldouble:
+			*(long long *)ea = (long long) r.val.q;
+			break;
+		}
+		if (ex != FPE_FLTRES)
+			pop(uap);
+		return;
+	}
+
+	/* for all other instructions, the result goes into a register */
+	switch (r.type) {
+	case fex_int:
+		r.val.q = (long double) r.val.i;
+		break;
+
+	case fex_llong:
+		r.val.q = (long double) r.val.l;
+		break;
+
+	case fex_float:
+		r.val.q = (long double) r.val.f;
+		break;
+
+	case fex_double:
+		r.val.q = (long double) r.val.d;
+		break;
+	}
+
+	/* for load instructions, push the result onto the stack */
+	switch (op & 0x7f8) {
+	case 0x100:
+	case 0x140:
+	case 0x180:
+	case 0x500:
+	case 0x540:
+	case 0x580:
+		if (ea)
+			push(r.val.q, uap);
+		return;
+	}
+
+	/* for all other instructions, if the exception is overflow,
+	   underflow, or inexact, the stack has already been updated */
+	stack = (ex == FPE_FLTOVF || ex == FPE_FLTUND || ex == FPE_FLTRES);
+	switch (op & 0x7f8) {
+	case 0x1f0: /* oddballs */
+		switch (op) {
+		case 0x1f1: /* fyl2x */
+		case 0x1f3: /* fpatan */
+		case 0x1f9: /* fyl2xp1 */
+			/* pop the stack, leaving the result in st */
+			if (!stack)
+				pop(uap);
+			fpreg(uap, 0) = r.val.q;
+			return;
+
+		case 0x1f2: /* fpatan */
+			/* fptan pushes 1.0 afterward */
+			if (stack)
+				fpreg(uap, 1) = r.val.q;
+			else {
+				fpreg(uap, 0) = r.val.q;
+				push(1.0L, uap);
+			}
+			return;
+
+		case 0x1f4: /* fxtract */
+		case 0x1fb: /* fsincos */
+			/* leave the supplied result in st */
+			if (stack)
+				fpreg(uap, 0) = r.val.q;
+			else {
+				fpreg(uap, 0) = 0.0; /* punt */
+				push(r.val.q, uap);
+			}
+			return;
+		}
+
+		/* all others leave the stack alone and the result in st */
+		fpreg(uap, 0) = r.val.q;
+		return;
+
+	case 0x4c0:
+	case 0x4c8:
+	case 0x4e0:
+	case 0x4e8:
+	case 0x4f0:
+	case 0x4f8:
+		fpreg(uap, op & 7) = r.val.q;
+		return;
+
+	case 0x6c0:
+	case 0x6c8:
+	case 0x6e0:
+	case 0x6e8:
+	case 0x6f0:
+	case 0x6f8:
+		/* stack is popped afterward */
+		if (stack)
+			fpreg(uap, (op - 1) & 7) = r.val.q;
+		else {
+			fpreg(uap, op & 7) = r.val.q;
+			pop(uap);
+		}
+		return;
+
+	default:
+		fpreg(uap, 0) = r.val.q;
+		return;
+	}
+}
diff --git a/usr/src/libm/src/m9x/__fex_sparc.c b/usr/src/libm/src/m9x/__fex_sparc.c
new file mode 100644
index 0000000..7682afa
--- /dev/null
+++ b/usr/src/libm/src/m9x/__fex_sparc.c
@@ -0,0 +1,864 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"@(#)__fex_sparc.c	1.7	06/01/31 SMI"
+
+#if defined(__sparc)
+#include "fenv_synonyms.h"
+#include <stdio.h>
+#include <unistd.h>
+#include <string.h>
+#include <signal.h>
+#include <siginfo.h>
+#include <thread.h>
+#include <ucontext.h>
+#include <math.h>
+#include <sunmath.h>
+#include <fenv.h>
+
+#ifdef __sparcv9
+
+#define FPreg(X)	&uap->uc_mcontext.fpregs.fpu_fr.fpu_regs[X]
+
+#define FPREG(X)	&uap->uc_mcontext.fpregs.fpu_fr.fpu_dregs[(X>>1)| \
+					((X&1)<<4)]
+
+#else
+
+#include <sys/procfs.h>
+
+#define FPxreg(X)	&((prxregset_t*)uap->uc_mcontext.xrs.xrs_ptr)->pr_un.pr_v8p.pr_xfr.pr_regs[X]
+
+#define FPreg(X)	&uap->uc_mcontext.fpregs.fpu_fr.fpu_regs[X]
+
+#define FPREG(X)	((X & 1)? FPxreg(X - 1) : FPreg(X))
+
+#endif	/* __sparcv9 */
+
+#include "fex_handler.h"
+
+/* avoid dependence on libsunmath */
+static enum fp_class_type
+my_fp_classl(long double *a)
+{
+	int		msw = *(int*)a & ~0x80000000;
+
+	if (msw >= 0x7fff0000) {
+		if (((msw & 0xffff) | *(1+(int*)a) | *(2+(int*)a) | *(3+(int*)a)) == 0)
+			return fp_infinity;
+		else if (msw & 0x8000)
+			return fp_quiet;
+		else
+			return fp_signaling;
+	} else if (msw < 0x10000) {
+		if ((msw | *(1+(int*)a) | *(2+(int*)a) | *(3+(int*)a)) == 0)
+			return fp_zero;
+		else
+			return fp_subnormal;
+	} else
+		return fp_normal;
+}
+
+/*
+*  Determine which type of invalid operation exception occurred
+*/
+enum fex_exception
+__fex_get_invalid_type(siginfo_t *sip, ucontext_t *uap)
+{
+	unsigned			instr, opf, rs1, rs2;
+	enum fp_class_type	t1, t2;
+
+	/* parse the instruction which caused the exception */
+	instr = uap->uc_mcontext.fpregs.fpu_q->FQu.fpq.fpq_instr;
+	opf = (instr >> 5) & 0x1ff;
+	rs1 = (instr >> 14) & 0x1f;
+	rs2 = instr & 0x1f;
+
+	/* determine the classes of the operands */
+	switch (opf & 3) {
+	case 1: /* single */
+		t1 = fp_classf(*(float*)FPreg(rs1));
+		t2 = fp_classf(*(float*)FPreg(rs2));
+		break;
+
+	case 2: /* double */
+		t1 = fp_class(*(double*)FPREG(rs1));
+		t2 = fp_class(*(double*)FPREG(rs2));
+		break;
+
+	case 3: /* quad */
+		t1 = my_fp_classl((long double*)FPREG(rs1));
+		t2 = my_fp_classl((long double*)FPREG(rs2));
+		break;
+
+	default: /* integer operands never cause an invalid operation */
+		return (enum fex_exception) -1;
+	}
+
+	/* if rs2 is snan, return immediately */
+	if (t2 == fp_signaling)
+		return fex_inv_snan;
+
+	/* determine the type of operation */
+	switch ((instr >> 19) & 0x183f) {
+	case 0x1034: /* add, subtract, multiply, divide, square root, convert */
+		switch (opf & 0x1fc) {
+		case 0x40:
+		case 0x44: /* add or subtract */
+			if (t1 == fp_signaling)
+				return fex_inv_snan;
+			else
+				return fex_inv_isi;
+
+		case 0x48:
+		case 0x68:
+		case 0x6c: /* multiply */
+			if (t1 == fp_signaling)
+				return fex_inv_snan;
+			else
+				return fex_inv_zmi;
+
+		case 0x4c: /* divide */
+			if (t1 == fp_signaling)
+				return fex_inv_snan;
+			else if (t1 == fp_zero)
+				return fex_inv_zdz;
+			else
+				return fex_inv_idi;
+
+		case 0x28: /* square root */
+			return fex_inv_sqrt;
+
+		case 0x80:
+		case 0xd0: /* convert to integer */
+			return fex_inv_int;
+		}
+		break;
+
+	case 0x1035: /* compare */
+		if (t1 == fp_signaling)
+			return fex_inv_snan;
+		else
+			return fex_inv_cmp;
+	}
+
+	return (enum fex_exception) -1;
+}
+
+#ifdef __sparcv9
+extern void _Qp_sqrt(long double *, const long double *);
+#else
+extern long double _Q_sqrt(long double);
+#endif
+
+/*
+*  Get the operands, generate the default untrapped result with
+*  exceptions, and set a code indicating the type of operation
+*/
+void
+__fex_get_op(siginfo_t *sip, ucontext_t *uap, fex_info_t *info)
+{
+	unsigned long	fsr;
+	unsigned		instr, opf, rs1, rs2;
+	volatile int	c;
+
+	/* parse the instruction which caused the exception */
+	instr = uap->uc_mcontext.fpregs.fpu_q->FQu.fpq.fpq_instr;
+	opf = (instr >> 5) & 0x1ff;
+	rs1 = (instr >> 14) & 0x1f;
+	rs2 = instr & 0x1f;
+
+	/* get the operands */
+	switch (opf & 3) {
+	case 0: /* integer */
+		info->op1.type = fex_nodata;
+		if (opf & 0x40) {
+			info->op2.type = fex_int;
+			info->op2.val.i = *(int*)FPreg(rs2);
+		}
+		else {
+			info->op2.type = fex_llong;
+			info->op2.val.l = *(long long*)FPREG(rs2);
+		}
+		break;
+
+	case 1: /* single */
+		info->op1.type = info->op2.type = fex_float;
+		info->op1.val.f = *(float*)FPreg(rs1);
+		info->op2.val.f = *(float*)FPreg(rs2);
+		break;
+
+	case 2: /* double */
+		info->op1.type = info->op2.type = fex_double;
+		info->op1.val.d = *(double*)FPREG(rs1);
+		info->op2.val.d = *(double*)FPREG(rs2);
+		break;
+
+	case 3: /* quad */
+		info->op1.type = info->op2.type = fex_ldouble;
+		info->op1.val.q = *(long double*)FPREG(rs1);
+		info->op2.val.q = *(long double*)FPREG(rs2);
+		break;
+	}
+
+	/* initialize res to the default untrapped result and ex to the
+	   corresponding flags (assume trapping is disabled and flags
+	   are clear) */
+	info->op = fex_other;
+	info->res.type = fex_nodata;
+	switch ((instr >> 19) & 0x183f) {
+	case 0x1035: /* compare */
+		info->op = fex_cmp;
+		switch (opf) {
+		case 0x51: /* compare single */
+			c = (info->op1.val.f == info->op2.val.f);
+			break;
+
+		case 0x52: /* compare double */
+			c = (info->op1.val.d == info->op2.val.d);
+			break;
+
+		case 0x53: /* compare quad */
+			c = (info->op1.val.q == info->op2.val.q);
+			break;
+
+		case 0x55: /* compare single with exception */
+			c = (info->op1.val.f < info->op2.val.f);
+			break;
+
+		case 0x56: /* compare double with exception */
+			c = (info->op1.val.d < info->op2.val.d);
+			break;
+
+		case 0x57: /* compare quad with exception */
+			c = (info->op1.val.q < info->op2.val.q);
+			break;
+		}
+		break;
+
+	case 0x1034: /* add, subtract, multiply, divide, square root, convert */
+		switch (opf) {
+		case 0x41: /* add single */
+			info->op = fex_add;
+			info->res.type = fex_float;
+			info->res.val.f = info->op1.val.f + info->op2.val.f;
+			break;
+
+		case 0x42: /* add double */
+			info->op = fex_add;
+			info->res.type = fex_double;
+			info->res.val.d = info->op1.val.d + info->op2.val.d;
+			break;
+
+		case 0x43: /* add quad */
+			info->op = fex_add;
+			info->res.type = fex_ldouble;
+			info->res.val.q = info->op1.val.q + info->op2.val.q;
+			break;
+
+		case 0x45: /* subtract single */
+			info->op = fex_sub;
+			info->res.type = fex_float;
+			info->res.val.f = info->op1.val.f - info->op2.val.f;
+			break;
+
+		case 0x46: /* subtract double */
+			info->op = fex_sub;
+			info->res.type = fex_double;
+			info->res.val.d = info->op1.val.d - info->op2.val.d;
+			break;
+
+		case 0x47: /* subtract quad */
+			info->op = fex_sub;
+			info->res.type = fex_ldouble;
+			info->res.val.q = info->op1.val.q - info->op2.val.q;
+			break;
+
+		case 0x49: /* multiply single */
+			info->op = fex_mul;
+			info->res.type = fex_float;
+			info->res.val.f = info->op1.val.f * info->op2.val.f;
+			break;
+
+		case 0x4a: /* multiply double */
+			info->op = fex_mul;
+			info->res.type = fex_double;
+			info->res.val.d = info->op1.val.d * info->op2.val.d;
+			break;
+
+		case 0x4b: /* multiply quad */
+			info->op = fex_mul;
+			info->res.type = fex_ldouble;
+			info->res.val.q = info->op1.val.q * info->op2.val.q;
+			break;
+
+		case 0x69: /* fsmuld */
+			info->op = fex_mul;
+			info->res.type = fex_double;
+			info->res.val.d = (double)info->op1.val.f * (double)info->op2.val.f;
+			break;
+
+		case 0x6e: /* fdmulq */
+			info->op = fex_mul;
+			info->res.type = fex_ldouble;
+			info->res.val.q = (long double)info->op1.val.d *
+				(long double)info->op2.val.d;
+			break;
+
+		case 0x4d: /* divide single */
+			info->op = fex_div;
+			info->res.type = fex_float;
+			info->res.val.f = info->op1.val.f / info->op2.val.f;
+			break;
+
+		case 0x4e: /* divide double */
+			info->op = fex_div;
+			info->res.type = fex_double;
+			info->res.val.d = info->op1.val.d / info->op2.val.d;
+			break;
+
+		case 0x4f: /* divide quad */
+			info->op = fex_div;
+			info->res.type = fex_ldouble;
+			info->res.val.q = info->op1.val.q / info->op2.val.q;
+			break;
+
+		case 0x29: /* square root single */
+			info->op = fex_sqrt;
+			info->op1 = info->op2;
+			info->op2.type = fex_nodata;
+			info->res.type = fex_float;
+			info->res.val.f = sqrtf(info->op1.val.f);
+			break;
+
+		case 0x2a: /* square root double */
+			info->op = fex_sqrt;
+			info->op1 = info->op2;
+			info->op2.type = fex_nodata;
+			info->res.type = fex_double;
+			info->res.val.d = sqrt(info->op1.val.d);
+			break;
+
+		case 0x2b: /* square root quad */
+			info->op = fex_sqrt;
+			info->op1 = info->op2;
+			info->op2.type = fex_nodata;
+			info->res.type = fex_ldouble;
+#ifdef __sparcv9
+			_Qp_sqrt(&info->res.val.q, &info->op1.val.q);
+#else
+			info->res.val.q = _Q_sqrt(info->op1.val.q);
+#endif
+			break;
+
+		default: /* conversions */
+			info->op = fex_cnvt;
+			info->op1 = info->op2;
+			info->op2.type = fex_nodata;
+			switch (opf) {
+			case 0xd1: /* convert single to int */
+				info->res.type = fex_int;
+				info->res.val.i = (int) info->op1.val.f;
+				break;
+
+			case 0xd2: /* convert double to int */
+				info->res.type = fex_int;
+				info->res.val.i = (int) info->op1.val.d;
+				break;
+
+			case 0xd3: /* convert quad to int */
+				info->res.type = fex_int;
+				info->res.val.i = (int) info->op1.val.q;
+				break;
+
+			case 0x81: /* convert single to long long */
+				info->res.type = fex_llong;
+				info->res.val.l = (long long) info->op1.val.f;
+				break;
+
+			case 0x82: /* convert double to long long */
+				info->res.type = fex_llong;
+				info->res.val.l = (long long) info->op1.val.d;
+				break;
+
+			case 0x83: /* convert quad to long long */
+				info->res.type = fex_llong;
+				info->res.val.l = (long long) info->op1.val.q;
+				break;
+
+			case 0xc4: /* convert int to single */
+				info->res.type = fex_float;
+				info->res.val.f = (float) info->op1.val.i;
+				break;
+
+			case 0x84: /* convert long long to single */
+				info->res.type = fex_float;
+				info->res.val.f = (float) info->op1.val.l;
+				break;
+
+			case 0x88: /* convert long long to double */
+				info->res.type = fex_double;
+				info->res.val.d = (double) info->op1.val.l;
+				break;
+
+			case 0xc6: /* convert double to single */
+				info->res.type = fex_float;
+				info->res.val.f = (float) info->op1.val.d;
+				break;
+
+			case 0xc7: /* convert quad to single */
+				info->res.type = fex_float;
+				info->res.val.f = (float) info->op1.val.q;
+				break;
+
+			case 0xc9: /* convert single to double */
+				info->res.type = fex_double;
+				info->res.val.d = (double) info->op1.val.f;
+				break;
+
+			case 0xcb: /* convert quad to double */
+				info->res.type = fex_double;
+				info->res.val.d = (double) info->op1.val.q;
+				break;
+
+			case 0xcd: /* convert single to quad */
+				info->res.type = fex_ldouble;
+				info->res.val.q = (long double) info->op1.val.f;
+				break;
+
+			case 0xce: /* convert double to quad */
+				info->res.type = fex_ldouble;
+				info->res.val.q = (long double) info->op1.val.d;
+				break;
+			}
+		}
+		break;
+	}
+	__fenv_getfsr(&fsr);
+	info->flags = (int)__fenv_get_ex(fsr);
+	__fenv_set_ex(fsr, 0);
+	__fenv_setfsr(&fsr);
+}
+
+/*
+*  Store the specified result; if no result is given but the exception
+*  is underflow or overflow, supply the default trapped result
+*/
+void
+__fex_st_result(siginfo_t *sip, ucontext_t *uap, fex_info_t *info)
+{
+	unsigned		instr, opf, rs1, rs2, rd;
+	long double		qscl;
+	double			dscl;
+	float			fscl;
+
+	/* parse the instruction which caused the exception */
+	instr = uap->uc_mcontext.fpregs.fpu_q->FQu.fpq.fpq_instr;
+	opf = (instr >> 5) & 0x1ff;
+	rs1 = (instr >> 14) & 0x1f;
+	rs2 = instr & 0x1f;
+	rd = (instr >> 25) & 0x1f;
+
+	/* if the instruction is a compare, just set fcc to unordered */
+	if (((instr >> 19) & 0x183f) == 0x1035) {
+		if (rd == 0)
+			uap->uc_mcontext.fpregs.fpu_fsr |= 0xc00;
+		else {
+#ifdef __sparcv9
+			uap->uc_mcontext.fpregs.fpu_fsr |= (3l << ((rd << 1) + 30));
+#else
+			((prxregset_t*)uap->uc_mcontext.xrs.xrs_ptr)->pr_un.pr_v8p.pr_xfsr |= (3 << ((rd - 1) << 1));
+#endif
+		}
+		return;
+	}
+
+	/* if there is no result available, try to generate the untrapped
+	   default */
+	if (info->res.type == fex_nodata) {
+		/* set scale factors for exponent wrapping */
+		switch (sip->si_code) {
+		case FPE_FLTOVF:
+			fscl = 1.262177448e-29f;	/* 2^-96 */
+			dscl = 6.441148769597133308e-232;	/* 2^-768 */
+			qscl = 8.778357852076208839765066529179033145e-3700l;/* 2^-12288 */
+			break;
+
+		case FPE_FLTUND:
+			fscl = 7.922816251e+28f;	/* 2^96 */
+			dscl = 1.552518092300708935e+231;	/* 2^768 */
+			qscl = 1.139165225263043370845938579315932009e+3699l;/* 2^12288 */
+			break;
+
+		default:
+			/* user may have blown away the default result by mistake,
+			   so try to regenerate it */
+			(void) __fex_get_op(sip, uap, info);
+			if (info->res.type != fex_nodata)
+				goto stuff;
+			/* couldn't do it */
+			return;
+		}
+
+		/* get the operands */
+		switch (opf & 3) {
+		case 1: /* single */
+			info->op1.val.f = *(float*)FPreg(rs1);
+			info->op2.val.f = *(float*)FPreg(rs2);
+			break;
+
+		case 2: /* double */
+			info->op1.val.d = *(double*)FPREG(rs1);
+			info->op2.val.d = *(double*)FPREG(rs2);
+			break;
+
+		case 3: /* quad */
+			info->op1.val.q = *(long double*)FPREG(rs1);
+			info->op2.val.q = *(long double*)FPREG(rs2);
+			break;
+		}
+
+		/* generate the wrapped result */
+		switch (opf) {
+		case 0x41: /* add single */
+			info->res.type = fex_float;
+			info->res.val.f = fscl * ( fscl * info->op1.val.f +
+				fscl * info->op2.val.f );
+			break;
+
+		case 0x42: /* add double */
+			info->res.type = fex_double;
+			info->res.val.d = dscl * ( dscl * info->op1.val.d +
+				dscl * info->op2.val.d );
+			break;
+
+		case 0x43: /* add quad */
+			info->res.type = fex_ldouble;
+			info->res.val.q = qscl * ( qscl * info->op1.val.q +
+				qscl * info->op2.val.q );
+			break;
+
+		case 0x45: /* subtract single */
+			info->res.type = fex_float;
+			info->res.val.f = fscl * ( fscl * info->op1.val.f -
+				fscl * info->op2.val.f );
+			break;
+
+		case 0x46: /* subtract double */
+			info->res.type = fex_double;
+			info->res.val.d = dscl * ( dscl * info->op1.val.d -
+				dscl * info->op2.val.d );
+			break;
+
+		case 0x47: /* subtract quad */
+			info->res.type = fex_ldouble;
+			info->res.val.q = qscl * ( qscl * info->op1.val.q -
+				qscl * info->op2.val.q );
+			break;
+
+		case 0x49: /* multiply single */
+			info->res.type = fex_float;
+			info->res.val.f = ( fscl * info->op1.val.f ) *
+				( fscl * info->op2.val.f );
+			break;
+
+		case 0x4a: /* multiply double */
+			info->res.type = fex_double;
+			info->res.val.d = ( dscl * info->op1.val.d ) *
+				( dscl * info->op2.val.d );
+			break;
+
+		case 0x4b: /* multiply quad */
+			info->res.type = fex_ldouble;
+			info->res.val.q = ( qscl * info->op1.val.q ) *
+				( qscl * info->op2.val.q );
+			break;
+
+		case 0x4d: /* divide single */
+			info->res.type = fex_float;
+			info->res.val.f = ( fscl * info->op1.val.f ) /
+				( info->op2.val.f / fscl );
+			break;
+
+		case 0x4e: /* divide double */
+			info->res.type = fex_double;
+			info->res.val.d = ( dscl * info->op1.val.d ) /
+				( info->op2.val.d / dscl );
+			break;
+
+		case 0x4f: /* divide quad */
+			info->res.type = fex_ldouble;
+			info->res.val.q = ( qscl * info->op1.val.q ) /
+				( info->op2.val.q / qscl );
+			break;
+
+		case 0xc6: /* convert double to single */
+			info->res.type = fex_float;
+			info->res.val.f = (float) ( fscl * ( fscl * info->op1.val.d ) );
+			break;
+
+		case 0xc7: /* convert quad to single */
+			info->res.type = fex_float;
+			info->res.val.f = (float) ( fscl * ( fscl * info->op1.val.q ) );
+			break;
+
+		case 0xcb: /* convert quad to double */
+			info->res.type = fex_double;
+			info->res.val.d = (double) ( dscl * ( dscl * info->op1.val.q ) );
+			break;
+		}
+
+		if (info->res.type == fex_nodata)
+			/* couldn't do it */
+			return;
+	}
+
+stuff:
+	/* stick the result in the destination */
+	if (opf & 0x80) { /* conversion */
+		if (opf & 0x10) { /* result is an int */
+			switch (info->res.type) {
+			case fex_llong:
+				info->res.val.i = (int) info->res.val.l;
+				break;
+
+			case fex_float:
+				info->res.val.i = (int) info->res.val.f;
+				break;
+
+			case fex_double:
+				info->res.val.i = (int) info->res.val.d;
+				break;
+
+			case fex_ldouble:
+				info->res.val.i = (int) info->res.val.q;
+				break;
+			}
+			*(int*)FPreg(rd) = info->res.val.i;
+			return;
+		}
+
+		switch (opf & 0xc) {
+		case 0: /* result is long long */
+			switch (info->res.type) {
+			case fex_int:
+				info->res.val.l = (long long) info->res.val.i;
+				break;
+
+			case fex_float:
+				info->res.val.l = (long long) info->res.val.f;
+				break;
+
+			case fex_double:
+				info->res.val.l = (long long) info->res.val.d;
+				break;
+
+			case fex_ldouble:
+				info->res.val.l = (long long) info->res.val.q;
+				break;
+			}
+			*(long long*)FPREG(rd) = info->res.val.l;
+			break;
+
+		case 0x4: /* result is float */
+			switch (info->res.type) {
+			case fex_int:
+				info->res.val.f = (float) info->res.val.i;
+				break;
+
+			case fex_llong:
+				info->res.val.f = (float) info->res.val.l;
+				break;
+
+			case fex_double:
+				info->res.val.f = (float) info->res.val.d;
+				break;
+
+			case fex_ldouble:
+				info->res.val.f = (float) info->res.val.q;
+				break;
+			}
+			*(float*)FPreg(rd) = info->res.val.f;
+			break;
+
+		case 0x8: /* result is double */
+			switch (info->res.type) {
+			case fex_int:
+				info->res.val.d = (double) info->res.val.i;
+				break;
+
+			case fex_llong:
+				info->res.val.d = (double) info->res.val.l;
+				break;
+
+			case fex_float:
+				info->res.val.d = (double) info->res.val.f;
+				break;
+
+			case fex_ldouble:
+				info->res.val.d = (double) info->res.val.q;
+				break;
+			}
+			*(double*)FPREG(rd) = info->res.val.d;
+			break;
+
+		case 0xc: /* result is long double */
+			switch (info->res.type) {
+			case fex_int:
+				info->res.val.q = (long double) info->res.val.i;
+				break;
+
+			case fex_llong:
+				info->res.val.q = (long double) info->res.val.l;
+				break;
+
+			case fex_float:
+				info->res.val.q = (long double) info->res.val.f;
+				break;
+
+			case fex_double:
+				info->res.val.q = (long double) info->res.val.d;
+				break;
+			}
+			*(long double*)FPREG(rd) = info->res.val.q;
+			break;
+		}
+		return;
+	}
+
+	if ((opf & 0xf0) == 0x60) { /* fsmuld, fdmulq */
+		switch (opf & 0xc0) {
+		case 0x8: /* result is double */
+			switch (info->res.type) {
+			case fex_int:
+				info->res.val.d = (double) info->res.val.i;
+				break;
+
+			case fex_llong:
+				info->res.val.d = (double) info->res.val.l;
+				break;
+
+			case fex_float:
+				info->res.val.d = (double) info->res.val.f;
+				break;
+
+			case fex_ldouble:
+				info->res.val.d = (double) info->res.val.q;
+				break;
+			}
+			*(double*)FPREG(rd) = info->res.val.d;
+			break;
+
+		case 0xc: /* result is long double */
+			switch (info->res.type) {
+			case fex_int:
+				info->res.val.q = (long double) info->res.val.i;
+				break;
+
+			case fex_llong:
+				info->res.val.q = (long double) info->res.val.l;
+				break;
+
+			case fex_float:
+				info->res.val.q = (long double) info->res.val.f;
+				break;
+
+			case fex_double:
+				info->res.val.q = (long double) info->res.val.d;
+				break;
+			}
+			*(long double*)FPREG(rd) = info->res.val.q;
+			break;
+		}
+		return;
+	}
+
+	switch (opf & 3) { /* other arithmetic op */
+	case 1: /* result is float */
+		switch (info->res.type) {
+		case fex_int:
+			info->res.val.f = (float) info->res.val.i;
+			break;
+
+		case fex_llong:
+			info->res.val.f = (float) info->res.val.l;
+			break;
+
+		case fex_double:
+			info->res.val.f = (float) info->res.val.d;
+			break;
+
+		case fex_ldouble:
+			info->res.val.f = (float) info->res.val.q;
+			break;
+		}
+		*(float*)FPreg(rd) = info->res.val.f;
+		break;
+
+	case 2: /* result is double */
+		switch (info->res.type) {
+		case fex_int:
+			info->res.val.d = (double) info->res.val.i;
+			break;
+
+		case fex_llong:
+			info->res.val.d = (double) info->res.val.l;
+			break;
+
+		case fex_float:
+			info->res.val.d = (double) info->res.val.f;
+			break;
+
+		case fex_ldouble:
+			info->res.val.d = (double) info->res.val.q;
+			break;
+		}
+		*(double*)FPREG(rd) = info->res.val.d;
+		break;
+
+	case 3: /* result is long double */
+		switch (info->res.type) {
+		case fex_int:
+			info->res.val.q = (long double) info->res.val.i;
+			break;
+
+		case fex_llong:
+			info->res.val.q = (long double) info->res.val.l;
+			break;
+
+		case fex_float:
+			info->res.val.q = (long double) info->res.val.f;
+			break;
+
+		case fex_double:
+			info->res.val.q = (long double) info->res.val.d;
+			break;
+		}
+		*(long double*)FPREG(rd) = info->res.val.q;
+		break;
+	}
+}
+#endif	/* defined(__sparc) */
diff --git a/usr/src/libm/src/m9x/__fex_sse.c b/usr/src/libm/src/m9x/__fex_sse.c
new file mode 100644
index 0000000..e1743a9
--- /dev/null
+++ b/usr/src/libm/src/m9x/__fex_sse.c
@@ -0,0 +1,1581 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"@(#)__fex_sse.c	1.3	06/01/31 SMI"
+
+#include "fenv_synonyms.h"
+#if defined(__i386) && !defined(__amd64)
+/* for now, pick up local copy of Solaris 10 sys/regset.h; we can get rid
+   of this once we no longer need to build on Solaris 8 */
+#include "regset.h"
+#endif
+#include <ucontext.h>
+#include <fenv.h>
+#include <sunmath.h>
+#include "fex_handler.h"
+
+#if !defined(REG_PC)
+#define REG_PC	EIP
+#endif
+
+#if !defined(REG_PS)
+#define REG_PS	EFL
+#endif
+
+#ifdef __amd64
+#define regno(X)	((X < 4)? REG_RAX - X : \
+			((X > 4)? REG_RAX + 1 - X : REG_RSP))
+#else
+#define regno(X)	(EAX - X)
+#endif
+
+/*
+ * Support for SSE instructions
+ */
+
+/*
+ * Decode an SSE instruction.  Fill in *inst and return the length of the
+ * instruction in bytes.  Return 0 if the instruction is not recognized.
+ */
+int
+__fex_parse_sse(ucontext_t *uap, sseinst_t *inst)
+{
+	unsigned char	*ip;
+	char		*addr;
+	int		i, dbl, simd, rex, modrm, sib, r;
+
+	i = 0;
+	ip = (unsigned char *)uap->uc_mcontext.gregs[REG_PC];
+
+	/* look for pseudo-prefixes */
+	dbl = 0;
+	simd = SIMD;
+	if (ip[i] == 0xF3) {
+		simd = 0;
+		i++;
+	} else if (ip[i] == 0x66) {
+		dbl = DOUBLE;
+		i++;
+	} else if (ip[i] == 0xF2) {
+		dbl = DOUBLE;
+		simd = 0;
+		i++;
+	}
+
+	/* look for AMD64 REX prefix */
+	rex = 0;
+	if (ip[i] >= 0x40 && ip[i] <= 0x4F) {
+		rex = ip[i];
+		i++;
+	}
+
+	/* parse opcode */
+	if (ip[i++] != 0x0F)
+		return 0;
+	switch (ip[i++]) {
+	case 0x2A:
+		inst->op = (int)cvtsi2ss + simd + dbl;
+		if (!simd)
+			inst->op = (int)inst->op + (rex & 8);
+		break;
+
+	case 0x2C:
+		inst->op = (int)cvttss2si + simd + dbl;
+		if (!simd)
+			inst->op = (int)inst->op + (rex & 8);
+		break;
+
+	case 0x2D:
+		inst->op = (int)cvtss2si + simd + dbl;
+		if (!simd)
+			inst->op = (int)inst->op + (rex & 8);
+		break;
+
+	case 0x2E:
+		/* oddball: scalar instruction in a SIMD opcode group */
+		if (!simd)
+			return 0;
+		inst->op = (int)ucomiss + dbl;
+		break;
+
+	case 0x2F:
+		/* oddball: scalar instruction in a SIMD opcode group */
+		if (!simd)
+			return 0;
+		inst->op = (int)comiss + dbl;
+		break;
+
+	case 0x51:
+		inst->op = (int)sqrtss + simd + dbl;
+		break;
+
+	case 0x58:
+		inst->op = (int)addss + simd + dbl;
+		break;
+
+	case 0x59:
+		inst->op = (int)mulss + simd + dbl;
+		break;
+
+	case 0x5A:
+		inst->op = (int)cvtss2sd + simd + dbl;
+		break;
+
+	case 0x5B:
+		if (dbl) {
+			if (simd)
+				inst->op = cvtps2dq;
+			else
+				return 0;
+		} else {
+			inst->op = (simd)? cvtdq2ps : cvttps2dq;
+		}
+		break;
+
+	case 0x5C:
+		inst->op = (int)subss + simd + dbl;
+		break;
+
+	case 0x5D:
+		inst->op = (int)minss + simd + dbl;
+		break;
+
+	case 0x5E:
+		inst->op = (int)divss + simd + dbl;
+		break;
+
+	case 0x5F:
+		inst->op = (int)maxss + simd + dbl;
+		break;
+
+	case 0xC2:
+		inst->op = (int)cmpss + simd + dbl;
+		break;
+
+	case 0xE6:
+		if (simd) {
+			if (dbl)
+				inst->op = cvttpd2dq;
+			else
+				return 0;
+		} else {
+			inst->op = (dbl)? cvtpd2dq : cvtdq2pd;
+		}
+		break;
+
+	default:
+		return 0;
+	}
+
+	/* locate operands */
+	modrm = ip[i++];
+
+	if (inst->op == cvtss2si || inst->op == cvttss2si ||
+	    inst->op == cvtsd2si || inst->op == cvttsd2si ||
+	    inst->op == cvtss2siq || inst->op == cvttss2siq ||
+	    inst->op == cvtsd2siq || inst->op == cvttsd2siq) {
+		/* op1 is a gp register */
+		r = ((rex & 4) << 1) | ((modrm >> 3) & 7);
+		inst->op1 = (sseoperand_t *)&uap->uc_mcontext.gregs[regno(r)];
+	} else if (inst->op == cvtps2pi || inst->op == cvttps2pi ||
+	    inst->op == cvtpd2pi || inst->op == cvttpd2pi) {
+		/* op1 is a mmx register */
+#ifdef __amd64
+		inst->op1 = (sseoperand_t *)&uap->uc_mcontext.fpregs.fp_reg_set.
+		    fpchip_state.st[(modrm >> 3) & 7];
+#else
+		inst->op1 = (sseoperand_t *)(10 * ((modrm >> 3) & 7) +
+		    (char *)&uap->uc_mcontext.fpregs.fp_reg_set.
+		    fpchip_state.state[7]);
+#endif
+	} else {
+		/* op1 is a xmm register */
+		r = ((rex & 4) << 1) | ((modrm >> 3) & 7);
+		inst->op1 = (sseoperand_t *)&uap->uc_mcontext.fpregs.
+		    fp_reg_set.fpchip_state.xmm[r];
+	}
+
+	if ((modrm >> 6) == 3) {
+		if (inst->op == cvtsi2ss || inst->op == cvtsi2sd ||
+		    inst->op == cvtsi2ssq || inst->op == cvtsi2sdq) {
+			/* op2 is a gp register */
+			r = ((rex & 1) << 3) | (modrm & 7);
+			inst->op2 = (sseoperand_t *)&uap->uc_mcontext.
+			    gregs[regno(r)];
+		} else if (inst->op == cvtpi2ps || inst->op == cvtpi2pd) {
+			/* op2 is a mmx register */
+#ifdef __amd64
+			inst->op2 = (sseoperand_t *)&uap->uc_mcontext.fpregs.
+			    fp_reg_set.fpchip_state.st[modrm & 7];
+#else
+			inst->op2 = (sseoperand_t *)(10 * (modrm & 7) +
+			    (char *)&uap->uc_mcontext.fpregs.fp_reg_set.
+			    fpchip_state.state[7]);
+#endif
+		} else {
+			/* op2 is a xmm register */
+			r = ((rex & 1) << 3) | (modrm & 7);
+			inst->op2 = (sseoperand_t *)&uap->uc_mcontext.fpregs.
+			    fp_reg_set.fpchip_state.xmm[r];
+		}
+	} else if ((modrm & 0xc7) == 0x05) {
+#ifdef __amd64
+		/* address of next instruction + offset */
+		r = i + 4;
+		if (inst->op == cmpss || inst->op == cmpps ||
+		    inst->op == cmpsd || inst->op == cmppd)
+			r++;
+		inst->op2 = (sseoperand_t *)(ip + r + *(int *)(ip + i));
+#else
+		/* absolute address */
+		inst->op2 = (sseoperand_t *)(*(int *)(ip + i));
+#endif
+		i += 4;
+	} else {
+		/* complex address */
+		if ((modrm & 7) == 4) {
+			/* parse sib byte */
+			sib = ip[i++];
+			if ((sib & 7) == 5 && (modrm >> 6) == 0) {
+				/* start with absolute address */
+				addr = (char *)(*(int *)(ip + i));
+				i += 4;
+			} else {
+				/* start with base */
+				r = ((rex & 1) << 3) | (sib & 7);
+				addr = (char *)uap->uc_mcontext.gregs[regno(r)];
+			}
+			r = ((rex & 2) << 2) | ((sib >> 3) & 7);
+			if (r != 4) {
+				/* add scaled index */
+				addr += uap->uc_mcontext.gregs[regno(r)]
+				    << (sib >> 6);
+			}
+		} else {
+			r = ((rex & 1) << 3) | (modrm & 7);
+			addr = (char *)uap->uc_mcontext.gregs[regno(r)];
+		}
+
+		/* add displacement, if any */
+		if ((modrm >> 6) == 1) {
+			addr += (char)ip[i++];
+		} else if ((modrm >> 6) == 2) {
+			addr += *(int *)(ip + i);
+			i += 4;
+		}
+		inst->op2 = (sseoperand_t *)addr;
+	}
+
+	if (inst->op == cmpss || inst->op == cmpps || inst->op == cmpsd ||
+	    inst->op == cmppd) {
+		/* get the immediate operand */
+		inst->imm = ip[i++];
+	}
+
+	return i;
+}
+
+static enum fp_class_type
+my_fp_classf(float *x)
+{
+	int	i = *(int *)x & ~0x80000000;
+
+	if (i < 0x7f800000) {
+		if (i < 0x00800000)
+			return ((i == 0)? fp_zero : fp_subnormal);
+		return fp_normal;
+	}
+	else if (i == 0x7f800000)
+		return fp_infinity;
+	else if (i & 0x400000)
+		return fp_quiet;
+	else
+		return fp_signaling;
+}
+
+static enum fp_class_type
+my_fp_class(double *x)
+{
+	int	i = *(1+(int *)x) & ~0x80000000;
+
+	if (i < 0x7ff00000) {
+		if (i < 0x00100000)
+			return (((i | *(int *)x) == 0)? fp_zero : fp_subnormal);
+		return fp_normal;
+	}
+	else if (i == 0x7ff00000 && *(int *)x == 0)
+		return fp_infinity;
+	else if (i & 0x80000)
+		return fp_quiet;
+	else
+		return fp_signaling;
+}
+
+/*
+ * Inspect a scalar SSE instruction that incurred an invalid operation
+ * exception to determine which type of exception it was.
+ */
+static enum fex_exception
+__fex_get_sse_invalid_type(sseinst_t *inst)
+{
+	enum fp_class_type	t1, t2;
+
+	/* check op2 for signaling nan */
+	t2 = ((int)inst->op & DOUBLE)? my_fp_class(&inst->op2->d[0]) :
+	    my_fp_classf(&inst->op2->f[0]);
+	if (t2 == fp_signaling)
+		return fex_inv_snan;
+
+	/* eliminate all single-operand instructions */
+	switch (inst->op) {
+	case cvtsd2ss:
+	case cvtss2sd:
+		/* hmm, this shouldn't have happened */
+		return (enum fex_exception) -1;
+
+	case sqrtss:
+	case sqrtsd:
+		return fex_inv_sqrt;
+
+	case cvtss2si:
+	case cvtsd2si:
+	case cvttss2si:
+	case cvttsd2si:
+	case cvtss2siq:
+	case cvtsd2siq:
+	case cvttss2siq:
+	case cvttsd2siq:
+		return fex_inv_int;
+	}
+
+	/* check op1 for signaling nan */
+	t1 = ((int)inst->op & DOUBLE)? my_fp_class(&inst->op1->d[0]) :
+	    my_fp_classf(&inst->op1->f[0]);
+	if (t1 == fp_signaling)
+		return fex_inv_snan;
+
+	/* check two-operand instructions for other cases */
+	switch (inst->op) {
+	case cmpss:
+	case cmpsd:
+	case minss:
+	case minsd:
+	case maxss:
+	case maxsd:
+	case comiss:
+	case comisd:
+		return fex_inv_cmp;
+
+	case addss:
+	case addsd:
+	case subss:
+	case subsd:
+		if (t1 == fp_infinity && t2 == fp_infinity)
+			return fex_inv_isi;
+		break;
+
+	case mulss:
+	case mulsd:
+		if ((t1 == fp_zero && t2 == fp_infinity) ||
+		    (t2 == fp_zero && t1 == fp_infinity))
+			return fex_inv_zmi;
+		break;
+
+	case divss:
+	case divsd:
+		if (t1 == fp_zero && t2 == fp_zero)
+			return fex_inv_zdz;
+		if (t1 == fp_infinity && t2 == fp_infinity)
+			return fex_inv_idi;
+	}
+
+	return (enum fex_exception)-1;
+}
+
+/* inline templates */
+extern void sse_cmpeqss(float *, float *, int *);
+extern void sse_cmpltss(float *, float *, int *);
+extern void sse_cmpless(float *, float *, int *);
+extern void sse_cmpunordss(float *, float *, int *);
+extern void sse_minss(float *, float *, float *);
+extern void sse_maxss(float *, float *, float *);
+extern void sse_addss(float *, float *, float *);
+extern void sse_subss(float *, float *, float *);
+extern void sse_mulss(float *, float *, float *);
+extern void sse_divss(float *, float *, float *);
+extern void sse_sqrtss(float *, float *);
+extern void sse_ucomiss(float *, float *);
+extern void sse_comiss(float *, float *);
+extern void sse_cvtss2sd(float *, double *);
+extern void sse_cvtsi2ss(int *, float *);
+extern void sse_cvttss2si(float *, int *);
+extern void sse_cvtss2si(float *, int *);
+#ifdef __amd64
+extern void sse_cvtsi2ssq(long long *, float *);
+extern void sse_cvttss2siq(float *, long long *);
+extern void sse_cvtss2siq(float *, long long *);
+#endif
+extern void sse_cmpeqsd(double *, double *, long long *);
+extern void sse_cmpltsd(double *, double *, long long *);
+extern void sse_cmplesd(double *, double *, long long *);
+extern void sse_cmpunordsd(double *, double *, long long *);
+extern void sse_minsd(double *, double *, double *);
+extern void sse_maxsd(double *, double *, double *);
+extern void sse_addsd(double *, double *, double *);
+extern void sse_subsd(double *, double *, double *);
+extern void sse_mulsd(double *, double *, double *);
+extern void sse_divsd(double *, double *, double *);
+extern void sse_sqrtsd(double *, double *);
+extern void sse_ucomisd(double *, double *);
+extern void sse_comisd(double *, double *);
+extern void sse_cvtsd2ss(double *, float *);
+extern void sse_cvtsi2sd(int *, double *);
+extern void sse_cvttsd2si(double *, int *);
+extern void sse_cvtsd2si(double *, int *);
+#ifdef __amd64
+extern void sse_cvtsi2sdq(long long *, double *);
+extern void sse_cvttsd2siq(double *, long long *);
+extern void sse_cvtsd2siq(double *, long long *);
+#endif
+
+/*
+ * Fill in *info with the operands, default untrapped result, and
+ * flags produced by a scalar SSE instruction, and return the type
+ * of trapped exception (if any).  On entry, the mxcsr must have
+ * all exceptions masked and all flags clear.  The same conditions
+ * will hold on exit.
+ *
+ * This routine does not work if the instruction specified by *inst
+ * is not a scalar instruction.
+ */
+enum fex_exception
+__fex_get_sse_op(ucontext_t *uap, sseinst_t *inst, fex_info_t *info)
+{
+	unsigned int	e, te, mxcsr, oldmxcsr, subnorm;
+
+	/*
+	 * Perform the operation with traps disabled and check the
+	 * exception flags.  If the underflow trap was enabled, also
+	 * check for an exact subnormal result.
+	 */
+	__fenv_getmxcsr(&oldmxcsr);
+	subnorm = 0;
+	if ((int)inst->op & DOUBLE) {
+		if (inst->op == cvtsi2sd) {
+			info->op1.type = fex_int;
+			info->op1.val.i = inst->op2->i[0];
+			info->op2.type = fex_nodata;
+		} else if (inst->op == cvtsi2sdq) {
+			info->op1.type = fex_llong;
+			info->op1.val.l = inst->op2->l[0];
+			info->op2.type = fex_nodata;
+		} else if (inst->op == sqrtsd || inst->op == cvtsd2ss ||
+		    inst->op == cvttsd2si || inst->op == cvtsd2si ||
+		    inst->op == cvttsd2siq || inst->op == cvtsd2siq) {
+			info->op1.type = fex_double;
+			info->op1.val.d = inst->op2->d[0];
+			info->op2.type = fex_nodata;
+		} else {
+			info->op1.type = fex_double;
+			info->op1.val.d = inst->op1->d[0];
+			info->op2.type = fex_double;
+			info->op2.val.d = inst->op2->d[0];
+		}
+		info->res.type = fex_double;
+		switch (inst->op) {
+		case cmpsd:
+			info->op = fex_cmp;
+			info->res.type = fex_llong;
+			switch (inst->imm & 3) {
+			case 0:
+				sse_cmpeqsd(&info->op1.val.d, &info->op2.val.d,
+				    &info->res.val.l);
+				break;
+
+			case 1:
+				sse_cmpltsd(&info->op1.val.d, &info->op2.val.d,
+				    &info->res.val.l);
+				break;
+
+			case 2:
+				sse_cmplesd(&info->op1.val.d, &info->op2.val.d,
+				    &info->res.val.l);
+				break;
+
+			case 3:
+				sse_cmpunordsd(&info->op1.val.d,
+				    &info->op2.val.d, &info->res.val.l);
+			}
+			if (inst->imm & 4)
+				info->res.val.l ^= 0xffffffffffffffffull;
+			break;
+
+		case minsd:
+			info->op = fex_other;
+			sse_minsd(&info->op1.val.d, &info->op2.val.d,
+			    &info->res.val.d);
+			break;
+
+		case maxsd:
+			info->op = fex_other;
+			sse_maxsd(&info->op1.val.d, &info->op2.val.d,
+			    &info->res.val.d);
+			break;
+
+		case addsd:
+			info->op = fex_add;
+			sse_addsd(&info->op1.val.d, &info->op2.val.d,
+			    &info->res.val.d);
+			if (my_fp_class(&info->res.val.d) == fp_subnormal)
+				subnorm = 1;
+			break;
+
+		case subsd:
+			info->op = fex_sub;
+			sse_subsd(&info->op1.val.d, &info->op2.val.d,
+			    &info->res.val.d);
+			if (my_fp_class(&info->res.val.d) == fp_subnormal)
+				subnorm = 1;
+			break;
+
+		case mulsd:
+			info->op = fex_mul;
+			sse_mulsd(&info->op1.val.d, &info->op2.val.d,
+			    &info->res.val.d);
+			if (my_fp_class(&info->res.val.d) == fp_subnormal)
+				subnorm = 1;
+			break;
+
+		case divsd:
+			info->op = fex_div;
+			sse_divsd(&info->op1.val.d, &info->op2.val.d,
+			    &info->res.val.d);
+			if (my_fp_class(&info->res.val.d) == fp_subnormal)
+				subnorm = 1;
+			break;
+
+		case sqrtsd:
+			info->op = fex_sqrt;
+			sse_sqrtsd(&info->op1.val.d, &info->res.val.d);
+			break;
+
+		case cvtsd2ss:
+			info->op = fex_cnvt;
+			info->res.type = fex_float;
+			sse_cvtsd2ss(&info->op1.val.d, &info->res.val.f);
+			if (my_fp_classf(&info->res.val.f) == fp_subnormal)
+				subnorm = 1;
+			break;
+
+		case cvtsi2sd:
+			info->op = fex_cnvt;
+			sse_cvtsi2sd(&info->op1.val.i, &info->res.val.d);
+			break;
+
+		case cvttsd2si:
+			info->op = fex_cnvt;
+			info->res.type = fex_int;
+			sse_cvttsd2si(&info->op1.val.d, &info->res.val.i);
+			break;
+
+		case cvtsd2si:
+			info->op = fex_cnvt;
+			info->res.type = fex_int;
+			sse_cvtsd2si(&info->op1.val.d, &info->res.val.i);
+			break;
+
+#ifdef __amd64
+		case cvtsi2sdq:
+			info->op = fex_cnvt;
+			sse_cvtsi2sdq(&info->op1.val.l, &info->res.val.d);
+			break;
+
+		case cvttsd2siq:
+			info->op = fex_cnvt;
+			info->res.type = fex_llong;
+			sse_cvttsd2siq(&info->op1.val.d, &info->res.val.l);
+			break;
+
+		case cvtsd2siq:
+			info->op = fex_cnvt;
+			info->res.type = fex_llong;
+			sse_cvtsd2siq(&info->op1.val.d, &info->res.val.l);
+			break;
+#endif
+
+		case ucomisd:
+			info->op = fex_cmp;
+			info->res.type = fex_nodata;
+			sse_ucomisd(&info->op1.val.d, &info->op2.val.d);
+			break;
+
+		case comisd:
+			info->op = fex_cmp;
+			info->res.type = fex_nodata;
+			sse_comisd(&info->op1.val.d, &info->op2.val.d);
+			break;
+		}
+	} else {
+		if (inst->op == cvtsi2ss) {
+			info->op1.type = fex_int;
+			info->op1.val.i = inst->op2->i[0];
+			info->op2.type = fex_nodata;
+		} else if (inst->op == cvtsi2ssq) {
+			info->op1.type = fex_llong;
+			info->op1.val.l = inst->op2->l[0];
+			info->op2.type = fex_nodata;
+		} else if (inst->op == sqrtss || inst->op == cvtss2sd ||
+		    inst->op == cvttss2si || inst->op == cvtss2si ||
+		    inst->op == cvttss2siq || inst->op == cvtss2siq) {
+			info->op1.type = fex_float;
+			info->op1.val.f = inst->op2->f[0];
+			info->op2.type = fex_nodata;
+		} else {
+			info->op1.type = fex_float;
+			info->op1.val.f = inst->op1->f[0];
+			info->op2.type = fex_float;
+			info->op2.val.f = inst->op2->f[0];
+		}
+		info->res.type = fex_float;
+		switch (inst->op) {
+		case cmpss:
+			info->op = fex_cmp;
+			info->res.type = fex_int;
+			switch (inst->imm & 3) {
+			case 0:
+				sse_cmpeqss(&info->op1.val.f, &info->op2.val.f,
+				    &info->res.val.i);
+				break;
+
+			case 1:
+				sse_cmpltss(&info->op1.val.f, &info->op2.val.f,
+				    &info->res.val.i);
+				break;
+
+			case 2:
+				sse_cmpless(&info->op1.val.f, &info->op2.val.f,
+				    &info->res.val.i);
+				break;
+
+			case 3:
+				sse_cmpunordss(&info->op1.val.f,
+				    &info->op2.val.f, &info->res.val.i);
+			}
+			if (inst->imm & 4)
+				info->res.val.i ^= 0xffffffffu;
+			break;
+
+		case minss:
+			info->op = fex_other;
+			sse_minss(&info->op1.val.f, &info->op2.val.f,
+			    &info->res.val.f);
+			break;
+
+		case maxss:
+			info->op = fex_other;
+			sse_maxss(&info->op1.val.f, &info->op2.val.f,
+			    &info->res.val.f);
+			break;
+
+		case addss:
+			info->op = fex_add;
+			sse_addss(&info->op1.val.f, &info->op2.val.f,
+			    &info->res.val.f);
+			if (my_fp_classf(&info->res.val.f) == fp_subnormal)
+				subnorm = 1;
+			break;
+
+		case subss:
+			info->op = fex_sub;
+			sse_subss(&info->op1.val.f, &info->op2.val.f,
+			    &info->res.val.f);
+			if (my_fp_classf(&info->res.val.f) == fp_subnormal)
+				subnorm = 1;
+			break;
+
+		case mulss:
+			info->op = fex_mul;
+			sse_mulss(&info->op1.val.f, &info->op2.val.f,
+			    &info->res.val.f);
+			if (my_fp_classf(&info->res.val.f) == fp_subnormal)
+				subnorm = 1;
+			break;
+
+		case divss:
+			info->op = fex_div;
+			sse_divss(&info->op1.val.f, &info->op2.val.f,
+			    &info->res.val.f);
+			if (my_fp_classf(&info->res.val.f) == fp_subnormal)
+				subnorm = 1;
+			break;
+
+		case sqrtss:
+			info->op = fex_sqrt;
+			sse_sqrtss(&info->op1.val.f, &info->res.val.f);
+			break;
+
+		case cvtss2sd:
+			info->op = fex_cnvt;
+			info->res.type = fex_double;
+			sse_cvtss2sd(&info->op1.val.f, &info->res.val.d);
+			break;
+
+		case cvtsi2ss:
+			info->op = fex_cnvt;
+			sse_cvtsi2ss(&info->op1.val.i, &info->res.val.f);
+			break;
+
+		case cvttss2si:
+			info->op = fex_cnvt;
+			info->res.type = fex_int;
+			sse_cvttss2si(&info->op1.val.f, &info->res.val.i);
+			break;
+
+		case cvtss2si:
+			info->op = fex_cnvt;
+			info->res.type = fex_int;
+			sse_cvtss2si(&info->op1.val.f, &info->res.val.i);
+			break;
+
+#ifdef __amd64
+		case cvtsi2ssq:
+			info->op = fex_cnvt;
+			sse_cvtsi2ssq(&info->op1.val.l, &info->res.val.f);
+			break;
+
+		case cvttss2siq:
+			info->op = fex_cnvt;
+			info->res.type = fex_llong;
+			sse_cvttss2siq(&info->op1.val.f, &info->res.val.l);
+			break;
+
+		case cvtss2siq:
+			info->op = fex_cnvt;
+			info->res.type = fex_llong;
+			sse_cvtss2siq(&info->op1.val.f, &info->res.val.l);
+			break;
+#endif
+
+		case ucomiss:
+			info->op = fex_cmp;
+			info->res.type = fex_nodata;
+			sse_ucomiss(&info->op1.val.f, &info->op2.val.f);
+			break;
+
+		case comiss:
+			info->op = fex_cmp;
+			info->res.type = fex_nodata;
+			sse_comiss(&info->op1.val.f, &info->op2.val.f);
+			break;
+		}
+	}
+	__fenv_getmxcsr(&mxcsr);
+	info->flags = mxcsr & 0x3d;
+	__fenv_setmxcsr(&oldmxcsr);
+
+	/* determine which exception would have been trapped */
+	te = ~(uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.mxcsr
+	    >> 7) & 0x3d;
+	e = mxcsr & te;
+	if (e & FE_INVALID)
+		return __fex_get_sse_invalid_type(inst);
+	if (e & FE_DIVBYZERO)
+		return fex_division;
+	if (e & FE_OVERFLOW)
+		return fex_overflow;
+	if ((e & FE_UNDERFLOW) || (subnorm && (te & FE_UNDERFLOW)))
+		return fex_underflow;
+	if (e & FE_INEXACT)
+		return fex_inexact;
+	return (enum fex_exception)-1;
+}
+
+/*
+ * Emulate a SIMD SSE instruction to determine which exceptions occur
+ * in each part.  For i = 0, 1, 2, and 3, set e[i] to indicate the
+ * trapped exception that would occur if the i-th part of the SIMD
+ * instruction were executed in isolation; set e[i] to -1 if no
+ * trapped exception would occur in this part.  Also fill in info[i]
+ * with the corresponding operands, default untrapped result, and
+ * flags.
+ *
+ * This routine does not work if the instruction specified by *inst
+ * is not a SIMD instruction.
+ */
+void
+__fex_get_simd_op(ucontext_t *uap, sseinst_t *inst, enum fex_exception *e,
+    fex_info_t *info)
+{
+	sseinst_t	dummy;
+	int		i;
+
+	e[0] = e[1] = e[2] = e[3] = -1;
+
+	/* perform each part of the SIMD operation */
+	switch (inst->op) {
+	case cmpps:
+		dummy.op = cmpss;
+		dummy.imm = inst->imm;
+		for (i = 0; i < 4; i++) {
+			dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
+			dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
+			e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
+		}
+		break;
+
+	case minps:
+		dummy.op = minss;
+		for (i = 0; i < 4; i++) {
+			dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
+			dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
+			e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
+		}
+		break;
+
+	case maxps:
+		dummy.op = maxss;
+		for (i = 0; i < 4; i++) {
+			dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
+			dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
+			e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
+		}
+		break;
+
+	case addps:
+		dummy.op = addss;
+		for (i = 0; i < 4; i++) {
+			dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
+			dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
+			e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
+		}
+		break;
+
+	case subps:
+		dummy.op = subss;
+		for (i = 0; i < 4; i++) {
+			dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
+			dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
+			e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
+		}
+		break;
+
+	case mulps:
+		dummy.op = mulss;
+		for (i = 0; i < 4; i++) {
+			dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
+			dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
+			e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
+		}
+		break;
+
+	case divps:
+		dummy.op = divss;
+		for (i = 0; i < 4; i++) {
+			dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
+			dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
+			e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
+		}
+		break;
+
+	case sqrtps:
+		dummy.op = sqrtss;
+		for (i = 0; i < 4; i++) {
+			dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
+			dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
+			e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
+		}
+		break;
+
+	case cvtdq2ps:
+		dummy.op = cvtsi2ss;
+		for (i = 0; i < 4; i++) {
+			dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
+			dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
+			e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
+		}
+		break;
+
+	case cvttps2dq:
+		dummy.op = cvttss2si;
+		for (i = 0; i < 4; i++) {
+			dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
+			dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
+			e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
+		}
+		break;
+
+	case cvtps2dq:
+		dummy.op = cvtss2si;
+		for (i = 0; i < 4; i++) {
+			dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
+			dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
+			e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
+		}
+		break;
+
+	case cvtpi2ps:
+		dummy.op = cvtsi2ss;
+		for (i = 0; i < 2; i++) {
+			dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
+			dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
+			e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
+		}
+		break;
+
+	case cvttps2pi:
+		dummy.op = cvttss2si;
+		for (i = 0; i < 2; i++) {
+			dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
+			dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
+			e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
+		}
+		break;
+
+	case cvtps2pi:
+		dummy.op = cvtss2si;
+		for (i = 0; i < 2; i++) {
+			dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
+			dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
+			e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
+		}
+		break;
+
+	case cmppd:
+		dummy.op = cmpsd;
+		dummy.imm = inst->imm;
+		for (i = 0; i < 2; i++) {
+			dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
+			dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
+			e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
+		}
+		break;
+
+	case minpd:
+		dummy.op = minsd;
+		for (i = 0; i < 2; i++) {
+			dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
+			dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
+			e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
+		}
+		break;
+
+	case maxpd:
+		dummy.op = maxsd;
+		for (i = 0; i < 2; i++) {
+			dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
+			dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
+			e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
+		}
+		break;
+
+	case addpd:
+		dummy.op = addsd;
+		for (i = 0; i < 2; i++) {
+			dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
+			dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
+			e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
+		}
+		break;
+
+	case subpd:
+		dummy.op = subsd;
+		for (i = 0; i < 2; i++) {
+			dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
+			dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
+			e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
+		}
+		break;
+
+	case mulpd:
+		dummy.op = mulsd;
+		for (i = 0; i < 2; i++) {
+			dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
+			dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
+			e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
+		}
+		break;
+
+	case divpd:
+		dummy.op = divsd;
+		for (i = 0; i < 2; i++) {
+			dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
+			dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
+			e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
+		}
+		break;
+
+	case sqrtpd:
+		dummy.op = sqrtsd;
+		for (i = 0; i < 2; i++) {
+			dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
+			dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
+			e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
+		}
+		break;
+
+	case cvtpi2pd:
+	case cvtdq2pd:
+		dummy.op = cvtsi2sd;
+		for (i = 0; i < 2; i++) {
+			dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
+			dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
+			e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
+		}
+		break;
+
+	case cvttpd2pi:
+	case cvttpd2dq:
+		dummy.op = cvttsd2si;
+		for (i = 0; i < 2; i++) {
+			dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
+			dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
+			e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
+		}
+		break;
+
+	case cvtpd2pi:
+	case cvtpd2dq:
+		dummy.op = cvtsd2si;
+		for (i = 0; i < 2; i++) {
+			dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
+			dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
+			e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
+		}
+		break;
+
+	case cvtps2pd:
+		dummy.op = cvtss2sd;
+		for (i = 0; i < 2; i++) {
+			dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
+			dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
+			e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
+		}
+		break;
+
+	case cvtpd2ps:
+		dummy.op = cvtsd2ss;
+		for (i = 0; i < 2; i++) {
+			dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
+			dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
+			e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
+		}
+	}
+}
+
+/*
+ * Store the result value from *info in the destination of the scalar
+ * SSE instruction specified by *inst.  If no result is given but the
+ * exception is underflow or overflow, supply the default trapped result.
+ *
+ * This routine does not work if the instruction specified by *inst
+ * is not a scalar instruction.
+ */
+void
+__fex_st_sse_result(ucontext_t *uap, sseinst_t *inst, enum fex_exception e,
+    fex_info_t *info)
+{
+	int		i;
+	long long	l;
+	float		f, fscl;
+	double		d, dscl;
+
+	/* for compares that write eflags, just set the flags
+	   to indicate "unordered" */
+	if (inst->op == ucomiss || inst->op == comiss ||
+	    inst->op == ucomisd || inst->op == comisd) {
+		uap->uc_mcontext.gregs[REG_PS] |= 0x45;
+		return;
+	}
+
+	/* if info doesn't specify a result value, try to generate
+	   the default trapped result */
+	if (info->res.type == fex_nodata) {
+		/* set scale factors for exponent wrapping */
+		switch (e) {
+		case fex_overflow:
+			fscl = 1.262177448e-29f; /* 2^-96 */
+			dscl = 6.441148769597133308e-232; /* 2^-768 */
+			break;
+
+		case fex_underflow:
+			fscl = 7.922816251e+28f; /* 2^96 */
+			dscl = 1.552518092300708935e+231; /* 2^768 */
+			break;
+
+		default:
+			(void) __fex_get_sse_op(uap, inst, info);
+			if (info->res.type == fex_nodata)
+				return;
+			goto stuff;
+		}
+
+		/* generate the wrapped result */
+		if (inst->op == cvtsd2ss) {
+			info->op1.type = fex_double;
+			info->op1.val.d = inst->op2->d[0];
+			info->op2.type = fex_nodata;
+			info->res.type = fex_float;
+			info->res.val.f = (float)(fscl * (fscl *
+			    info->op1.val.d));
+		} else if ((int)inst->op & DOUBLE) {
+			info->op1.type = fex_double;
+			info->op1.val.d = inst->op1->d[0];
+			info->op2.type = fex_double;
+			info->op2.val.d = inst->op2->d[0];
+			info->res.type = fex_double;
+			switch (inst->op) {
+			case addsd:
+				info->res.val.d = dscl * (dscl *
+				    info->op1.val.d + dscl * info->op2.val.d);
+				break;
+
+			case subsd:
+				info->res.val.d = dscl * (dscl *
+				    info->op1.val.d - dscl * info->op2.val.d);
+				break;
+
+			case mulsd:
+				info->res.val.d = (dscl * info->op1.val.d) *
+				    (dscl * info->op2.val.d);
+				break;
+
+			case divsd:
+				info->res.val.d = (dscl * info->op1.val.d) /
+				    (info->op2.val.d / dscl);
+				break;
+
+			default:
+				return;
+			}
+		} else {
+			info->op1.type = fex_float;
+			info->op1.val.f = inst->op1->f[0];
+			info->op2.type = fex_float;
+			info->op2.val.f = inst->op2->f[0];
+			info->res.type = fex_float;
+			switch (inst->op) {
+			case addss:
+				info->res.val.f = fscl * (fscl *
+				    info->op1.val.f + fscl * info->op2.val.f);
+				break;
+
+			case subss:
+				info->res.val.f = fscl * (fscl *
+				    info->op1.val.f - fscl * info->op2.val.f);
+				break;
+
+			case mulss:
+				info->res.val.f = (fscl * info->op1.val.f) *
+				    (fscl * info->op2.val.f);
+				break;
+
+			case divss:
+				info->res.val.f = (fscl * info->op1.val.f) /
+				    (info->op2.val.f / fscl);
+				break;
+
+			default:
+				return;
+			}
+		}
+	}
+
+	/* put the result in the destination */
+stuff:
+	if (inst->op == cmpss || inst->op == cvttss2si || inst->op == cvtss2si
+	    || inst->op == cvttsd2si || inst->op == cvtsd2si) {
+		switch (info->res.type) {
+		case fex_int:
+			i = info->res.val.i;
+			break;
+
+		case fex_llong:
+			i = info->res.val.l;
+			break;
+
+		case fex_float:
+			i = info->res.val.f;
+			break;
+
+		case fex_double:
+			i = info->res.val.d;
+			break;
+
+		case fex_ldouble:
+			i = info->res.val.q;
+			break;
+		}
+		inst->op1->i[0] = i;
+	} else if (inst->op == cmpsd || inst->op == cvttss2siq ||
+	    inst->op == cvtss2siq || inst->op == cvttsd2siq ||
+	    inst->op == cvtsd2siq) {
+		switch (info->res.type) {
+		case fex_int:
+			l = info->res.val.i;
+			break;
+
+		case fex_llong:
+			l = info->res.val.l;
+			break;
+
+		case fex_float:
+			l = info->res.val.f;
+			break;
+
+		case fex_double:
+			l = info->res.val.d;
+			break;
+
+		case fex_ldouble:
+			l = info->res.val.q;
+			break;
+		}
+		inst->op1->l[0] = l;
+	} else if ((((int)inst->op & DOUBLE) && inst->op != cvtsd2ss) ||
+	    inst->op == cvtss2sd) {
+		switch (info->res.type) {
+		case fex_int:
+			d = info->res.val.i;
+			break;
+
+		case fex_llong:
+			d = info->res.val.l;
+			break;
+
+		case fex_float:
+			d = info->res.val.f;
+			break;
+
+		case fex_double:
+			d = info->res.val.d;
+			break;
+
+		case fex_ldouble:
+			d = info->res.val.q;
+			break;
+		}
+		inst->op1->d[0] = d;
+	} else {
+		switch (info->res.type) {
+		case fex_int:
+			f = info->res.val.i;
+			break;
+
+		case fex_llong:
+			f = info->res.val.l;
+			break;
+
+		case fex_float:
+			f = info->res.val.f;
+			break;
+
+		case fex_double:
+			f = info->res.val.d;
+			break;
+
+		case fex_ldouble:
+			f = info->res.val.q;
+			break;
+		}
+		inst->op1->f[0] = f;
+	}
+}
+
+/*
+ * Store the results from a SIMD instruction.  For each i, store
+ * the result value from info[i] in the i-th part of the destination
+ * of the SIMD SSE instruction specified by *inst.  If no result
+ * is given but the exception indicated by e[i] is underflow or
+ * overflow, supply the default trapped result.
+ *
+ * This routine does not work if the instruction specified by *inst
+ * is not a SIMD instruction.
+ */
+void
+__fex_st_simd_result(ucontext_t *uap, sseinst_t *inst, enum fex_exception *e,
+    fex_info_t *info)
+{
+	sseinst_t	dummy;
+	int		i;
+
+	/* store each part */
+	switch (inst->op) {
+	case cmpps:
+		dummy.op = cmpss;
+		dummy.imm = inst->imm;
+		for (i = 0; i < 4; i++) {
+			dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
+			dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
+			__fex_st_sse_result(uap, &dummy, e[i], &info[i]);
+		}
+		break;
+
+	case minps:
+		dummy.op = minss;
+		for (i = 0; i < 4; i++) {
+			dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
+			dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
+			__fex_st_sse_result(uap, &dummy, e[i], &info[i]);
+		}
+		break;
+
+	case maxps:
+		dummy.op = maxss;
+		for (i = 0; i < 4; i++) {
+			dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
+			dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
+			__fex_st_sse_result(uap, &dummy, e[i], &info[i]);
+		}
+		break;
+
+	case addps:
+		dummy.op = addss;
+		for (i = 0; i < 4; i++) {
+			dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
+			dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
+			__fex_st_sse_result(uap, &dummy, e[i], &info[i]);
+		}
+		break;
+
+	case subps:
+		dummy.op = subss;
+		for (i = 0; i < 4; i++) {
+			dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
+			dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
+			__fex_st_sse_result(uap, &dummy, e[i], &info[i]);
+		}
+		break;
+
+	case mulps:
+		dummy.op = mulss;
+		for (i = 0; i < 4; i++) {
+			dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
+			dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
+			__fex_st_sse_result(uap, &dummy, e[i], &info[i]);
+		}
+		break;
+
+	case divps:
+		dummy.op = divss;
+		for (i = 0; i < 4; i++) {
+			dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
+			dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
+			__fex_st_sse_result(uap, &dummy, e[i], &info[i]);
+		}
+		break;
+
+	case sqrtps:
+		dummy.op = sqrtss;
+		for (i = 0; i < 4; i++) {
+			dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
+			dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
+			__fex_st_sse_result(uap, &dummy, e[i], &info[i]);
+		}
+		break;
+
+	case cvtdq2ps:
+		dummy.op = cvtsi2ss;
+		for (i = 0; i < 4; i++) {
+			dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
+			dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
+			__fex_st_sse_result(uap, &dummy, e[i], &info[i]);
+		}
+		break;
+
+	case cvttps2dq:
+		dummy.op = cvttss2si;
+		for (i = 0; i < 4; i++) {
+			dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
+			dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
+			__fex_st_sse_result(uap, &dummy, e[i], &info[i]);
+		}
+		break;
+
+	case cvtps2dq:
+		dummy.op = cvtss2si;
+		for (i = 0; i < 4; i++) {
+			dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
+			dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
+			__fex_st_sse_result(uap, &dummy, e[i], &info[i]);
+		}
+		break;
+
+	case cvtpi2ps:
+		dummy.op = cvtsi2ss;
+		for (i = 0; i < 2; i++) {
+			dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
+			dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
+			__fex_st_sse_result(uap, &dummy, e[i], &info[i]);
+		}
+		break;
+
+	case cvttps2pi:
+		dummy.op = cvttss2si;
+		for (i = 0; i < 2; i++) {
+			dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
+			dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
+			__fex_st_sse_result(uap, &dummy, e[i], &info[i]);
+		}
+		break;
+
+	case cvtps2pi:
+		dummy.op = cvtss2si;
+		for (i = 0; i < 2; i++) {
+			dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
+			dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
+			__fex_st_sse_result(uap, &dummy, e[i], &info[i]);
+		}
+		break;
+
+	case cmppd:
+		dummy.op = cmpsd;
+		dummy.imm = inst->imm;
+		for (i = 0; i < 2; i++) {
+			dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
+			dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
+			__fex_st_sse_result(uap, &dummy, e[i], &info[i]);
+		}
+		break;
+
+	case minpd:
+		dummy.op = minsd;
+		for (i = 0; i < 2; i++) {
+			dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
+			dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
+			__fex_st_sse_result(uap, &dummy, e[i], &info[i]);
+		}
+		break;
+
+	case maxpd:
+		dummy.op = maxsd;
+		for (i = 0; i < 2; i++) {
+			dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
+			dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
+			__fex_st_sse_result(uap, &dummy, e[i], &info[i]);
+		}
+		break;
+
+	case addpd:
+		dummy.op = addsd;
+		for (i = 0; i < 2; i++) {
+			dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
+			dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
+			__fex_st_sse_result(uap, &dummy, e[i], &info[i]);
+		}
+		break;
+
+	case subpd:
+		dummy.op = subsd;
+		for (i = 0; i < 2; i++) {
+			dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
+			dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
+			__fex_st_sse_result(uap, &dummy, e[i], &info[i]);
+		}
+		break;
+
+	case mulpd:
+		dummy.op = mulsd;
+		for (i = 0; i < 2; i++) {
+			dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
+			dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
+			__fex_st_sse_result(uap, &dummy, e[i], &info[i]);
+		}
+		break;
+
+	case divpd:
+		dummy.op = divsd;
+		for (i = 0; i < 2; i++) {
+			dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
+			dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
+			__fex_st_sse_result(uap, &dummy, e[i], &info[i]);
+		}
+		break;
+
+	case sqrtpd:
+		dummy.op = sqrtsd;
+		for (i = 0; i < 2; i++) {
+			dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
+			dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
+			__fex_st_sse_result(uap, &dummy, e[i], &info[i]);
+		}
+		break;
+
+	case cvtpi2pd:
+	case cvtdq2pd:
+		dummy.op = cvtsi2sd;
+		for (i = 0; i < 2; i++) {
+			dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
+			dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
+			__fex_st_sse_result(uap, &dummy, e[i], &info[i]);
+		}
+		break;
+
+	case cvttpd2pi:
+	case cvttpd2dq:
+		dummy.op = cvttsd2si;
+		for (i = 0; i < 2; i++) {
+			dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
+			dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
+			__fex_st_sse_result(uap, &dummy, e[i], &info[i]);
+		}
+		/* for cvttpd2dq, zero the high 64 bits of the destination */
+		if (inst->op == cvttpd2dq)
+			inst->op1->l[1] = 0ll;
+		break;
+
+	case cvtpd2pi:
+	case cvtpd2dq:
+		dummy.op = cvtsd2si;
+		for (i = 0; i < 2; i++) {
+			dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
+			dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
+			__fex_st_sse_result(uap, &dummy, e[i], &info[i]);
+		}
+		/* for cvtpd2dq, zero the high 64 bits of the destination */
+		if (inst->op == cvtpd2dq)
+			inst->op1->l[1] = 0ll;
+		break;
+
+	case cvtps2pd:
+		dummy.op = cvtss2sd;
+		for (i = 0; i < 2; i++) {
+			dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
+			dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
+			__fex_st_sse_result(uap, &dummy, e[i], &info[i]);
+		}
+		break;
+
+	case cvtpd2ps:
+		dummy.op = cvtsd2ss;
+		for (i = 0; i < 2; i++) {
+			dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
+			dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
+			__fex_st_sse_result(uap, &dummy, e[i], &info[i]);
+		}
+		/* zero the high 64 bits of the destination */
+		inst->op1->l[1] = 0ll;
+	}
+}
diff --git a/usr/src/libm/src/m9x/__fex_sym.c b/usr/src/libm/src/m9x/__fex_sym.c
new file mode 100644
index 0000000..7942493
--- /dev/null
+++ b/usr/src/libm/src/m9x/__fex_sym.c
@@ -0,0 +1,306 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"@(#)__fex_sym.c	1.7	06/01/31 SMI"
+
+#include "fenv_synonyms.h"
+#include <elf.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <procfs.h>
+#include <string.h>
+#include <sys/stat.h>
+
+#if defined(__sparcv9) || defined(__amd64)
+
+#define Elf_Ehdr	Elf64_Ehdr
+#define Elf_Phdr	Elf64_Phdr
+#define Elf_Shdr	Elf64_Shdr
+#define Elf_Sym		Elf64_Sym
+#define ELF_ST_BIND	ELF64_ST_BIND
+#define ELF_ST_TYPE	ELF64_ST_TYPE
+
+#else
+
+#define Elf_Ehdr	Elf32_Ehdr
+#define Elf_Phdr	Elf32_Phdr
+#define Elf_Shdr	Elf32_Shdr
+#define Elf_Sym		Elf32_Sym
+#define ELF_ST_BIND	ELF32_ST_BIND
+#define ELF_ST_TYPE	ELF32_ST_TYPE
+
+#endif	/* __sparcv9 */
+
+/* semi-permanent data established by __fex_sym_init */
+static	prmap_t		*pm = NULL;		/* prmap_t array */
+static	int			npm = 0;		/* number of entries in pm */
+
+/* transient data modified by __fex_sym */
+static	prmap_t		*lpm = NULL;	/* prmap_t found in last call */
+static	Elf_Phdr	*ph = NULL;		/* program header array */
+static	int			phsize = 0;		/* size of ph */
+static	int			nph;			/* number of entries in ph */
+static	char		*stbuf = NULL;	/* symbol and string table buffer */
+static	int			stbufsize = 0;	/* size of stbuf */
+static	int			stoffset;		/* offset of string table in stbuf */
+static	int			nsyms;			/* number of symbols in stbuf */
+
+/* get a current prmap_t list (must call this before each stack trace) */
+void
+__fex_sym_init()
+{
+	struct stat	statbuf;
+	long		n;
+	int			i;
+
+	/* clear out the previous prmap_t list */
+	if (pm != NULL)
+		free(pm);
+	pm = lpm = NULL;
+	npm = 0;
+
+	/* get the current prmap_t list */
+	if (stat("/proc/self/map", &statbuf) < 0 || statbuf.st_size <= 0 ||
+		(pm = (prmap_t*)malloc(statbuf.st_size)) == NULL)
+		return;
+	if ((i = open("/proc/self/map", O_RDONLY)) < 0)
+	{
+		free(pm);
+		pm = NULL;
+		return;
+	}
+	n = read(i, pm, statbuf.st_size);
+	close(i);
+	if (n != statbuf.st_size)
+	{
+		free(pm);
+		pm = NULL;
+	}
+	else
+		npm = (int) (n / sizeof(prmap_t));
+}
+
+/* read ELF program headers and symbols; return -1 on error, 0 otherwise */
+static int
+__fex_read_syms(int fd)
+{
+	Elf_Ehdr	h;
+	Elf_Shdr	*sh;
+	int			i, size;
+
+	/* read the ELF header */
+	if (read(fd, &h, sizeof(h)) != sizeof(h))
+		return -1;
+	if (h.e_ident[EI_MAG0] != ELFMAG0 ||
+		h.e_ident[EI_MAG1] != ELFMAG1 ||
+		h.e_ident[EI_MAG2] != ELFMAG2 ||
+		h.e_ident[EI_MAG3] != ELFMAG3 ||
+		h.e_phentsize != sizeof(Elf_Phdr) ||
+		h.e_shentsize != sizeof(Elf_Shdr))
+		return -1;
+
+	/* get space for the program headers */
+	size = h.e_phnum * h.e_phentsize;
+	if (size > phsize)
+	{
+		if (ph)
+			free(ph);
+		phsize = nph = 0;
+		if ((ph = (Elf_Phdr*)malloc(size)) == NULL)
+			return -1;
+		phsize = size;
+	}
+
+	/* read the program headers */
+	if (lseek(fd, h.e_phoff, SEEK_SET) != h.e_phoff ||
+		read(fd, ph, size) != (ssize_t)size)
+	{
+		nph = 0;
+		return -1;
+	}
+	nph = h.e_phnum;
+
+	/* read the section headers */
+	size = h.e_shnum * h.e_shentsize;
+	if ((sh = (Elf_Shdr*)malloc(size)) == NULL)
+		return -1;
+	if (lseek(fd, h.e_shoff, SEEK_SET) != h.e_shoff ||
+		read(fd, sh, size) != (ssize_t)size)
+	{
+		free(sh);
+		return -1;
+	}
+
+	/* find the symtab section header */
+	for (i = 0; i < h.e_shnum; i++)
+	{
+		if (sh[i].sh_type == SHT_SYMTAB)
+			break; /* assume there is only one */
+	}
+	if (i == h.e_shnum || sh[i].sh_size == 0 ||
+		sh[i].sh_entsize != sizeof(Elf_Sym) ||
+		sh[i].sh_link < 1 || sh[i].sh_link >= h.e_shnum ||
+		sh[sh[i].sh_link].sh_type != SHT_STRTAB ||
+		sh[sh[i].sh_link].sh_size == 0)
+	{
+		free(sh);
+		return -1;
+	}
+
+	/* get space for the symbol and string tables */
+	size = (int) (sh[i].sh_size + sh[sh[i].sh_link].sh_size);
+	if (size > stbufsize)
+	{
+		if (stbuf)
+			free(stbuf);
+		stbufsize = nsyms = 0;
+		if ((stbuf = (char*)malloc(size)) == NULL)
+		{
+			free(sh);
+			return -1;
+		}
+		stbufsize = size;
+	}
+
+	/* read the symbol and string tables */
+	if (lseek(fd, sh[i].sh_offset, SEEK_SET) != sh[i].sh_offset ||
+		read(fd, stbuf, sh[i].sh_size) != sh[i].sh_size ||
+		lseek(fd, sh[sh[i].sh_link].sh_offset, SEEK_SET) !=
+			sh[sh[i].sh_link].sh_offset ||
+		read(fd, stbuf + sh[i].sh_size, sh[sh[i].sh_link].sh_size) !=
+			sh[sh[i].sh_link].sh_size)
+	{
+		free(sh);
+		return -1;
+	}
+	nsyms = (int) (sh[i].sh_size / sh[i].sh_entsize);
+	stoffset = (int) sh[i].sh_size;
+
+	free(sh);
+	return 0;
+}
+
+/* find the symbol corresponding to the given text address;
+   return NULL on error, symbol address otherwise */
+char *
+__fex_sym(char *a, char **name)
+{
+	Elf_Sym			*s;
+	unsigned long	fo, va, value;
+	int				fd, i, j, nm;
+	char			fname[PRMAPSZ+20];
+
+	/* see if the last prmap_t found contains the indicated address */
+	if (lpm)
+	{
+		if (a >= (char*)lpm->pr_vaddr && a < (char*)lpm->pr_vaddr +
+			lpm->pr_size)
+			goto cont;
+	}
+
+	/* look for a prmap_t that contains the indicated address */
+	for (i = 0; i < npm; i++)
+	{
+		if (a >= (char*)pm[i].pr_vaddr && a < (char*)pm[i].pr_vaddr +
+			pm[i].pr_size)
+			break;
+	}
+	if (i == npm)
+		return NULL;
+
+	/* get an open file descriptor for the mapped object */
+	if (pm[i].pr_mapname[0] == '\0')
+		return NULL;
+	strcpy(fname, "/proc/self/object/");
+	strncat(fname, pm[i].pr_mapname, PRMAPSZ);
+	fd = open(fname, O_RDONLY);
+	if (fd < 0)
+		return NULL;
+
+	/* read the program headers and symbols */
+	lpm = NULL;
+	j = __fex_read_syms(fd);
+	close(fd);
+	if (j < 0)
+		return NULL;
+	lpm = &pm[i];
+
+cont:
+	/* compute the file offset corresponding to the mapped address */
+	fo = (a - (char*)lpm->pr_vaddr) + lpm->pr_offset;
+
+	/* find the program header containing the file offset */
+	for (i = 0; i < nph; i++)
+	{
+		if (ph[i].p_type == PT_LOAD && fo >= ph[i].p_offset &&
+			fo < ph[i].p_offset + ph[i].p_filesz)
+			break;
+	}
+	if (i == nph)
+		return NULL;
+
+	/* compute the virtual address corresponding to the file offset */
+	va = (fo - ph[i].p_offset) + ph[i].p_vaddr;
+
+	/* find the symbol in this segment with the highest value
+	   less than or equal to the virtual address */
+	s = (Elf_Sym*)stbuf;
+	value = nm = 0;
+	for (j = 0; j < nsyms; j++)
+	{
+		if (s[j].st_name == 0 || s[j].st_shndx == SHN_UNDEF ||
+			(ELF_ST_BIND(s[j].st_info) != STB_LOCAL &&
+			ELF_ST_BIND(s[j].st_info) != STB_GLOBAL &&
+			ELF_ST_BIND(s[j].st_info) != STB_WEAK) ||
+			(ELF_ST_TYPE(s[j].st_info) != STT_NOTYPE &&
+			ELF_ST_TYPE(s[j].st_info) != STT_OBJECT &&
+			ELF_ST_TYPE(s[j].st_info) != STT_FUNC))
+		{
+			continue;
+		}
+
+		if (s[j].st_value < ph[i].p_vaddr || s[j].st_value >= ph[i].p_vaddr
+			+ ph[i].p_memsz)
+		{
+			continue;
+		}
+
+		if (s[j].st_value < value || s[j].st_value > va)
+			continue;
+
+		value = s[j].st_value;
+		nm = s[j].st_name;
+	}
+	if (nm == 0)
+		return NULL;
+
+	/* pass back the name and return the mapped address of the symbol */
+	*name = stbuf + stoffset + nm;
+	fo = (value - ph[i].p_vaddr) + ph[i].p_offset;
+	return (char*)lpm->pr_vaddr + (fo - lpm->pr_offset);
+}
diff --git a/usr/src/libm/src/m9x/fdim.c b/usr/src/libm/src/m9x/fdim.c
new file mode 100644
index 0000000..5f888e1
--- /dev/null
+++ b/usr/src/libm/src/m9x/fdim.c
@@ -0,0 +1,55 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"@(#)fdim.c	1.6	06/01/31 SMI"
+
+#if defined(ELFOBJ)
+#pragma weak fdim = __fdim
+#endif
+
+/*
+ * fdim(x,y) returns x - y if x > y, +0 if x <= y, and NaN if x and
+ * y are unordered.
+ *
+ * fdim(x,y) raises overflow or inexact if x > y and x - y overflows
+ * or is inexact.  It raises invalid if either operand is a signaling
+ * NaN.  Otherwise, it raises no exceptions.
+ */
+
+#include "libm.h"	/* for islessequal macro */
+
+double
+__fdim(double x, double y) {
+#if defined(COMPARISON_MACRO_BUG)
+	if (x == x && y == y && x <= y) {	/* } */
+#else
+	if (islessequal(x, y)) {
+#endif
+		x = 0.0;
+		y = -x;
+	}
+	return (x - y);
+}
diff --git a/usr/src/libm/src/m9x/fdimf.c b/usr/src/libm/src/m9x/fdimf.c
new file mode 100644
index 0000000..84f56e5
--- /dev/null
+++ b/usr/src/libm/src/m9x/fdimf.c
@@ -0,0 +1,58 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"@(#)fdimf.c	1.5	06/01/31 SMI"
+
+#if defined(ELFOBJ)
+#pragma weak fdimf = __fdimf
+#endif
+
+#include "libm.h"	/* for islessequal macro */
+
+float
+__fdimf(float x, float y) {
+	/*
+	 * On SPARC v8plus/v9, this could be implemented as follows
+	 * (assuming %f0 = x, %f1 = y, return value left in %f0):
+	 *
+	 * fcmps	%fcc0,%f0,%f1
+	 * st		%g0,[scratch]	! use fzero instead of st/ld
+	 * ld		[scratch],%f2	! if VIS is available
+	 * fnegs	%f2,%f3
+	 * fmovsle	%fcc0,%f2,%f0
+	 * fmovsle	%fcc0,%f3,%f1
+	 * fsubs	%f0,%f1,%f0
+	 */
+#if defined(COMPARISON_MACRO_BUG)
+	if (x == x && y == y && x <= y) {	/* } */
+#else
+	if (islessequal(x, y)) {
+#endif
+		x = 0.0f;
+		y = -x;
+	}
+	return (x - y);
+}
diff --git a/usr/src/libm/src/m9x/fdiml.c b/usr/src/libm/src/m9x/fdiml.c
new file mode 100644
index 0000000..3fffdc4
--- /dev/null
+++ b/usr/src/libm/src/m9x/fdiml.c
@@ -0,0 +1,46 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"@(#)fdiml.c	1.5	06/01/31 SMI"
+
+#if defined(ELFOBJ)
+#pragma weak fdiml = __fdiml
+#endif
+
+#include "libm.h"	/* for islessequal macro */
+
+long double
+__fdiml(long double x, long double y) {
+#if defined(COMPARISON_MACRO_BUG)
+	if (x == x && y == y && x <= y) {
+#else
+	if (islessequal(x, y)) {
+#endif
+		x = 0.0l;
+		y = -x;
+	}
+	return (x - y);
+}
diff --git a/usr/src/libm/src/m9x/feexcept.c b/usr/src/libm/src/m9x/feexcept.c
new file mode 100644
index 0000000..c4979f1
--- /dev/null
+++ b/usr/src/libm/src/m9x/feexcept.c
@@ -0,0 +1,135 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"@(#)feexcept.c	1.8	06/01/31 SMI"
+
+#pragma weak feclearexcept = __feclearexcept
+#pragma weak feraiseexcept = __feraiseexcept
+#pragma weak fetestexcept = __fetestexcept
+#pragma weak fegetexceptflag = __fegetexceptflag
+#pragma weak fesetexceptflag = __fesetexceptflag
+
+#pragma weak feclearexcept96 = __feclearexcept
+#pragma weak feraiseexcept96 = __feraiseexcept
+#pragma weak fetestexcept96 = __fetestexcept
+#pragma weak fegetexceptflag96 = __fegetexceptflag
+#pragma weak fesetexceptflag96 = __fesetexceptflag
+
+#include "fenv_synonyms.h"
+#include <fenv.h>
+#include <sys/ieeefp.h>
+#include <ucontext.h>
+#include <thread.h>
+#include "fex_handler.h"
+
+int feclearexcept(int e)
+{
+	unsigned long	fsr;
+
+	__fenv_getfsr(&fsr);
+	__fenv_set_ex(fsr, __fenv_get_ex(fsr) & ~e);
+	__fenv_setfsr(&fsr);
+	if (fex_get_log())
+		__fex_update_te();
+	return 0;
+}
+
+/*
+*  note - __fex_hdlr depends on fetestexcept following feraiseexcept
+*/
+int feraiseexcept(int e)
+{
+	volatile double	t;
+	unsigned long	fsr;
+
+	if (e & FE_INVALID) {
+		t = 0.0;
+		t /= 0.0;
+	}
+	if (e & FE_DIVBYZERO) {
+		t = 1.0e300;
+		t /= 0.0;
+	}
+	if (e & FE_OVERFLOW) {
+		/* if overflow is not trapped, avoid raising inexact */
+		__fenv_getfsr(&fsr);
+		if (!(__fenv_get_te(fsr) & (1 << fp_trap_overflow))) {
+			__fenv_set_ex(fsr, __fenv_get_ex(fsr) | FE_OVERFLOW);
+			__fenv_setfsr(&fsr);
+		}
+		else {
+			t = 1.0e300;
+			t *= 1.0e300;
+		}
+	}
+	if (e & FE_UNDERFLOW) {
+		/* if underflow is not trapped, avoid raising inexact */
+		__fenv_getfsr(&fsr);
+		if (!(__fenv_get_te(fsr) & (1 << fp_trap_underflow))) {
+			__fenv_set_ex(fsr, __fenv_get_ex(fsr) | FE_UNDERFLOW);
+			__fenv_setfsr(&fsr);
+		}
+		else {
+			t = 1.0e-307;
+			t -= 1.001e-307;
+		}
+	}
+	if (e & FE_INEXACT) {
+		t = 1.0e300;
+		t += 1.0e-307;
+	}
+	return 0;
+}
+
+int fetestexcept(int e)
+{
+	unsigned long	fsr;
+
+	__fenv_getfsr(&fsr);
+	return (int)__fenv_get_ex(fsr) & e;
+}
+
+int fegetexceptflag(fexcept_t *p, int e)
+{
+	unsigned long	fsr;
+
+	__fenv_getfsr(&fsr);
+	*p = (int)__fenv_get_ex(fsr) & e;
+	return 0;
+}
+
+int fesetexceptflag(const fexcept_t *p, int e)
+{
+	unsigned long	fsr;
+
+	__fenv_getfsr(&fsr);
+	__fenv_set_ex(fsr, (((int)__fenv_get_ex(fsr) & ~e) | (*p & e)) &
+		FE_ALL_EXCEPT);
+	__fenv_setfsr(&fsr);
+	if (fex_get_log())
+		__fex_update_te();
+	return 0;
+}
diff --git a/usr/src/libm/src/m9x/fenv.c b/usr/src/libm/src/m9x/fenv.c
new file mode 100644
index 0000000..0054871
--- /dev/null
+++ b/usr/src/libm/src/m9x/fenv.c
@@ -0,0 +1,116 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"@(#)fenv.c	1.9	06/01/31 SMI"
+
+#pragma weak fex_merge_flags = __fex_merge_flags
+
+#pragma weak feholdexcept = __feholdexcept
+#pragma weak feupdateenv = __feupdateenv
+#pragma weak fegetenv = __fegetenv
+#pragma weak fesetenv = __fesetenv
+
+#pragma weak feholdexcept96 = __feholdexcept96
+#pragma weak feupdateenv96 = __feupdateenv
+#pragma weak fegetenv96 = __fegetenv
+#pragma weak fesetenv96 = __fesetenv
+
+#include "fenv_synonyms.h"
+#include <fenv.h>
+#include <ucontext.h>
+#include <thread.h>
+#include "fex_handler.h"
+
+const fenv_t __fenv_dfl_env = {
+	{
+		{ FEX_NONSTOP, (void(*)())0 },
+		{ FEX_NONSTOP, (void(*)())0 },
+		{ FEX_NONSTOP, (void(*)())0 },
+		{ FEX_NONSTOP, (void(*)())0 },
+		{ FEX_NONSTOP, (void(*)())0 },
+		{ FEX_NONSTOP, (void(*)())0 },
+		{ FEX_NONSTOP, (void(*)())0 },
+		{ FEX_NONSTOP, (void(*)())0 },
+		{ FEX_NONSTOP, (void(*)())0 },
+		{ FEX_NONSTOP, (void(*)())0 },
+		{ FEX_NONSTOP, (void(*)())0 },
+		{ FEX_NONSTOP, (void(*)())0 },
+	},
+#ifdef __i386
+	0x13000000
+#else
+	0
+#endif
+};
+
+int feholdexcept(fenv_t *p)
+{
+	(void) fegetenv(p);
+	(void) feclearexcept(FE_ALL_EXCEPT);
+	return !fex_set_handling(FEX_ALL, FEX_NONSTOP, NULL);
+}
+
+int feholdexcept96(fenv_t *p)
+{
+	(void) fegetenv(p);
+	(void) feclearexcept(FE_ALL_EXCEPT);
+	return fex_set_handling(FEX_ALL, FEX_NONSTOP, NULL);
+}
+
+int feupdateenv(const fenv_t *p)
+{
+	unsigned long	fsr;
+
+	__fenv_getfsr(&fsr);
+	(void) fesetenv(p);
+	(void) feraiseexcept((int)__fenv_get_ex(fsr));
+	return 0;
+}
+
+int fegetenv(fenv_t *p)
+{
+	fex_getexcepthandler(&p->__handlers, FEX_ALL);
+	__fenv_getfsr(&p->__fsr);
+	return 0;
+}
+
+int fesetenv(const fenv_t *p)
+{
+	__fenv_setfsr(&p->__fsr);
+	fex_setexcepthandler(&p->__handlers, FEX_ALL);
+	return 0;
+}
+
+void fex_merge_flags(const fenv_t *p)
+{
+	unsigned long	fsr;
+
+	__fenv_getfsr(&fsr);
+	__fenv_set_ex(fsr, __fenv_get_ex(fsr) | __fenv_get_ex(p->__fsr));
+	__fenv_setfsr(&fsr);
+	if (fex_get_log())
+		__fex_update_te();
+}
diff --git a/usr/src/libm/src/m9x/fenv_synonyms.h b/usr/src/libm/src/m9x/fenv_synonyms.h
new file mode 100644
index 0000000..14b32fe
--- /dev/null
+++ b/usr/src/libm/src/m9x/fenv_synonyms.h
@@ -0,0 +1,100 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"@(#)fenv_synonyms.h	1.7	06/01/31 SMI"
+
+/* feexcept.c */
+#define feclearexcept	__feclearexcept
+#define feraiseexcept	__feraiseexcept
+#define fetestexcept	__fetestexcept
+#define fegetexceptflag	__fegetexceptflag
+#define fesetexceptflag	__fesetexceptflag
+
+/* fenv.c */
+#define feholdexcept	__feholdexcept
+#define feholdexcept96	__feholdexcept96
+#define feupdateenv		__feupdateenv
+#define fegetenv		__fegetenv
+#define fesetenv		__fesetenv
+#define fex_merge_flags	__fex_merge_flags
+
+#ifdef __i386
+/* feprec.c */
+#define fegetprec		__fegetprec
+#define fesetprec		__fesetprec
+#endif
+
+/* feround.c */
+#define fegetround		__fegetround
+#define fesetround		__fesetround
+#define fesetround96	__fesetround96
+
+/* fex_handler.c */
+#define fex_get_handling		__fex_get_handling
+#define fex_set_handling		__fex_set_handling
+#define fex_getexcepthandler	__fex_getexcepthandler
+#define fex_setexcepthandler	__fex_setexcepthandler
+
+/* fex_log.c */
+#define fex_get_log			__fex_get_log
+#define fex_set_log			__fex_set_log
+#define fex_get_log_depth	__fex_get_log_depth
+#define fex_set_log_depth	__fex_set_log_depth
+#define fex_log_entry		__fex_log_entry
+
+/* libc, libthread */
+#define close			_close
+#define getcontext		_getcontext
+#define getpid			_getpid
+#define kill			_kill
+#define lseek			_lseek
+#define mutex_lock		_mutex_lock
+#define mutex_unlock	_mutex_unlock
+#define open			_open
+#define read			_read
+#define sigaction		_sigaction
+#define sigemptyset		_sigemptyset
+#define sigismember		_sigismember
+#define sigprocmask		_sigprocmask
+#define stat			_stat
+#define thr_getspecific	_thr_getspecific
+#define thr_keycreate	_thr_keycreate
+#define thr_main		_thr_main
+#define thr_setspecific	_thr_setspecific
+#define write			_write
+
+/* ??? see V9 /usr/include/stdio.h */
+#ifdef __sparcv9
+#define fileno			_fileno
+#endif
+
+#ifdef __sparc
+/* libm, libsunmath */
+#define fp_class		__fp_class
+#define fp_classf		__fp_classf
+#define sqrt			__sqrt
+#define sqrtf			__sqrtf
+#endif
diff --git a/usr/src/libm/src/m9x/feprec.c b/usr/src/libm/src/m9x/feprec.c
new file mode 100644
index 0000000..56a64e2
--- /dev/null
+++ b/usr/src/libm/src/m9x/feprec.c
@@ -0,0 +1,56 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"@(#)feprec.c	1.5	06/01/31 SMI"
+
+#pragma weak fegetprec = __fegetprec
+#pragma weak fesetprec = __fesetprec
+
+#include "fenv_synonyms.h"
+#include <fenv.h>
+#include <ucontext.h>
+#include <thread.h>
+#include "fex_handler.h"
+
+int fegetprec(void)
+{
+	unsigned long	fsr;
+
+	__fenv_getfsr(&fsr);
+	return __fenv_get_rp(fsr);
+}
+
+int fesetprec(int r)
+{
+	unsigned long	fsr;
+
+	if (r != FE_FLTPREC && r != FE_DBLPREC && r != FE_LDBLPREC)
+		return 0;
+	__fenv_getfsr(&fsr);
+	__fenv_set_rp(fsr, r);
+	__fenv_setfsr(&fsr);
+	return 1;
+}
diff --git a/usr/src/libm/src/m9x/feround.c b/usr/src/libm/src/m9x/feround.c
new file mode 100644
index 0000000..2f0bc99
--- /dev/null
+++ b/usr/src/libm/src/m9x/feround.c
@@ -0,0 +1,81 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"@(#)feround.c	1.9	06/01/31 SMI"
+
+#pragma weak fegetround = __fegetround
+#pragma weak fesetround = __fesetround
+
+#pragma weak fegetround96 = __fegetround
+#pragma weak fesetround96 = __fesetround96
+
+#include "fenv_synonyms.h"
+#include <fenv.h>
+#include <ucontext.h>
+#include <thread.h>
+#include "fex_handler.h"
+
+#if defined(__i386) && !defined(__amd64)
+#include <float.h>
+#endif
+
+int fegetround(void)
+{
+	unsigned long	fsr;
+
+	__fenv_getfsr(&fsr);
+	return (int)__fenv_get_rd(fsr);
+}
+
+int fesetround(int r)
+{
+	unsigned long	fsr;
+
+	if (r & ~3)
+		return -1;
+	__fenv_getfsr(&fsr);
+	__fenv_set_rd(fsr, r);
+	__fenv_setfsr(&fsr);
+#if defined(__i386) && !defined(__amd64)
+	FLT_ROUNDS = (0x2D >> (r << 1)) & 3;	/* 0->1, 1->3, 2->2, 3->0 */
+#endif
+	return 0;
+}
+
+int fesetround96(int r)
+{
+	unsigned long	fsr;
+
+	if (r & ~3)
+		return 0;
+	__fenv_getfsr(&fsr);
+	__fenv_set_rd(fsr, r);
+	__fenv_setfsr(&fsr);
+#if defined(__i386) && !defined(__amd64)
+	FLT_ROUNDS = (0x2D >> (r << 1)) & 3;	/* 0->1, 1->3, 2->2, 3->0 */
+#endif
+	return 1;
+}
diff --git a/usr/src/libm/src/m9x/fex_handler.c b/usr/src/libm/src/m9x/fex_handler.c
new file mode 100644
index 0000000..3491e1c
--- /dev/null
+++ b/usr/src/libm/src/m9x/fex_handler.c
@@ -0,0 +1,91 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"@(#)fex_handler.c	1.5	06/01/31 SMI"
+
+#pragma weak fex_get_handling = __fex_get_handling
+#pragma weak fex_set_handling = __fex_set_handling
+#pragma weak fex_getexcepthandler = __fex_getexcepthandler
+#pragma weak fex_setexcepthandler = __fex_setexcepthandler
+
+#include "fenv_synonyms.h"
+#include <fenv.h>
+#include <ucontext.h>
+#include <thread.h>
+#include "fex_handler.h"
+
+int fex_get_handling(int e)
+{
+	struct fex_handler_data	*thr_handlers;
+	int						i;
+
+	thr_handlers = __fex_get_thr_handlers();
+	for (i = 0; i < FEX_NUM_EXC; i++)
+		if (e & (1 << i))
+			return thr_handlers[i].__mode;
+	return FEX_NOHANDLER;
+}
+
+int fex_set_handling(int e, int mode, void (*handler)())
+{
+	struct fex_handler_data	*thr_handlers;
+	int						i;
+
+	if (e & ~((1 << FEX_NUM_EXC) - 1))
+		return 0;
+	thr_handlers = __fex_get_thr_handlers();
+	for (i = 0; i < FEX_NUM_EXC; i++) {
+		if (e & (1 << i)) {
+			thr_handlers[i].__mode = mode;
+			thr_handlers[i].__handler = handler;
+		}
+	}
+	__fex_update_te();
+	return 1;
+}
+
+void fex_getexcepthandler(fex_handler_t *buf, int e)
+{
+	struct fex_handler_data	*thr_handlers;
+	int						i;
+
+	thr_handlers = __fex_get_thr_handlers();
+	for (i = 0; i < FEX_NUM_EXC; i++)
+		if (e & (1 << i))
+			(*buf)[i] = thr_handlers[i];
+}
+
+void fex_setexcepthandler(const fex_handler_t *buf, int e)
+{
+	struct fex_handler_data	*thr_handlers;
+	int						i;
+
+	thr_handlers = __fex_get_thr_handlers();
+	for (i = 0; i < FEX_NUM_EXC; i++)
+		if (e & (1 << i))
+			thr_handlers[i] = (*buf)[i];
+	__fex_update_te();
+}
diff --git a/usr/src/libm/src/m9x/fex_handler.h b/usr/src/libm/src/m9x/fex_handler.h
new file mode 100644
index 0000000..9f8c259
--- /dev/null
+++ b/usr/src/libm/src/m9x/fex_handler.h
@@ -0,0 +1,215 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"@(#)fex_handler.h	1.8	06/01/31 SMI"
+
+/* the following enums must match the bit positions in fenv.h */
+enum fex_exception {
+	fex_inexact		= 0,
+	fex_division	= 1,
+	fex_underflow	= 2,
+	fex_overflow	= 3,
+	fex_inv_zdz		= 4,
+	fex_inv_idi		= 5,
+	fex_inv_isi		= 6,
+	fex_inv_zmi		= 7,
+	fex_inv_sqrt	= 8,
+	fex_inv_snan	= 9,
+	fex_inv_int		= 10,
+	fex_inv_cmp		= 11
+};
+
+
+/* auxiliary functions in __fex_hdlr.c */
+extern struct fex_handler_data *__fex_get_thr_handlers(void);
+extern void __fex_update_te(void);
+
+/* auxiliary functions in __fex_sym.c */
+extern void __fex_sym_init(void);
+extern char *__fex_sym(char *, char **);
+
+/* auxiliary functions in fex_log.c */
+extern void __fex_mklog(ucontext_t *, char *, int, enum fex_exception,
+	int, void *);
+
+/* system-dependent auxiliary functions */
+extern enum fex_exception __fex_get_invalid_type(siginfo_t *, ucontext_t *);
+extern void __fex_get_op(siginfo_t *, ucontext_t *, fex_info_t *);
+extern void __fex_st_result(siginfo_t *, ucontext_t *, fex_info_t *);
+
+/* inline templates and macros for accessing fp state */
+#ifdef __sparcv9
+#define __fenv_getfsr	__fenv_getfsrx
+#define __fenv_setfsr	__fenv_setfsrx
+#endif
+extern void __fenv_getfsr(unsigned long *);
+extern void __fenv_setfsr(const unsigned long *);
+
+#if defined(__sparc)
+
+#define __fenv_get_rd(X)	((X>>30)&0x3)
+#define __fenv_set_rd(X,Y)	X=(X&~0xc0000000ul)|((Y)<<30)
+
+#define __fenv_get_te(X)	((X>>23)&0x1f)
+#define __fenv_set_te(X,Y)	X=(X&~0x0f800000ul)|((Y)<<23)
+
+#define __fenv_get_ex(X)	((X>>5)&0x1f)
+#define __fenv_set_ex(X,Y)	X=(X&~0x000003e0ul)|((Y)<<5)
+
+#elif defined(__i386)
+
+extern void __fenv_getcwsw(unsigned int *);
+extern void __fenv_setcwsw(const unsigned int *);
+
+extern void __fenv_getmxcsr(unsigned int *);
+extern void __fenv_setmxcsr(const unsigned int *);
+
+#define __fenv_get_rd(X)	((X>>26)&3)
+#define __fenv_set_rd(X,Y)	X=(X&~0x0c000000)|((Y)<<26)
+
+#define __fenv_get_rp(X)	((X>>24)&3)
+#define __fenv_set_rp(X,Y)	X=(X&~0x03000000)|((Y)<<24)
+
+#define __fenv_get_te(X)	((X>>16)&0x3d)
+#define __fenv_set_te(X,Y)	X=(X&~0x003d0000)|((Y)<<16)
+
+#define __fenv_get_ex(X)	(X&0x3d)
+#define __fenv_set_ex(X,Y)	X=(X&~0x0000003d)|(Y)
+
+/* 
+ * These macros define some useful distinctions between various
+ * SSE instructions.  In some cases, distinctions are made for
+ * the purpose of simplifying the decoding of instructions, while
+ * in other cases, they are made for the purpose of simplying the
+ * emulation.  Note that these values serve as bit flags within
+ * the enum values in sseinst_t.
+ */
+#define DOUBLE		0x100
+#define SIMD		0x080
+#define INTREG		0x040
+
+typedef union {
+	double		d[2];
+	long long	l[2];
+	float		f[4];
+	int		i[4];
+} sseoperand_t;
+
+/* structure to hold a decoded SSE instruction */
+typedef struct {
+	enum {
+		/* single precision scalar instructions */
+		cmpss		= 0,
+		minss		= 1,
+		maxss		= 2,
+		addss		= 3,
+		subss		= 4,
+		mulss		= 5,
+		divss		= 6,
+		sqrtss		= 7,
+		ucomiss		= 16,
+		comiss		= 17,
+		cvtss2sd	= 32,
+		cvtsi2ss	= INTREG + 0,
+		cvttss2si	= INTREG + 1,
+		cvtss2si	= INTREG + 2,
+		cvtsi2ssq	= INTREG + 8,
+		cvttss2siq	= INTREG + 9,
+		cvtss2siq	= INTREG + 10,
+
+		/* single precision SIMD instructions */
+		cmpps		= SIMD + 0,
+		minps		= SIMD + 1,
+		maxps		= SIMD + 2,
+		addps		= SIMD + 3,
+		subps		= SIMD + 4,
+		mulps		= SIMD + 5,
+		divps		= SIMD + 6,
+		sqrtps		= SIMD + 7,
+		cvtps2pd	= SIMD + 32,
+		cvtdq2ps	= SIMD + 34,
+		cvttps2dq	= SIMD + 35,
+		cvtps2dq	= SIMD + 36,
+		cvtpi2ps	= SIMD + INTREG + 0,
+		cvttps2pi	= SIMD + INTREG + 1,
+		cvtps2pi	= SIMD + INTREG + 2,
+
+		/* double precision scalar instructions */
+		cmpsd		= DOUBLE + 0,
+		minsd		= DOUBLE + 1,
+		maxsd		= DOUBLE + 2,
+		addsd		= DOUBLE + 3,
+		subsd		= DOUBLE + 4,
+		mulsd		= DOUBLE + 5,
+		divsd		= DOUBLE + 6,
+		sqrtsd		= DOUBLE + 7,
+		ucomisd		= DOUBLE + 16,
+		comisd		= DOUBLE + 17,
+		cvtsd2ss	= DOUBLE + 32,
+		cvtsi2sd	= DOUBLE + INTREG + 0,
+		cvttsd2si	= DOUBLE + INTREG + 1,
+		cvtsd2si	= DOUBLE + INTREG + 2,
+		cvtsi2sdq	= DOUBLE + INTREG + 8,
+		cvttsd2siq	= DOUBLE + INTREG + 9,
+		cvtsd2siq	= DOUBLE + INTREG + 10,
+
+		/* double precision SIMD instructions */
+		cmppd		= DOUBLE + SIMD + 0,
+		minpd		= DOUBLE + SIMD + 1,
+		maxpd		= DOUBLE + SIMD + 2,
+		addpd		= DOUBLE + SIMD + 3,
+		subpd		= DOUBLE + SIMD + 4,
+		mulpd		= DOUBLE + SIMD + 5,
+		divpd		= DOUBLE + SIMD + 6,
+		sqrtpd		= DOUBLE + SIMD + 7,
+		cvtpd2ps	= DOUBLE + SIMD + 32,
+		cvtdq2pd	= DOUBLE + SIMD + 34,
+		cvttpd2dq	= DOUBLE + SIMD + 35,
+		cvtpd2dq	= DOUBLE + SIMD + 36,
+		cvtpi2pd	= DOUBLE + SIMD + INTREG + 0,
+		cvttpd2pi	= DOUBLE + SIMD + INTREG + 1,
+		cvtpd2pi	= DOUBLE + SIMD + INTREG + 2,
+	} op;
+	int		imm;
+	sseoperand_t	*op1, *op2;
+} sseinst_t;
+
+/* x86-specific auxiliary functions */
+extern int *__fex_accrued(void);
+extern void __fex_get_x86_exc(siginfo_t *, ucontext_t *);
+extern int __fex_parse_sse(ucontext_t *, sseinst_t *);
+extern enum fex_exception __fex_get_sse_op(ucontext_t *, sseinst_t *,
+	fex_info_t *);
+extern void __fex_get_simd_op(ucontext_t *, sseinst_t *,
+	enum fex_exception *, fex_info_t *);
+extern void __fex_st_sse_result(ucontext_t *, sseinst_t *,
+	enum fex_exception, fex_info_t *);
+extern void __fex_st_simd_result(ucontext_t *, sseinst_t *,
+	enum fex_exception *, fex_info_t *);
+
+#else
+#error Unknown architecture
+#endif
diff --git a/usr/src/libm/src/m9x/fex_log.c b/usr/src/libm/src/m9x/fex_log.c
new file mode 100644
index 0000000..62a0939
--- /dev/null
+++ b/usr/src/libm/src/m9x/fex_log.c
@@ -0,0 +1,398 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"@(#)fex_log.c	1.13	06/01/31 SMI"
+
+#pragma weak fex_get_log = __fex_get_log
+#pragma weak fex_set_log = __fex_set_log
+#pragma weak fex_get_log_depth = __fex_get_log_depth
+#pragma weak fex_set_log_depth = __fex_set_log_depth
+#pragma weak fex_log_entry = __fex_log_entry
+
+#include "fenv_synonyms.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <signal.h>
+#include <ucontext.h>
+#include <sys/frame.h>
+#include <fenv.h>
+#include <sys/ieeefp.h>
+#include <thread.h>
+#include "fex_handler.h"
+
+#if !defined(PC)
+#if defined(REG_PC)
+#define	PC	REG_PC
+#else
+#error Neither PC nor REG_PC is defined!
+#endif
+#endif
+
+static FILE *log_fp = NULL;
+static mutex_t log_lock = DEFAULTMUTEX;
+static int log_depth = 100;
+
+FILE *fex_get_log(void)
+{
+	FILE	*fp;
+
+	mutex_lock(&log_lock);
+	fp = log_fp;
+	mutex_unlock(&log_lock);
+	return fp;
+}
+
+int fex_set_log(FILE *fp)
+{
+	mutex_lock(&log_lock);
+	log_fp = fp;
+	mutex_unlock(&log_lock);
+	__fex_update_te();
+	return 1;
+}
+
+int fex_get_log_depth(void)
+{
+	int	d;
+
+	mutex_lock(&log_lock);
+	d = log_depth;
+	mutex_unlock(&log_lock);
+	return d;
+}
+
+int fex_set_log_depth(int d)
+{
+	if (d < 0)
+		return 0;
+	mutex_lock(&log_lock);
+	log_depth = d;
+	mutex_unlock(&log_lock);
+	return 1;
+}
+
+static struct exc_list {
+	struct exc_list		*next;
+	char			*addr;
+	unsigned long		code;
+	int			nstack;
+	char			*stack[1]; /* actual length is max(1,nstack) */
+} *list = NULL;
+
+#ifdef __sparcv9
+#define FRAMEP(X)	(struct frame *)((char*)(X)+(((long)(X)&1)?2047:0))
+#else
+#define FRAMEP(X)	(struct frame *)(X)
+#endif
+
+#ifdef _LP64
+#define PDIG		"16"
+#else
+#define PDIG		"8"
+#endif
+
+/* look for a matching exc_list; return 1 if one is found,
+   otherwise add this one to the list and return 0 */
+static int check_exc_list(char *addr, unsigned long code, char *stk,
+    struct frame *fp)
+{
+	struct exc_list	*l, *ll;
+	struct frame	*f;
+	int		i, n;
+
+	if (list) {
+		for (l = list; l; ll = l, l = l->next) {
+			if (l->addr != addr || l->code != code)
+				continue;
+			if (log_depth < 1 || l->nstack < 1)
+				return 1;
+			if (l->stack[0] != stk)
+				continue;
+			n = 1;
+			for (i = 1, f = fp; i < log_depth && i < l->nstack &&
+			    f && f->fr_savpc; i++, f = FRAMEP(f->fr_savfp))
+				if (l->stack[i] != (char *)f->fr_savpc) {
+					n = 0;
+					break;
+				}
+			if (n)
+				return 1;
+		}
+	}
+
+	/* create a new exc_list structure and tack it on the list */
+	for (n = 1, f = fp; n < log_depth && f && f->fr_savpc;
+	    n++, f = FRAMEP(f->fr_savfp)) ;
+	if ((l = (struct exc_list *)malloc(sizeof(struct exc_list) +
+	    (n - 1) * sizeof(char *))) != NULL) {
+		l->next = NULL;
+		l->addr = addr;
+		l->code = code;
+		l->nstack = ((log_depth < 1)? 0 : n);
+		l->stack[0] = stk;
+		for (i = 1; i < n; i++) {
+			l->stack[i] = (char *)fp->fr_savpc;
+			fp = FRAMEP(fp->fr_savfp);
+		}
+		if (list)
+			ll->next = l;
+		else
+			list = l;
+	}
+	return 0;
+}
+
+/*
+* Warning: cleverness ahead
+*
+* In the following code, the use of sprintf+write rather than fprintf
+* to send output to the log file is intentional.  The reason is that
+* fprintf is not async-signal-safe.  "But," you protest, "SIGFPE is
+* not an asynchronous signal!  It's always handled by the same thread
+* that executed the fpop that provoked it."  That's true, but a prob-
+* lem arises because (i) base conversion in fprintf can cause a fp
+* exception and (ii) my signal handler acquires a mutex lock before
+* sending output to the log file (so that outputs for entries from
+* different threads aren't interspersed).  Therefore, if the code
+* were to use fprintf, a deadlock could occur as follows:
+*
+*	Thread A			Thread B
+*
+*	Incurs a fp exception,		Calls fprintf,
+*	acquires log_lock		acquires file rmutex lock
+*
+*	Calls fprintf,			Incurs a fp exception,
+*	waits for file rmutex lock	waits for log_lock
+*
+* (I could just verify that fprintf doesn't hold the rmutex lock while
+* it's doing the base conversion, but since efficiency is of little
+* concern here, I opted for the safe and dumb route.)
+*/
+
+static void print_stack(int fd, char *addr, struct frame *fp)
+{
+	int	i;
+	char	*name, buf[30];
+
+	for (i = 0; i < log_depth && addr != NULL; i++) {
+		if (__fex_sym(addr, &name) != NULL) {
+			write(fd, buf, sprintf(buf, "  0x%0" PDIG "lx  ",
+			    (long)addr));
+			write(fd, name, strlen(name));
+			write(fd, "\n", 1);
+			if (!strcmp(name, "main"))
+				break;
+		} else {
+			write(fd, buf, sprintf(buf, "  0x%0" PDIG "lx\n",
+			    (long)addr));
+		}
+		if (fp == NULL)
+			break;
+		addr = (char *)fp->fr_savpc;
+		fp = FRAMEP(fp->fr_savfp);
+	}
+}
+
+void fex_log_entry(const char *msg)
+{
+	ucontext_t	uc;
+	struct frame	*fp;
+	char		*stk;
+	int		fd;
+
+	/* if logging is disabled, just return */
+	mutex_lock(&log_lock);
+	if (log_fp == NULL) {
+		mutex_unlock(&log_lock);
+		return;
+	}
+
+	/* get the frame pointer from the current context and
+	   pop our own frame */
+	getcontext(&uc);
+#if defined(__sparc) || defined(__amd64)
+	fp = FRAMEP(uc.uc_mcontext.gregs[REG_SP]);
+#elif defined(__i386)	/* !defined(__amd64) */
+	fp = FRAMEP(uc.uc_mcontext.gregs[EBP]);
+#else
+#error Unknown architecture
+#endif
+	if (fp == NULL) {
+		mutex_unlock(&log_lock);
+		return;
+	}
+	stk = (char *)fp->fr_savpc;
+	fp = FRAMEP(fp->fr_savfp);
+
+	/* if we've already logged this message here, don't make an entry */
+	if (check_exc_list(stk, (unsigned long)msg, stk, fp)) {
+		mutex_unlock(&log_lock);
+		return;
+	}
+
+	/* make an entry */
+	fd = fileno(log_fp);
+	write(fd, "fex_log_entry: ", 15);
+	write(fd, msg, strlen(msg));
+	write(fd, "\n", 1);
+	__fex_sym_init();
+	print_stack(fd, stk, fp);
+	mutex_unlock(&log_lock);
+}
+
+static const char *exception[FEX_NUM_EXC] = {
+	"inexact result",
+	"division by zero",
+	"underflow",
+	"overflow",
+	"invalid operation (0/0)",
+	"invalid operation (inf/inf)",
+	"invalid operation (inf-inf)",
+	"invalid operation (0*inf)",
+	"invalid operation (sqrt)",
+	"invalid operation (snan)",
+	"invalid operation (int)",
+	"invalid operation (cmp)"
+};
+
+void
+__fex_mklog(ucontext_t *uap, char *addr, int f, enum fex_exception e,
+    int m, void *p)
+{
+	struct	frame	*fp;
+	char		*stk, *name, buf[30];
+	int		fd;
+
+	/* if logging is disabled, just return */
+	mutex_lock(&log_lock);
+	if (log_fp == NULL) {
+		mutex_unlock(&log_lock);
+		return;
+	}
+
+	/* get stack info */
+#if defined(__sparc)
+	stk = (char*)uap->uc_mcontext.gregs[REG_PC];
+	fp = FRAMEP(uap->uc_mcontext.gregs[REG_SP]);
+#elif defined(__amd64)
+	stk = (char*)uap->uc_mcontext.gregs[REG_PC];
+	fp = FRAMEP(uap->uc_mcontext.gregs[REG_RBP]);
+#elif defined(__i386)	/* !defined(__amd64) */
+	stk = (char*)uap->uc_mcontext.gregs[PC];
+	fp = FRAMEP(uap->uc_mcontext.gregs[EBP]);
+#else
+#error Unknown architecture
+#endif
+
+	/* if the handling mode is the default and this exception's
+	   flag is already raised, don't make an entry */
+	if (m == FEX_NONSTOP) {
+		switch (e) {
+		case fex_inexact:
+			if (f & FE_INEXACT) {
+				mutex_unlock(&log_lock);
+				return;
+			}
+			break;
+		case fex_underflow:
+			if (f & FE_UNDERFLOW) {
+				mutex_unlock(&log_lock);
+				return;
+			}
+			break;
+		case fex_overflow:
+			if (f & FE_OVERFLOW) {
+				mutex_unlock(&log_lock);
+				return;
+			}
+			break;
+		case fex_division:
+			if (f & FE_DIVBYZERO) {
+				mutex_unlock(&log_lock);
+				return;
+			}
+			break;
+		default:
+			if (f & FE_INVALID) {
+				mutex_unlock(&log_lock);
+				return;
+			}
+			break;
+		}
+	}
+
+	/* if we've already logged this exception at this address,
+	   don't make an entry */
+	if (check_exc_list(addr, (unsigned long)e, stk, fp)) {
+		mutex_unlock(&log_lock);
+		return;
+	}
+
+	/* make an entry */
+	fd = fileno(log_fp);
+	write(fd, "Floating point ", 15);
+	write(fd, exception[e], strlen(exception[e]));
+	write(fd, buf, sprintf(buf, " at 0x%0" PDIG "lx", (long)addr));
+	__fex_sym_init();
+	if (__fex_sym(addr, &name) != NULL) {
+		write(fd, " ", 1);
+		write(fd, name, strlen(name));
+	}
+	switch (m) {
+	case FEX_NONSTOP:
+		write(fd, ", nonstop mode\n", 15);
+		break;
+
+	case FEX_ABORT:
+		write(fd, ", abort\n", 8);
+		break;
+
+	case FEX_NOHANDLER:
+		if (p == (void *)SIG_DFL) {
+			write(fd, ", handler: SIG_DFL\n", 19);
+			break;
+		}
+		else if (p == (void *)SIG_IGN) {
+			write(fd, ", handler: SIG_IGN\n", 19);
+			break;
+		}
+		/* fall through*/
+	default:
+		write(fd, ", handler: ", 11);
+		if (__fex_sym((char *)p, &name) != NULL) {
+			write(fd, name, strlen(name));
+			write(fd, "\n", 1);
+		} else {
+			write(fd, buf, sprintf(buf, "0x%0" PDIG "lx\n",
+			    (long)p));
+		}
+		break;
+	}
+	print_stack(fd, stk, fp);
+	mutex_unlock(&log_lock);
+}
diff --git a/usr/src/libm/src/m9x/fma.c b/usr/src/libm/src/m9x/fma.c
new file mode 100644
index 0000000..ff13ee3
--- /dev/null
+++ b/usr/src/libm/src/m9x/fma.c
@@ -0,0 +1,608 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"@(#)fma.c	1.5	06/01/31 SMI"
+
+#if defined(ELFOBJ)
+#pragma weak fma = __fma
+#endif
+
+#include "libm.h"
+#include "fma.h"
+
+#if defined(__sparc)
+
+static const union {
+	unsigned i[2];
+	double d;
+} C[] = {
+	{ 0x3fe00000u, 0 },
+	{ 0x40000000u, 0 },
+	{ 0x43300000u, 0 },
+	{ 0x41a00000u, 0 },
+	{ 0x3e500000u, 0 },
+	{ 0x3df00000u, 0 },
+	{ 0x3bf00000u, 0 },
+	{ 0x7fe00000u, 0 },
+	{ 0x00100000u, 0 },
+	{ 0x00100001u, 0 }
+};
+
+#define	half	C[0].d
+#define	two	C[1].d
+#define	two52	C[2].d
+#define	two27	C[3].d
+#define	twom26	C[4].d
+#define	twom32	C[5].d
+#define	twom64	C[6].d
+#define	huge	C[7].d
+#define	tiny	C[8].d
+#define	tiny2	C[9].d
+
+static const unsigned fsr_rm = 0xc0000000u;
+
+/*
+ * fma for SPARC: 64-bit double precision, big-endian
+ */
+double
+__fma(double x, double y, double z) {
+	union {
+		unsigned i[2];
+		double d;
+	} xx, yy, zz;
+	double xhi, yhi, xlo, ylo, t;
+	unsigned xy0, xy1, xy2, xy3, z0, z1, z2, z3, fsr, rm, sticky;
+	int hx, hy, hz, ex, ey, ez, exy, sxy, sz, e, ibit;
+	volatile double	dummy;
+
+	/* extract the high order words of the arguments */
+	xx.d = x;
+	yy.d = y;
+	zz.d = z;
+	hx = xx.i[0] & ~0x80000000;
+	hy = yy.i[0] & ~0x80000000;
+	hz = zz.i[0] & ~0x80000000;
+
+	/* dispense with inf, nan, and zero cases */
+	if (hx >= 0x7ff00000 || hy >= 0x7ff00000 || (hx | xx.i[1]) == 0 ||
+		(hy | yy.i[1]) == 0)	/* x or y is inf, nan, or zero */
+		return (x * y + z);
+
+	if (hz >= 0x7ff00000)	/* z is inf or nan */
+		return (x + z);	/* avoid spurious under/overflow in x * y */
+
+	if ((hz | zz.i[1]) == 0)	/* z is zero */
+		/*
+		 * x * y isn't zero but could underflow to zero,
+		 * so don't add z, lest we perturb the sign
+		 */
+		return (x * y);
+
+	/*
+	 * now x, y, and z are all finite and nonzero; save the fsr and
+	 * set round-to-negative-infinity mode (and clear nonstandard
+	 * mode before we try to scale subnormal operands)
+	 */
+	__fenv_getfsr(&fsr);
+	__fenv_setfsr(&fsr_rm);
+
+	/* extract signs and exponents, and normalize subnormals */
+	sxy = (xx.i[0] ^ yy.i[0]) & 0x80000000;
+	sz = zz.i[0] & 0x80000000;
+	ex = hx >> 20;
+	if (!ex) {
+		xx.d = x * two52;
+		ex = ((xx.i[0] & ~0x80000000) >> 20) - 52;
+	}
+	ey = hy >> 20;
+	if (!ey) {
+		yy.d = y * two52;
+		ey = ((yy.i[0] & ~0x80000000) >> 20) - 52;
+	}
+	ez = hz >> 20;
+	if (!ez) {
+		zz.d = z * two52;
+		ez = ((zz.i[0] & ~0x80000000) >> 20) - 52;
+	}
+
+	/* multiply x*y to 106 bits */
+	exy = ex + ey - 0x3ff;
+	xx.i[0] = (xx.i[0] & 0xfffff) | 0x3ff00000;
+	yy.i[0] = (yy.i[0] & 0xfffff) | 0x3ff00000;
+	x = xx.d;
+	y = yy.d;
+	xhi = ((x + twom26) + two27) - two27;
+	yhi = ((y + twom26) + two27) - two27;
+	xlo = x - xhi;
+	ylo = y - yhi;
+	x *= y;
+	y = ((xhi * yhi - x) + xhi * ylo + xlo * yhi) + xlo * ylo;
+	if (x >= two) {
+		x *= half;
+		y *= half;
+		exy++;
+	}
+
+	/* extract the significands */
+	xx.d = x;
+	xy0 = (xx.i[0] & 0xfffff) | 0x100000;
+	xy1 = xx.i[1];
+	yy.d = t = y + twom32;
+	xy2 = yy.i[1];
+	yy.d = (y - (t - twom32)) + twom64;
+	xy3 = yy.i[1];
+	z0 = (zz.i[0] & 0xfffff) | 0x100000;
+	z1 = zz.i[1];
+	z2 = z3 = 0;
+
+	/*
+	 * now x*y is represented by sxy, exy, and xy[0-3], and z is
+	 * represented likewise; swap if need be so |xy| <= |z|
+	 */
+	if (exy > ez || (exy == ez && (xy0 > z0 || (xy0 == z0 &&
+		(xy1 > z1 || (xy1 == z1 && (xy2 | xy3) != 0)))))) {
+		e = sxy; sxy = sz; sz = e;
+		e = exy; exy = ez; ez = e;
+		e = xy0; xy0 = z0; z0 = e;
+		e = xy1; xy1 = z1; z1 = e;
+		z2 = xy2; xy2 = 0;
+		z3 = xy3; xy3 = 0;
+	}
+
+	/* shift the significand of xy keeping a sticky bit */
+	e = ez - exy;
+	if (e > 116) {
+		xy0 = xy1 = xy2 = 0;
+		xy3 = 1;
+	} else if (e >= 96) {
+		sticky = xy3 | xy2 | xy1 | ((xy0 << 1) << (127 - e));
+		xy3 = xy0 >> (e - 96);
+		if (sticky)
+			xy3 |= 1;
+		xy0 = xy1 = xy2 = 0;
+	} else if (e >= 64) {
+		sticky = xy3 | xy2 | ((xy1 << 1) << (95 - e));
+		xy3 = (xy1 >> (e - 64)) | ((xy0 << 1) << (95 - e));
+		if (sticky)
+			xy3 |= 1;
+		xy2 = xy0 >> (e - 64);
+		xy0 = xy1 = 0;
+	} else if (e >= 32) {
+		sticky = xy3 | ((xy2 << 1) << (63 - e));
+		xy3 = (xy2 >> (e - 32)) | ((xy1 << 1) << (63 - e));
+		if (sticky)
+			xy3 |= 1;
+		xy2 = (xy1 >> (e - 32)) | ((xy0 << 1) << (63 - e));
+		xy1 = xy0 >> (e - 32);
+		xy0 = 0;
+	} else if (e) {
+		sticky = (xy3 << 1) << (31 - e);
+		xy3 = (xy3 >> e) | ((xy2 << 1) << (31 - e));
+		if (sticky)
+			xy3 |= 1;
+		xy2 = (xy2 >> e) | ((xy1 << 1) << (31 - e));
+		xy1 = (xy1 >> e) | ((xy0 << 1) << (31 - e));
+		xy0 >>= e;
+	}
+
+	/* if this is a magnitude subtract, negate the significand of xy */
+	if (sxy ^ sz) {
+		xy0 = ~xy0;
+		xy1 = ~xy1;
+		xy2 = ~xy2;
+		xy3 = -xy3;
+		if (xy3 == 0)
+			if (++xy2 == 0)
+				if (++xy1 == 0)
+					xy0++;
+	}
+
+	/* add, propagating carries */
+	z3 += xy3;
+	e = (z3 < xy3);
+	z2 += xy2;
+	if (e) {
+		z2++;
+		e = (z2 <= xy2);
+	} else
+		e = (z2 < xy2);
+	z1 += xy1;
+	if (e) {
+		z1++;
+		e = (z1 <= xy1);
+	} else
+		e = (z1 < xy1);
+	z0 += xy0;
+	if (e)
+		z0++;
+
+	/* postnormalize and collect rounding information into z2 */
+	if (ez < 1) {
+		/* result is tiny; shift right until exponent is within range */
+		e = 1 - ez;
+		if (e > 56) {
+			z2 = 1;	/* result can't be exactly zero */
+			z0 = z1 = 0;
+		} else if (e >= 32) {
+			sticky = z3 | z2 | ((z1 << 1) << (63 - e));
+			z2 = (z1 >> (e - 32)) | ((z0 << 1) << (63 - e));
+			if (sticky)
+				z2 |= 1;
+			z1 = z0 >> (e - 32);
+			z0 = 0;
+		} else {
+			sticky = z3 | (z2 << 1) << (31 - e);
+			z2 = (z2 >> e) | ((z1 << 1) << (31 - e));
+			if (sticky)
+				z2 |= 1;
+			z1 = (z1 >> e) | ((z0 << 1) << (31 - e));
+			z0 >>= e;
+		}
+		ez = 1;
+	} else if (z0 >= 0x200000) {
+		/* carry out; shift right by one */
+		sticky = (z2 & 1) | z3;
+		z2 = (z2 >> 1) | (z1 << 31);
+		if (sticky)
+			z2 |= 1;
+		z1 = (z1 >> 1) | (z0 << 31);
+		z0 >>= 1;
+		ez++;
+	} else {
+		if (z0 < 0x100000 && (z0 | z1 | z2 | z3) != 0) {
+			/*
+			 * borrow/cancellation; shift left as much as
+			 * exponent allows
+			 */
+			while (!(z0 | (z1 & 0xffe00000)) && ez >= 33) {
+				z0 = z1;
+				z1 = z2;
+				z2 = z3;
+				z3 = 0;
+				ez -= 32;
+			}
+			while (z0 < 0x100000 && ez > 1) {
+				z0 = (z0 << 1) | (z1 >> 31);
+				z1 = (z1 << 1) | (z2 >> 31);
+				z2 = (z2 << 1) | (z3 >> 31);
+				z3 <<= 1;
+				ez--;
+			}
+		}
+		if (z3)
+			z2 |= 1;
+	}
+
+	/* get the rounding mode and clear current exceptions */
+	rm = fsr >> 30;
+	fsr &= ~FSR_CEXC;
+
+	/* strip off the integer bit, if there is one */
+	ibit = z0 & 0x100000;
+	if (ibit)
+		z0 -= 0x100000;
+	else {
+		ez = 0;
+		if (!(z0 | z1 | z2)) { /* exact zero */
+			zz.i[0] = rm == FSR_RM ? 0x80000000 : 0;
+			zz.i[1] = 0;
+			__fenv_setfsr(&fsr);
+			return (zz.d);
+		}
+	}
+
+	/*
+	 * flip the sense of directed roundings if the result is negative;
+	 * the logic below applies to a positive result
+	 */
+	if (sz)
+		rm ^= rm >> 1;
+
+	/* round and raise exceptions */
+	if (z2) {
+		fsr |= FSR_NXC;
+
+		/* decide whether to round the fraction up */
+		if (rm == FSR_RP || (rm == FSR_RN && (z2 > 0x80000000u ||
+			(z2 == 0x80000000u && (z1 & 1))))) {
+			/* round up and renormalize if necessary */
+			if (++z1 == 0) {
+				if (++z0 == 0x100000) {
+					z0 = 0;
+					ez++;
+				}
+			}
+		}
+	}
+
+	/* check for under/overflow */
+	if (ez >= 0x7ff) {
+		if (rm == FSR_RN || rm == FSR_RP) {
+			zz.i[0] = sz | 0x7ff00000;
+			zz.i[1] = 0;
+		} else {
+			zz.i[0] = sz | 0x7fefffff;
+			zz.i[1] = 0xffffffff;
+		}
+		fsr |= FSR_OFC | FSR_NXC;
+	} else {
+		zz.i[0] = sz | (ez << 20) | z0;
+		zz.i[1] = z1;
+
+		/*
+		 * !ibit => exact result was tiny before rounding,
+		 * z2 nonzero => result delivered is inexact
+		 */
+		if (!ibit) {
+			if (z2)
+				fsr |= FSR_UFC | FSR_NXC;
+			else if (fsr & FSR_UFM)
+				fsr |= FSR_UFC;
+		}
+	}
+
+	/* restore the fsr and emulate exceptions as needed */
+	if ((fsr & FSR_CEXC) & (fsr >> 23)) {
+		__fenv_setfsr(&fsr);
+		if (fsr & FSR_OFC) {
+			dummy = huge;
+			dummy *= huge;
+		} else if (fsr & FSR_UFC) {
+			dummy = tiny;
+			if (fsr & FSR_NXC)
+				dummy *= tiny;
+			else
+				dummy -= tiny2;
+		} else {
+			dummy = huge;
+			dummy += tiny;
+		}
+	} else {
+		fsr |= (fsr & 0x1f) << 5;
+		__fenv_setfsr(&fsr);
+	}
+	return (zz.d);
+}
+
+#elif defined(__i386)
+
+#if defined(__amd64)
+#define	NI	4
+#else
+#define	NI	3
+#endif
+
+/*
+ *  fma for x86: 64-bit double precision, little-endian
+ */
+double
+__fma(double x, double y, double z) {
+	union {
+		unsigned i[NI];
+		long double e;
+	} xx, yy, zz;
+	long double xe, ye, xhi, xlo, yhi, ylo;
+	int ex, ey, ez;
+	unsigned cwsw, oldcwsw, rm;
+
+	/* convert the operands to double extended */
+	xx.e = (long double) x;
+	yy.e = (long double) y;
+	zz.e = (long double) z;
+
+	/* extract the exponents of the arguments */
+	ex = xx.i[2] & 0x7fff;
+	ey = yy.i[2] & 0x7fff;
+	ez = zz.i[2] & 0x7fff;
+
+	/* dispense with inf, nan, and zero cases */
+	if (ex == 0x7fff || ey == 0x7fff || ex == 0 || ey == 0)
+		/* x or y is inf, nan, or zero */
+		return ((double) (xx.e * yy.e + zz.e));
+
+	if (ez >= 0x7fff) /* z is inf or nan */
+		return ((double) (xx.e + zz.e));
+					/* avoid spurious inexact in x * y */
+
+	/*
+	 * save the control and status words, mask all exceptions, and
+	 * set rounding to 64-bit precision and to-nearest
+	 */
+	__fenv_getcwsw(&oldcwsw);
+	cwsw = (oldcwsw & 0xf0c0ffff) | 0x033f0000;
+	__fenv_setcwsw(&cwsw);
+
+	/* multiply x*y to 106 bits */
+	xe = xx.e;
+	xx.i[0] = 0;
+	xhi = xx.e; /* hi 32 bits */
+	xlo = xe - xhi; /* lo 21 bits */
+	ye = yy.e;
+	yy.i[0] = 0;
+	yhi = yy.e;
+	ylo = ye - yhi;
+	xe = xe * ye;
+	ye = ((xhi * yhi - xe) + xhi * ylo + xlo * yhi) + xlo * ylo;
+
+	/* distill the sum of xe, ye, and z */
+	xhi = ye + zz.e;
+	yhi = xhi - ye;
+	xlo = (zz.e - yhi) + (ye - (xhi - yhi));
+						/* now (xhi,xlo) = ye + z */
+
+	yhi = xe + xhi;
+	ye = yhi - xe;
+	ylo = (xhi - ye) + (xe - (yhi - ye));	/* now (yhi,ylo) = xe + xhi */
+
+	xhi = xlo + ylo;
+	xe = xhi - xlo;
+	xlo = (ylo - xe) + (xlo - (xhi - xe));	/* now (xhi,xlo) = xlo + ylo */
+
+	yy.e = yhi + xhi;
+	ylo = (yhi - yy.e) + xhi;		/* now (yy.e,ylo) = xhi + yhi */
+
+	if (yy.i[1] != 0) {	/* yy.e is nonzero */
+		/* perturb yy.e if its least significant 10 bits are zero */
+		if (!(yy.i[0] & 0x3ff)) {
+			xx.e = ylo + xlo;
+			if (xx.i[1] != 0) {
+				xx.i[2] = (xx.i[2] & 0x8000) |
+					((yy.i[2] & 0x7fff) - 63);
+				xx.i[1] = 0x80000000;
+				xx.i[0] = 0;
+				yy.e += xx.e;
+			}
+		}
+	} else {
+		/* set sign of zero result according to rounding direction */
+		rm = oldcwsw & 0x0c000000;
+		yy.i[2] = ((rm == FCW_RM)? 0x8000 : 0);
+	}
+
+	/*
+	 * restore the control and status words and convert the result
+	 * to double
+	 */
+	__fenv_setcwsw(&oldcwsw);
+	return ((double) yy.e);
+}
+
+#if 0
+/*
+ * another fma for x86: assumes return value will be left in
+ * long double (80-bit double extended) precision
+ */
+long double
+__fma(double x, double y, double z) {
+	union {
+		unsigned i[3];
+		long double e;
+	} xx, yy, zz, tt;
+	long double xe, ye, xhi, xlo, yhi, ylo, zhi, zlo;
+	int ex, ey, ez;
+	unsigned cwsw, oldcwsw, s;
+
+	/* convert the operands to double extended */
+	xx.e = (long double) x;
+	yy.e = (long double) y;
+	zz.e = (long double) z;
+
+	/* extract the exponents of the arguments */
+	ex = xx.i[2] & 0x7fff;
+	ey = yy.i[2] & 0x7fff;
+	ez = zz.i[2] & 0x7fff;
+
+	/* dispense with inf, nan, and zero cases */
+	if (ex == 0x7fff || ey == 0x7fff || ex == 0 || ey == 0)
+		/* x or y is inf, nan, or zero */
+		return (xx.e * yy.e + zz.e);
+
+	if (ez >= 0x7fff) /* z is inf or nan */
+		return (xx.e + zz.e);	/* avoid spurious inexact in x * y */
+
+	if (ez == 0) /* z is zero */
+		return (xx.e * yy.e);	/* x * y isn't zero; no need to add z */
+
+	/*
+	 * save the control and status words, mask all exceptions, and
+	 * set rounding to 64-bit precision and to-nearest
+	 */
+	__fenv_getcwsw(&oldcwsw);
+	cwsw = (oldcwsw & 0xf0c0ffff) | 0x033f0000;
+	__fenv_setcwsw(&cwsw);
+
+	/* multiply x*y to 106 bits */
+	xe = xx.e;
+	xx.i[0] = 0;
+	xhi = xx.e; /* hi 32 bits */
+	xlo = xe - xhi; /* lo 21 bits */
+	ye = yy.e;
+	yy.i[0] = 0;
+	yhi = yy.e;
+	ylo = ye - yhi;
+	xx.e = xe * ye;
+	xx.i[0] &= ~0x7ff; /* 53 bits of x*y */
+	yy.e = ((xhi * yhi - xx.e) + xhi * ylo + xlo * yhi) + xlo * ylo;
+
+	/* reduce to a sum of two terms */
+	if (yy.e != 0.0) {
+		ex = xx.i[2] & 0x7fff;
+		if (ez - ex > 10) {
+			/* collapse y into a single bit and add to x */
+			yy.i[0] = 0;
+			yy.i[1] = 0x80000000;
+			yy.i[2] = (yy.i[2] & 0x8000) | (ex - 60);
+			xx.e += yy.e;
+		} else if (ex - ez <= 10) {
+			xx.e += zz.e; /* exact */
+			zz.e = yy.e;
+		} else if (ex - ez <= 42) {
+			/* split z into two pieces */
+			tt.i[0] = 0;
+			tt.i[1] = 0x80000000;
+			tt.i[2] = ex + 11;
+			zhi = (zz.e + tt.e) - tt.e;
+			zlo = zz.e - zhi;
+			xx.e += zhi;
+			zz.e = yy.e + zlo;
+		} else if (ex - ez <= 63) {
+			zz.e += yy.e; /* exact */
+		} else if (ex - ez <= 106) {
+			/*
+			 * collapse the tail of z into a sticky bit and add z
+			 * to y without error
+			 */
+			if (ex - ez <= 81) {
+				s = 1 << (ex - ez - 50);
+				if (zz.i[0] & (s - 1))
+					zz.i[0] |= s;
+				zz.i[0] &= ~(s - 1);
+			} else {
+				s = 1 << (ex - ez - 82);
+				if ((zz.i[1] & (s - 1)) | zz.i[0])
+					zz.i[1] |= s;
+				zz.i[1] &= ~(s - 1);
+				zz.i[0] = 0;
+			}
+			zz.e += yy.e;
+		} else {
+			/* collapse z into a single bit and add to y */
+			zz.i[0] = 0;
+			zz.i[1] = 0x80000000;
+			zz.i[2] = (zz.i[2] & 0x8000) | (ex - 113);
+			zz.e += yy.e;
+		}
+	}
+
+	/* restore the control and status words, and sum */
+	__fenv_setcwsw(&oldcwsw);
+	return (xx.e + zz.e);
+}
+#endif
+
+#else
+#error Unknown architecture
+#endif
diff --git a/usr/src/libm/src/m9x/fma.h b/usr/src/libm/src/m9x/fma.h
new file mode 100644
index 0000000..9e2b718
--- /dev/null
+++ b/usr/src/libm/src/m9x/fma.h
@@ -0,0 +1,125 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _FMA_H
+#define	_FMA_H
+
+#pragma ident	"@(#)fma.h	1.3	06/01/31 SMI"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef __sparc
+
+/*
+ * Common definitions for fma routines (SPARC)
+ */
+
+/* fsr fields */
+
+/* current exception bits */
+#define	FSR_NXC		0x1
+#define	FSR_DZC		0x2
+#define	FSR_UFC		0x4
+#define	FSR_OFC		0x8
+#define	FSR_NVC		0x10
+#define	FSR_CEXC	0x1f	/* mask for all cexc bits */
+
+/* accrued exception bits */
+#define	FSR_NXA		0x20
+#define	FSR_DZA		0x40
+#define	FSR_UFA		0x80
+#define	FSR_OFA		0x100
+#define	FSR_NVA		0x200
+
+/* trap enable bits */
+#define	FSR_NXM		0x00800000
+#define	FSR_DZM		0x01000000
+#define	FSR_UFM		0x02000000
+#define	FSR_OFM		0x04000000
+#define	FSR_NVM		0x08000000
+
+/* rounding directions (right-adjusted) */
+#define	FSR_RN		0
+#define	FSR_RZ		1
+#define	FSR_RP		2
+#define	FSR_RM		3
+
+/* inline templates */
+extern void __fenv_getfsr(unsigned int *);
+extern void __fenv_setfsr(const unsigned int *);
+
+#endif /* __sparc */
+
+
+#ifdef __i386
+
+/*
+ * Common definitions for fma routines (x86)
+ */
+
+/* control and status word fields */
+
+/* exception flags */
+#define	FSW_NV		0x1
+#define	FSW_DN		0x2
+#define	FSW_DZ		0x4
+#define	FSW_OF		0x8
+#define	FSW_UF		0x10
+#define	FSW_NX		0x20
+
+/* exception masks */
+#define	FCW_NVM		0x00010000
+#define	FCW_DNM		0x00020000
+#define	FCW_DZM		0x00040000
+#define	FCW_OFM		0x00080000
+#define	FCW_UFM		0x00100000
+#define	FCW_NXM		0x00200000
+#define FCW_ALLM	0x003f0000
+
+/* rounding directions */
+#define	FCW_RN		0x00000000
+#define	FCW_RM		0x04000000
+#define	FCW_RP		0x08000000
+#define	FCW_RZ		0x0c000000
+
+/* rounding precisions */
+#define FCW_P24		0x00000000
+#define FCW_P53		0x02000000
+#define FCW_P64		0x03000000
+
+/* inline templates */
+extern void __fenv_getcwsw(unsigned int *);
+extern void __fenv_setcwsw(const unsigned int *);
+
+#endif /* __i386 */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif	/* _FMA_H */
diff --git a/usr/src/libm/src/m9x/fmaf.c b/usr/src/libm/src/m9x/fmaf.c
new file mode 100644
index 0000000..f0799b7
--- /dev/null
+++ b/usr/src/libm/src/m9x/fmaf.c
@@ -0,0 +1,241 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"@(#)fmaf.c	1.5	06/01/31 SMI"
+
+#if defined(ELFOBJ)
+#pragma weak fmaf = __fmaf
+#endif
+
+#include "libm.h"
+#include "fma.h"
+
+#if defined(__sparc)
+
+/*
+ * fmaf for SPARC: 32-bit single precision, big-endian
+ */
+float
+__fmaf(float x, float y, float z) {
+	union {
+		unsigned i[2];
+		double d;
+	} xy, zz;
+	unsigned u, s;
+	int exy, ez;
+
+	/*
+	 * the following operations can only raise the invalid exception,
+	 * and then only if either x*y is of the form Inf*0 or one of x,
+	 * y, or z is a signaling NaN
+	 */
+	xy.d = (double) x * y;
+	zz.d = (double) z;
+
+	/*
+	 * if the sum xy + z will be exact, just compute it and cast the
+	 * result to float
+	 */
+	exy = (xy.i[0] >> 20) & 0x7ff;
+	ez = (zz.i[0] >> 20) & 0x7ff;
+	if ((ez - exy <= 4 && exy - ez <= 28) || exy == 0x7ff || exy == 0 ||
+		ez == 0x7ff || ez == 0) {
+		return ((float) (xy.d + zz.d));
+	}
+
+	/*
+	 * collapse the tail of the smaller summand into a "sticky bit"
+	 * so that the sum can be computed without error
+	 */
+	if (ez > exy) {
+		if (ez - exy < 31) {
+			u = xy.i[1];
+			s = 2 << (ez - exy);
+			if (u & (s - 1))
+				u |= s;
+			xy.i[1] = u & ~(s - 1);
+		} else if (ez - exy < 51) {
+			u = xy.i[0];
+			s = 1 << (ez - exy - 31);
+			if ((u & (s - 1)) | xy.i[1])
+				u |= s;
+			xy.i[0] = u & ~(s - 1);
+			xy.i[1] = 0;
+		} else {
+			/* collapse all of xy into a single bit */
+			xy.i[0] = (xy.i[0] & 0x80000000) | ((ez - 51) << 20);
+			xy.i[1] = 0;
+		}
+	} else {
+		if (exy - ez < 31) {
+			u = zz.i[1];
+			s = 2 << (exy - ez);
+			if (u & (s - 1))
+				u |= s;
+			zz.i[1] = u & ~(s - 1);
+		} else if (exy - ez < 51) {
+			u = zz.i[0];
+			s = 1 << (exy - ez - 31);
+			if ((u & (s - 1)) | zz.i[1])
+				u |= s;
+			zz.i[0] = u & ~(s - 1);
+			zz.i[1] = 0;
+		} else {
+			/* collapse all of zz into a single bit */
+			zz.i[0] = (zz.i[0] & 0x80000000) | ((exy - 51) << 20);
+			zz.i[1] = 0;
+		}
+	}
+
+	return ((float) (xy.d + zz.d));
+}
+
+#elif defined(__i386)
+
+#if defined(__amd64)
+#define	NI	4
+#else
+#define	NI	3
+#endif
+
+/*
+ * fmaf for x86: 32-bit single precision, little-endian
+ */
+float
+__fmaf(float x, float y, float z) {
+	union {
+		unsigned i[NI];
+		long double e;
+	} xy, zz;
+	unsigned u, s, cwsw, oldcwsw;
+	int exy, ez;
+
+	/* set rounding precision to 64 bits */
+	__fenv_getcwsw(&oldcwsw);
+	cwsw = (oldcwsw & 0xfcffffff) | 0x03000000;
+	__fenv_setcwsw(&cwsw);
+
+	/*
+	 * the following operations can only raise the invalid exception,
+	 * and then only if either x*y is of the form Inf*0 or one of x,
+	 * y, or z is a signaling NaN
+	 */
+	xy.e = (long double) x * y;
+	zz.e = (long double) z;
+
+	/*
+	 * if the sum xy + z will be exact, just compute it and cast the
+	 * result to float
+	 */
+	exy = xy.i[2] & 0x7fff;
+	ez = zz.i[2] & 0x7fff;
+	if ((ez - exy <= 15 && exy - ez <= 39) || exy == 0x7fff || exy == 0 ||
+		ez == 0x7fff || ez == 0) {
+		goto cont;
+	}
+
+	/*
+	 * collapse the tail of the smaller summand into a "sticky bit"
+	 * so that the sum can be computed without error
+	 */
+	if (ez > exy) {
+		if (ez - exy < 31) {
+			u = xy.i[0];
+			s = 2 << (ez - exy);
+			if (u & (s - 1))
+				u |= s;
+			xy.i[0] = u & ~(s - 1);
+		} else if (ez - exy < 62) {
+			u = xy.i[1];
+			s = 1 << (ez - exy - 31);
+			if ((u & (s - 1)) | xy.i[0])
+				u |= s;
+			xy.i[1] = u & ~(s - 1);
+			xy.i[0] = 0;
+		} else {
+			/* collapse all of xy into a single bit */
+			xy.i[0] = 0;
+			xy.i[1] = 0x80000000;
+			xy.i[2] = (xy.i[2] & 0x8000) | (ez - 62);
+		}
+	} else {
+		if (exy - ez < 62) {
+			u = zz.i[1];
+			s = 1 << (exy - ez - 31);
+			if ((u & (s - 1)) | zz.i[0])
+				u |= s;
+			zz.i[1] = u & ~(s - 1);
+			zz.i[0] = 0;
+		} else {
+			/* collapse all of zz into a single bit */
+			zz.i[0] = 0;
+			zz.i[1] = 0x80000000;
+			zz.i[2] = (zz.i[2] & 0x8000) | (exy - 62);
+		}
+	}
+
+cont:
+	xy.e += zz.e;
+
+	/* restore the rounding precision */
+	__fenv_getcwsw(&cwsw);
+	cwsw = (cwsw & 0xfcffffff) | (oldcwsw & 0x03000000);
+	__fenv_setcwsw(&cwsw);
+
+	return ((float) xy.e);
+}
+
+#if 0
+/*
+ * another fmaf for x86: assumes return value will be left in
+ * long double (80-bit double extended) precision
+ */
+long double
+__fmaf(float x, float y, float z) {
+	/*
+	 * Note: This implementation assumes the rounding precision mode
+	 * is set to the default, rounding to 64 bit precision.  If this
+	 * routine must work in non-default rounding precision modes, do
+	 * the following instead:
+	 *
+	 *   long double t;
+	 *
+	 *   <set rp mode to round to 64 bit precision>
+	 *   t = x * y;
+	 *   <restore rp mode>
+	 *   return t + z;
+	 *
+	 * Note that the code to change rounding precision must not alter
+	 * the exception masks or flags, since the product x * y may raise
+	 * an invalid operation exception.
+	 */
+	return ((long double) x * y + z);
+}
+#endif
+
+#else
+#error Unknown architecture
+#endif
diff --git a/usr/src/libm/src/m9x/fmal.c b/usr/src/libm/src/m9x/fmal.c
new file mode 100644
index 0000000..7fb9a62
--- /dev/null
+++ b/usr/src/libm/src/m9x/fmal.c
@@ -0,0 +1,1224 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"@(#)fmal.c	1.5	06/01/31 SMI"
+
+#if defined(ELFOBJ)
+#pragma weak fmal = __fmal
+#endif
+
+#include "libm.h"
+#include "fma.h"
+
+#if defined(__sparc)
+
+static const union {
+	unsigned i[2];
+	double d;
+} C[] = {
+	{ 0x3fe00000u, 0 },
+	{ 0x40000000u, 0 },
+	{ 0x3ef00000u, 0 },
+	{ 0x3e700000u, 0 },
+	{ 0x41300000u, 0 },
+	{ 0x3e300000u, 0 },
+	{ 0x3b300000u, 0 },
+	{ 0x38300000u, 0 },
+	{ 0x42300000u, 0 },
+	{ 0x3df00000u, 0 },
+	{ 0x7fe00000u, 0 },
+	{ 0x00100000u, 0 },
+	{ 0x00100001u, 0 },
+	{ 0, 0 },
+	{ 0x7ff00000u, 0 },
+	{ 0x7ff00001u, 0 }
+};
+
+#define	half	C[0].d
+#define	two	C[1].d
+#define	twom16	C[2].d
+#define	twom24	C[3].d
+#define	two20	C[4].d
+#define	twom28	C[5].d
+#define	twom76	C[6].d
+#define	twom124	C[7].d
+#define	two36	C[8].d
+#define	twom32	C[9].d
+#define	huge	C[10].d
+#define	tiny	C[11].d
+#define	tiny2	C[12].d
+#define	zero	C[13].d
+#define	inf	C[14].d
+#define	snan	C[15].d
+
+static const unsigned fsr_rm = 0xc0000000u;
+
+/*
+ * fmal for SPARC: 128-bit quad precision, big-endian
+ */
+long double
+__fmal(long double x, long double y, long double z) {
+	union {
+		unsigned i[4];
+		long double q;
+	} xx, yy, zz;
+	union {
+		unsigned i[2];
+		double d;
+	} u;
+	double dx[5], dy[5], dxy[9], c, s;
+	unsigned xy0, xy1, xy2, xy3, xy4, xy5, xy6, xy7;
+	unsigned z0, z1, z2, z3, z4, z5, z6, z7;
+	unsigned fsr, rm, sticky;
+	int hx, hy, hz, ex, ey, ez, exy, sxy, sz, e, ibit;
+	int cx, cy, cz;
+	volatile double	dummy;
+
+	/* extract the high order words of the arguments */
+	xx.q = x;
+	yy.q = y;
+	zz.q = z;
+	hx = xx.i[0] & ~0x80000000;
+	hy = yy.i[0] & ~0x80000000;
+	hz = zz.i[0] & ~0x80000000;
+
+	/*
+	 * distinguish zero, finite nonzero, infinite, and quiet nan
+	 * arguments; raise invalid and return for signaling nans
+	 */
+	if (hx >= 0x7fff0000) {
+		if ((hx & 0xffff) | xx.i[1] | xx.i[2] | xx.i[3]) {
+			if (!(hx & 0x8000)) {
+				/* signaling nan, raise invalid */
+				dummy = snan;
+				dummy += snan;
+				xx.i[0] |= 0x8000;
+				return (xx.q);
+			}
+			cx = 3;	/* quiet nan */
+		} else
+			cx = 2;	/* inf */
+	} else if (hx == 0) {
+		cx = (xx.i[1] | xx.i[2] | xx.i[3]) ? 1 : 0;
+				/* subnormal or zero */
+	} else
+		cx = 1;		/* finite nonzero */
+
+	if (hy >= 0x7fff0000) {
+		if ((hy & 0xffff) | yy.i[1] | yy.i[2] | yy.i[3]) {
+			if (!(hy & 0x8000)) {
+				dummy = snan;
+				dummy += snan;
+				yy.i[0] |= 0x8000;
+				return (yy.q);
+			}
+			cy = 3;
+		} else
+			cy = 2;
+	} else if (hy == 0) {
+		cy = (yy.i[1] | yy.i[2] | yy.i[3]) ? 1 : 0;
+	} else
+		cy = 1;
+
+	if (hz >= 0x7fff0000) {
+		if ((hz & 0xffff) | zz.i[1] | zz.i[2] | zz.i[3]) {
+			if (!(hz & 0x8000)) {
+				dummy = snan;
+				dummy += snan;
+				zz.i[0] |= 0x8000;
+				return (zz.q);
+			}
+			cz = 3;
+		} else
+			cz = 2;
+	} else if (hz == 0) {
+		cz = (zz.i[1] | zz.i[2] | zz.i[3]) ? 1 : 0;
+	} else
+		cz = 1;
+
+	/* get the fsr and clear current exceptions */
+	__fenv_getfsr(&fsr);
+	fsr &= ~FSR_CEXC;
+
+	/* handle all other zero, inf, and nan cases */
+	if (cx != 1 || cy != 1 || cz != 1) {
+		/* if x or y is a quiet nan, return it */
+		if (cx == 3) {
+			__fenv_setfsr(&fsr);
+			return (x);
+		}
+		if (cy == 3) {
+			__fenv_setfsr(&fsr);
+			return (y);
+		}
+
+		/* if x*y is 0*inf, raise invalid and return the default nan */
+		if ((cx == 0 && cy == 2) || (cx == 2 && cy == 0)) {
+			dummy = zero;
+			dummy *= inf;
+			zz.i[0] = 0x7fffffff;
+			zz.i[1] = zz.i[2] = zz.i[3] = 0xffffffff;
+			return (zz.q);
+		}
+
+		/* if z is a quiet nan, return it */
+		if (cz == 3) {
+			__fenv_setfsr(&fsr);
+			return (z);
+		}
+
+		/*
+		 * now none of x, y, or z is nan; handle cases where x or y
+		 * is inf
+		 */
+		if (cx == 2 || cy == 2) {
+			/*
+			 * if z is also inf, either we have inf-inf or
+			 * the result is the same as z depending on signs
+			 */
+			if (cz == 2) {
+				if ((int) ((xx.i[0] ^ yy.i[0]) ^ zz.i[0]) < 0) {
+					dummy = inf;
+					dummy -= inf;
+					zz.i[0] = 0x7fffffff;
+					zz.i[1] = zz.i[2] = zz.i[3] =
+						0xffffffff;
+					return (zz.q);
+				}
+				__fenv_setfsr(&fsr);
+				return (z);
+			}
+
+			/* otherwise the result is inf with appropriate sign */
+			zz.i[0] = ((xx.i[0] ^ yy.i[0]) & 0x80000000) |
+				0x7fff0000;
+			zz.i[1] = zz.i[2] = zz.i[3] = 0;
+			__fenv_setfsr(&fsr);
+			return (zz.q);
+		}
+
+		/* if z is inf, return it */
+		if (cz == 2) {
+			__fenv_setfsr(&fsr);
+			return (z);
+		}
+
+		/*
+		 * now x, y, and z are all finite; handle cases where x or y
+		 * is zero
+		 */
+		if (cx == 0 || cy == 0) {
+			/* either we have 0-0 or the result is the same as z */
+			if (cz == 0 && (int) ((xx.i[0] ^ yy.i[0]) ^ zz.i[0]) <
+				0) {
+				zz.i[0] = (fsr >> 30) == FSR_RM ? 0x80000000 :
+					0;
+				__fenv_setfsr(&fsr);
+				return (zz.q);
+			}
+			__fenv_setfsr(&fsr);
+			return (z);
+		}
+
+		/* if we get here, x and y are nonzero finite, z must be zero */
+		return (x * y);
+	}
+
+	/*
+	 * now x, y, and z are all finite and nonzero; set round-to-
+	 * negative-infinity mode
+	 */
+	__fenv_setfsr(&fsr_rm);
+
+	/*
+	 * get the signs and exponents and normalize the significands
+	 * of x and y
+	 */
+	sxy = (xx.i[0] ^ yy.i[0]) & 0x80000000;
+	ex = hx >> 16;
+	hx &= 0xffff;
+	if (!ex) {
+		if (hx | (xx.i[1] & 0xfffe0000)) {
+			ex = 1;
+		} else if (xx.i[1] | (xx.i[2] & 0xfffe0000)) {
+			hx = xx.i[1];
+			xx.i[1] = xx.i[2];
+			xx.i[2] = xx.i[3];
+			xx.i[3] = 0;
+			ex = -31;
+		} else if (xx.i[2] | (xx.i[3] & 0xfffe0000)) {
+			hx = xx.i[2];
+			xx.i[1] = xx.i[3];
+			xx.i[2] = xx.i[3] = 0;
+			ex = -63;
+		} else {
+			hx = xx.i[3];
+			xx.i[1] = xx.i[2] = xx.i[3] = 0;
+			ex = -95;
+		}
+		while ((hx & 0x10000) == 0) {
+			hx = (hx << 1) | (xx.i[1] >> 31);
+			xx.i[1] = (xx.i[1] << 1) | (xx.i[2] >> 31);
+			xx.i[2] = (xx.i[2] << 1) | (xx.i[3] >> 31);
+			xx.i[3] <<= 1;
+			ex--;
+		}
+	} else
+		hx |= 0x10000;
+	ey = hy >> 16;
+	hy &= 0xffff;
+	if (!ey) {
+		if (hy | (yy.i[1] & 0xfffe0000)) {
+			ey = 1;
+		} else if (yy.i[1] | (yy.i[2] & 0xfffe0000)) {
+			hy = yy.i[1];
+			yy.i[1] = yy.i[2];
+			yy.i[2] = yy.i[3];
+			yy.i[3] = 0;
+			ey = -31;
+		} else if (yy.i[2] | (yy.i[3] & 0xfffe0000)) {
+			hy = yy.i[2];
+			yy.i[1] = yy.i[3];
+			yy.i[2] = yy.i[3] = 0;
+			ey = -63;
+		} else {
+			hy = yy.i[3];
+			yy.i[1] = yy.i[2] = yy.i[3] = 0;
+			ey = -95;
+		}
+		while ((hy & 0x10000) == 0) {
+			hy = (hy << 1) | (yy.i[1] >> 31);
+			yy.i[1] = (yy.i[1] << 1) | (yy.i[2] >> 31);
+			yy.i[2] = (yy.i[2] << 1) | (yy.i[3] >> 31);
+			yy.i[3] <<= 1;
+			ey--;
+		}
+	} else
+		hy |= 0x10000;
+	exy = ex + ey - 0x3fff;
+
+	/* convert the significands of x and y to doubles */
+	c = twom16;
+	dx[0] = (double) ((int) hx) * c;
+	dy[0] = (double) ((int) hy) * c;
+
+	c *= twom24;
+	dx[1] = (double) ((int) (xx.i[1] >> 8)) * c;
+	dy[1] = (double) ((int) (yy.i[1] >> 8)) * c;
+
+	c *= twom24;
+	dx[2] = (double) ((int) (((xx.i[1] << 16) | (xx.i[2] >> 16)) &
+	    0xffffff)) * c;
+	dy[2] = (double) ((int) (((yy.i[1] << 16) | (yy.i[2] >> 16)) &
+	    0xffffff)) * c;
+
+	c *= twom24;
+	dx[3] = (double) ((int) (((xx.i[2] << 8) | (xx.i[3] >> 24)) &
+	    0xffffff)) * c;
+	dy[3] = (double) ((int) (((yy.i[2] << 8) | (yy.i[3] >> 24)) &
+	    0xffffff)) * c;
+
+	c *= twom24;
+	dx[4] = (double) ((int) (xx.i[3] & 0xffffff)) * c;
+	dy[4] = (double) ((int) (yy.i[3] & 0xffffff)) * c;
+
+	/* form the "digits" of the product */
+	dxy[0] = dx[0] * dy[0];
+	dxy[1] = dx[0] * dy[1] + dx[1] * dy[0];
+	dxy[2] = dx[0] * dy[2] + dx[1] * dy[1] + dx[2] * dy[0];
+	dxy[3] = dx[0] * dy[3] + dx[1] * dy[2] + dx[2] * dy[1] +
+	    dx[3] * dy[0];
+	dxy[4] = dx[0] * dy[4] + dx[1] * dy[3] + dx[2] * dy[2] +
+	    dx[3] * dy[1] + dx[4] * dy[0];
+	dxy[5] = dx[1] * dy[4] + dx[2] * dy[3] + dx[3] * dy[2] +
+	    dx[4] * dy[1];
+	dxy[6] = dx[2] * dy[4] + dx[3] * dy[3] + dx[4] * dy[2];
+	dxy[7] = dx[3] * dy[4] + dx[4] * dy[3];
+	dxy[8] = dx[4] * dy[4];
+
+	/* split odd-numbered terms and combine into even-numbered terms */
+	c = (dxy[1] + two20) - two20;
+	dxy[0] += c;
+	dxy[1] -= c;
+	c = (dxy[3] + twom28) - twom28;
+	dxy[2] += c + dxy[1];
+	dxy[3] -= c;
+	c = (dxy[5] + twom76) - twom76;
+	dxy[4] += c + dxy[3];
+	dxy[5] -= c;
+	c = (dxy[7] + twom124) - twom124;
+	dxy[6] += c + dxy[5];
+	dxy[8] += (dxy[7] - c);
+
+	/* propagate carries, adjusting the exponent if need be */
+	dxy[7] = dxy[6] + dxy[8];
+	dxy[5] = dxy[4] + dxy[7];
+	dxy[3] = dxy[2] + dxy[5];
+	dxy[1] = dxy[0] + dxy[3];
+	if (dxy[1] >= two) {
+		dxy[0] *= half;
+		dxy[1] *= half;
+		dxy[2] *= half;
+		dxy[3] *= half;
+		dxy[4] *= half;
+		dxy[5] *= half;
+		dxy[6] *= half;
+		dxy[7] *= half;
+		dxy[8] *= half;
+		exy++;
+	}
+
+	/* extract the significand of x*y */
+	s = two36;
+	u.d = c = dxy[1] + s;
+	xy0 = u.i[1];
+	c -= s;
+	dxy[1] -= c;
+	dxy[0] -= c;
+
+	s *= twom32;
+	u.d = c = dxy[1] + s;
+	xy1 = u.i[1];
+	c -= s;
+	dxy[2] += (dxy[0] - c);
+	dxy[3] = dxy[2] + dxy[5];
+
+	s *= twom32;
+	u.d = c = dxy[3] + s;
+	xy2 = u.i[1];
+	c -= s;
+	dxy[4] += (dxy[2] - c);
+	dxy[5] = dxy[4] + dxy[7];
+
+	s *= twom32;
+	u.d = c = dxy[5] + s;
+	xy3 = u.i[1];
+	c -= s;
+	dxy[4] -= c;
+	dxy[5] = dxy[4] + dxy[7];
+
+	s *= twom32;
+	u.d = c = dxy[5] + s;
+	xy4 = u.i[1];
+	c -= s;
+	dxy[6] += (dxy[4] - c);
+	dxy[7] = dxy[6] + dxy[8];
+
+	s *= twom32;
+	u.d = c = dxy[7] + s;
+	xy5 = u.i[1];
+	c -= s;
+	dxy[8] += (dxy[6] - c);
+
+	s *= twom32;
+	u.d = c = dxy[8] + s;
+	xy6 = u.i[1];
+	c -= s;
+	dxy[8] -= c;
+
+	s *= twom32;
+	u.d = c = dxy[8] + s;
+	xy7 = u.i[1];
+
+	/* extract the sign, exponent, and significand of z */
+	sz = zz.i[0] & 0x80000000;
+	ez = hz >> 16;
+	z0 = hz & 0xffff;
+	if (!ez) {
+		if (z0 | (zz.i[1] & 0xfffe0000)) {
+			z1 = zz.i[1];
+			z2 = zz.i[2];
+			z3 = zz.i[3];
+			ez = 1;
+		} else if (zz.i[1] | (zz.i[2] & 0xfffe0000)) {
+			z0 = zz.i[1];
+			z1 = zz.i[2];
+			z2 = zz.i[3];
+			z3 = 0;
+			ez = -31;
+		} else if (zz.i[2] | (zz.i[3] & 0xfffe0000)) {
+			z0 = zz.i[2];
+			z1 = zz.i[3];
+			z2 = z3 = 0;
+			ez = -63;
+		} else {
+			z0 = zz.i[3];
+			z1 = z2 = z3 = 0;
+			ez = -95;
+		}
+		while ((z0 & 0x10000) == 0) {
+			z0 = (z0 << 1) | (z1 >> 31);
+			z1 = (z1 << 1) | (z2 >> 31);
+			z2 = (z2 << 1) | (z3 >> 31);
+			z3 <<= 1;
+			ez--;
+		}
+	} else {
+		z0 |= 0x10000;
+		z1 = zz.i[1];
+		z2 = zz.i[2];
+		z3 = zz.i[3];
+	}
+	z4 = z5 = z6 = z7 = 0;
+
+	/*
+	 * now x*y is represented by sxy, exy, and xy[0-7], and z is
+	 * represented likewise; swap if need be so |xy| <= |z|
+	 */
+	if (exy > ez || (exy == ez && (xy0 > z0 || (xy0 == z0 && (xy1 > z1 ||
+		(xy1 == z1 && (xy2 > z2 || (xy2 == z2 && (xy3 > z3 ||
+		(xy3 == z3 && (xy4 | xy5 | xy6 | xy7) != 0)))))))))) {
+		e = sxy; sxy = sz; sz = e;
+		e = exy; exy = ez; ez = e;
+		e = xy0; xy0 = z0; z0 = e;
+		e = xy1; xy1 = z1; z1 = e;
+		e = xy2; xy2 = z2; z2 = e;
+		e = xy3; xy3 = z3; z3 = e;
+		z4 = xy4; xy4 = 0;
+		z5 = xy5; xy5 = 0;
+		z6 = xy6; xy6 = 0;
+		z7 = xy7; xy7 = 0;
+	}
+
+	/* shift the significand of xy keeping a sticky bit */
+	e = ez - exy;
+	if (e > 236) {
+		xy0 = xy1 = xy2 = xy3 = xy4 = xy5 = xy6 = 0;
+		xy7 = 1;
+	} else if (e >= 224) {
+		sticky = xy7 | xy6 | xy5 | xy4 | xy3 | xy2 | xy1 |
+			((xy0 << 1) << (255 - e));
+		xy7 = xy0 >> (e - 224);
+		if (sticky)
+			xy7 |= 1;
+		xy0 = xy1 = xy2 = xy3 = xy4 = xy5 = xy6 = 0;
+	} else if (e >= 192) {
+		sticky = xy7 | xy6 | xy5 | xy4 | xy3 | xy2 |
+			((xy1 << 1) << (223 - e));
+		xy7 = (xy1 >> (e - 192)) | ((xy0 << 1) << (223 - e));
+		if (sticky)
+			xy7 |= 1;
+		xy6 = xy0 >> (e - 192);
+		xy0 = xy1 = xy2 = xy3 = xy4 = xy5 = 0;
+	} else if (e >= 160) {
+		sticky = xy7 | xy6 | xy5 | xy4 | xy3 |
+			((xy2 << 1) << (191 - e));
+		xy7 = (xy2 >> (e - 160)) | ((xy1 << 1) << (191 - e));
+		if (sticky)
+			xy7 |= 1;
+		xy6 = (xy1 >> (e - 160)) | ((xy0 << 1) << (191 - e));
+		xy5 = xy0 >> (e - 160);
+		xy0 = xy1 = xy2 = xy3 = xy4 = 0;
+	} else if (e >= 128) {
+		sticky = xy7 | xy6 | xy5 | xy4 | ((xy3 << 1) << (159 - e));
+		xy7 = (xy3 >> (e - 128)) | ((xy2 << 1) << (159 - e));
+		if (sticky)
+			xy7 |= 1;
+		xy6 = (xy2 >> (e - 128)) | ((xy1 << 1) << (159 - e));
+		xy5 = (xy1 >> (e - 128)) | ((xy0 << 1) << (159 - e));
+		xy4 = xy0 >> (e - 128);
+		xy0 = xy1 = xy2 = xy3 = 0;
+	} else if (e >= 96) {
+		sticky = xy7 | xy6 | xy5 | ((xy4 << 1) << (127 - e));
+		xy7 = (xy4 >> (e - 96)) | ((xy3 << 1) << (127 - e));
+		if (sticky)
+			xy7 |= 1;
+		xy6 = (xy3 >> (e - 96)) | ((xy2 << 1) << (127 - e));
+		xy5 = (xy2 >> (e - 96)) | ((xy1 << 1) << (127 - e));
+		xy4 = (xy1 >> (e - 96)) | ((xy0 << 1) << (127 - e));
+		xy3 = xy0 >> (e - 96);
+		xy0 = xy1 = xy2 = 0;
+	} else if (e >= 64) {
+		sticky = xy7 | xy6 | ((xy5 << 1) << (95 - e));
+		xy7 = (xy5 >> (e - 64)) | ((xy4 << 1) << (95 - e));
+		if (sticky)
+			xy7 |= 1;
+		xy6 = (xy4 >> (e - 64)) | ((xy3 << 1) << (95 - e));
+		xy5 = (xy3 >> (e - 64)) | ((xy2 << 1) << (95 - e));
+		xy4 = (xy2 >> (e - 64)) | ((xy1 << 1) << (95 - e));
+		xy3 = (xy1 >> (e - 64)) | ((xy0 << 1) << (95 - e));
+		xy2 = xy0 >> (e - 64);
+		xy0 = xy1 = 0;
+	} else if (e >= 32) {
+		sticky = xy7 | ((xy6 << 1) << (63 - e));
+		xy7 = (xy6 >> (e - 32)) | ((xy5 << 1) << (63 - e));
+		if (sticky)
+			xy7 |= 1;
+		xy6 = (xy5 >> (e - 32)) | ((xy4 << 1) << (63 - e));
+		xy5 = (xy4 >> (e - 32)) | ((xy3 << 1) << (63 - e));
+		xy4 = (xy3 >> (e - 32)) | ((xy2 << 1) << (63 - e));
+		xy3 = (xy2 >> (e - 32)) | ((xy1 << 1) << (63 - e));
+		xy2 = (xy1 >> (e - 32)) | ((xy0 << 1) << (63 - e));
+		xy1 = xy0 >> (e - 32);
+		xy0 = 0;
+	} else if (e) {
+		sticky = (xy7 << 1) << (31 - e);
+		xy7 = (xy7 >> e) | ((xy6 << 1) << (31 - e));
+		if (sticky)
+			xy7 |= 1;
+		xy6 = (xy6 >> e) | ((xy5 << 1) << (31 - e));
+		xy5 = (xy5 >> e) | ((xy4 << 1) << (31 - e));
+		xy4 = (xy4 >> e) | ((xy3 << 1) << (31 - e));
+		xy3 = (xy3 >> e) | ((xy2 << 1) << (31 - e));
+		xy2 = (xy2 >> e) | ((xy1 << 1) << (31 - e));
+		xy1 = (xy1 >> e) | ((xy0 << 1) << (31 - e));
+		xy0 >>= e;
+	}
+
+	/* if this is a magnitude subtract, negate the significand of xy */
+	if (sxy ^ sz) {
+		xy0 = ~xy0;
+		xy1 = ~xy1;
+		xy2 = ~xy2;
+		xy3 = ~xy3;
+		xy4 = ~xy4;
+		xy5 = ~xy5;
+		xy6 = ~xy6;
+		xy7 = -xy7;
+		if (xy7 == 0)
+			if (++xy6 == 0)
+				if (++xy5 == 0)
+					if (++xy4 == 0)
+						if (++xy3 == 0)
+							if (++xy2 == 0)
+								if (++xy1 == 0)
+									xy0++;
+	}
+
+	/* add, propagating carries */
+	z7 += xy7;
+	e = (z7 < xy7);
+	z6 += xy6;
+	if (e) {
+		z6++;
+		e = (z6 <= xy6);
+	} else
+		e = (z6 < xy6);
+	z5 += xy5;
+	if (e) {
+		z5++;
+		e = (z5 <= xy5);
+	} else
+		e = (z5 < xy5);
+	z4 += xy4;
+	if (e) {
+		z4++;
+		e = (z4 <= xy4);
+	} else
+		e = (z4 < xy4);
+	z3 += xy3;
+	if (e) {
+		z3++;
+		e = (z3 <= xy3);
+	} else
+		e = (z3 < xy3);
+	z2 += xy2;
+	if (e) {
+		z2++;
+		e = (z2 <= xy2);
+	} else
+		e = (z2 < xy2);
+	z1 += xy1;
+	if (e) {
+		z1++;
+		e = (z1 <= xy1);
+	} else
+		e = (z1 < xy1);
+	z0 += xy0;
+	if (e)
+		z0++;
+
+	/* postnormalize and collect rounding information into z4 */
+	if (ez < 1) {
+		/* result is tiny; shift right until exponent is within range */
+		e = 1 - ez;
+		if (e > 116) {
+			z4 = 1; /* result can't be exactly zero */
+			z0 = z1 = z2 = z3 = 0;
+		} else if (e >= 96) {
+			sticky = z7 | z6 | z5 | z4 | z3 | z2 |
+				((z1 << 1) << (127 - e));
+			z4 = (z1 >> (e - 96)) | ((z0 << 1) << (127 - e));
+			if (sticky)
+				z4 |= 1;
+			z3 = z0 >> (e - 96);
+			z0 = z1 = z2 = 0;
+		} else if (e >= 64) {
+			sticky = z7 | z6 | z5 | z4 | z3 |
+				((z2 << 1) << (95 - e));
+			z4 = (z2 >> (e - 64)) | ((z1 << 1) << (95 - e));
+			if (sticky)
+				z4 |= 1;
+			z3 = (z1 >> (e - 64)) | ((z0 << 1) << (95 - e));
+			z2 = z0 >> (e - 64);
+			z0 = z1 = 0;
+		} else if (e >= 32) {
+			sticky = z7 | z6 | z5 | z4 | ((z3 << 1) << (63 - e));
+			z4 = (z3 >> (e - 32)) | ((z2 << 1) << (63 - e));
+			if (sticky)
+				z4 |= 1;
+			z3 = (z2 >> (e - 32)) | ((z1 << 1) << (63 - e));
+			z2 = (z1 >> (e - 32)) | ((z0 << 1) << (63 - e));
+			z1 = z0 >> (e - 32);
+			z0 = 0;
+		} else {
+			sticky = z7 | z6 | z5 | (z4 << 1) << (31 - e);
+			z4 = (z4 >> e) | ((z3 << 1) << (31 - e));
+			if (sticky)
+				z4 |= 1;
+			z3 = (z3 >> e) | ((z2 << 1) << (31 - e));
+			z2 = (z2 >> e) | ((z1 << 1) << (31 - e));
+			z1 = (z1 >> e) | ((z0 << 1) << (31 - e));
+			z0 >>= e;
+		}
+		ez = 1;
+	} else if (z0 >= 0x20000) {
+		/* carry out; shift right by one */
+		sticky = (z4 & 1) | z5 | z6 | z7;
+		z4 = (z4 >> 1) | (z3 << 31);
+		if (sticky)
+			z4 |= 1;
+		z3 = (z3 >> 1) | (z2 << 31);
+		z2 = (z2 >> 1) | (z1 << 31);
+		z1 = (z1 >> 1) | (z0 << 31);
+		z0 >>= 1;
+		ez++;
+	} else {
+		if (z0 < 0x10000 && (z0 | z1 | z2 | z3 | z4 | z5 | z6 | z7)
+			!= 0) {
+			/*
+			 * borrow/cancellation; shift left as much as
+			 * exponent allows
+			 */
+			while (!(z0 | (z1 & 0xfffe0000)) && ez >= 33) {
+				z0 = z1;
+				z1 = z2;
+				z2 = z3;
+				z3 = z4;
+				z4 = z5;
+				z5 = z6;
+				z6 = z7;
+				z7 = 0;
+				ez -= 32;
+			}
+			while (z0 < 0x10000 && ez > 1) {
+				z0 = (z0 << 1) | (z1 >> 31);
+				z1 = (z1 << 1) | (z2 >> 31);
+				z2 = (z2 << 1) | (z3 >> 31);
+				z3 = (z3 << 1) | (z4 >> 31);
+				z4 = (z4 << 1) | (z5 >> 31);
+				z5 = (z5 << 1) | (z6 >> 31);
+				z6 = (z6 << 1) | (z7 >> 31);
+				z7 <<= 1;
+				ez--;
+			}
+		}
+		if (z5 | z6 | z7)
+			z4 |= 1;
+	}
+
+	/* get the rounding mode */
+	rm = fsr >> 30;
+
+	/* strip off the integer bit, if there is one */
+	ibit = z0 & 0x10000;
+	if (ibit)
+		z0 -= 0x10000;
+	else {
+		ez = 0;
+		if (!(z0 | z1 | z2 | z3 | z4)) { /* exact zero */
+			zz.i[0] = rm == FSR_RM ? 0x80000000 : 0;
+			zz.i[1] = zz.i[2] = zz.i[3] = 0;
+			__fenv_setfsr(&fsr);
+			return (zz.q);
+		}
+	}
+
+	/*
+	 * flip the sense of directed roundings if the result is negative;
+	 * the logic below applies to a positive result
+	 */
+	if (sz)
+		rm ^= rm >> 1;
+
+	/* round and raise exceptions */
+	if (z4) {
+		fsr |= FSR_NXC;
+
+		/* decide whether to round the fraction up */
+		if (rm == FSR_RP || (rm == FSR_RN && (z4 > 0x80000000u ||
+			(z4 == 0x80000000u && (z3 & 1))))) {
+			/* round up and renormalize if necessary */
+			if (++z3 == 0)
+				if (++z2 == 0)
+					if (++z1 == 0)
+						if (++z0 == 0x10000) {
+							z0 = 0;
+							ez++;
+						}
+		}
+	}
+
+	/* check for under/overflow */
+	if (ez >= 0x7fff) {
+		if (rm == FSR_RN || rm == FSR_RP) {
+			zz.i[0] = sz | 0x7fff0000;
+			zz.i[1] = zz.i[2] = zz.i[3] = 0;
+		} else {
+			zz.i[0] = sz | 0x7ffeffff;
+			zz.i[1] = zz.i[2] = zz.i[3] = 0xffffffff;
+		}
+		fsr |= FSR_OFC | FSR_NXC;
+	} else {
+		zz.i[0] = sz | (ez << 16) | z0;
+		zz.i[1] = z1;
+		zz.i[2] = z2;
+		zz.i[3] = z3;
+
+		/*
+		 * !ibit => exact result was tiny before rounding,
+		 * z4 nonzero => result delivered is inexact
+		 */
+		if (!ibit) {
+			if (z4)
+				fsr |= FSR_UFC | FSR_NXC;
+			else if (fsr & FSR_UFM)
+				fsr |= FSR_UFC;
+		}
+	}
+
+	/* restore the fsr and emulate exceptions as needed */
+	if ((fsr & FSR_CEXC) & (fsr >> 23)) {
+		__fenv_setfsr(&fsr);
+		if (fsr & FSR_OFC) {
+			dummy = huge;
+			dummy *= huge;
+		} else if (fsr & FSR_UFC) {
+			dummy = tiny;
+			if (fsr & FSR_NXC)
+				dummy *= tiny;
+			else
+				dummy -= tiny2;
+		} else {
+			dummy = huge;
+			dummy += tiny;
+		}
+	} else {
+		fsr |= (fsr & 0x1f) << 5;
+		__fenv_setfsr(&fsr);
+	}
+	return (zz.q);
+}
+
+#elif defined(__i386)
+
+static const union {
+	unsigned i[2];
+	double d;
+} C[] = {
+	{ 0, 0x3fe00000u },
+	{ 0, 0x40000000u },
+	{ 0, 0x3df00000u },
+	{ 0, 0x3bf00000u },
+	{ 0, 0x41f00000u },
+	{ 0, 0x43e00000u },
+	{ 0, 0x7fe00000u },
+	{ 0, 0x00100000u },
+	{ 0, 0x00100001u }
+};
+
+#define	half	C[0].d
+#define	two	C[1].d
+#define	twom32	C[2].d
+#define	twom64	C[3].d
+#define	two32	C[4].d
+#define	two63	C[5].d
+#define	huge	C[6].d
+#define	tiny	C[7].d
+#define	tiny2	C[8].d
+
+#if defined(__amd64)
+#define	NI	4
+#else
+#define	NI	3
+#endif
+
+/*
+ * fmal for x86: 80-bit extended double precision, little-endian
+ */
+long double
+__fmal(long double x, long double y, long double z) {
+	union {
+		unsigned i[NI];
+		long double e;
+	} xx, yy, zz;
+	long double xhi, yhi, xlo, ylo, t;
+	unsigned xy0, xy1, xy2, xy3, xy4, z0, z1, z2, z3, z4;
+	unsigned oldcwsw, cwsw, rm, sticky, carry;
+	int ex, ey, ez, exy, sxy, sz, e, tinyafter;
+	volatile double	dummy;
+
+	/* extract the exponents of the arguments */
+	xx.e = x;
+	yy.e = y;
+	zz.e = z;
+	ex = xx.i[2] & 0x7fff;
+	ey = yy.i[2] & 0x7fff;
+	ez = zz.i[2] & 0x7fff;
+
+	/* dispense with inf, nan, and zero cases */
+	if (ex == 0x7fff || ey == 0x7fff || (ex | xx.i[1] | xx.i[0]) == 0 ||
+		(ey | yy.i[1] | yy.i[0]) == 0)	/* x or y is inf, nan, or 0 */
+		return (x * y + z);
+
+	if (ez == 0x7fff)			/* z is inf or nan */
+		return (x + z);	/* avoid spurious under/overflow in x * y */
+
+	if ((ez | zz.i[1] | zz.i[0]) == 0)	/* z is zero */
+		/*
+		 * x * y isn't zero but could underflow to zero,
+		 * so don't add z, lest we perturb the sign
+		 */
+		return (x * y);
+
+	/*
+	 * now x, y, and z are all finite and nonzero; extract signs and
+	 * normalize the significands (this will raise the denormal operand
+	 * exception if need be)
+	 */
+	sxy = (xx.i[2] ^ yy.i[2]) & 0x8000;
+	sz = zz.i[2] & 0x8000;
+	if (!ex) {
+		xx.e = x * two63;
+		ex = (xx.i[2] & 0x7fff) - 63;
+	}
+	if (!ey) {
+		yy.e = y * two63;
+		ey = (yy.i[2] & 0x7fff) - 63;
+	}
+	if (!ez) {
+		zz.e = z * two63;
+		ez = (zz.i[2] & 0x7fff) - 63;
+	}
+
+	/*
+	 * save the control and status words, mask all exceptions, and
+	 * set rounding to 64-bit precision and toward-zero
+	 */
+	__fenv_getcwsw(&oldcwsw);
+	cwsw = (oldcwsw & 0xf0c0ffff) | 0x0f3f0000;
+	__fenv_setcwsw(&cwsw);
+
+	/* multiply x*y to 128 bits */
+	exy = ex + ey - 0x3fff;
+	xx.i[2] = 0x3fff;
+	yy.i[2] = 0x3fff;
+	x = xx.e;
+	y = yy.e;
+	xhi = ((x + twom32) + two32) - two32;
+	yhi = ((y + twom32) + two32) - two32;
+	xlo = x - xhi;
+	ylo = y - yhi;
+	x *= y;
+	y = ((xhi * yhi - x) + xhi * ylo + xlo * yhi) + xlo * ylo;
+	if (x >= two) {
+		x *= half;
+		y *= half;
+		exy++;
+	}
+
+	/* extract the significands */
+	xx.e = x;
+	xy0 = xx.i[1];
+	xy1 = xx.i[0];
+	yy.e = t = y + twom32;
+	xy2 = yy.i[0];
+	yy.e = (y - (t - twom32)) + twom64;
+	xy3 = yy.i[0];
+	xy4 = 0;
+	z0 = zz.i[1];
+	z1 = zz.i[0];
+	z2 = z3 = z4 = 0;
+
+	/*
+	 * now x*y is represented by sxy, exy, and xy[0-4], and z is
+	 * represented likewise; swap if need be so |xy| <= |z|
+	 */
+	if (exy > ez || (exy == ez && (xy0 > z0 || (xy0 == z0 &&
+		(xy1 > z1 || (xy1 == z1 && (xy2 | xy3) != 0)))))) {
+		e = sxy; sxy = sz; sz = e;
+		e = exy; exy = ez; ez = e;
+		e = xy0; xy0 = z0; z0 = e;
+		e = xy1; xy1 = z1; z1 = e;
+		z2 = xy2; xy2 = 0;
+		z3 = xy3; xy3 = 0;
+	}
+
+	/* shift the significand of xy keeping a sticky bit */
+	e = ez - exy;
+	if (e > 130) {
+		xy0 = xy1 = xy2 = xy3 = 0;
+		xy4 = 1;
+	} else if (e >= 128) {
+		sticky = xy3 | xy2 | xy1 | ((xy0 << 1) << (159 - e));
+		xy4 = xy0 >> (e - 128);
+		if (sticky)
+			xy4 |= 1;
+		xy0 = xy1 = xy2 = xy3 = 0;
+	} else if (e >= 96) {
+		sticky = xy3 | xy2 | ((xy1 << 1) << (127 - e));
+		xy4 = (xy1 >> (e - 96)) | ((xy0 << 1) << (127 - e));
+		if (sticky)
+			xy4 |= 1;
+		xy3 = xy0 >> (e - 96);
+		xy0 = xy1 = xy2 = 0;
+	} else if (e >= 64) {
+		sticky = xy3 | ((xy2 << 1) << (95 - e));
+		xy4 = (xy2 >> (e - 64)) | ((xy1 << 1) << (95 - e));
+		if (sticky)
+			xy4 |= 1;
+		xy3 = (xy1 >> (e - 64)) | ((xy0 << 1) << (95 - e));
+		xy2 = xy0 >> (e - 64);
+		xy0 = xy1 = 0;
+	} else if (e >= 32) {
+		sticky = (xy3 << 1) << (63 - e);
+		xy4 = (xy3 >> (e - 32)) | ((xy2 << 1) << (63 - e));
+		if (sticky)
+			xy4 |= 1;
+		xy3 = (xy2 >> (e - 32)) | ((xy1 << 1) << (63 - e));
+		xy2 = (xy1 >> (e - 32)) | ((xy0 << 1) << (63 - e));
+		xy1 = xy0 >> (e - 32);
+		xy0 = 0;
+	} else if (e) {
+		xy4 = (xy3 << 1) << (31 - e);
+		xy3 = (xy3 >> e) | ((xy2 << 1) << (31 - e));
+		xy2 = (xy2 >> e) | ((xy1 << 1) << (31 - e));
+		xy1 = (xy1 >> e) | ((xy0 << 1) << (31 - e));
+		xy0 >>= e;
+	}
+
+	/* if this is a magnitude subtract, negate the significand of xy */
+	if (sxy ^ sz) {
+		xy0 = ~xy0;
+		xy1 = ~xy1;
+		xy2 = ~xy2;
+		xy3 = ~xy3;
+		xy4 = -xy4;
+		if (xy4 == 0)
+			if (++xy3 == 0)
+				if (++xy2 == 0)
+					if (++xy1 == 0)
+						xy0++;
+	}
+
+	/* add, propagating carries */
+	z4 += xy4;
+	carry = (z4 < xy4);
+	z3 += xy3;
+	if (carry) {
+		z3++;
+		carry = (z3 <= xy3);
+	} else
+		carry = (z3 < xy3);
+	z2 += xy2;
+	if (carry) {
+		z2++;
+		carry = (z2 <= xy2);
+	} else
+		carry = (z2 < xy2);
+	z1 += xy1;
+	if (carry) {
+		z1++;
+		carry = (z1 <= xy1);
+	} else
+		carry = (z1 < xy1);
+	z0 += xy0;
+	if (carry) {
+		z0++;
+		carry = (z0 <= xy0);
+	} else
+		carry = (z0 < xy0);
+
+	/* for a magnitude subtract, ignore the last carry out */
+	if (sxy ^ sz)
+		carry = 0;
+
+	/* postnormalize and collect rounding information into z2 */
+	if (ez < 1) {
+		/* result is tiny; shift right until exponent is within range */
+		e = 1 - ez;
+		if (e > 67) {
+			z2 = 1;	/* result can't be exactly zero */
+			z0 = z1 = 0;
+		} else if (e >= 64) {
+			sticky = z4 | z3 | z2 | z1 | ((z0 << 1) << (95 - e));
+			z2 = (z0 >> (e - 64)) | ((carry << 1) << (95 - e));
+			if (sticky)
+				z2 |= 1;
+			z1 = carry >> (e - 64);
+			z0 = 0;
+		} else if (e >= 32) {
+			sticky = z4 | z3 | z2 | ((z1 << 1) << (63 - e));
+			z2 = (z1 >> (e - 32)) | ((z0 << 1) << (63 - e));
+			if (sticky)
+				z2 |= 1;
+			z1 = (z0 >> (e - 32)) | ((carry << 1) << (63 - e));
+			z0 = carry >> (e - 32);
+		} else {
+			sticky = z4 | z3 | (z2 << 1) << (31 - e);
+			z2 = (z2 >> e) | ((z1 << 1) << (31 - e));
+			if (sticky)
+				z2 |= 1;
+			z1 = (z1 >> e) | ((z0 << 1) << (31 - e));
+			z0 = (z0 >> e) | ((carry << 1) << (31 - e));
+		}
+		ez = 1;
+	} else if (carry) {
+		/* carry out; shift right by one */
+		sticky = (z2 & 1) | z3 | z4;
+		z2 = (z2 >> 1) | (z1 << 31);
+		if (sticky)
+			z2 |= 1;
+		z1 = (z1 >> 1) | (z0 << 31);
+		z0 = (z0 >> 1) | 0x80000000;
+		ez++;
+	} else {
+		if (z0 < 0x80000000u && (z0 | z1 | z2 | z3 | z4) != 0) {
+			/*
+			 * borrow/cancellation; shift left as much as
+			 * exponent allows
+			 */
+			while (!z0 && ez >= 33) {
+				z0 = z1;
+				z1 = z2;
+				z2 = z3;
+				z3 = z4;
+				z4 = 0;
+				ez -= 32;
+			}
+			while (z0 < 0x80000000u && ez > 1) {
+				z0 = (z0 << 1) | (z1 >> 31);
+				z1 = (z1 << 1) | (z2 >> 31);
+				z2 = (z2 << 1) | (z3 >> 31);
+				z3 = (z3 << 1) | (z4 >> 31);
+				z4 <<= 1;
+				ez--;
+			}
+		}
+		if (z3 | z4)
+			z2 |= 1;
+	}
+
+	/* get the rounding mode */
+	rm = oldcwsw & 0x0c000000;
+
+	/* adjust exponent if result is subnormal */
+	tinyafter = 0;
+	if (!(z0 & 0x80000000)) {
+		ez = 0;
+		tinyafter = 1;
+		if (!(z0 | z1 | z2)) { /* exact zero */
+			zz.i[2] = rm == FCW_RM ? 0x8000 : 0;
+			zz.i[1] = zz.i[0] = 0;
+			__fenv_setcwsw(&oldcwsw);
+			return (zz.e);
+		}
+	}
+
+	/*
+	 * flip the sense of directed roundings if the result is negative;
+	 * the logic below applies to a positive result
+	 */
+	if (sz && (rm == FCW_RM || rm == FCW_RP))
+		rm = (FCW_RM + FCW_RP) - rm;
+
+	/* round */
+	if (z2) {
+		if (rm == FCW_RP || (rm == FCW_RN && (z2 > 0x80000000u ||
+			(z2 == 0x80000000u && (z1 & 1))))) {
+			/* round up and renormalize if necessary */
+			if (++z1 == 0) {
+				if (++z0 == 0) {
+					z0 = 0x80000000;
+					ez++;
+				} else if (z0 == 0x80000000) {
+					/* rounded up to smallest normal */
+					ez = 1;
+					if ((rm == FCW_RP && z2 >
+						0x80000000u) || (rm == FCW_RN &&
+						z2 >= 0xc0000000u))
+						/*
+						 * would have rounded up to
+						 * smallest normal even with
+						 * unbounded range
+						 */
+						tinyafter = 0;
+				}
+			}
+		}
+	}
+
+	/* restore the control and status words, check for over/underflow */
+	__fenv_setcwsw(&oldcwsw);
+	if (ez >= 0x7fff) {
+		if (rm == FCW_RN || rm == FCW_RP) {
+			zz.i[2] = sz | 0x7fff;
+			zz.i[1] = 0x80000000;
+			zz.i[0] = 0;
+		} else {
+			zz.i[2] = sz | 0x7ffe;
+			zz.i[1] = 0xffffffff;
+			zz.i[0] = 0xffffffff;
+		}
+		dummy = huge;
+		dummy *= huge;
+	} else {
+		zz.i[2] = sz | ez;
+		zz.i[1] = z0;
+		zz.i[0] = z1;
+
+		/*
+		 * tinyafter => result rounded w/ unbounded range would be tiny,
+		 * z2 nonzero => result delivered is inexact
+		 */
+		if (tinyafter) {
+			dummy = tiny;
+			if (z2)
+				dummy *= tiny;
+			else
+				dummy -= tiny2;
+		} else if (z2) {
+			dummy = huge;
+			dummy += tiny;
+		}
+	}
+
+	return (zz.e);
+}
+
+#else
+#error Unknown architecture
+#endif
diff --git a/usr/src/libm/src/m9x/fmax.c b/usr/src/libm/src/m9x/fmax.c
new file mode 100644
index 0000000..454a38b
--- /dev/null
+++ b/usr/src/libm/src/m9x/fmax.c
@@ -0,0 +1,79 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"@(#)fmax.c	1.6	06/01/31 SMI"
+
+#if defined(ELFOBJ)
+#pragma weak fmax = __fmax
+#endif
+
+/*
+ * fmax(x,y) returns the larger of x and y.  If just one of the
+ * arguments is NaN, fmax returns the other argument.  If both
+ * arguments are NaN, fmax returns NaN.
+ *
+ * See fmaxf.c for a discussion of implementation trade-offs.
+ */
+
+#include "libm.h"	/* for isgreaterequal macro */
+
+double
+__fmax(double x, double y) {
+	union {
+		unsigned i[2];
+		double d;
+	} xx, yy;
+	unsigned s;
+
+	/* if y is nan, replace it by x */
+	if (y != y)
+		y = x;
+
+	/* if x is less than y or x and y are unordered, replace x by y */
+#if defined(COMPARISON_MACRO_BUG)
+	if (x != x || x < y)
+#else
+	if (!isgreaterequal(x, y))
+#endif
+		x = y;
+
+	/*
+	 * now x and y are either both NaN or both numeric; clear the
+	 * sign of the result if either x or y has its sign clear
+	 */
+	xx.d = x;
+	yy.d = y;
+#if defined(__sparc)
+	s = ~(xx.i[0] & yy.i[0]) & 0x80000000;
+	xx.i[0] &= ~s;
+#elif defined(__i386)
+	s = ~(xx.i[1] & yy.i[1]) & 0x80000000;
+	xx.i[1] &= ~s;
+#else
+#error Unknown architecture
+#endif
+	return (xx.d);
+}
diff --git a/usr/src/libm/src/m9x/fmaxf.c b/usr/src/libm/src/m9x/fmaxf.c
new file mode 100644
index 0000000..f5b570d
--- /dev/null
+++ b/usr/src/libm/src/m9x/fmaxf.c
@@ -0,0 +1,143 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"@(#)fmaxf.c	1.5	06/01/31 SMI"
+
+#if defined(ELFOBJ)
+#pragma weak fmaxf = __fmaxf
+#endif
+
+/*
+ * fmax(x,y) returns the larger of x and y.  If just one of the
+ * arguments is NaN, fmax returns the other argument.  If both
+ * arguments are NaN, fmax returns NaN (ideally, one of the
+ * argument NaNs).
+ *
+ * C99 does not require that fmax(-0,+0) = fmax(+0,-0) = +0, but
+ * ideally fmax should satisfy this.
+ *
+ * C99 makes no mention of exceptions for fmax.  I suppose ideally
+ * either fmax never raises any exceptions or else it raises the
+ * invalid operation exception if and only if some argument is a
+ * signaling NaN.  In the former case, fmax should always return
+ * one of its arguments.  In the latter, fmax shouldn't return a
+ * signaling NaN, although when both arguments are signaling NaNs,
+ * this ideal is at odds with the stipulation that fmax should
+ * always return one of its arguments.
+ *
+ * Commutativity of fmax follows from the properties listed above
+ * except when both arguments are NaN.  In that case, fmax may be
+ * declared commutative by fiat because there is no portable way
+ * to tell different NaNs apart.  Ideally fmax would be truly com-
+ * mutative for all arguments.
+ *
+ * On SPARC V8, fmax must involve tests and branches.  Ideally,
+ * an implementation on SPARC V9 should avoid branching, using
+ * conditional moves instead where necessary, and be as efficient
+ * as possible in its use of other resources.
+ *
+ * It appears to be impossible to attain all of the aforementioned
+ * ideals simultaneously.  The implementation below satisfies the
+ * following (on SPARC):
+ *
+ * 1. fmax(x,y) returns the larger of x and y if neither x nor y
+ *    is NaN and the non-NaN argument if just one of x or y is NaN.
+ *    If both x and y are NaN, fmax(x,y) returns x unchanged.
+ * 2. fmax(-0,+0) = fmax(+0,-0) = +0.
+ * 3. If either argument is a signaling NaN, fmax raises the invalid
+ *    operation exception.  Otherwise, it raises no exceptions.
+ */
+
+#include "libm.h"	/* for isgreaterequal macro */
+
+float
+__fmaxf(float x, float y) {
+	/*
+	 * On SPARC v8plus/v9, this could be implemented as follows
+	 * (assuming %f0 = x, %f1 = y, return value left in %f0):
+	 *
+	 * fcmps	%fcc0,%f1,%f1
+	 * fmovsu	%fcc0,%f0,%f1
+	 * fcmps	%fcc0,%f0,%f1
+	 * fmovsul	%fcc0,%f1,%f0
+	 * st		%f0,[x]
+	 * st		%f1,[y]
+	 * ld		[x],%l0
+	 * ld		[y],%l1
+	 * and		%l0,%l1,%l2
+	 * sethi	%hi(0x80000000),%l3
+	 * andn		%l3,%l2,%l2
+	 * andn		%l0,%l2,%l0
+	 * st		%l0,[x]
+	 * ld		[x],%f0
+	 *
+	 * If VIS instructions are available, use this code instead:
+	 *
+	 * fcmps	%fcc0,%f1,%f1
+	 * fmovsu	%fcc0,%f0,%f1
+	 * fcmps	%fcc0,%f0,%f1
+	 * fmovsul	%fcc0,%f1,%f0
+	 * fands	%f0,%f1,%f2
+	 * fzeros	%f3
+	 * fnegs	%f3,%f3
+	 * fandnot2s %f3,%f2,%f2
+	 * fandnot2s %f0,%f2,%f0
+	 *
+	 * If VIS 3.0 instructions are available, use this:
+	 *
+	 * flcmps	%fcc0,%f0,%f1
+	 * fmovslg	%fcc0,%f1,%f0	! move if %fcc0 is 1 or 2
+	 */
+
+	union {
+		unsigned i;
+		float f;
+	} xx, yy;
+	unsigned s;
+
+	/* if y is nan, replace it by x */
+	if (y != y)
+		y = x;
+
+	/* if x is less than y or x and y are unordered, replace x by y */
+#if defined(COMPARISON_MACRO_BUG)
+	if (x != x || x < y)
+#else
+	if (!isgreaterequal(x, y))
+#endif
+		x = y;
+
+	/*
+	 * now x and y are either both NaN or both numeric; clear the
+	 * sign of the result if either x or y has its sign clear
+	 */
+	xx.f = x;
+	yy.f = y;
+	s = ~(xx.i & yy.i) & 0x80000000;
+	xx.i &= ~s;
+
+	return (xx.f);
+}
diff --git a/usr/src/libm/src/m9x/fmaxl.c b/usr/src/libm/src/m9x/fmaxl.c
new file mode 100644
index 0000000..68a236e
--- /dev/null
+++ b/usr/src/libm/src/m9x/fmaxl.c
@@ -0,0 +1,78 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"@(#)fmaxl.c	1.5	06/01/31 SMI"
+
+#if defined(ELFOBJ)
+#pragma weak fmaxl = __fmaxl
+#endif
+
+#include "libm.h"	/* for isgreaterequal macro */
+
+long double
+__fmaxl(long double x, long double y) {
+	union {
+#if defined(__sparc)
+		unsigned i[4];
+#elif defined(__i386)
+		unsigned i[3];
+#else
+#error Unknown architecture
+#endif
+		long double ld;
+	} xx, yy;
+	unsigned s;
+
+	/* if y is nan, replace it by x */
+	if (y != y)
+		y = x;
+
+	/* if x is less than y or x and y are unordered, replace x by y */
+#if defined(COMPARISON_MACRO_BUG)
+	if (x != x || x < y)
+#else
+	if (!isgreaterequal(x, y))
+#endif
+		x = y;
+
+	/*
+	 * now x and y are either both NaN or both numeric; clear the
+	 * sign of the result if either x or y has its sign clear
+	 */
+	xx.ld = x;
+	yy.ld = y;
+#if defined(__sparc)
+	s = ~(xx.i[0] & yy.i[0]) & 0x80000000;
+	xx.i[0] &= ~s;
+#elif defined(__i386)
+	s = ~(xx.i[2] & yy.i[2]) & 0x8000;
+	xx.i[2] &= ~s;
+#else
+#error Unknown architecture
+#endif
+
+	return (xx.ld);
+}
diff --git a/usr/src/libm/src/m9x/fmin.c b/usr/src/libm/src/m9x/fmin.c
new file mode 100644
index 0000000..24ad2b5
--- /dev/null
+++ b/usr/src/libm/src/m9x/fmin.c
@@ -0,0 +1,80 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"@(#)fmin.c	1.6	06/01/31 SMI"
+
+#if defined(ELFOBJ)
+#pragma weak fmin = __fmin
+#endif
+
+/*
+ * fmin(x,y) returns the smaller of x and y.  If just one of the
+ * arguments is NaN, fmin returns the other argument.  If both
+ * arguments are NaN, fmin returns NaN.
+ *
+ * See fmaxf.c for a discussion of implementation trade-offs.
+ */
+
+#include "libm.h"	/* for islessequal macro */
+
+double
+__fmin(double x, double y) {
+	union {
+		unsigned i[2];
+		double d;
+	} xx, yy;
+	unsigned s;
+
+	/* if y is nan, replace it by x */
+	if (y != y)
+		y = x;
+
+	/* if x is greater than y or x and y are unordered, replace x by y */
+#if defined(COMPARISON_MACRO_BUG)
+	if (x != x || x > y)
+#else
+	if (!islessequal(x, y))
+#endif
+		x = y;
+
+	/*
+	 * now x and y are either both NaN or both numeric; set the
+	 * sign of the result if either x or y has its sign set
+	 */
+	xx.d = x;
+	yy.d = y;
+#if defined(__sparc)
+	s = (xx.i[0] | yy.i[0]) & 0x80000000;
+	xx.i[0] |= s;
+#elif defined(__i386)
+	s = (xx.i[1] | yy.i[1]) & 0x80000000;
+	xx.i[1] |= s;
+#else
+#error Unknown architecture
+#endif
+
+	return (xx.d);
+}
diff --git a/usr/src/libm/src/m9x/fminf.c b/usr/src/libm/src/m9x/fminf.c
new file mode 100644
index 0000000..8c1c7c8
--- /dev/null
+++ b/usr/src/libm/src/m9x/fminf.c
@@ -0,0 +1,102 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"@(#)fminf.c	1.5	06/01/31 SMI"
+
+#if defined(ELFOBJ)
+#pragma weak fminf = __fminf
+#endif
+
+#include "libm.h"	/* for islessequal macro */
+
+float
+__fminf(float x, float y) {
+	/*
+	 * On SPARC v8plus/v9, this could be implemented as follows
+	 * (assuming %f0 = x, %f1 = y, return value left in %f0):
+	 *
+	 * fcmps	%fcc0,%f1,%f1
+	 * fmovsu	%fcc0,%f0,%f1
+	 * fcmps	%fcc0,%f0,%f1
+	 * fmovsug	%fcc0,%f1,%f0
+	 * st		%f0,[x]
+	 * st		%f1,[y]
+	 * ld		[x],%l0
+	 * ld		[y],%l1
+	 * or		%l0,%l1,%l2
+	 * sethi	%hi(0x80000000),%l3
+	 * and		%l3,%l2,%l2
+	 * or		%l0,%l2,%l0
+	 * st		%l0,[x]
+	 * ld		[x],%f0
+	 *
+	 * If VIS instructions are available, use this code instead:
+	 *
+	 * fcmps	%fcc0,%f1,%f1
+	 * fmovsu	%fcc0,%f0,%f1
+	 * fcmps	%fcc0,%f0,%f1
+	 * fmovsug	%fcc0,%f1,%f0
+	 * fors		%f0,%f1,%f2
+	 * fzeros	%f3
+	 * fnegs	%f3,%f3
+	 * fands	%f3,%f2,%f2
+	 * fors		%f0,%f2,%f0
+	 *
+	 * If VIS 3.0 instructions are available, use this:
+	 *
+	 * flcmps	%fcc0,%f0,%f1
+	 * fmovsge	%fcc0,%f1,%f0	! move if %fcc0 is 0 or 2
+	 */
+
+	union {
+		unsigned i;
+		float f;
+	} xx, yy;
+	unsigned s;
+
+	/* if y is nan, replace it by x */
+	if (y != y)
+		y = x;
+
+	/* if x is greater than y or x and y are unordered, replace x by y */
+#if defined(COMPARISON_MACRO_BUG)
+	if (x != x || x > y)
+#else
+	if (!islessequal(x, y))
+#endif
+		x = y;
+
+	/*
+	 * now x and y are either both NaN or both numeric; set the
+	 * sign of the result if either x or y has its sign set
+	 */
+	xx.f = x;
+	yy.f = y;
+	s = (xx.i | yy.i) & 0x80000000;
+	xx.i |= s;
+
+	return (xx.f);
+}
diff --git a/usr/src/libm/src/m9x/fminl.c b/usr/src/libm/src/m9x/fminl.c
new file mode 100644
index 0000000..07944b6
--- /dev/null
+++ b/usr/src/libm/src/m9x/fminl.c
@@ -0,0 +1,78 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"@(#)fminl.c	1.5	06/01/31 SMI"
+
+#if defined(ELFOBJ)
+#pragma weak fminl = __fminl
+#endif
+
+#include "libm.h"	/* for islessequal macro */
+
+long double
+__fminl(long double x, long double y) {
+	union {
+#if defined(__sparc)
+		unsigned i[4];
+#elif defined(__i386)
+		unsigned i[3];
+#else
+#error Unknown architecture
+#endif
+		long double ld;
+	} xx, yy;
+	unsigned s;
+
+	/* if y is nan, replace it by x */
+	if (y != y)
+		y = x;
+
+	/* if x is greater than y or x and y are unordered, replace x by y */
+#if defined(COMPARISON_MACRO_BUG)
+	if (x != x || x > y)
+#else
+	if (!islessequal(x, y))
+#endif
+		x = y;
+
+	/*
+	 * now x and y are either both NaN or both numeric; set the
+	 * sign of the result if either x or y has its sign set
+	 */
+	xx.ld = x;
+	yy.ld = y;
+#if defined(__sparc)
+	s = (xx.i[0] | yy.i[0]) & 0x80000000;
+	xx.i[0] |= s;
+#elif defined(__i386)
+	s = (xx.i[2] | yy.i[2]) & 0x8000;
+	xx.i[2] |= s;
+#else
+#error Unknown architecture
+#endif
+
+	return (xx.ld);
+}
diff --git a/usr/src/libm/src/m9x/frexp.c b/usr/src/libm/src/m9x/frexp.c
new file mode 100644
index 0000000..a5f7ebc
--- /dev/null
+++ b/usr/src/libm/src/m9x/frexp.c
@@ -0,0 +1,102 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"@(#)frexp.c	1.5	06/01/31 SMI"
+
+#if defined(ELFOBJ)
+#pragma weak frexp = __frexp
+#endif
+
+/*
+ * frexp(x, exp) returns the normalized significand of x and sets
+ * *exp so that x = r*2^(*exp) where r is the return value.  If x
+ * is finite and nonzero, 1/2 <= |r| < 1.
+ *
+ * If x is zero, infinite or NaN, frexp returns x and sets *exp = 0.
+ * (The relevant standards do not specify *exp when x is infinite or
+ * NaN, but this code sets it anyway.)
+ *
+ * If x is a signaling NaN, this code returns x without attempting
+ * to raise the invalid operation exception.  If x is subnormal,
+ * this code treats it as nonzero regardless of nonstandard mode.
+ */
+
+#include "libm.h"
+
+double
+__frexp(double x, int *exp) {
+	union {
+		unsigned i[2];
+		double d;
+	} xx, yy;
+	double t;
+	unsigned hx;
+	int e;
+
+	xx.d = x;
+	hx = xx.i[HIWORD] & ~0x80000000;
+
+	if (hx >= 0x7ff00000) { /* x is infinite or NaN */
+		*exp = 0;
+		return (x);
+	}
+
+	e = 0;
+	if (hx < 0x00100000) { /* x is subnormal or zero */
+		if ((hx | xx.i[LOWORD]) == 0) {
+			*exp = 0;
+			return (x);
+		}
+
+		/*
+		 * normalize x by regarding it as an integer
+		 *
+		 * Here we use 32-bit integer arithmetic to avoid trapping
+		 * or emulating 64-bit arithmetic.  If 64-bit arithmetic is
+		 * available (e.g., in SPARC V9), do this instead:
+		 *
+		 *  long lx = ((long) hx << 32) | xx.i[LOWORD];
+		 *  xx.d = (xx.i[HIWORD] < 0)? -lx : lx;
+		 *
+		 * If subnormal arithmetic doesn't trap, just multiply x by
+		 * a power of two.
+		 */
+		yy.i[HIWORD] = 0x43300000 | hx;
+		yy.i[LOWORD] = xx.i[LOWORD];
+		t = yy.d;
+		yy.i[HIWORD] = 0x43300000;
+		yy.i[LOWORD] = 0;
+		t -= yy.d; /* t = |x| scaled */
+		xx.d = ((int)xx.i[HIWORD] < 0)? -t : t;
+		hx = xx.i[HIWORD] & ~0x80000000;
+		e = -1074;
+	}
+
+	/* now xx.d is normal */
+	xx.i[HIWORD] = (xx.i[HIWORD] & ~0x7ff00000) | 0x3fe00000;
+	*exp = e + (hx >> 20) - 0x3fe;
+	return (xx.d);
+}
diff --git a/usr/src/libm/src/m9x/frexpf.c b/usr/src/libm/src/m9x/frexpf.c
new file mode 100644
index 0000000..f137adc
--- /dev/null
+++ b/usr/src/libm/src/m9x/frexpf.c
@@ -0,0 +1,69 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"@(#)frexpf.c	1.4	06/01/31 SMI"
+
+#if defined(ELFOBJ)
+#pragma weak frexpf = __frexpf
+#endif
+
+#include "libm.h"
+
+float
+__frexpf(float x, int *exp) {
+	union {
+		unsigned i;
+		float f;
+	} xx;
+	unsigned hx;
+	int e;
+
+	xx.f = x;
+	hx = xx.i & ~0x80000000;
+
+	if (hx >= 0x7f800000) { /* x is infinite or NaN */
+		*exp = 0;
+		return (x);
+	}
+
+	e = 0;
+	if (hx < 0x00800000) { /* x is subnormal or zero */
+		if (hx == 0) {
+			*exp = 0;
+			return (x);
+		}
+
+		/* normalize x by regarding it as an integer */
+		xx.f = (int) xx.i < 0 ? -(int) hx : (int) hx;
+		hx = xx.i & ~0x80000000;
+		e = -149;
+	}
+
+	/* now xx.f is normal */
+	xx.i = (xx.i & ~0x7f800000) | 0x3f000000;
+	*exp = e + (hx >> 23) - 0x7e;
+	return (xx.f);
+}
diff --git a/usr/src/libm/src/m9x/frexpl.c b/usr/src/libm/src/m9x/frexpl.c
new file mode 100644
index 0000000..a0dd032
--- /dev/null
+++ b/usr/src/libm/src/m9x/frexpl.c
@@ -0,0 +1,126 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"@(#)frexpl.c	1.6	06/01/31 SMI"
+
+#if defined(ELFOBJ)
+#pragma weak frexpl = __frexpl
+#endif
+
+#include "libm.h"
+
+#if defined(__sparc)
+
+long double
+__frexpl(long double x, int *exp) {
+	union {
+		unsigned i[4];
+		long double q;
+	} xx;
+	unsigned hx;
+	int e, s;
+
+	xx.q = x;
+	hx = xx.i[0] & ~0x80000000;
+
+	if (hx >= 0x7fff0000) {	/* x is infinite or NaN */
+		*exp = 0;
+		return (x);
+	}
+
+	e = 0;
+	if (hx < 0x00010000) {	/* x is subnormal or zero */
+		if ((hx | xx.i[1] | xx.i[2] | xx.i[3]) == 0) {
+			*exp = 0;
+			return (x);
+		}
+
+		/* normalize x */
+		s = xx.i[0] & 0x80000000;
+		while ((hx | (xx.i[1] & 0xffff0000)) == 0) {
+			hx = xx.i[1];
+			xx.i[1] = xx.i[2];
+			xx.i[2] = xx.i[3];
+			xx.i[3] = 0;
+			e -= 32;
+		}
+		while (hx < 0x10000) {
+			hx = (hx << 1) | (xx.i[1] >> 31);
+			xx.i[1] = (xx.i[1] << 1) | (xx.i[2] >> 31);
+			xx.i[2] = (xx.i[2] << 1) | (xx.i[3] >> 31);
+			xx.i[3] <<= 1;
+			e--;
+		}
+		xx.i[0] = s | hx;
+	}
+
+	/* now xx.q is normal */
+	xx.i[0] = (xx.i[0] & ~0x7fff0000) | 0x3ffe0000;
+	*exp = e + (hx >> 16) - 0x3ffe;
+	return (xx.q);
+}
+
+#elif defined(__i386)
+
+long double
+__frexpl(long double x, int *exp) {
+	union {
+		unsigned i[3];
+		long double e;
+	} xx;
+	unsigned hx;
+	int e;
+
+	xx.e = x;
+	hx = xx.i[2] & 0x7fff;
+
+	if (hx >= 0x7fff) {	/* x is infinite or NaN */
+		*exp = 0;
+		return (x);
+	}
+
+	e = 0;
+	if (hx < 0x0001) {	/* x is subnormal or zero */
+		if ((xx.i[0] | xx.i[1]) == 0) {
+			*exp = 0;
+			return (x);
+		}
+
+		/* normalize x */
+		xx.e *= 18446744073709551616.0L;	/* 2^64 */
+		hx = xx.i[2] & 0x7fff;
+		e = -64;
+	}
+
+	/* now xx.e is normal */
+	xx.i[2] = (xx.i[2] & 0x8000) | 0x3ffe;
+	*exp = e + hx - 0x3ffe;
+	return (xx.e);
+}
+
+#else
+#error Unknown architecture
+#endif
diff --git a/usr/src/libm/src/m9x/ldexp.c b/usr/src/libm/src/m9x/ldexp.c
new file mode 100644
index 0000000..60fb5b6
--- /dev/null
+++ b/usr/src/libm/src/m9x/ldexp.c
@@ -0,0 +1,56 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"@(#)ldexp.c	1.13	06/01/31 SMI"
+
+#if defined(ELFOBJ)
+#pragma weak ldexp = __ldexp
+#endif
+
+#include "libm.h"
+#include <errno.h>
+
+double
+ldexp(double x, int n) {
+	int *px = (int *) &x, ix = px[HIWORD] & ~0x80000000;
+
+	if (ix >= 0x7ff00000 || (px[LOWORD] | ix) == 0)
+#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN)
+		return (ix >= 0x7ff80000 ? x : x + x);
+		/* assumes sparc-like QNaN */
+#else
+		return (x + x);
+#endif
+	x = scalbn(x, n);
+	ix = px[HIWORD] & ~0x80000000;
+	/*
+	 * SVID3 requires both overflow and underflow cases to set errno
+	 * XPG3/XPG4/XPG4.2/SUSv2 requires overflow to set errno
+	 */
+	if (ix >= 0x7ff00000 || (px[LOWORD] | ix) == 0)
+		errno = ERANGE;
+	return (x);
+}
diff --git a/usr/src/libm/src/m9x/ldexpf.c b/usr/src/libm/src/m9x/ldexpf.c
new file mode 100644
index 0000000..881874c
--- /dev/null
+++ b/usr/src/libm/src/m9x/ldexpf.c
@@ -0,0 +1,38 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"@(#)ldexpf.c	1.3	06/01/31 SMI"
+
+#if defined(ELFOBJ)
+#pragma weak ldexpf = __ldexpf
+#endif
+
+#include "libm.h"
+
+float
+ldexpf(float x, int n) {
+	return (scalbnf(x, n));
+}
diff --git a/usr/src/libm/src/m9x/ldexpl.c b/usr/src/libm/src/m9x/ldexpl.c
new file mode 100644
index 0000000..e40ce1f
--- /dev/null
+++ b/usr/src/libm/src/m9x/ldexpl.c
@@ -0,0 +1,38 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"@(#)ldexpl.c	1.3	06/01/31 SMI"
+
+#if defined(ELFOBJ)
+#pragma weak ldexpl = __ldexpl
+#endif
+
+#include "libm.h"
+
+long double
+ldexpl(long double x, int n) {
+	return (scalbnl(x, n));
+}
diff --git a/usr/src/libm/src/m9x/llrint.c b/usr/src/libm/src/m9x/llrint.c
new file mode 100644
index 0000000..8d3e79b
--- /dev/null
+++ b/usr/src/libm/src/m9x/llrint.c
@@ -0,0 +1,80 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"@(#)llrint.c	1.5	06/01/31 SMI"
+
+#if defined(ELFOBJ)
+#pragma weak llrint = __llrint
+#if defined(__sparcv9) || defined(__amd64)
+#pragma weak lrint = __llrint
+#pragma weak __lrint = __llrint
+#endif
+#endif
+
+/*
+ * llrint(x) rounds its argument to the nearest integer according
+ * to the current rounding direction and converts the result to a
+ * 64 bit signed integer.
+ *
+ * If x is NaN, infinite, or so large that the nearest integer would
+ * exceed 64 bits, the invalid operation exception is raised.  If x
+ * is not an integer, the inexact exception is raised.
+ */
+
+#include "libm.h"
+
+long long
+llrint(double x) {
+	/*
+	 * Note: The following code works on x86 (in the default rounding
+	 * precision mode), but one should just use the fistpll instruction
+	 * instead.
+	 */
+	union {
+		unsigned i[2];
+		double d;
+	} xx, yy;
+	unsigned hx;
+
+	xx.d = x;
+	hx = xx.i[HIWORD] & ~0x80000000;
+
+	if (hx < 0x43300000) { /* |x| < 2^52 */
+		/* add and subtract a power of two to round x to an integer */
+#if defined(__sparc) || defined(__amd64)
+		yy.i[HIWORD] = (xx.i[HIWORD] & 0x80000000) | 0x43300000;
+#elif defined(__i386)	/* !defined(__amd64) */
+		yy.i[HIWORD] = (xx.i[HIWORD] & 0x80000000) | 0x43e00000;
+#else
+#error Unknown architecture
+#endif
+		yy.i[LOWORD] = 0;
+		x = (x + yy.d) - yy.d;
+	}
+
+	/* now x is nan, inf, or integral */
+	return ((long long) x);
+}
diff --git a/usr/src/libm/src/m9x/llrintf.c b/usr/src/libm/src/m9x/llrintf.c
new file mode 100644
index 0000000..70287f3
--- /dev/null
+++ b/usr/src/libm/src/m9x/llrintf.c
@@ -0,0 +1,76 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"@(#)llrintf.c	1.5	06/01/31 SMI"
+
+#if defined(ELFOBJ)
+#pragma weak llrintf = __llrintf
+#if defined(__sparcv9) || defined(__amd64)
+#pragma weak lrintf = __llrintf
+#pragma weak __lrintf = __llrintf
+#endif
+#endif
+
+#include "libm.h"
+
+long long
+llrintf(float x) {
+	/*
+	 * Note: The following code works on x86 (in the default rounding
+	 * precision mode), but one should just use the fistpll instruction
+	 * instead.
+	 */
+	union {
+		unsigned i;
+		float f;
+	} xx, yy;
+	unsigned hx;
+
+	xx.f = x;
+	hx = xx.i & ~0x80000000;
+
+	if (hx < 0x4b000000) { /* |x| < 2^23 */
+		/* add and subtract a power of two to round x to an integer */
+#if defined(__sparc) || defined(__amd64)
+		yy.i = (xx.i & 0x80000000) | 0x4b000000;
+#elif defined(__i386)
+		/* assume 64-bit precision */
+		yy.i = (xx.i & 0x80000000) | 0x5f000000;
+#else
+#error Unknown architecture
+#endif
+		x = (x + yy.f) - yy.f;
+
+		/*
+		 * on LP32 architectures, we can just convert x to a 32-bit
+		 * integer and sign-extend it
+		 */
+		return ((long) x);
+	}
+
+	/* now x is nan, inf, or integral */
+	return ((long long) x);
+}
diff --git a/usr/src/libm/src/m9x/llrintl.c b/usr/src/libm/src/m9x/llrintl.c
new file mode 100644
index 0000000..5d1f2b2
--- /dev/null
+++ b/usr/src/libm/src/m9x/llrintl.c
@@ -0,0 +1,173 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"@(#)llrintl.c	1.6	06/01/31 SMI"
+
+#if defined(ELFOBJ)
+#pragma weak llrintl = __llrintl
+#if defined(__sparcv9) || defined(__amd64)
+#pragma weak lrintl = __llrintl
+#pragma weak __lrintl = __llrintl
+#endif
+#endif
+
+#include "libm.h"
+
+#if defined(__sparc)
+
+#include "fma.h"
+
+long long
+llrintl(long double x) {
+	union {
+		unsigned i[4];
+		long double q;
+	} xx;
+	union {
+		unsigned i[2];
+		long long l;
+	} zz;
+	union {
+		unsigned i;
+		float f;
+	} tt;
+	unsigned int hx, sx, frac, fsr;
+	int rm, j;
+	volatile float dummy;
+
+	xx.q = x;
+	sx = xx.i[0] & 0x80000000;
+	hx = xx.i[0] & ~0x80000000;
+
+	/* handle trivial cases */
+	if (hx > 0x403e0000) { /* |x| > 2^63 + ... or x is nan */
+		/* convert an out-of-range float */
+		tt.i = sx | 0x7f000000;
+		return ((long long) tt.f);
+	} else if ((hx | xx.i[1] | xx.i[2] | xx.i[3]) == 0) /* x is zero */
+		return (0LL);
+
+	/* get the rounding mode */
+	__fenv_getfsr(&fsr);
+	rm = fsr >> 30;
+
+	/* flip the sense of directed roundings if x is negative */
+	if (sx)
+		rm ^= rm >> 1;
+
+	/* handle |x| < 1 */
+	if (hx < 0x3fff0000) {
+		dummy = 1.0e30f; /* x is nonzero, so raise inexact */
+		dummy += 1.0e-30f;
+		if (rm == FSR_RP || (rm == FSR_RN && (hx >= 0x3ffe0000 &&
+			((hx & 0xffff) | xx.i[1] | xx.i[2] | xx.i[3]))))
+			return (sx ? -1LL : 1LL);
+		return (0LL);
+	}
+
+	/* extract the integer and fractional parts of x */
+	j = 0x406f - (hx >> 16);
+	xx.i[0] = 0x10000 | (xx.i[0] & 0xffff);
+	if (j >= 96) {
+		zz.i[0] = 0;
+		zz.i[1] = xx.i[0] >> (j - 96);
+		frac = ((xx.i[0] << 1) << (127 - j)) | (xx.i[1] >> (j - 96));
+		if (((xx.i[1] << 1) << (127 - j)) | xx.i[2] | xx.i[3])
+			frac |= 1;
+	} else if (j >= 64) {
+		zz.i[0] = xx.i[0] >> (j - 64);
+		zz.i[1] = ((xx.i[0] << 1) << (95 - j)) | (xx.i[1] >> (j - 64));
+		frac = ((xx.i[1] << 1) << (95 - j)) | (xx.i[2] >> (j - 64));
+		if (((xx.i[2] << 1) << (95 - j)) | xx.i[3])
+			frac |= 1;
+	} else {
+		zz.i[0] = ((xx.i[0] << 1) << (63 - j)) | (xx.i[1] >> (j - 32));
+		zz.i[1] = ((xx.i[1] << 1) << (63 - j)) | (xx.i[2] >> (j - 32));
+		frac = ((xx.i[2] << 1) << (63 - j)) | (xx.i[3] >> (j - 32));
+		if ((xx.i[3] << 1) << (63 - j))
+			frac |= 1;
+	}
+
+	/* round */
+	if (frac && (rm == FSR_RP || (rm == FSR_RN && (frac > 0x80000000u ||
+		(frac == 0x80000000 && (zz.i[1] & 1)))))) {
+		if (++zz.i[1] == 0)
+			zz.i[0]++;
+	}
+
+	/* check for result out of range (note that z is |x| at this point) */
+	if (zz.i[0] > 0x80000000u || (zz.i[0] == 0x80000000 && (zz.i[1] ||
+		!sx))) {
+		tt.i = sx | 0x7f000000;
+		return ((long long) tt.f);
+	}
+
+	/* raise inexact if need be */
+	if (frac) {
+		dummy = 1.0e30F;
+		dummy += 1.0e-30F;
+	}
+
+	/* negate result if need be */
+	if (sx) {
+		zz.i[0] = ~zz.i[0];
+		zz.i[1] = -zz.i[1];
+		if (zz.i[1] == 0)
+			zz.i[0]++;
+	}
+	return (zz.l);
+}
+#elif defined(__i386) || defined(__amd64)
+long long
+llrintl(long double x) {
+	/*
+	 * Note: The following code works on x86 (in the default rounding
+	 * precision mode), but one ought to just use the fistpll instruction
+	 * instead.
+	 */
+	union {
+		unsigned i[3];
+		long double e;
+	} xx, yy;
+	int ex;
+
+	xx.e = x;
+	ex = xx.i[2] & 0x7fff;
+
+	if (ex < 0x403e) { /* |x| < 2^63 */
+		/* add and subtract a power of two to round x to an integer */
+		yy.i[2] = (xx.i[2] & 0x8000) | 0x403e;
+		yy.i[1] = 0x80000000;
+		yy.i[0] = 0;
+		x = (x + yy.e) - yy.e;
+	}
+
+	/* now x is nan, inf, or integral */
+	return ((long long) x);
+}
+#else
+#error Unknown architecture
+#endif
diff --git a/usr/src/libm/src/m9x/llround.c b/usr/src/libm/src/m9x/llround.c
new file mode 100644
index 0000000..b65a9e1
--- /dev/null
+++ b/usr/src/libm/src/m9x/llround.c
@@ -0,0 +1,84 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"@(#)llround.c	1.5	06/01/31 SMI"
+
+#if defined(ELFOBJ)
+#pragma weak llround = __llround
+#if defined(__sparcv9) || defined(__amd64)
+#pragma weak lround = __llround
+#pragma weak __lround = __llround
+#endif
+#endif
+
+/*
+ * llround(x) rounds its argument to the nearest integer, rounding
+ * ties away from zero, and converts the result to a 64 bit signed
+ * integer.
+ *
+ * If x is NaN, infinite, or so large that the nearest integer
+ * would exceed 64 bits, the invalid operation exception is raised.
+ */
+
+#include "libm.h"
+
+long long
+llround(double x) {
+	union {
+		unsigned i[2];
+		double d;
+	} xx;
+	unsigned hx, sx, i;
+
+	xx.d = x;
+	hx = xx.i[HIWORD] & ~0x80000000;
+	sx = xx.i[HIWORD] & 0x80000000;
+
+	if (hx < 0x43300000) { /* |x| < 2^52 */
+		/* handle |x| < 1 */
+		if (hx < 0x3ff00000) {
+			if (hx >= 0x3fe00000)
+				return (sx ? -1LL : 1LL);
+			return (0LL);
+		}
+
+		/* round x at the integer bit */
+		if (hx < 0x41300000) {
+			i = 1 << (0x412 - (hx >> 20));
+			xx.i[HIWORD] = (xx.i[HIWORD] + i) & ~(i | (i - 1));
+			xx.i[LOWORD] = 0;
+		} else {
+			i = 1 << (0x432 - (hx >> 20));
+			xx.i[LOWORD] += i;
+			if (xx.i[LOWORD] < i)
+				xx.i[HIWORD]++;
+			xx.i[LOWORD] &= ~(i | (i - 1));
+		}
+	}
+
+	/* now x is nan, inf, or integral */
+	return ((long long) xx.d);
+}
diff --git a/usr/src/libm/src/m9x/llroundf.c b/usr/src/libm/src/m9x/llroundf.c
new file mode 100644
index 0000000..7bdc977
--- /dev/null
+++ b/usr/src/libm/src/m9x/llroundf.c
@@ -0,0 +1,72 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"@(#)llroundf.c	1.5	06/01/31 SMI"
+
+#if defined(ELFOBJ)
+#pragma weak llroundf = __llroundf
+#if defined(__sparcv9) || defined(__amd64)
+#pragma weak lroundf = __llroundf
+#pragma weak __lroundf = __llroundf
+#endif
+#endif
+
+#include "libm.h"
+
+long long
+llroundf(float x) {
+	union {
+		unsigned i;
+		float f;
+	} xx;
+	unsigned hx, sx, i;
+
+	xx.f = x;
+	hx = xx.i & ~0x80000000;
+	sx = xx.i & 0x80000000;
+
+	if (hx < 0x4b000000) { /* |x| < 2^23 */
+		/* handle |x| < 1 */
+		if (hx < 0x3f800000) {
+			if (hx >= 0x3f000000)
+				return (sx ? -1LL : 1LL);
+			return (0LL);
+		}
+
+		/* round x at the integer bit */
+		i = 1 << (0x95 - (hx >> 23));
+		xx.i = (xx.i + i) & ~((i << 1) - 1);
+
+		/*
+		 * on LP32 architectures, we can just convert x to a 32-bit
+		 * integer and sign-extend it
+		 */
+		return ((long) xx.f);
+	}
+
+	/* now x is nan, inf, or integral */
+	return ((long long) x);
+}
diff --git a/usr/src/libm/src/m9x/llroundl.c b/usr/src/libm/src/m9x/llroundl.c
new file mode 100644
index 0000000..0c96cdf
--- /dev/null
+++ b/usr/src/libm/src/m9x/llroundl.c
@@ -0,0 +1,165 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"@(#)llroundl.c	1.6	06/01/31 SMI"
+
+#if defined(ELFOBJ)
+#pragma weak llroundl = __llroundl
+#if defined(__sparcv9) || defined(__amd64)
+#pragma weak lroundl = __llroundl
+#pragma weak __lroundl = __llroundl
+#endif
+#endif
+
+#include "libm.h"
+
+#if defined(__sparc)
+long long
+llroundl(long double x) {
+	union {
+		unsigned i[4];
+		long double q;
+	} xx;
+	union {
+		unsigned i[2];
+		long long l;
+	} zz;
+	union {
+		unsigned i;
+		float f;
+	} tt;
+	unsigned hx, sx, frac;
+	int j;
+
+	xx.q = x;
+	sx = xx.i[0] & 0x80000000;
+	hx = xx.i[0] & ~0x80000000;
+
+	/* handle trivial cases */
+	if (hx > 0x403e0000) { /* |x| > 2^63 + ... or x is nan */
+		/* convert an out-of-range float */
+		tt.i = sx | 0x7f000000;
+		return ((long long) tt.f);
+	}
+
+	/* handle |x| < 1 */
+	if (hx < 0x3fff0000) {
+		if (hx >= 0x3ffe0000)
+			return (sx ? -1LL : 1LL);
+		return (0LL);
+	}
+
+	/* extract the integer and fractional parts of x */
+	j = 0x406f - (hx >> 16);
+	xx.i[0] = 0x10000 | (xx.i[0] & 0xffff);
+	if (j >= 96) {
+		zz.i[0] = 0;
+		zz.i[1] = xx.i[0] >> (j - 96);
+		frac = ((xx.i[0] << 1) << (127 - j)) | (xx.i[1] >> (j - 96));
+		if (((xx.i[1] << 1) << (127 - j)) | xx.i[2] | xx.i[3])
+			frac |= 1;
+	} else if (j >= 64) {
+		zz.i[0] = xx.i[0] >> (j - 64);
+		zz.i[1] = ((xx.i[0] << 1) << (95 - j)) | (xx.i[1] >> (j - 64));
+		frac = ((xx.i[1] << 1) << (95 - j)) | (xx.i[2] >> (j - 64));
+		if (((xx.i[2] << 1) << (95 - j)) | xx.i[3])
+			frac |= 1;
+	} else {
+		zz.i[0] = ((xx.i[0] << 1) << (63 - j)) | (xx.i[1] >> (j - 32));
+		zz.i[1] = ((xx.i[1] << 1) << (63 - j)) | (xx.i[2] >> (j - 32));
+		frac = ((xx.i[2] << 1) << (63 - j)) | (xx.i[3] >> (j - 32));
+		if ((xx.i[3] << 1) << (63 - j))
+			frac |= 1;
+	}
+
+	/* round */
+	if (frac >= 0x80000000u) {
+		if (++zz.i[1] == 0)
+			zz.i[0]++;
+	}
+
+	/* check for result out of range (note that z is |x| at this point) */
+	if (zz.i[0] > 0x80000000u || (zz.i[0] == 0x80000000 && (zz.i[1] ||
+		!sx))) {
+		tt.i = sx | 0x7f000000;
+		return ((long long) tt.f);
+	}
+
+	/* negate result if need be */
+	if (sx) {
+		zz.i[0] = ~zz.i[0];
+		zz.i[1] = -zz.i[1];
+		if (zz.i[1] == 0)
+			zz.i[0]++;
+	}
+
+	return (zz.l);
+}
+#elif defined(__i386) || defined(__amd64)
+long long
+llroundl(long double x) {
+	union {
+		unsigned i[3];
+		long double e;
+	} xx;
+	int ex, sx, i;
+
+	xx.e = x;
+	ex = xx.i[2] & 0x7fff;
+	sx = xx.i[2] & 0x8000;
+
+	if (ex < 0x403e) { /* |x| < 2^63 */
+		/* handle |x| < 1 */
+		if (ex < 0x3fff) {
+			if (ex >= 0x3ffe)
+				return (sx ? -1LL : 1LL);
+			return (0LL);
+		}
+
+		/* round x at the integer bit */
+		if (ex < 0x401e) {
+			i = 1 << (0x401d - ex);
+			xx.i[1] = (xx.i[1] + i) & ~(i | (i - 1));
+			xx.i[0] = 0;
+		} else {
+			i = 1 << (0x403d - ex);
+			xx.i[0] += i;
+			if (xx.i[0] < i)
+				xx.i[1]++;
+			xx.i[0] &= ~(i | (i - 1));
+		}
+		if (xx.i[1] == 0) {
+			xx.i[2] = sx | ++ex;
+			xx.i[1] = 0x80000000U;
+		}
+	}
+
+	/* now x is nan, inf, or integral */
+	return ((long long) xx.e);
+}
+#else
+#error Unknown architecture
+#endif
diff --git a/usr/src/libm/src/m9x/lrint.c b/usr/src/libm/src/m9x/lrint.c
new file mode 100644
index 0000000..3a5792c
--- /dev/null
+++ b/usr/src/libm/src/m9x/lrint.c
@@ -0,0 +1,80 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"@(#)lrint.c	1.3	06/01/31 SMI"
+
+#if defined(ELFOBJ)
+#pragma weak lrint = __lrint
+#endif
+
+/*
+ * lrint(x) rounds its argument to the nearest integer according
+ * to the current rounding direction and converts the result to
+ * a 32 bit signed integer.
+ *
+ * If x is NaN, infinite, or so large that the nearest integer
+ * would exceed 32 bits, the invalid operation exception is raised.
+ * If x is not an integer, the inexact exception is raised.
+ */
+
+#include <sys/isa_defs.h>	/* _ILP32 */
+#include "libm.h"
+
+#if defined(_ILP32)
+long
+lrint(double x) {
+	/*
+	 * Note: The following code works on x86 (in the default rounding
+	 * precision mode), but one should just use the fistpl instruction
+	 * instead.
+	 */
+	union {
+		unsigned i[2];
+		double d;
+	} xx, yy;
+	unsigned hx;
+
+	xx.d = x;
+	hx = xx.i[HIWORD] & ~0x80000000;
+	if (hx < 0x43300000) { /* |x| < 2^52 */
+		/* add and subtract a power of two to round x to an integer */
+#if defined(__sparc)
+		yy.i[HIWORD] = (xx.i[HIWORD] & 0x80000000) | 0x43300000;
+#elif defined(__i386)
+		yy.i[HIWORD] = (xx.i[HIWORD] & 0x80000000) | 0x43e00000;
+#else
+#error Unknown architecture
+#endif
+		yy.i[LOWORD] = 0;
+		x = (x + yy.d) - yy.d;
+	}
+
+	/* now x is nan, inf, or integral */
+	return ((long) x);
+}
+#else
+#error Unsupported architecture
+#endif	/* defined(_ILP32) */
diff --git a/usr/src/libm/src/m9x/lrintf.c b/usr/src/libm/src/m9x/lrintf.c
new file mode 100644
index 0000000..92bb664
--- /dev/null
+++ b/usr/src/libm/src/m9x/lrintf.c
@@ -0,0 +1,71 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"@(#)lrintf.c	1.3	06/01/31 SMI"
+
+#if defined(ELFOBJ)
+#pragma weak lrintf = __lrintf
+#endif
+
+#include <sys/isa_defs.h>	/* _ILP32 */
+#include "libm.h"
+
+#if defined(_ILP32)
+long
+lrintf(float x) {
+	/*
+	 * Note: The following code works on x86 (in the default rounding
+	 * precision mode), but one should just use the fistpl instruction
+	 * instead.
+	 */
+	union {
+		unsigned i;
+		float f;
+	} xx, yy;
+	unsigned hx;
+
+	xx.f = x;
+	hx = xx.i & ~0x80000000;
+	if (hx < 0x4b000000) {	/* |x| < 2^23 */
+		/* add and subtract a power of two to round x to an integer */
+#if defined(__sparc)
+		yy.i = (xx.i & 0x80000000) | 0x4b000000;
+#elif defined(__i386)
+		/* assume 64-bit precision */
+		yy.i = (xx.i & 0x80000000) | 0x5f000000;
+#else
+#error Unknown architecture
+#endif
+		x = (x + yy.f) - yy.f;
+		return ((long) x);
+	}
+
+	/* now x is nan, inf, or integral */
+	return ((long) x);
+}
+#else
+#error Unsupported architecture
+#endif	/* defined(_ILP32) */
diff --git a/usr/src/libm/src/m9x/lrintl.c b/usr/src/libm/src/m9x/lrintl.c
new file mode 100644
index 0000000..d827195
--- /dev/null
+++ b/usr/src/libm/src/m9x/lrintl.c
@@ -0,0 +1,154 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"@(#)lrintl.c	1.3	06/01/31 SMI"
+
+#if defined(ELFOBJ)
+#pragma weak lrintl = __lrintl
+#endif
+
+#include <sys/isa_defs.h>	/* _ILP32 */
+#include "libm.h"
+
+#if defined(_ILP32)
+#if defined(__sparc)
+
+#include "fma.h"
+
+long
+lrintl(long double x) {
+	union {
+		unsigned i[4];
+		long double q;
+	} xx;
+	union {
+		unsigned i;
+		float f;
+	} tt;
+	unsigned hx, sx, frac, fsr, l;
+	int rm, j;
+	volatile float dummy;
+
+	xx.q = x;
+	sx = xx.i[0] & 0x80000000;
+	hx = xx.i[0] & ~0x80000000;
+
+	/* handle trivial cases */
+	if (hx > 0x401e0000) { /* |x| > 2^31 + ... or x is nan */
+		/* convert an out-of-range float */
+		tt.i = sx | 0x7f000000;
+		return ((long) tt.f);
+	} else if ((hx | xx.i[1] | xx.i[2] | xx.i[3]) == 0) /* x is zero */
+		return (0L);
+
+	/* get the rounding mode */
+	__fenv_getfsr(&fsr);
+	rm = fsr >> 30;
+
+	/* flip the sense of directed roundings if x is negative */
+	if (sx)
+		rm ^= rm >> 1;
+
+	/* handle |x| < 1 */
+	if (hx < 0x3fff0000) {
+		dummy = 1.0e30F; /* x is nonzero, so raise inexact */
+		dummy += 1.0e-30F;
+		if (rm == FSR_RP || (rm == FSR_RN && (hx >= 0x3ffe0000 &&
+			((hx & 0xffff) | xx.i[1] | xx.i[2] | xx.i[3]))))
+			return (sx ? -1L : 1L);
+		return (0L);
+	}
+
+	/* extract the integer and fractional parts of x */
+	j = 0x406f - (hx >> 16);		/* 91 <= j <= 112 */
+	xx.i[0] = 0x10000 | (xx.i[0] & 0xffff);
+	if (j >= 96) {				/* 96 <= j <= 112 */
+		l = xx.i[0] >> (j - 96);
+		frac = ((xx.i[0] << 1) << (127 - j)) | (xx.i[1] >> (j - 96));
+		if (((xx.i[1] << 1) << (127 - j)) | xx.i[2] | xx.i[3])
+			frac |= 1;
+	} else {				/* 91 <= j <= 95 */
+		l = (xx.i[0] << (96 - j)) | (xx.i[1] >> (j - 64));
+		frac = (xx.i[1] << (96 - j)) | (xx.i[2] >> (j - 64));
+		if ((xx.i[2] << (96 - j)) | xx.i[3])
+			frac |= 1;
+	}
+
+	/* round */
+	if (frac && (rm == FSR_RP || (rm == FSR_RN && (frac > 0x80000000U ||
+		(frac == 0x80000000 && (l & 1))))))
+		l++;
+
+	/* check for result out of range (note that z is |x| at this point) */
+	if (l > 0x80000000U || (l == 0x80000000U && !sx)) {
+		tt.i = sx | 0x7f000000;
+		return ((long) tt.f);
+	}
+
+	/* raise inexact if need be */
+	if (frac) {
+		dummy = 1.0e30F;
+		dummy += 1.0e-30F;
+	}
+
+	/* negate result if need be */
+	if (sx)
+		l = -l;
+	return ((long) l);
+}
+#elif defined(__i386)
+long
+lrintl(long double x) {
+	/*
+	 * Note: The following code works on x86 (in the default rounding
+	 * precision mode), but one ought to just use the fistpl instruction
+	 * instead.
+	 */
+	union {
+		unsigned i[3];
+		long double e;
+	} xx, yy;
+	int ex;
+
+	xx.e = x;
+	ex = xx.i[2] & 0x7fff;
+	if (ex < 0x403e) {	/* |x| < 2^63 */
+		/* add and subtract a power of two to round x to an integer */
+		yy.i[2] = (xx.i[2] & 0x8000) | 0x403e;
+		yy.i[1] = 0x80000000;
+		yy.i[0] = 0;
+		x = (x + yy.e) - yy.e;
+	}
+
+	/* now x is nan, inf, or integral */
+	return ((long) x);
+}
+#else
+#error Unknown architecture
+#endif	/* defined(__sparc) || defined(__i386) */
+#else
+#error Unsupported architecture
+#endif	/* defined(_ILP32) */
diff --git a/usr/src/libm/src/m9x/lround.c b/usr/src/libm/src/m9x/lround.c
new file mode 100644
index 0000000..c009156
--- /dev/null
+++ b/usr/src/libm/src/m9x/lround.c
@@ -0,0 +1,82 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"@(#)lround.c	1.3	06/01/31 SMI"
+
+#if defined(ELFOBJ)
+#pragma weak lround = __lround
+#endif
+
+/*
+ * lround(x) rounds its argument to the nearest integer, rounding ties
+ * away from zero, and converts the result to a 32 bit signed integer.
+ *
+ * If x is NaN, infinite, or so large that the nearest integer
+ * would exceed 32 bits, the invalid operation exception is raised.
+ */
+
+#include <sys/isa_defs.h>	/* _ILP32 */
+#include "libm.h"
+
+#if defined(_ILP32)
+long
+lround(double x) {
+	union {
+		unsigned i[2];
+		double d;
+	} xx;
+	unsigned hx, sx, i;
+
+	xx.d = x;
+	hx = xx.i[HIWORD] & ~0x80000000;
+	sx = xx.i[HIWORD] & 0x80000000;
+	if (hx < 0x43300000) {	/* |x| < 2^52 */
+		if (hx < 0x3ff00000) {	/* |x| < 1 */
+			if (hx >= 0x3fe00000)
+				return (sx ? -1L : 1L);
+			return (0L);
+		}
+
+		/* round x at the integer bit */
+		if (hx < 0x41300000) {
+			i = 1 << (0x412 - (hx >> 20));
+			xx.i[HIWORD] = (xx.i[HIWORD] + i) & ~(i | (i - 1));
+			xx.i[LOWORD] = 0;
+		} else {
+			i = 1 << (0x432 - (hx >> 20));
+			xx.i[LOWORD] += i;
+			if (xx.i[LOWORD] < i)
+				xx.i[HIWORD]++;
+			xx.i[LOWORD] &= ~(i | (i - 1));
+		}
+	}
+
+	/* now x is nan, inf, or integral */
+	return ((long) xx.d);
+}
+#else
+#error Unsupported architecture
+#endif	/* defined(_ILP32) */
diff --git a/usr/src/libm/src/m9x/lroundf.c b/usr/src/libm/src/m9x/lroundf.c
new file mode 100644
index 0000000..a00652c
--- /dev/null
+++ b/usr/src/libm/src/m9x/lroundf.c
@@ -0,0 +1,66 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"@(#)lroundf.c	1.3	06/01/31 SMI"
+
+#if defined(ELFOBJ)
+#pragma weak lroundf = __lroundf
+#endif
+
+#include <sys/isa_defs.h>	/* _ILP32 */
+#include "libm.h"
+
+#if defined(_ILP32)
+long
+lroundf(float x) {
+	union {
+		unsigned i;
+		float f;
+	} xx;
+	unsigned hx, sx, i;
+
+	xx.f = x;
+	hx = xx.i & ~0x80000000;
+	sx = xx.i & 0x80000000;
+	if (hx < 0x4b000000) {		/* |x| < 2^23 */
+		if (hx < 0x3f800000) {		/* |x| < 1 */
+			if (hx >= 0x3f000000)
+				return (sx ? -1L : 1L);
+			return (0L);
+		}
+
+		/* round x at the integer bit */
+		i = 1 << (0x95 - (hx >> 23));
+		xx.i = (xx.i + i) & ~((i << 1) - 1);
+		return ((long) xx.f);
+	}
+
+	/* now x is nan, inf, or integral */
+	return ((long) x);
+}
+#else
+#error Unsupported architecture
+#endif	/* defined(_ILP32) */
diff --git a/usr/src/libm/src/m9x/lroundl.c b/usr/src/libm/src/m9x/lroundl.c
new file mode 100644
index 0000000..bb931d4
--- /dev/null
+++ b/usr/src/libm/src/m9x/lroundl.c
@@ -0,0 +1,144 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"@(#)lroundl.c	1.4	06/01/31 SMI"
+
+#if defined(ELFOBJ)
+#pragma weak lroundl = __lroundl
+#endif
+
+#include <sys/isa_defs.h>	/* _ILP32 */
+#include "libm.h"
+
+#if defined(_ILP32)
+#if defined(__sparc)
+long
+lroundl(long double x) {
+	union {
+		unsigned i[4];
+		long double q;
+	} xx;
+	union {
+		unsigned i;
+		float f;
+	} tt;
+	unsigned hx, sx, frac, l;
+	int j;
+
+	xx.q = x;
+	sx = xx.i[0] & 0x80000000;
+	hx = xx.i[0] & ~0x80000000;
+
+	/* handle trivial cases */
+	if (hx > 0x401e0000) { /* |x| > 2^31 + ... or x is nan */
+		/* convert an out-of-range float */
+		tt.i = sx | 0x7f000000;
+		return ((long) tt.f);
+	}
+
+	/* handle |x| < 1 */
+	if (hx < 0x3fff0000) {
+		if (hx >= 0x3ffe0000)
+			return (sx ? -1L : 1L);
+		return (0L);
+	}
+
+	/* extract the integer and fractional parts of x */
+	j = 0x406f - (hx >> 16);		/* 91 <= j <= 112 */
+	xx.i[0] = 0x10000 | (xx.i[0] & 0xffff);
+	if (j >= 96) {				/* 96 <= j <= 112 */
+		l = xx.i[0] >> (j - 96);
+		frac = ((xx.i[0] << 1) << (127 - j)) | (xx.i[1] >> (j - 96));
+		if (((xx.i[1] << 1) << (127 - j)) | xx.i[2] | xx.i[3])
+			frac |= 1;
+	} else {				/* 91 <= j <= 95 */
+		l = (xx.i[0] << (96 - j)) | (xx.i[1] >> (j - 64));
+		frac = (xx.i[1] << (96 - j)) | (xx.i[2] >> (j - 64));
+		if ((xx.i[2] << (96 - j)) | xx.i[3])
+			frac |= 1;
+	}
+
+	/* round */
+	if (frac >= 0x80000000U)
+		l++;
+
+	/* check for result out of range (note that z is |x| at this point) */
+	if (l > 0x80000000U || (l == 0x80000000U && !sx)) {
+		tt.i = sx | 0x7f000000;
+		return ((long) tt.f);
+	}
+
+	/* negate result if need be */
+	if (sx)
+		l = -l;
+	return ((long) l);
+}
+#elif defined(__i386)
+long
+lroundl(long double x) {
+	union {
+		unsigned i[3];
+		long double e;
+	} xx;
+	int ex, sx, i;
+
+	xx.e = x;
+	ex = xx.i[2] & 0x7fff;
+	sx = xx.i[2] & 0x8000;
+	if (ex < 0x403e) {	/* |x| < 2^63 */
+		if (ex < 0x3fff) {	/* |x| < 1 */
+			if (ex >= 0x3ffe)
+				return (sx ? -1L : 1L);
+			return (0L);
+		}
+
+		/* round x at the integer bit */
+		if (ex < 0x401e) {
+			i = 1 << (0x401d - ex);
+			xx.i[1] = (xx.i[1] + i) & ~(i | (i - 1));
+			xx.i[0] = 0;
+		} else {
+			i = 1 << (0x403d - ex);
+			xx.i[0] += i;
+			if (xx.i[0] < i)
+				xx.i[1]++;
+			xx.i[0] &= ~(i | (i - 1));
+		}
+		if (xx.i[1] == 0) {
+			xx.i[2] = sx | ++ex;
+			xx.i[1] = 0x80000000U;
+		}
+	}
+
+	/* now x is nan, inf, or integral */
+	return ((long) xx.e);
+}
+#else
+#error Unknown architecture
+#endif	/* defined(__sparc) || defined(__i386) */
+#else
+#error Unsupported architecture
+#endif	/* defined(_ILP32) */
diff --git a/usr/src/libm/src/m9x/modf.c b/usr/src/libm/src/m9x/modf.c
new file mode 100644
index 0000000..66ce2bd
--- /dev/null
+++ b/usr/src/libm/src/m9x/modf.c
@@ -0,0 +1,92 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"@(#)modf.c	1.5	06/01/31 SMI"
+
+#if defined(ELFOBJ)
+#pragma weak modf = __modf
+#pragma weak _modf = __modf
+#endif
+
+/*
+ * modf(x, iptr) decomposes x into an integral part and a fractional
+ * part both having the same sign as x.  It stores the integral part
+ * in *iptr and returns the fractional part.
+ *
+ * If x is infinite, modf sets *iptr to x and returns copysign(0.0,x).
+ * If x is NaN, modf sets *iptr to x and returns x.
+ *
+ * If x is a signaling NaN, this code does not attempt to raise the
+ * invalid operation exception.
+ */
+
+#include "libm.h"
+
+double
+__modf(double x, double *iptr) {
+	union {
+		unsigned i[2];
+		double d;
+	} xx, yy;
+	unsigned hx, s;
+
+	xx.d = x;
+	hx = xx.i[HIWORD] & ~0x80000000;
+
+	if (hx >= 0x43300000) {	/* x is NaN, infinite, or integral */
+		*iptr = x;
+		if (hx < 0x7ff00000 || (hx == 0x7ff00000 &&
+			xx.i[LOWORD] == 0)) {
+			xx.i[HIWORD] &= 0x80000000;
+			xx.i[LOWORD] = 0;
+		}
+		return (xx.d);
+	}
+
+	if (hx < 0x3ff00000) {	/* |x| < 1 */
+		xx.i[HIWORD] &= 0x80000000;
+		xx.i[LOWORD] = 0;
+		*iptr = xx.d;
+		return (x);
+	}
+
+	/* split x at the binary point */
+	s = xx.i[HIWORD] & 0x80000000;
+	if (hx < 0x41400000) {
+		yy.i[HIWORD] = xx.i[HIWORD] & ~((1 << (0x413 - (hx >> 20))) -
+			1);
+		yy.i[LOWORD] = 0;
+	} else {
+		yy.i[HIWORD] = xx.i[HIWORD];
+		yy.i[LOWORD] = xx.i[LOWORD] & ~((1 << (0x433 - (hx >> 20))) -
+			1);
+	}
+	*iptr = yy.d;
+	xx.d -= yy.d;
+	xx.i[HIWORD] = (xx.i[HIWORD] & ~0x80000000) | s;
+							/* keep sign of x */
+	return (xx.d);
+}
diff --git a/usr/src/libm/src/m9x/modff.c b/usr/src/libm/src/m9x/modff.c
new file mode 100644
index 0000000..4931cc3
--- /dev/null
+++ b/usr/src/libm/src/m9x/modff.c
@@ -0,0 +1,68 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"@(#)modff.c	1.5	06/01/31 SMI"
+
+#if defined(ELFOBJ)
+#pragma weak modff = __modff
+#pragma weak _modff = __modff
+#endif
+
+#include "libm.h"
+
+float
+__modff(float x, float *iptr) {
+	union {
+		unsigned i;
+		float f;
+	} xx, yy;
+	unsigned hx, s;
+
+	xx.f = x;
+	hx = xx.i & ~0x80000000;
+
+	if (hx >= 0x4b000000) {	/* x is NaN, infinite, or integral */
+		*iptr = x;
+		if (hx <= 0x7f800000)
+			xx.i &= 0x80000000;
+		return (xx.f);
+	}
+
+	if (hx < 0x3f800000) {	/* |x| < 1 */
+		xx.i &= 0x80000000;
+		*iptr = xx.f;
+		return (x);
+	}
+
+	/* split x at the binary point */
+	s = xx.i & 0x80000000;
+	yy.i = xx.i & ~((1 << (0x96 - (hx >> 23))) - 1);
+	*iptr = yy.f;
+	xx.f -= yy.f;
+	xx.i = (xx.i & ~0x80000000) | s;
+				/* restore sign in case difference is 0 */
+	return (xx.f);
+}
diff --git a/usr/src/libm/src/m9x/modfl.c b/usr/src/libm/src/m9x/modfl.c
new file mode 100644
index 0000000..134dba8
--- /dev/null
+++ b/usr/src/libm/src/m9x/modfl.c
@@ -0,0 +1,149 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"@(#)modfl.c	1.5	06/01/31 SMI"
+
+#if defined(ELFOBJ)
+#pragma weak modfl = __modfl
+#endif
+
+#include "libm.h"
+
+#if defined(__sparc)
+
+long double
+__modfl(long double x, long double *iptr) {
+	union {
+		unsigned i[4];
+		long double q;
+	} xx, yy;
+	unsigned hx, s;
+
+	xx.q = x;
+	hx = xx.i[0] & ~0x80000000;
+
+	if (hx >= 0x406f0000) {	/* x is NaN, infinite, or integral */
+		*iptr = x;
+		if (hx < 0x7fff0000 || (hx == 0x7fff0000 &&
+			(xx.i[1] | xx.i[2] | xx.i[3]) == 0)) {
+			xx.i[0] &= 0x80000000;
+			xx.i[1] = xx.i[2] = xx.i[3] = 0;
+		}
+		return (xx.q);
+	}
+
+	if (hx < 0x3fff0000) {	/* |x| < 1 */
+		xx.i[0] &= 0x80000000;
+		xx.i[1] = xx.i[2] = xx.i[3] = 0;
+		*iptr = xx.q;
+		return (x);
+	}
+
+	/* split x at the binary point */
+	s = xx.i[0] & 0x80000000;
+	if (hx < 0x40100000) {
+		yy.i[0] = xx.i[0] & ~((1 << (0x400f - (hx >> 16))) - 1);
+		yy.i[1] = yy.i[2] = yy.i[3] = 0;
+	} else if (hx < 0x40300000) {
+		yy.i[0] = xx.i[0];
+		yy.i[1] = xx.i[1] & ~((1 << (0x402f - (hx >> 16))) - 1);
+		yy.i[2] = yy.i[3] = 0;
+	} else if (hx < 0x40500000) {
+		yy.i[0] = xx.i[0];
+		yy.i[1] = xx.i[1];
+		yy.i[2] = xx.i[2] & ~((1 << (0x404f - (hx >> 16))) - 1);
+		yy.i[3] = 0;
+	} else {
+		yy.i[0] = xx.i[0];
+		yy.i[1] = xx.i[1];
+		yy.i[2] = xx.i[2];
+		yy.i[3] = xx.i[3] & ~((1 << (0x406f - (hx >> 16))) - 1);
+	}
+	*iptr = yy.q;
+
+	/*
+	 * we could implement the following more efficiently than by using
+	 * software emulation of fsubq, but we'll do it this way for now
+	 * (and hope hardware support becomes commonplace)
+	 */
+	xx.q -= yy.q;
+	xx.i[0] = (xx.i[0] & ~0x80000000) | s;	/* keep sign of x */
+	return (xx.q);
+}
+
+#elif defined(__i386)
+
+long double
+__modfl(long double x, long double *iptr) {
+	union {
+		unsigned i[3];
+		long double e;
+	} xx, yy;
+	unsigned hx, s;
+
+	/*
+	 * It might be faster to use one of the x86 fpops instead of
+	 * the following.
+	 */
+	xx.e = x;
+	hx = xx.i[2] & 0x7fff;
+
+	if (hx >= 0x403e) {	/* x is NaN, infinite, or integral */
+		*iptr = x;
+		if (hx < 0x7fff || (hx == 0x7fff &&
+			((xx.i[1] << 1) | xx.i[0]) == 0)) {
+			xx.i[2] &= 0x8000;
+			xx.i[1] = xx.i[0] = 0;
+		}
+		return (xx.e);
+	}
+
+	if (hx < 0x3fff) {	/* |x| < 1 */
+		xx.i[2] &= 0x8000;
+		xx.i[1] = xx.i[0] = 0;
+		*iptr = xx.e;
+		return (x);
+	}
+
+	/* split x at the binary point */
+	s = xx.i[2] & 0x8000;
+	yy.i[2] = xx.i[2];
+	if (hx < 0x401f) {
+		yy.i[1] = xx.i[1] & ~((1 << (0x401e - hx)) - 1);
+		yy.i[0] = 0;
+	} else {
+		yy.i[1] = xx.i[1];
+		yy.i[0] = xx.i[0] & ~((1 << (0x403e - hx)) - 1);
+	}
+	*iptr = yy.e;
+	xx.e -= yy.e;
+	xx.i[2] = (xx.i[2] & ~0x8000) | s;	/* keep sign of x */
+	return (xx.e);
+}
+
+#else
+#error Unknown architecture
+#endif
diff --git a/usr/src/libm/src/m9x/nan.c b/usr/src/libm/src/m9x/nan.c
new file mode 100644
index 0000000..6a994ca
--- /dev/null
+++ b/usr/src/libm/src/m9x/nan.c
@@ -0,0 +1,61 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"@(#)nan.c	1.5	06/01/31 SMI"
+
+#if defined(ELFOBJ)
+#pragma weak nan = __nan
+#endif
+
+/*
+ *  nan(c) returns a NaN.  This implementation ignores c.
+ */
+
+#include "libm.h"
+
+#if defined(__sparc)
+
+static const union {
+	unsigned i[2];
+	double d;
+} __nan_union = { 0x7fffffff, 0xffffffff };
+
+#elif defined(__i386)
+
+static const union {
+	unsigned i[2];
+	double d;
+} __nan_union = { 0xffffffff, 0x7fffffff };
+
+#else
+#error Unknown architecture
+#endif
+
+/* ARGSUSED0 */
+double
+__nan(const char *c) {
+	return (__nan_union.d);
+}
diff --git a/usr/src/libm/src/m9x/nanf.c b/usr/src/libm/src/m9x/nanf.c
new file mode 100644
index 0000000..08a029e
--- /dev/null
+++ b/usr/src/libm/src/m9x/nanf.c
@@ -0,0 +1,44 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"@(#)nanf.c	1.4	06/01/31 SMI"
+
+#if defined(ELFOBJ)
+#pragma weak nanf = __nanf
+#endif
+
+#include "libm.h"
+
+static const union {
+	unsigned i;
+	float f;
+} __nanf_union = { 0x7fffffff };
+
+/* ARGSUSED0 */
+float
+__nanf(const char *c) {
+	return (__nanf_union.f);
+}
diff --git a/usr/src/libm/src/m9x/nanl.c b/usr/src/libm/src/m9x/nanl.c
new file mode 100644
index 0000000..cf00010
--- /dev/null
+++ b/usr/src/libm/src/m9x/nanl.c
@@ -0,0 +1,57 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"@(#)nanl.c	1.4	06/01/31 SMI"
+
+#if defined(ELFOBJ)
+#pragma weak nanl = __nanl
+#endif
+
+#include "libm.h"
+
+#if defined(__sparc)
+
+static const union {
+	unsigned i[4];
+	long double ld;
+} __nanl_union = { 0x7fffffff, 0xffffffff, 0xffffffff, 0xffffffff };
+
+#elif defined(__i386)
+
+static const union {
+	unsigned i[3];
+	long double ld;
+} __nanl_union = { 0xffffffff, 0xffffffff, 0x7fff };
+
+#else
+#error Unknown architecture
+#endif
+
+/* ARGSUSED0 */
+long double
+__nanl(const char *c) {
+	return (__nanl_union.ld);
+}
diff --git a/usr/src/libm/src/m9x/nearbyint.c b/usr/src/libm/src/m9x/nearbyint.c
new file mode 100644
index 0000000..d977522
--- /dev/null
+++ b/usr/src/libm/src/m9x/nearbyint.c
@@ -0,0 +1,222 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"@(#)nearbyint.c	1.4	06/01/31 SMI"
+
+#if defined(ELFOBJ)
+#pragma weak nearbyint = __nearbyint
+#endif
+
+/*
+ * nearbyint(x) returns the nearest fp integer to x in the direction
+ * corresponding to the current rounding direction without raising
+ * the inexact exception.
+ *
+ * nearbyint(x) is x unchanged if x is +/-0 or +/-inf.  If x is NaN,
+ * nearbyint(x) is also NaN.
+ */
+
+#include "libm.h"
+#include "fenv_synonyms.h"
+#include <fenv.h>
+
+double
+__nearbyint(double x) {
+	union {
+		unsigned i[2];
+		double d;
+	} xx;
+	unsigned hx, sx, i, frac;
+	int rm, j;
+
+	xx.d = x;
+	sx = xx.i[HIWORD] & 0x80000000;
+	hx = xx.i[HIWORD] & ~0x80000000;
+
+	/* handle trivial cases */
+	if (hx >= 0x43300000) {	/* x is nan, inf, or already integral */
+		if (hx >= 0x7ff00000)	/* x is inf or nan */
+#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN)
+			return (hx >= 0x7ff80000 ? x : x + x);
+			/* assumes sparc-like QNaN */
+#else
+			return (x + x);
+#endif
+		return (x);
+	} else if ((hx | xx.i[LOWORD]) == 0)	/* x is zero */
+		return (x);
+
+	/* get the rounding mode */
+	rm = fegetround();
+
+	/* flip the sense of directed roundings if x is negative */
+	if (sx && (rm == FE_UPWARD || rm == FE_DOWNWARD))
+		rm = (FE_UPWARD + FE_DOWNWARD) - rm;
+
+	/* handle |x| < 1 */
+	if (hx < 0x3ff00000) {
+		if (rm == FE_UPWARD || (rm == FE_TONEAREST &&
+			(hx >= 0x3fe00000 && ((hx & 0xfffff) | xx.i[LOWORD]))))
+			xx.i[HIWORD] = sx | 0x3ff00000;
+		else
+			xx.i[HIWORD] = sx;
+		xx.i[LOWORD] = 0;
+		return (xx.d);
+	}
+
+	/* round x at the integer bit */
+	j = 0x433 - (hx >> 20);
+	if (j >= 32) {
+		i = 1 << (j - 32);
+		frac = ((xx.i[HIWORD] << 1) << (63 - j)) |
+			(xx.i[LOWORD] >> (j - 32));
+		if (xx.i[LOWORD] & (i - 1))
+			frac |= 1;
+		if (!frac)
+			return (x);
+		xx.i[LOWORD] = 0;
+		xx.i[HIWORD] &= ~(i - 1);
+		if (rm == FE_UPWARD || (rm == FE_TONEAREST &&
+			(frac > 0x80000000u || (frac == 0x80000000) &&
+			(xx.i[HIWORD] & i))))
+			xx.i[HIWORD] += i;
+	} else {
+		i = 1 << j;
+		frac = (xx.i[LOWORD] << 1) << (31 - j);
+		if (!frac)
+			return (x);
+		xx.i[LOWORD] &= ~(i - 1);
+		if (rm == FE_UPWARD || (rm == FE_TONEAREST &&
+			(frac > 0x80000000u || (frac == 0x80000000) &&
+			(xx.i[LOWORD] & i)))) {
+			xx.i[LOWORD] += i;
+			if (xx.i[LOWORD] == 0)
+				xx.i[HIWORD]++;
+		}
+	}
+	return (xx.d);
+}
+
+#if 0
+
+/*
+*  Alternate implementations for SPARC, x86, using fp ops.  These may
+*  be faster depending on how expensive saving and restoring the fp
+*  modes and status flags is.
+*/
+
+#include "libm.h"
+#include "fma.h"
+
+#if defined(__sparc)
+
+double
+__nearbyint(double x) {
+	union {
+		unsigned i[2];
+		double d;
+	} xx, yy;
+	double z;
+	unsigned hx, sx, fsr, oldfsr;
+	int rm;
+
+	xx.d = x;
+	sx = xx.i[0] & 0x80000000;
+	hx = xx.i[0] & ~0x80000000;
+
+	/* handle trivial cases */
+	if (hx >= 0x43300000)	/* x is nan, inf, or already integral */
+		return (x + 0.0);
+	else if ((hx | xx.i[1]) == 0)	/* x is zero */
+		return (x);
+
+	/* save the fsr */
+	__fenv_getfsr(&oldfsr);
+
+	/* handle |x| < 1 */
+	if (hx < 0x3ff00000) {
+		/* flip the sense of directed roundings if x is negative */
+		rm = oldfsr >> 30;
+		if (sx)
+			rm ^= rm >> 1;
+		if (rm == FSR_RP || (rm == FSR_RN && (hx >= 0x3fe00000 &&
+			((hx & 0xfffff) | xx.i[1]))))
+			xx.i[0] = sx | 0x3ff00000;
+		else
+			xx.i[0] = sx;
+		xx.i[1] = 0;
+		return (xx.d);
+	}
+
+	/* clear the inexact trap */
+	fsr = oldfsr & ~FSR_NXM;
+	__fenv_setfsr(&fsr);
+
+	/* round x at the integer bit */
+	yy.i[0] = sx | 0x43300000;
+	yy.i[1] = 0;
+	z = (x + yy.d) - yy.d;
+
+	/* restore the old fsr */
+	__fenv_setfsr(&oldfsr);
+
+	return (z);
+}
+
+#elif defined(__i386)
+
+/* inline template */
+extern long double frndint(long double);
+
+double
+__nearbyint(double x) {
+	long double z;
+	unsigned oldcwsw, cwsw;
+
+	/* save the control and status words, mask the inexact exception */
+	__fenv_getcwsw(&oldcwsw);
+	cwsw = oldcwsw | 0x00200000;
+	__fenv_setcwsw(&cwsw);
+
+	z = frndint((long double) x);
+
+	/*
+	 * restore the control and status words, preserving all but the
+	 * inexact flag
+	 */
+	__fenv_getcwsw(&cwsw);
+	oldcwsw |= (cwsw & 0x1f);
+	__fenv_setcwsw(&oldcwsw);
+
+	/* note: the value of z is representable in double precision */
+	return (z);
+}
+
+#else
+#error Unknown architecture
+#endif
+
+#endif
diff --git a/usr/src/libm/src/m9x/nearbyintf.c b/usr/src/libm/src/m9x/nearbyintf.c
new file mode 100644
index 0000000..2251c89
--- /dev/null
+++ b/usr/src/libm/src/m9x/nearbyintf.c
@@ -0,0 +1,185 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"@(#)nearbyintf.c	1.4	06/01/31 SMI"
+
+#if defined(ELFOBJ)
+#pragma weak nearbyintf = __nearbyintf
+#endif
+
+#include "libm.h"
+#include "fenv_synonyms.h"
+#include <fenv.h>
+
+float
+__nearbyintf(float x) {
+	union {
+		unsigned i;
+		float f;
+	} xx;
+	unsigned hx, sx, i, frac;
+	int rm;
+
+	xx.f = x;
+	sx = xx.i & 0x80000000;
+	hx = xx.i & ~0x80000000;
+
+	/* handle trivial cases */
+	if (hx >= 0x4b000000) {	/* x is nan, inf, or already integral */
+		if (hx > 0x7f800000)	/* x is nan */
+			return (x * x);		/* + -> * for Cheetah */
+		return (x);
+	} else if (hx == 0)		/* x is zero */
+		return (x);
+
+	/* get the rounding mode */
+	rm = fegetround();
+
+	/* flip the sense of directed roundings if x is negative */
+	if (sx && (rm == FE_UPWARD || rm == FE_DOWNWARD))
+		rm = (FE_UPWARD + FE_DOWNWARD) - rm;
+
+	/* handle |x| < 1 */
+	if (hx < 0x3f800000) {
+		if (rm == FE_UPWARD || (rm == FE_TONEAREST && hx > 0x3f000000))
+			xx.i = sx | 0x3f800000;
+		else
+			xx.i = sx;
+		return (xx.f);
+	}
+
+	/* round x at the integer bit */
+	i = 1 << (0x96 - (hx >> 23));
+	frac = hx & (i - 1);
+	if (!frac)
+		return (x);
+
+	hx &= ~(i - 1);
+	if (rm == FE_UPWARD || (rm == FE_TONEAREST && (frac > (i >> 1) ||
+		(frac == (i >> 1)) && (hx & i))))
+		xx.i = sx | (hx + i);
+	else
+		xx.i = sx | hx;
+	return (xx.f);
+}
+
+#if 0
+
+/*
+ * Alternate implementations for SPARC, x86, using fp ops.  These may
+ * be faster depending on how expensive saving and restoring the fp
+ * modes and status flags is.
+ */
+
+#include "libm.h"
+#include "fma.h"
+
+#if defined(__sparc)
+
+float
+__nearbyintf(float x) {
+	union {
+		unsigned i;
+		float f;
+	} xx, yy;
+	float z;
+	unsigned hx, sx, fsr, oldfsr;
+	int rm;
+
+	xx.f = x;
+	sx = xx.i & 0x80000000;
+	hx = xx.i & ~0x80000000;
+
+	/* handle trivial cases */
+	if (hx >= 0x4b000000)	/* x is nan, inf, or already integral */
+		return (x + 0.0f);
+	else if (hx == 0)	/* x is zero */
+		return (x);
+
+	/* save the fsr */
+	__fenv_getfsr(&oldfsr);
+
+	/* handle |x| < 1 */
+	if (hx < 0x3f800000) {
+		/* flip the sense of directed roundings if x is negative */
+		rm = oldfsr >> 30;
+		if (sx)
+			rm ^= rm >> 1;
+		if (rm == FSR_RP || (rm == FSR_RN && hx > 0x3f000000))
+			xx.i = sx | 0x3f800000;
+		else
+			xx.i = sx;
+		return (xx.f);
+	}
+
+	/* clear the inexact trap */
+	fsr = oldfsr & ~FSR_NXM;
+	__fenv_setfsr(&fsr);
+
+	/* round x at the integer bit */
+	yy.i = sx | 0x4b000000;
+	z = (x + yy.f) - yy.f;
+
+	/* restore the old fsr */
+	__fenv_setfsr(&oldfsr);
+
+	return (z);
+}
+
+#elif defined(__i386)
+
+/* inline template */
+extern long double frndint(long double);
+
+float
+__nearbyintf(float x) {
+	long double z;
+	unsigned oldcwsw, cwsw;
+
+	/* save the control and status words, mask the inexact exception */
+	__fenv_getcwsw(&oldcwsw);
+	cwsw = oldcwsw | 0x00200000;
+	__fenv_setcwsw(&cwsw);
+
+	z = frndint((long double) x);
+
+	/*
+	 * restore the control and status words, preserving all but the
+	 * inexact flag
+	 */
+	__fenv_getcwsw(&cwsw);
+	oldcwsw |= (cwsw & 0x1f);
+	__fenv_setcwsw(&oldcwsw);
+
+	/* note: the value of z is representable in single precision */
+	return (z);
+}
+
+#else
+#error Unknown architecture
+#endif
+
+#endif
diff --git a/usr/src/libm/src/m9x/nearbyintl.c b/usr/src/libm/src/m9x/nearbyintl.c
new file mode 100644
index 0000000..98def46
--- /dev/null
+++ b/usr/src/libm/src/m9x/nearbyintl.c
@@ -0,0 +1,183 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"@(#)nearbyintl.c	1.4	06/01/31 SMI"
+
+#if defined(ELFOBJ)
+#pragma weak nearbyintl = __nearbyintl
+#endif
+
+#include "libm.h"
+#include "fma.h"
+
+#if defined(__sparc)
+
+static union {
+	unsigned i;
+	float f;
+} snan = { 0x7f800001 };
+
+long double
+__nearbyintl(long double x) {
+	union {
+		unsigned i[4];
+		long double q;
+	} xx;
+	unsigned hx, sx, i, frac, fsr;
+	int rm, j;
+	volatile float	dummy;
+
+	xx.q = x;
+	sx = xx.i[0] & 0x80000000;
+	hx = xx.i[0] & ~0x80000000;
+
+	/* handle trivial cases */
+	if (hx >= 0x406f0000) {	/* x is nan, inf, or already integral */
+		/* check for signaling nan */
+		if ((hx > 0x7fff0000 || (hx == 0x7fff0000 &&
+			(xx.i[1] | xx.i[2] | xx.i[3]))) && !(hx & 0x8000)) {
+			dummy = snan.f;
+			dummy += snan.f;
+			xx.i[0] = sx | hx | 0x8000;
+		}
+		return (xx.q);
+	} else if ((hx | xx.i[1] | xx.i[2] | xx.i[3]) == 0)	/* x is zero */
+		return (x);
+
+	/* get the rounding mode */
+	__fenv_getfsr(&fsr);
+	rm = fsr >> 30;
+
+	/* flip the sense of directed roundings if x is negative */
+	if (sx)
+		rm ^= rm >> 1;
+
+	/* handle |x| < 1 */
+	if (hx < 0x3fff0000) {
+		if (rm == FSR_RP || (rm == FSR_RN && (hx >= 0x3ffe0000 &&
+			((hx & 0xffff) | xx.i[1] | xx.i[2] | xx.i[3]))))
+			xx.i[0] = sx | 0x3fff0000;
+		else
+			xx.i[0] = sx;
+		xx.i[1] = xx.i[2] = xx.i[3] = 0;
+		return (xx.q);
+	}
+
+	/* round x at the integer bit */
+	j = 0x406f - (hx >> 16);
+	if (j >= 96) {
+		i = 1 << (j - 96);
+		frac = ((xx.i[0] << 1) << (127 - j)) | (xx.i[1] >> (j - 96));
+		if ((xx.i[1] & (i - 1)) | xx.i[2] | xx.i[3])
+			frac |= 1;
+		if (!frac)
+			return (x);
+		xx.i[1] = xx.i[2] = xx.i[3] = 0;
+		xx.i[0] &= ~(i - 1);
+		if (rm == FSR_RP || (rm == FSR_RN && (frac > 0x80000000u ||
+			(frac == 0x80000000 && (xx.i[0] & i)))))
+			xx.i[0] += i;
+	} else if (j >= 64) {
+		i = 1 << (j - 64);
+		frac = ((xx.i[1] << 1) << (95 - j)) | (xx.i[2] >> (j - 64));
+		if ((xx.i[2] & (i - 1)) | xx.i[3])
+			frac |= 1;
+		if (!frac)
+			return (x);
+		xx.i[2] = xx.i[3] = 0;
+		xx.i[1] &= ~(i - 1);
+		if (rm == FSR_RP || (rm == FSR_RN && (frac > 0x80000000u ||
+			(frac == 0x80000000 && (xx.i[1] & i))))) {
+			xx.i[1] += i;
+			if (xx.i[1] == 0)
+				xx.i[0]++;
+		}
+	} else if (j >= 32) {
+		i = 1 << (j - 32);
+		frac = ((xx.i[2] << 1) << (63 - j)) | (xx.i[3] >> (j - 32));
+		if (xx.i[3] & (i - 1))
+			frac |= 1;
+		if (!frac)
+			return (x);
+		xx.i[3] = 0;
+		xx.i[2] &= ~(i - 1);
+		if (rm == FSR_RP || (rm == FSR_RN && (frac > 0x80000000u ||
+			(frac == 0x80000000 && (xx.i[2] & i))))) {
+			xx.i[2] += i;
+			if (xx.i[2] == 0)
+				if (++xx.i[1] == 0)
+					xx.i[0]++;
+		}
+	} else {
+		i = 1 << j;
+		frac = (xx.i[3] << 1) << (31 - j);
+		if (!frac)
+			return (x);
+		xx.i[3] &= ~(i - 1);
+		if (rm == FSR_RP || (rm == FSR_RN && (frac > 0x80000000u ||
+			(frac == 0x80000000 && (xx.i[3] & i))))) {
+			xx.i[3] += i;
+			if (xx.i[3] == 0)
+				if (++xx.i[2] == 0)
+					if (++xx.i[1] == 0)
+						xx.i[0]++;
+		}
+	}
+
+	return (xx.q);
+}
+
+#elif defined(__i386)
+
+/* inline template */
+extern long double frndint(long double);
+
+long double
+__nearbyintl(long double x) {
+	long double z;
+	unsigned oldcwsw, cwsw;
+
+	/* save the control and status words, mask the inexact exception */
+	__fenv_getcwsw(&oldcwsw);
+	cwsw = oldcwsw | 0x00200000;
+	__fenv_setcwsw(&cwsw);
+
+	z = frndint(x);
+
+	/*
+	 * restore the control and status words, preserving all but the
+	 * inexact flag
+	 */
+	__fenv_getcwsw(&cwsw);
+	oldcwsw |= (cwsw & 0x1f);
+	__fenv_setcwsw(&oldcwsw);
+
+	return (z);
+}
+
+#else
+#error Unknown architecture
+#endif
diff --git a/usr/src/libm/src/m9x/nexttoward.c b/usr/src/libm/src/m9x/nexttoward.c
new file mode 100644
index 0000000..d9bbb55
--- /dev/null
+++ b/usr/src/libm/src/m9x/nexttoward.c
@@ -0,0 +1,222 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"@(#)nexttoward.c	1.4	06/01/31 SMI"
+
+#if defined(ELFOBJ)
+#pragma weak nexttoward = __nexttoward
+#endif
+
+/*
+ * nexttoward(x, y) delivers the next representable number after x
+ * in the direction of y.  If x and y are both zero, the result is
+ * zero with the same sign as y.  If either x or y is NaN, the result
+ * is NaN.
+ *
+ * If x != y and the result is infinite, overflow is raised; if
+ * x != y and the result is subnormal or zero, underflow is raised.
+ * (This is wrong, but it's what C99 apparently wants.)
+ */
+
+#include "libm.h"
+
+#if defined(__sparc)
+
+static union {
+	unsigned i[2];
+	double d;
+} C[] = {
+	0x00100000, 0,
+	0x7fe00000, 0,
+	0x7fffffff, 0xffffffff
+};
+
+#define	tiny	C[0].d
+#define	huge	C[1].d
+#define	qnan	C[2].d
+
+enum fcc_type {
+	fcc_equal = 0,
+	fcc_less = 1,
+	fcc_greater = 2,
+	fcc_unordered = 3
+};
+
+#ifdef __sparcv9
+#define	_Q_cmp	_Qp_cmp
+#endif
+
+extern enum fcc_type _Q_cmp(const long double *, const long double *);
+
+double
+__nexttoward(double x, long double y) {
+	union {
+		unsigned i[2];
+		double d;
+	} xx;
+	union {
+		unsigned i[4];
+		long double q;
+	} yy;
+	long double lx;
+	unsigned hx;
+	volatile double	dummy;
+	enum fcc_type rel;
+
+	/*
+	 * It would be somewhat more efficient to check for NaN and
+	 * zero operands before converting x to long double and then
+	 * to code the comparison in line rather than calling _Q_cmp.
+	 * However, since this code probably won't get used much,
+	 * I'm opting in favor of simplicity instead.
+	 */
+	lx = xx.d = x;
+	hx = (xx.i[0] & ~0x80000000) | xx.i[1];
+
+	/* check for each of four possible orderings */
+	rel = _Q_cmp(&lx, &y);
+	if (rel == fcc_unordered)
+		return (qnan);
+
+	if (rel == fcc_equal) {
+		if (hx == 0) {	/* x is zero; return zero with y's sign */
+			yy.q = y;
+			xx.i[0] = yy.i[0];
+			return (xx.d);
+		}
+		return (x);
+	}
+
+	if (rel == fcc_less) {
+		if (hx == 0) {	/* x is zero */
+			xx.i[0] = 0;
+			xx.i[1] = 0x00000001;
+		} else if ((int)xx.i[0] >= 0) {	/* x is positive */
+			if (++xx.i[1] == 0)
+				xx.i[0]++;
+		} else {
+			if (xx.i[1]-- == 0)
+				xx.i[0]--;
+		}
+	} else {
+		if (hx == 0) {	/* x is zero */
+			xx.i[0] = 0x80000000;
+			xx.i[1] = 0x00000001;
+		} else if ((int)xx.i[0] >= 0) {	/* x is positive */
+			if (xx.i[1]-- == 0)
+				xx.i[0]--;
+		} else {
+			if (++xx.i[1] == 0)
+				xx.i[0]++;
+		}
+	}
+
+	/* raise exceptions as needed */
+	hx = xx.i[0] & ~0x80000000;
+	if (hx == 0x7ff00000) {
+		dummy = huge;
+		dummy *= huge;
+	} else if (hx < 0x00100000) {
+		dummy = tiny;
+		dummy *= tiny;
+	}
+
+	return (xx.d);
+}
+
+#elif defined(__i386)
+
+static union {
+	unsigned i[2];
+	double d;
+} C[] = {
+	0, 0x00100000,
+	0, 0x7fe00000,
+};
+
+#define	tiny	C[0].d
+#define	huge	C[1].d
+
+double
+__nexttoward(double x, long double y) {
+	union {
+		unsigned i[2];
+		double d;
+	} xx;
+	unsigned hx;
+	long double lx;
+	volatile double	dummy;
+
+	lx = xx.d = x;
+	hx = (xx.i[1] & ~0x80000000) | xx.i[0];
+
+	/* check for each of four possible orderings */
+	if (isunordered(lx, y))
+		return ((double) (lx + y));
+
+	if (lx == y)
+		return ((double) y);
+
+	if (lx < y) {
+		if (hx == 0) {	/* x is zero */
+			xx.i[0] = 0x00000001;
+			xx.i[1] = 0;
+		} else if ((int)xx.i[1] >= 0) {	/* x is positive */
+			if (++xx.i[0] == 0)
+				xx.i[1]++;
+		} else {
+			if (xx.i[0]-- == 0)
+				xx.i[1]--;
+		}
+	} else {
+		if (hx == 0) {	/* x is zero */
+			xx.i[0] = 0x00000001;
+			xx.i[1] = 0x80000000;
+		} else if ((int)xx.i[1] >= 0) {	/* x is positive */
+			if (xx.i[0]-- == 0)
+				xx.i[1]--;
+		} else {
+			if (++xx.i[0] == 0)
+				xx.i[1]++;
+		}
+	}
+
+	/* raise exceptions as needed */
+	hx = xx.i[1] & ~0x80000000;
+	if (hx == 0x7ff00000) {
+		dummy = huge;
+		dummy *= huge;
+	} else if (hx < 0x00100000) {
+		dummy = tiny;
+		dummy *= tiny;
+	}
+
+	return (xx.d);
+}
+
+#else
+#error Unknown architecture
+#endif
diff --git a/usr/src/libm/src/m9x/nexttowardf.c b/usr/src/libm/src/m9x/nexttowardf.c
new file mode 100644
index 0000000..0bf8a05
--- /dev/null
+++ b/usr/src/libm/src/m9x/nexttowardf.c
@@ -0,0 +1,184 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"@(#)nexttowardf.c	1.4	06/01/31 SMI"
+
+#if defined(ELFOBJ)
+#pragma weak nexttowardf = __nexttowardf
+#endif
+
+#include "libm.h"
+
+static union {
+	unsigned i;
+	float f;
+} C[] = {
+	0x00800000,
+	0x7f000000,
+	0x7fffffff
+};
+
+#define	tiny	C[0].f
+#define	huge	C[1].f
+#define	qnan	C[2].f
+
+#if defined(__sparc)
+
+enum fcc_type {
+	fcc_equal = 0,
+	fcc_less = 1,
+	fcc_greater = 2,
+	fcc_unordered = 3
+};
+
+#ifdef __sparcv9
+#define	_Q_cmp	_Qp_cmp
+#endif
+
+extern enum fcc_type _Q_cmp(const long double *, const long double *);
+
+float
+__nexttowardf(float x, long double y) {
+	union {
+		unsigned i;
+		float f;
+	} xx;
+	union {
+		unsigned i[4];
+		long double q;
+	} yy;
+	long double lx;
+	unsigned hx;
+	volatile float dummy;
+	enum fcc_type rel;
+
+	/*
+	 * It would be somewhat more efficient to check for NaN and
+	 * zero operands before converting x to long double and then
+	 * to code the comparison in line rather than calling _Q_cmp.
+	 * However, since this code probably won't get used much,
+	 * I'm opting in favor of simplicity instead.
+	 */
+	lx = xx.f = x;
+	hx = xx.i & ~0x80000000;
+
+	/* check for each of four possible orderings */
+	rel = _Q_cmp(&lx, &y);
+	if (rel == fcc_unordered)
+		return (qnan);
+
+	if (rel == fcc_equal) {
+		if (hx == 0) {	/* x is zero; return zero with y's sign */
+			yy.q = y;
+			xx.i = yy.i[0];
+			return (xx.f);
+		}
+		return (x);
+	}
+
+	if (rel == fcc_less) {
+		if (hx == 0)	/* x is zero */
+			xx.i = 0x00000001;
+		else if ((int) xx.i >= 0)	/* x is positive */
+			xx.i++;
+		else
+			xx.i--;
+	} else {
+		if (hx == 0)	/* x is zero */
+			xx.i = 0x80000001;
+		else if ((int) xx.i >= 0)	/* x is positive */
+			xx.i--;
+		else
+			xx.i++;
+	}
+
+	/* raise exceptions as needed */
+	hx = xx.i & ~0x80000000;
+	if (hx == 0x7f800000) {
+		dummy = huge;
+		dummy *= huge;
+	} else if (hx < 0x00800000) {
+		dummy = tiny;
+		dummy *= tiny;
+	}
+
+	return (xx.f);
+}
+
+#elif defined(__i386)
+
+float
+__nexttowardf(float x, long double y) {
+	union {
+		unsigned i;
+		float f;
+	} xx;
+	unsigned hx;
+	long double lx;
+	volatile float dummy;
+
+	lx = xx.f = x;
+	hx = xx.i & ~0x80000000;
+
+	/* check for each of four possible orderings */
+	if (isunordered(lx, y))
+		return ((float) (lx + y));
+
+	if (lx == y)
+		return ((float) y);
+
+	if (lx < y) {
+		if (hx == 0)	/* x is zero */
+			xx.i = 0x00000001;
+		else if ((int) xx.i >= 0)	/* x is positive */
+			xx.i++;
+		else
+			xx.i--;
+	} else {
+		if (hx == 0)	/* x is zero */
+			xx.i = 0x80000001;
+		else if ((int) xx.i >= 0)	/* x is positive */
+			xx.i--;
+		else
+			xx.i++;
+	}
+
+	/* raise exceptions as needed */
+	hx = xx.i & ~0x80000000;
+	if (hx == 0x7f800000) {
+		dummy = huge;
+		dummy *= huge;
+	} else if (hx < 0x00800000) {
+		dummy = tiny;
+		dummy *= tiny;
+	}
+
+	return (xx.f);
+}
+
+#else
+#error Unknown architecture
+#endif
diff --git a/usr/src/libm/src/m9x/nexttowardl.c b/usr/src/libm/src/m9x/nexttowardl.c
new file mode 100644
index 0000000..4578738
--- /dev/null
+++ b/usr/src/libm/src/m9x/nexttowardl.c
@@ -0,0 +1,118 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"@(#)nexttowardl.c	1.3	06/01/31 SMI"
+
+#if defined(ELFOBJ)
+#pragma weak nexttowardl = __nexttowardl
+#endif
+
+#include "libm.h"
+#include <float.h>		/* LDBL_MAX, LDBL_MIN */
+
+#if defined(__sparc)
+#define	n0	0
+#define	n1	1
+#define	n2	2
+#define	n3	3
+#define	X86PDNRM1(x)
+#define	INC(px)	{ \
+			if (++px[n3] == 0) \
+				if (++px[n2] == 0) \
+					if (++px[n1] == 0) \
+						++px[n0]; \
+		}
+#define	DEC(px)	{ \
+			if (--px[n3] == 0xffffffff) \
+				if (--px[n2] == 0xffffffff) \
+					if (--px[n1] == 0xffffffff) \
+						--px[n0]; \
+		}
+#elif defined(__i386)
+#define	n0	2
+#define	n1	1
+#define	n2	0
+#define	n3	0
+/*
+ * if pseudo-denormal, replace by the equivalent normal
+ */
+#define	X86PDNRM1(x)	if (XBIASED_EXP(x) == 0 && (((int *) &x)[1] & \
+				0x80000000) != 0) \
+				((int *) &x)[2] |= 1
+#define	INC(px)	{ \
+			if (++px[n2] == 0) \
+				if ((++px[n1] & ~0x80000000) == 0) \
+					px[n1] = 0x80000000, ++px[n0]; \
+		}
+#define	DEC(px)	{ \
+			if (--px[n2] == 0xffffffff) \
+				if (--px[n1] == 0x7fffffff) \
+					if ((--px[n0] & 0x7fff) != 0) \
+						px[n1] |= 0x80000000; \
+		}
+#endif
+
+long double
+nexttowardl(long double x, long double y) {
+	int *px = (int *) &x;
+	int *py = (int *) &y;
+
+	if (x == y)
+		return (y);		/* C99 requirement */
+	if (x != x || y != y)
+		return (x * y);
+
+	if (ISZEROL(x)) {	/* x == 0.0 */
+		px[n0] = py[n0] & XSGNMSK;
+		px[n1] = px[n2] = 0;
+		px[n3] = 1;
+	} else {
+		X86PDNRM1(x);
+		if ((px[n0] & XSGNMSK) == 0) {	/* x > 0.0 */
+			if (x > y)	/* x > y */
+				DEC(px)
+			else
+				INC(px)
+		} else {
+			if (x < y)	/* x < y */
+				DEC(px)
+			else
+				INC(px)
+		}
+	}
+#ifndef lint
+	{
+		volatile long double dummy;
+		int k = XBIASED_EXP(x);
+
+		if (k == 0)
+			dummy = LDBL_MIN * copysignl(LDBL_MIN, x);
+		else if (k == 0x7fff)
+			dummy = LDBL_MAX * copysignl(LDBL_MAX, x);
+	}
+#endif
+	return (x);
+}
diff --git a/usr/src/libm/src/m9x/regset.h b/usr/src/libm/src/m9x/regset.h
new file mode 100644
index 0000000..54c9306
--- /dev/null
+++ b/usr/src/libm/src/m9x/regset.h
@@ -0,0 +1,128 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * Parts of Solaris 10 x86 /usr/include/sys/regset.h
+ */
+
+#ifndef	_SYS_REGSET_H
+#define	_SYS_REGSET_H
+
+#pragma ident	"@(#)regset.h	1.3	06/01/31 SMI"
+
+#include <sys/types.h>
+
+typedef union {
+	long double	_q;
+	uint32_t	_l[4];
+} myupad128_t;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * The names and offsets defined here are specified by i386 ABI suppl.
+ */
+
+#define	SS		18	/* only stored on a privilege transition */
+#define	UESP		17	/* only stored on a privilege transition */
+#define	EFL		16
+#define	CS		15
+#define	EIP		14
+#define	ERR		13
+#define	TRAPNO		12
+#define	EAX		11
+#define	ECX		10
+#define	EDX		9
+#define	EBX		8
+#define	ESP		7
+#define	EBP		6
+#define	ESI		5
+#define	EDI		4
+#define	DS		3
+#define	ES		2
+#define	FS		1
+#define	GS		0
+
+/* aliases for portability */
+
+#define	REG_PC	EIP
+#define	REG_FP	EBP
+#define	REG_SP	UESP
+#define	REG_PS	EFL
+#define	REG_R0	EAX
+#define	REG_R1	EDX
+
+/*
+ * A gregset_t is defined as an array type for compatibility with the reference
+ * source. This is important due to differences in the way the C language
+ * treats arrays and structures as parameters.
+ */
+#define	_NGREG	19
+
+typedef int	greg_t;
+typedef greg_t	gregset_t[_NGREG];
+
+/*
+ * This definition of the floating point structure is binary
+ * compatible with the Intel386 psABI definition, and source
+ * compatible with that specification for x87-style floating point.
+ * It also allows SSE/SSE2 state to be accessed on machines that
+ * possess such hardware capabilities.
+ */
+typedef struct fpu {
+	union {
+		struct fpchip_state {
+			uint32_t state[27];	/* 287/387 saved state */
+			uint32_t status;	/* saved at exception */
+			uint32_t mxcsr;		/* SSE control and status */
+			uint32_t xstatus;	/* SSE mxcsr at exception */
+			uint32_t __pad[2];	/* align to 128-bits */
+			myupad128_t xmm[8];	/* %xmm0-%xmm7 */
+		} fpchip_state;
+		struct fp_emul_space {		/* for emulator(s) */
+			uint8_t	fp_emul[246];
+			uint8_t	fp_epad[2];
+		} fp_emul_space;
+		uint32_t	f_fpregs[95];	/* union of the above */
+	} fp_reg_set;
+} fpregset_t;
+
+/*
+ * Structure mcontext defines the complete hardware machine state.
+ * (This structure is specified in the i386 ABI suppl.)
+ */
+typedef struct {
+	gregset_t	gregs;		/* general register set */
+	fpregset_t	fpregs;		/* floating point register set */
+} mcontext_t;
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _SYS_REGSET_H */
diff --git a/usr/src/libm/src/m9x/remquo.c b/usr/src/libm/src/m9x/remquo.c
new file mode 100644
index 0000000..25d501e
--- /dev/null
+++ b/usr/src/libm/src/m9x/remquo.c
@@ -0,0 +1,267 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"@(#)remquo.c	1.10	06/01/31 SMI"
+
+#pragma weak remquo = __remquo
+
+/* INDENT OFF */
+/*
+ * double remquo(double x, double y, int *quo) return remainder(x,y) and an
+ * integer pointer quo such that *quo = N mod {2**31}, where N is the
+ * exact integral part of x/y rounded to nearest even.
+ *
+ * remquo call internal fmodquo
+ */
+/* INDENT ON */
+
+#include "libm.h"
+#include "libm_synonyms.h"
+#include "libm_protos.h"
+#include <math.h>		/* fabs() */
+
+#if defined(__sparc)
+#define	HIWORD	0
+#define	LOWORD	1
+#elif defined(__i386)
+#define	HIWORD	1
+#define	LOWORD	0
+#else
+#error Unknown architecture
+#endif
+#define	__HI(x)	((int *) &x)[HIWORD]
+#define	__LO(x)	((int *) &x)[LOWORD]
+
+static const double one = 1.0, Zero[] = {0.0, -0.0};
+
+static double
+fmodquo(double x, double y, int *quo) {
+	int n, hx, hy, hz, ix, iy, sx, sq, i, m;
+	unsigned lx, ly, lz;
+
+	hx = __HI(x);		/* high word of x */
+	lx = __LO(x);		/* low  word of x */
+	hy = __HI(y);		/* high word of y */
+	ly = __LO(y);		/* low  word of y */
+	sx = hx & 0x80000000;	/* sign of x */
+	sq = (hx ^ hy) & 0x80000000;	/* sign of x/y */
+	hx ^= sx;		/* |x| */
+	hy &= 0x7fffffff;	/* |y| */
+
+	/* purge off exception values */
+	*quo = 0;
+	if ((hy | ly) == 0 || hx >= 0x7ff00000 ||	/* y=0, or x !finite */
+	    (hy | ((ly | -ly) >> 31)) > 0x7ff00000)	/* or y is NaN */
+		return ((x * y) / (x * y));
+	if (hx <= hy) {
+		if (hx < hy || lx < ly)
+			return (x);	/* |x|<|y| return x */
+		if (lx == ly) {
+			*quo = 1 + (sq >> 30);
+			/* |x|=|y| return x*0 */
+			return (Zero[(unsigned) sx >> 31]);
+		}
+	}
+
+	/* determine ix = ilogb(x) */
+	if (hx < 0x00100000) {	/* subnormal x */
+		if (hx == 0) {
+			for (ix = -1043, i = lx; i > 0; i <<= 1)
+				ix -= 1;
+		} else {
+			for (ix = -1022, i = (hx << 11); i > 0; i <<= 1)
+				ix -= 1;
+		}
+	} else
+		ix = (hx >> 20) - 1023;
+
+	/* determine iy = ilogb(y) */
+	if (hy < 0x00100000) {	/* subnormal y */
+		if (hy == 0) {
+			for (iy = -1043, i = ly; i > 0; i <<= 1)
+				iy -= 1;
+		} else {
+			for (iy = -1022, i = (hy << 11); i > 0; i <<= 1)
+				iy -= 1;
+		}
+	} else
+		iy = (hy >> 20) - 1023;
+
+	/* set up {hx,lx}, {hy,ly} and align y to x */
+	if (ix >= -1022)
+		hx = 0x00100000 | (0x000fffff & hx);
+	else {			/* subnormal x, shift x to normal */
+		n = -1022 - ix;
+		if (n <= 31) {
+			hx = (hx << n) | (lx >> (32 - n));
+			lx <<= n;
+		} else {
+			hx = lx << (n - 32);
+			lx = 0;
+		}
+	}
+	if (iy >= -1022)
+		hy = 0x00100000 | (0x000fffff & hy);
+	else {			/* subnormal y, shift y to normal */
+		n = -1022 - iy;
+		if (n <= 31) {
+			hy = (hy << n) | (ly >> (32 - n));
+			ly <<= n;
+		} else {
+			hy = ly << (n - 32);
+			ly = 0;
+		}
+	}
+
+	/* fix point fmod */
+	n = ix - iy;
+	m = 0;
+	while (n--) {
+		hz = hx - hy;
+		lz = lx - ly;
+		if (lx < ly)
+			hz -= 1;
+		if (hz < 0) {
+			hx = hx + hx + (lx >> 31);
+			lx = lx + lx;
+		} else {
+			m += 1;
+			if ((hz | lz) == 0) {	/* return sign(x)*0 */
+				if (n < 31)
+					m <<= 1 + n;
+				else
+					m = 0;
+				m &= 0x7fffffff;
+				*quo = sq >= 0 ? m : -m;
+				return (Zero[(unsigned) sx >> 31]);
+			}
+			hx = hz + hz + (lz >> 31);
+			lx = lz + lz;
+		}
+		m += m;
+	}
+	hz = hx - hy;
+	lz = lx - ly;
+	if (lx < ly)
+		hz -= 1;
+	if (hz >= 0) {
+		hx = hz;
+		lx = lz;
+		m += 1;
+	}
+	m &= 0x7fffffff;
+	*quo = sq >= 0 ? m : -m;
+
+	/* convert back to floating value and restore the sign */
+	if ((hx | lx) == 0) {	/* return sign(x)*0 */
+		return (Zero[(unsigned) sx >> 31]);
+	}
+	while (hx < 0x00100000) {	/* normalize x */
+		hx = hx + hx + (lx >> 31);
+		lx = lx + lx;
+		iy -= 1;
+	}
+	if (iy >= -1022) {	/* normalize output */
+		hx = (hx - 0x00100000) | ((iy + 1023) << 20);
+		__HI(x) = hx | sx;
+		__LO(x) = lx;
+	} else {			/* subnormal output */
+		n = -1022 - iy;
+		if (n <= 20) {
+			lx = (lx >> n) | ((unsigned) hx << (32 - n));
+			hx >>= n;
+		} else if (n <= 31) {
+			lx = (hx << (32 - n)) | (lx >> n);
+			hx = sx;
+		} else {
+			lx = hx >> (n - 32);
+			hx = sx;
+		}
+		__HI(x) = hx | sx;
+		__LO(x) = lx;
+		x *= one;	/* create necessary signal */
+	}
+	return (x);		/* exact output */
+}
+
+#define	zero	Zero[0]
+
+double
+remquo(double x, double y, int *quo) {
+	int hx, hy, sx, sq;
+	double v;
+	unsigned ly;
+
+	hx = __HI(x);		/* high word of x */
+	hy = __HI(y);		/* high word of y */
+	ly = __LO(y);		/* low  word of y */
+	sx = hx & 0x80000000;	/* sign of x */
+	sq = (hx ^ hy) & 0x80000000;	/* sign of x/y */
+	hx ^= sx;		/* |x| */
+	hy &= 0x7fffffff;	/* |y| */
+
+	/* purge off exception values */
+	*quo = 0;
+	if ((hy | ly) == 0 || hx >= 0x7ff00000 ||	/* y=0, or x !finite */
+	    (hy | ((ly | -ly) >> 31)) > 0x7ff00000)	/* or y is NaN */
+		return ((x * y) / (x * y));
+
+	y = fabs(y);
+	x = fabs(x);
+	if (hy <= 0x7fdfffff) {
+		x = fmodquo(x, y + y, quo);
+		*quo = ((*quo) & 0x3fffffff) << 1;
+	}
+	if (hy < 0x00200000) {
+		if (x + x > y) {
+			*quo += 1;
+			if (x == y)
+				x = zero;
+			else
+				x -= y;
+			if (x + x >= y) {
+				x -= y;
+				*quo += 1;
+			}
+		}
+	} else {
+		v = 0.5 * y;
+		if (x > v) {
+			*quo += 1;
+			if (x == y)
+				x = zero;
+			else
+				x -= y;
+			if (x >= v) {
+				x -= y;
+				*quo += 1;
+			}
+		}
+	}
+	if (sq != 0)
+		*quo = -(*quo);
+	return (sx == 0 ? x : -x);
+}
diff --git a/usr/src/libm/src/m9x/remquof.c b/usr/src/libm/src/m9x/remquof.c
new file mode 100644
index 0000000..14a2f73
--- /dev/null
+++ b/usr/src/libm/src/m9x/remquof.c
@@ -0,0 +1,267 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"@(#)remquof.c	1.10	06/01/31 SMI"
+
+#pragma weak remquof = __remquof
+
+/* INDENT OFF */
+/*
+ * float remquof(float x, float y, int *quo) return remainderf(x,y) and an
+ * integer pointer quo such that *quo = N mod (2**31),  where N is the
+ * exact integeral part of x/y rounded to nearest even.
+ *
+ * remquof call internal fmodquof
+ */
+
+#include "libm.h"
+#include "libm_synonyms.h"
+#include "libm_protos.h"
+#include <math.h>
+extern float fabsf(float);
+
+static const int
+	is = (int) 0x80000000,
+	im = 0x007fffff,
+	ii = 0x7f800000,
+	iu = 0x00800000;
+
+static const float zero = 0.0F, half = 0.5F;
+/* INDENT ON */
+
+static float
+fmodquof(float x, float y, int *quo) {
+	float w;
+	int hx, ix, iy, iz, k, ny, nd, m, sq;
+
+	hx = *(int *) &x;
+	ix = hx & 0x7fffffff;
+	iy = *(int *) &y;
+	sq = (iy ^ hx) & is;	/* sign of x/y */
+	iy &= 0x7fffffff;
+
+	/* purge off exception values */
+	*quo = 0;
+	if (ix >= ii || iy > ii || iy == 0) {
+		w = x * y;
+		w = w / w;
+	} else if (ix <= iy) {
+		if (ix < iy)
+			w = x;	/* return x if |x|<|y| */
+		else {
+			*quo = 1 + (sq >> 30);
+			w = zero * x;	/* return sign(x)*0.0  */
+		}
+	} else {
+		/* INDENT OFF */
+		/*
+		 * scale x,y to "normal" with
+		 *	ny = exponent of y
+		 *	nd = exponent of x minus exponent of y
+		 */
+		/* INDENT ON */
+		ny = iy >> 23;
+		k = ix >> 23;
+
+		/* special case for subnormal y or x */
+		if (ny == 0) {
+			ny = 1;
+			while (iy < iu) {
+				ny -= 1;
+				iy += iy;
+			}
+			nd = k - ny;
+			if (k == 0) {
+				nd += 1;
+				while (ix < iu) {
+					nd -= 1;
+					ix += ix;
+				}
+			} else
+				ix = iu | (ix & im);
+		} else {
+			nd = k - ny;
+			ix = iu | (ix & im);
+			iy = iu | (iy & im);
+		}
+		/* INDENT OFF */
+		/* fix point fmod for normalized ix and iy */
+		/*
+		 * while (nd--) {
+		 *	iz = ix - iy;
+		 *	if (iz < 0)
+		 *		ix = ix + ix;
+		 *	else if (iz == 0) {
+		 *		*(int *) &w = is & hx;
+		 *		return w;
+		 *	} else
+		 *		ix = iz + iz;
+		 * }
+		 */
+		/* INDENT ON */
+		/* unroll the above loop 4 times to gain performance */
+		m = 0;
+		k = nd >> 2;
+		nd -= (k << 2);
+		while (k--) {
+			iz = ix - iy;
+			if (iz >= 0) {
+				m += 1;
+				ix = iz + iz;
+			} else
+				ix += ix;
+			m += m;
+			iz = ix - iy;
+			if (iz >= 0) {
+				m += 1;
+				ix = iz + iz;
+			} else
+				ix += ix;
+			m += m;
+			iz = ix - iy;
+			if (iz >= 0) {
+				m += 1;
+				ix = iz + iz;
+			} else
+				ix += ix;
+			m += m;
+			iz = ix - iy;
+			if (iz >= 0) {
+				m += 1;
+				ix = iz + iz;
+			} else
+				ix += ix;
+			m += m;
+			if (iz == 0) {
+				iz = (k << 2) + nd;
+				if (iz < 32)
+					m <<= iz;
+				else
+					m = 0;
+				m &= 0x7fffffff;
+				*quo = sq >= 0 ? m : -m;
+				*(int *) &w = is & hx;
+				return (w);
+			}
+		}
+		while (nd--) {
+			iz = ix - iy;
+			if (iz >= 0) {
+				m += 1;
+				ix = iz + iz;
+			} else
+				ix += ix;
+			m += m;
+		}
+		/* end of unrolling */
+
+		iz = ix - iy;
+		if (iz >= 0) {
+			m += 1;
+			ix = iz;
+		}
+		m &= 0x7fffffff;
+		*quo = sq >= 0 ? m : -m;
+
+		/* convert back to floating value and restore the sign */
+		if (ix == 0) {
+			*(int *) &w = is & hx;
+			return (w);
+		}
+		while (ix < iu) {
+			ix += ix;
+			ny -= 1;
+		}
+		while (ix > (iu + iu)) {
+			ny += 1;
+			ix >>= 1;
+		}
+		if (ny > 0)
+			*(int *) &w = (is & hx) | (ix & im) | (ny << 23);
+		else {		/* subnormal output */
+			k = -ny + 1;
+			ix >>= k;
+			*(int *) &w = (is & hx) | ix;
+		}
+	}
+	return (w);
+}
+
+float
+remquof(float x, float y, int *quo) {
+	int hx, hy, sx, sq;
+	float v;
+
+	hx = *(int *) &x;	/* high word of x */
+	hy = *(int *) &y;	/* high word of y */
+	sx = hx & is;		/* sign of x */
+	sq = (hx ^ hy) & is;	/* sign of x/y */
+	hx ^= sx;		/* |x| */
+	hy &= 0x7fffffff;	/* |y| */
+
+	/* purge off exception values: y is 0 or NaN, x is Inf or NaN */
+	*quo = 0;
+	if (hx >= ii || hy > ii || hy == 0) {
+		v = x * y;
+		return (v / v);
+	}
+
+	y = fabsf(y);
+	x = fabsf(x);
+	if (hy <= 0x7f7fffff) {
+		x = fmodquof(x, y + y, quo);
+		*quo = ((*quo) & 0x3fffffff) << 1;
+	}
+	if (hy < 0x01000000) {
+		if (x + x > y) {
+			*quo += 1;
+			if (x == y)
+				x = zero;
+			else
+				x -= y;
+			if (x + x >= y) {
+				x -= y;
+				*quo += 1;
+			}
+		}
+	} else {
+		v = half * y;
+		if (x > v) {
+			*quo += 1;
+			if (x == y)
+				x = zero;
+			else
+				x -= y;
+			if (x >= v) {
+				x -= y;
+				*quo += 1;
+			}
+		}
+	}
+	if (sq != 0)
+		*quo = -(*quo);
+	return (sx == 0 ? x : -x);
+}
diff --git a/usr/src/libm/src/m9x/remquol.c b/usr/src/libm/src/m9x/remquol.c
new file mode 100644
index 0000000..5d24a86
--- /dev/null
+++ b/usr/src/libm/src/m9x/remquol.c
@@ -0,0 +1,344 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"@(#)remquol.c	1.8	06/01/31 SMI"
+
+#pragma weak remquol = __remquol
+
+#include "libm.h"
+#include "libm_synonyms.h"
+#include <sunmath.h>			/* fabsl */
+/* INDENT OFF */
+static const int
+	is = -0x7fffffff - 1,
+	im = 0x0000ffff,
+	iu = 0x00010000;
+
+static const long double zero = 0.0L, one = 1.0L;
+/* INDENT ON */
+
+#if defined(__sparc)
+#define	__H0(x)	((int *) &x)[0]
+#define	__H1(x)	((int *) &x)[1]
+#define	__H2(x)	((int *) &x)[2]
+#define	__H3(x)	((int *) &x)[3]
+#else
+#error Unsupported architecture
+#endif
+
+/*
+ * On entrance: *quo is initialized to 0, x finite and y non-zero & ordered
+ */
+static long double
+fmodquol(long double x, long double y, int *quo) {
+	long double a, b;
+	int n, ix, iy, k, sx, sq, m;
+	int hx;
+	int x0, y0, z0, carry;
+	unsigned x1, x2, x3, y1, y2, y3, z1, z2, z3;
+
+	hx = __H0(x);
+	x1 = __H1(x);
+	x2 = __H2(x);
+	x3 = __H3(x);
+	y0 = __H0(y);
+	y1 = __H1(y);
+	y2 = __H2(y);
+	y3 = __H3(y);
+
+	sx = hx & is;
+	sq = (hx ^ y0) & is;
+	x0 = hx ^ sx;
+	y0 &= ~0x80000000;
+
+	a = fabsl(x);
+	b = fabsl(y);
+	if (a <= b) {
+		if (a < b)
+			return (x);
+		else {
+			*quo = 1 + (sq >> 30);
+			return (zero * x);
+		}
+	}
+	/* determine ix = ilogbl(x) */
+	if (x0 < iu) {		/* subnormal x */
+		ix = 0;
+		ix = -16382;
+		while (x0 == 0) {
+			ix -= 16;
+			x0 = x1 >> 16;
+			x1 = (x1 << 16) | (x2 >> 16);
+			x2 = (x2 << 16) | (x3 >> 16);
+			x3 = (x3 << 16);
+		}
+		while (x0 < iu) {
+			ix -= 1;
+			x0 = (x0 << 1) | (x1 >> 31);
+			x1 = (x1 << 1) | (x2 >> 31);
+			x2 = (x2 << 1) | (x3 >> 31);
+			x3 <<= 1;
+		}
+	} else {
+		ix = (x0 >> 16) - 16383;
+		x0 = iu | (x0 & im);
+	}
+
+	/* determine iy = ilogbl(y) */
+	if (y0 < iu) {		/* subnormal y */
+		iy = -16382;
+		while (y0 == 0) {
+			iy -= 16;
+			y0 = y1 >> 16;
+			y1 = (y1 << 16) | (y2 >> 16);
+			y2 = (y2 << 16) | (y3 >> 16);
+			y3 = (y3 << 16);
+		}
+		while (y0 < iu) {
+			iy -= 1;
+			y0 = (y0 << 1) | (y1 >> 31);
+			y1 = (y1 << 1) | (y2 >> 31);
+			y2 = (y2 << 1) | (y3 >> 31);
+			y3 <<= 1;
+		}
+	} else {
+		iy = (y0 >> 16) - 16383;
+		y0 = iu | (y0 & im);
+	}
+
+
+	/* fix point fmod */
+	n = ix - iy;
+	m = 0;
+	while (n--) {
+		while (x0 == 0 && n >= 16) {
+			m <<= 16;
+			n -= 16;
+			x0 = x1 >> 16;
+			x1 = (x1 << 16) | (x2 >> 16);
+			x2 = (x2 << 16) | (x3 >> 16);
+			x3 = (x3 << 16);
+		}
+		while (x0 < iu && n >= 1) {
+			m += m;
+			n -= 1;
+			x0 = (x0 << 1) | (x1 >> 31);
+			x1 = (x1 << 1) | (x2 >> 31);
+			x2 = (x2 << 1) | (x3 >> 31);
+			x3 = (x3 << 1);
+		}
+		carry = 0;
+		z3 = x3 - y3;
+		carry = z3 > x3;
+		if (carry == 0) {
+			z2 = x2 - y2;
+			carry = z2 > x2;
+		} else {
+			z2 = x2 - y2 - 1;
+			carry = z2 >= x2;
+		}
+		if (carry == 0) {
+			z1 = x1 - y1;
+			carry = z1 > x1;
+		} else {
+			z1 = x1 - y1 - 1;
+			carry = z1 >= x1;
+		}
+		z0 = x0 - y0 - carry;
+		if (z0 < 0) {	/* double x */
+			x0 = x0 + x0 + ((x1 & is) != 0);
+			x1 = x1 + x1 + ((x2 & is) != 0);
+			x2 = x2 + x2 + ((x3 & is) != 0);
+			x3 = x3 + x3;
+			m += m;
+		} else {
+			m += 1;
+			if (z0 == 0) {
+				if ((z1 | z2 | z3) == 0) {
+					/* 0: we are done */
+					if (n < 31)
+						m <<= (1 + n);
+					else
+						m = 0;
+					m &= ~0x80000000;
+					*quo = sq >= 0 ? m : -m;
+					__H0(a) = hx & is;
+					__H1(a) = __H2(a) = __H3(a) = 0;
+					return (a);
+				}
+			}
+			/* x = z << 1 */
+			z0 = z0 + z0 + ((z1 & is) != 0);
+			z1 = z1 + z1 + ((z2 & is) != 0);
+			z2 = z2 + z2 + ((z3 & is) != 0);
+			z3 = z3 + z3;
+			x0 = z0;
+			x1 = z1;
+			x2 = z2;
+			x3 = z3;
+			m += m;
+		}
+	}
+	carry = 0;
+	z3 = x3 - y3;
+	carry = z3 > x3;
+	if (carry == 0) {
+		z2 = x2 - y2;
+		carry = z2 > x2;
+	} else {
+		z2 = x2 - y2 - 1;
+		carry = z2 >= x2;
+	}
+	if (carry == 0) {
+		z1 = x1 - y1;
+		carry = z1 > x1;
+	} else {
+		z1 = x1 - y1 - 1;
+		carry = z1 >= x1;
+	}
+	z0 = x0 - y0 - carry;
+	if (z0 >= 0) {
+		x0 = z0;
+		x1 = z1;
+		x2 = z2;
+		x3 = z3;
+		m += 1;
+	}
+	m &= ~0x80000000;
+	*quo = sq >= 0 ? m : -m;
+
+	/* convert back to floating value and restore the sign */
+	if ((x0 | x1 | x2 | x3) == 0) {
+		__H0(a) = hx & is;
+		__H1(a) = __H2(a) = __H3(a) = 0;
+		return (a);
+	}
+	while (x0 < iu) {
+		if (x0 == 0) {
+			iy -= 16;
+			x0 = x1 >> 16;
+			x1 = (x1 << 16) | (x2 >> 16);
+			x2 = (x2 << 16) | (x3 >> 16);
+			x3 = (x3 << 16);
+		} else {
+			x0 = x0 + x0 + ((x1 & is) != 0);
+			x1 = x1 + x1 + ((x2 & is) != 0);
+			x2 = x2 + x2 + ((x3 & is) != 0);
+			x3 = x3 + x3;
+			iy -= 1;
+		}
+	}
+
+	/* normalize output */
+	if (iy >= -16382) {
+		__H0(a) = sx | (x0 - iu) | ((iy + 16383) << 16);
+		__H1(a) = x1;
+		__H2(a) = x2;
+		__H3(a) = x3;
+	} else {		/* subnormal output */
+		n = -16382 - iy;
+		k = n & 31;
+		if (k <= 16) {
+			x3 = (x2 << (32 - k)) | (x3 >> k);
+			x2 = (x1 << (32 - k)) | (x2 >> k);
+			x1 = (x0 << (32 - k)) | (x1 >> k);
+			x0 >>= k;
+		} else {
+			x3 = (x2 << (32 - k)) | (x3 >> k);
+			x2 = (x1 << (32 - k)) | (x2 >> k);
+			x1 = (x0 << (32 - k)) | (x1 >> k);
+			x0 = 0;
+		}
+		while (n >= 32) {
+			n -= 32;
+			x3 = x2;
+			x2 = x1;
+			x1 = x0;
+			x0 = 0;
+		}
+		__H0(a) = x0 | sx;
+		__H1(a) = x1;
+		__H2(a) = x2;
+		__H3(a) = x3;
+		a *= one;
+	}
+	return (a);
+}
+
+long double
+remquol(long double x, long double y, int *quo) {
+	int hx, hy, sx, sq;
+	long double v;
+
+	hx = __H0(x);		/* high word of x */
+	hy = __H0(y);		/* high word of y */
+	sx = hx & is;		/* sign of x */
+	sq = (hx ^ hy) & is;	/* sign of x/y */
+	hx ^= sx;		/* |x| */
+	hy &= ~0x80000000;
+
+	/* purge off exception values */
+	*quo = 0;
+	/* y=0, y is NaN, x is NaN or inf */
+	if (y == 0.0L || y != y || hx >= 0x7fff0000)
+		return ((x * y) / (x * y));
+
+	y = fabsl(y);
+	x = fabsl(x);
+	if (hy <= 0x7ffdffff) {
+		x = fmodquol(x, y + y, quo);
+		*quo = ((*quo) & 0x3fffffff) << 1;
+	}
+	if (hy < 0x00020000) {
+		if (x + x > y) {
+			*quo += 1;
+			if (x == y)
+				x = zero;
+			else
+				x -= y;
+			if (x + x >= y) {
+				x -= y;
+				*quo += 1;
+			}
+		}
+	} else {
+		v = 0.5L * y;
+		if (x > v) {
+			*quo += 1;
+			if (x == y)
+				x = zero;
+			else
+				x -= y;
+			if (x >= v) {
+				x -= y;
+				*quo += 1;
+			}
+		}
+	}
+	if (sq != 0)
+		*quo = -(*quo);
+	return (sx == 0 ? x : -x);
+}
diff --git a/usr/src/libm/src/m9x/round.c b/usr/src/libm/src/m9x/round.c
new file mode 100644
index 0000000..f635830
--- /dev/null
+++ b/usr/src/libm/src/m9x/round.c
@@ -0,0 +1,75 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"@(#)round.c	1.4	06/01/31 SMI"
+
+#if defined(ELFOBJ)
+#pragma weak round = __round
+#endif
+
+#include "libm.h"
+
+double
+round(double x) {
+	union {
+		unsigned i[2];
+		double d;
+	} xx;
+	unsigned hx, sx, i;
+
+	xx.d = x;
+	hx = xx.i[HIWORD] & ~0x80000000;
+	sx = xx.i[HIWORD] & 0x80000000;
+	if (hx < 0x43300000) {	/* |x| < 2^52 */
+		if (hx < 0x3ff00000) {	/* |x| < 1 */
+			if (hx >= 0x3fe00000)
+				return (sx ? -1.0 : 1.0);
+			return (sx ? -0.0 : 0.0);
+		}
+
+		/* round x at the integer bit */
+		if (hx < 0x41300000) {
+			i = 1 << (0x412 - (hx >> 20));
+			xx.i[HIWORD] = (xx.i[HIWORD] + i) & ~(i | (i - 1));
+			xx.i[LOWORD] = 0;
+		} else {
+			i = 1 << (0x432 - (hx >> 20));
+			xx.i[LOWORD] += i;
+			if (xx.i[LOWORD] < i)
+				xx.i[HIWORD]++;
+			xx.i[LOWORD] &= ~(i | (i - 1));
+		}
+		return (xx.d);
+	} else if (hx < 0x7ff00000)
+		return (x);
+	else
+#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN)
+		return (hx >= 0x7ff80000 ? x : x + x);
+		/* assumes sparc-like QNaN */
+#else
+		return (x + x);
+#endif
+}
diff --git a/usr/src/libm/src/m9x/roundf.c b/usr/src/libm/src/m9x/roundf.c
new file mode 100644
index 0000000..6d6adb7
--- /dev/null
+++ b/usr/src/libm/src/m9x/roundf.c
@@ -0,0 +1,65 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"@(#)roundf.c	1.3	06/01/31 SMI"
+
+#if defined(ELFOBJ)
+#pragma weak roundf = __roundf
+#endif
+
+#include "libm.h"
+
+float
+roundf(float x) {
+	union {
+		unsigned i;
+		float f;
+	} xx;
+	unsigned hx, sx, i;
+
+	xx.f = x;
+	hx = xx.i & ~0x80000000;
+	sx = xx.i & 0x80000000;
+	if (hx < 0x4b000000) {		/* |x| < 2^23 */
+		if (hx < 0x3f800000) {		/* |x| < 1 */
+			if (hx >= 0x3f000000)
+				return (sx ? -1.0F : 1.0F);
+			return (sx ? -0.0F : 0.0F);
+		}
+
+		/* round x at the integer bit */
+		i = 1 << (0x95 - (hx >> 23));
+		xx.i = (xx.i + i) & ~((i << 1) - 1);
+		return (xx.f);
+	} else if (hx < 0x7f800000)	/* |x| is integral */
+		return (x);
+	else
+#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN)
+		return (hx > 0x7f800000 ? x * x : x + x);
+#else
+		return (x + x);
+#endif
+}
diff --git a/usr/src/libm/src/m9x/roundl.c b/usr/src/libm/src/m9x/roundl.c
new file mode 100644
index 0000000..c4859b2
--- /dev/null
+++ b/usr/src/libm/src/m9x/roundl.c
@@ -0,0 +1,165 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"@(#)roundl.c	1.4	06/01/31 SMI"
+
+#if defined(ELFOBJ)
+#pragma weak roundl = __roundl
+#endif
+
+#include "libm.h"
+
+#if defined(__sparc)
+long double
+roundl(long double x) {
+	union {
+		unsigned i[4];
+		long double q;
+	} xx;
+	unsigned hx, sx, v;
+	int j;
+
+	xx.q = x;
+	sx = xx.i[0] & 0x80000000;
+	hx = xx.i[0] & ~0x80000000;
+
+	/* handle trivial cases */
+	if (hx >= 0x406f0000) /* |x| >= 2^112 + ... or x is nan */
+		return (hx >= 0x7fff0000 ? x + x : x);
+
+	/* handle |x| < 1 */
+	if (hx < 0x3fff0000) {
+		if (hx >= 0x3ffe0000)
+			return (sx ? -1.0L : 1.0L);
+		return (sx ? -0.0L : 0.0L);
+	}
+
+	xx.i[0] = hx;
+	j = 0x406f - (hx >> 16);		/* 1 <= j <= 112 */
+	if (j >= 96) {				/* 96 <= j <= 112 */
+		v = (1U << (j - 96)) >> 1;
+		if (v) {
+			if (xx.i[0] & v)
+				xx.i[0] += v;
+			xx.i[0] &= ~(v - 1);
+		} else if (xx.i[1] & 0x80000000)
+				++xx.i[0];
+		xx.i[1] = xx.i[2] = xx.i[3] = 0;
+	} else if (j >= 64) {			/* 64 <= j <= 95 */
+		v = (1U << (j - 64)) >> 1;
+		if (v) {
+			if (xx.i[1] & v) {
+				xx.i[1] += v;
+				if (xx.i[1] < v)
+					++xx.i[0];
+			}
+			xx.i[1] &= ~(v - 1);
+		} else if (xx.i[2] & 0x80000000) {
+				if (++xx.i[1] == 0)
+					++xx.i[0];
+		}
+		xx.i[2] = xx.i[3] = 0;
+	} else if (j >= 32) {			/* 32 <= j <= 63 */
+		v = (1U << (j - 32)) >> 1;
+		if (v) {
+			if (xx.i[2] & v) {
+				xx.i[2] += v;
+				if (xx.i[2] < v) {
+					if (++xx.i[1] == 0)
+						++xx.i[0];
+				}
+			}
+			xx.i[2] &= ~(v - 1);
+		} else if (xx.i[3] & 0x80000000) {
+				if (++xx.i[2] == 0) {
+					if (++xx.i[1] == 0)
+						++xx.i[0];
+				}
+		}
+		xx.i[3] = 0;
+	} else {				/* 1 <= j <= 31 */
+		v = 1U << (j - 1);
+		if (xx.i[3] & v) {
+			xx.i[3] += v;
+			if (xx.i[3] < v) {
+				if (++xx.i[2] == 0) {
+					if (++xx.i[1] == 0)
+						++xx.i[0];
+				}
+			}
+		}
+		xx.i[3] &= ~(v - 1);
+	}
+
+	/* negate result if need be */
+	if (sx)
+		xx.i[0] |= 0x80000000;
+	return (xx.q);
+}
+#elif defined(__i386)
+long double
+roundl(long double x) {
+	union {
+		unsigned i[3];
+		long double e;
+	} xx;
+	int ex, sx, i;
+
+	xx.e = x;
+	ex = xx.i[2] & 0x7fff;
+	sx = xx.i[2] & 0x8000;
+	if (ex < 0x403e) {	/* |x| < 2^63 */
+		if (ex < 0x3fff) {	/* |x| < 1 */
+			if (ex >= 0x3ffe)
+				return (sx ? -1.0L : 1.0L);
+			return (sx ? -0.0L : 0.0L);
+		}
+
+		/* round x at the integer bit */
+		if (ex < 0x401e) {
+			i = 1 << (0x401d - ex);
+			xx.i[1] = (xx.i[1] + i) & ~(i | (i - 1));
+			xx.i[0] = 0;
+		} else {
+			i = 1 << (0x403d - ex);
+			xx.i[0] += i;
+			if (xx.i[0] < i)
+				xx.i[1]++;
+			xx.i[0] &= ~(i | (i - 1));
+		}
+		if (xx.i[1] == 0) {
+			xx.i[2] = sx | ++ex;
+			xx.i[1] = 0x80000000U;
+		}
+		return (xx.e);
+	} else if (ex < 0x7fff)	/* x is integral */
+		return (x);
+	else			/* inf or nan */
+		return (x + x);
+}
+#else
+#error Unknown architecture
+#endif	/* defined(__sparc) || defined(__i386) */
diff --git a/usr/src/libm/src/m9x/scalbln.c b/usr/src/libm/src/m9x/scalbln.c
new file mode 100644
index 0000000..731d531
--- /dev/null
+++ b/usr/src/libm/src/m9x/scalbln.c
@@ -0,0 +1,108 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"@(#)scalbln.c	1.3	06/01/31 SMI"
+
+#if defined(ELFOBJ)
+#pragma weak scalbln = __scalbln
+#endif
+
+#include "libm.h"
+#include <float.h>		/* DBL_MAX, DBL_MIN */
+
+static const double twom54 = 5.5511151231257827021181583404541015625e-17;
+#if defined(USE_FPSCALE) || defined(__i386)
+static const double two52 = 4503599627370496.0;
+#else
+/*
+ * Normalize non-zero subnormal x and return biased exponent of x in [-51,0]
+ */
+static int
+ilogb_biased(unsigned *px) {
+	int s = 52;
+	unsigned v = px[HIWORD] & ~0x80000000, w = px[LOWORD], t = v;
+
+	if (t)
+		s -= 32;
+	else
+		t = w;
+	if (t & 0xffff0000)
+		s -= 16, t >>= 16;
+	if (t & 0xff00)
+		s -= 8, t >>= 8;
+	if (t & 0xf0)
+		s -= 4, t >>= 4;
+	t <<= 1;
+	s -= (0xffffaa50 >> t) & 0x3;
+	if (s < 32) {
+		v = (v << s) | w >> (32 - s);
+		w <<= s;
+	} else {
+		v = w << (s - 32);
+		w = 0;
+	}
+	px[HIWORD] = (px[HIWORD] & 0x80000000) | v;
+	px[LOWORD] = w;
+	return (1 - s);
+}
+#endif	/* defined(USE_FPSCALE) */
+
+double
+scalbln(double x, long n) {
+	int *px = (int *) &x, ix, k;
+
+	ix = px[HIWORD] & ~0x80000000;
+	k = ix >> 20;
+	if (k == 0x7ff)
+#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN)
+		return ((px[HIWORD] & 0x80000) != 0 ? x : x + x);
+		/* assumes sparc-like QNaN */
+#else
+		return (x + x);
+#endif
+	if ((px[LOWORD] | ix) == 0 || n == 0)
+		return (x);
+	if (k == 0) {
+#if defined(USE_FPSCALE) || defined(__i386)
+		x *= two52;
+		k = ((px[HIWORD] & ~0x80000000) >> 20) - 52;
+#else
+		k = ilogb_biased((unsigned *) px);
+#endif
+	}
+	k += (int) n;
+	if (n > 5000 || k > 0x7fe)
+		return (DBL_MAX * copysign(DBL_MAX, x));
+	if (n < -5000 || k <= -54)
+		return (DBL_MIN * copysign(DBL_MIN, x));
+	if (k > 0) {
+		px[HIWORD] = (px[HIWORD] & ~0x7ff00000) | (k << 20);
+		return (x);
+	}
+	k += 54;
+	px[HIWORD] = (px[HIWORD] & ~0x7ff00000) | (k << 20);
+	return (x * twom54);
+}
diff --git a/usr/src/libm/src/m9x/scalblnf.c b/usr/src/libm/src/m9x/scalblnf.c
new file mode 100644
index 0000000..ae69036
--- /dev/null
+++ b/usr/src/libm/src/m9x/scalblnf.c
@@ -0,0 +1,93 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"@(#)scalblnf.c	1.3	06/01/31 SMI"
+
+#if defined(ELFOBJ)
+#pragma weak scalblnf = __scalblnf
+#endif
+
+#include "libm.h"
+#include <float.h>		/* FLT_MAX, FLT_MIN */
+
+static const float twom25f = 2.98023223876953125e-8F;
+#if defined(USE_FPSCALE) || defined(__i386)
+static const float two23f = 8388608.0F;
+#else
+/*
+ * v: a non-zero subnormal |x|; returns [-22, 0]
+ */
+static int
+ilogbf_biased(unsigned v) {
+	int r = -22;
+
+	if (v & 0xffff0000)
+		r += 16, v >>= 16;
+	if (v & 0xff00)
+		r += 8, v >>= 8;
+	if (v & 0xf0)
+		r += 4, v >>= 4;
+	v <<= 1;
+	return (r + ((0xffffaa50 >> v) & 0x3));
+}
+#endif	/* defined(USE_FPSCALE) */
+
+float
+scalblnf(float x, long n) {
+	int *px = (int *) &x, ix, k;
+
+	ix = *px & ~0x80000000;
+	k = ix >> 23;
+	if (k == 0xff)
+#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN)
+		return (ix > 0x7f800000 ? x * x : x);
+#else
+		return (x + x);
+#endif
+	if (ix == 0 || n == 0)
+		return (x);
+	if (k == 0) {
+#if defined(USE_FPSCALE) || defined(__i386)
+		x *= two23f;
+		k = ((*px & ~0x80000000) >> 23) - 23;
+#else
+		k = ilogbf_biased(ix);
+		*px = (*px & 0x80000000) | (ix << (-k + 1));
+#endif
+	}
+	k += (int) n;
+	if (n > 5000 || k > 0xfe)
+		return (FLT_MAX * copysignf(FLT_MAX, x));
+	if (n < -5000 || k <= -25)
+		return (FLT_MIN * copysignf(FLT_MIN, x));
+	if (k > 0) {
+		*px = (*px & ~0x7f800000) | (k << 23);
+		return (x);
+	}
+	k += 25;
+	*px = (*px & ~0x7f800000) | (k << 23);
+	return (x * twom25f);
+}
diff --git a/usr/src/libm/src/m9x/scalblnl.c b/usr/src/libm/src/m9x/scalblnl.c
new file mode 100644
index 0000000..f017495
--- /dev/null
+++ b/usr/src/libm/src/m9x/scalblnl.c
@@ -0,0 +1,81 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"@(#)scalblnl.c	1.3	06/01/31 SMI"
+
+#if defined(ELFOBJ)
+#pragma weak scalblnl = __scalblnl
+#endif
+
+#include "libm.h"
+#include <float.h>		/* LDBL_MAX, LDBL_MIN */
+
+#if defined(__sparc)
+#define	XSET_EXP(k, x)	((int *) &x)[0] = (((int *) &x)[0] & ~0x7fff0000) | \
+				(k << 16)
+#define	ISINFNANL(k, x)	(k == 0x7fff)
+#define	XTWOT_OFFSET	113
+static const long double xtwot = 10384593717069655257060992658440192.0L,
+								/* 2^113 */
+	twomtm1 = 4.814824860968089632639944856462318296E-35L;	/* 2^-114 */
+#elif defined(__i386)
+#define	XSET_EXP(k, x)	((int *) &x)[2] = (((int *) &x)[2] & ~0x7fff) | k
+#if defined(HANDLE_UNSUPPORTED)
+#define	ISINFNANL(k, x)	(k == 0x7fff || k != 0 && \
+				(((int *) &x)[1] & 0x80000000) == 0)
+#else
+#define	ISINFNANL(k, x)	(k == 0x7fff)
+#endif
+#define	XTWOT_OFFSET	64
+static const long double xtwot = 18446744073709551616.0L,	/* 2^64 */
+	twomtm1 = 2.7105054312137610850186E-20L;		/* 2^-65 */
+#endif
+
+long double
+scalblnl(long double x, long n) {
+	int k = XBIASED_EXP(x);
+
+	if (ISINFNANL(k, x))
+		return (x + x);
+	if (ISZEROL(x) || n == 0)
+		return (x);
+	if (k == 0) {
+		x *= xtwot;
+		k = XBIASED_EXP(x) - XTWOT_OFFSET;
+	}
+	k += (int) n;
+	if (n > 50000 || k > 0x7ffe)
+		return (LDBL_MAX * copysignl(LDBL_MAX, x));
+	if (n < -50000 || k <= -XTWOT_OFFSET - 1)
+		return (LDBL_MIN * copysignl(LDBL_MIN, x));
+	if (k > 0) {
+		XSET_EXP(k, x);
+		return (x);
+	}
+	k += XTWOT_OFFSET + 1;
+	XSET_EXP(k, x);
+	return (x * twomtm1);
+}
diff --git a/usr/src/libm/src/m9x/tgamma.c b/usr/src/libm/src/m9x/tgamma.c
new file mode 100644
index 0000000..4e5253f
--- /dev/null
+++ b/usr/src/libm/src/m9x/tgamma.c
@@ -0,0 +1,1703 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"@(#)tgamma.c	1.13	06/01/31 SMI"
+
+#if defined(ELFOBJ)
+#pragma weak tgamma = __tgamma
+#endif
+
+/* INDENT OFF */
+/*
+ * True gamma function
+ * double tgamma(double x)
+ *
+ * Error:
+ * ------
+ *  	Less that one ulp for both positive and negative arguments.
+ *
+ * Algorithm:
+ * ---------
+ *	A: For negative argument
+ *		(1) gamma(-n or -inf) is NaN
+ *		(2) Underflow Threshold
+ *		(3) Reduction to gamma(1+x)
+ *	B: For x between 1 and 2
+ * 	C: For x between 0 and 1
+ *	D: For x between 2 and 8
+ *	E: Overflow thresold {see over.c}
+ *	F: For overflow_threshold >= x >= 8
+ *
+ * Implementation details
+ * -----------------------
+ *							-pi
+ * (A) For negative argument, use gamma(-x) = ------------------------.
+ *                                            (sin(pi*x)*gamma(1+x))
+ *
+ *   (1) gamma(-n or -inf) is NaN with invalid signal by SUSv3 spec.
+ *	 (Ideally, gamma(-n) = 1/sinpi(n) = (-1)**(n+1) * inf.)
+ *
+ *   (2) Underflow Threshold. For each precision, there is a value T
+ *	such that when x>T and when x is not an integer, gamma(-x) will
+ *       always underflow. A table of the underflow threshold value is given
+ *	below. For proof, see file "under.c".
+ *
+ *	Precision	underflow threshold T =
+ *	----------------------------------------------------------------------
+ *	single	41.000041962					= 41  + 11 ULP
+ *		(machine format) 4224000B
+ *	double	183.000000000000312639				= 183 + 11 ULP
+ *		(machine format) 4066E000 0000000B
+ *	quad	1774.0000000000000000000000000000017749370	= 1774 + 9 ULP
+ *		(machine format) 4009BB80000000000000000000000009
+ *	----------------------------------------------------------------------
+ *
+ *   (3) Reduction to gamma(1+x).
+ *	Because of (1) and (2), we need only consider non-integral x
+ *	such that 0<x<T. Let k = [x] and z = x-[x]. Define
+ *                  sin(x*pi)                cos(x*pi)
+ *	kpsin(x) = --------- and kpcos(x) = --------- . Then
+ *                     pi                       pi
+ *                                    1
+ *		gamma(-x) = --------------------.
+ *		            -kpsin(x)*gamma(1+x)
+ *	Since x = k+z,
+ *                                                  k+1
+ *		-sin(x*pi) = -sin(k*pi+z*pi) = (-1)   *sin(z*pi),
+ *                               k+1
+ *	we have -kpsin(x) = (-1)   * kpsin(z).  We can further
+ *	reduce z to t by
+ *	   (I)   t = z	     when 0.00000     <= z < 0.31830...
+ *	   (II)  t = 0.5-z   when 0.31830...  <= z < 0.681690...
+ *	   (III) t = 1-z     when 0.681690... <= z < 1.00000
+ *	and correspondingly
+ *	   (I)   kpsin(z) = kpsin(t)  	... 0<= z < 0.3184
+ *	   (II)  kpsin(z) = kpcos(t) 	... |t|   < 0.182
+ *	   (III) kpsin(z) = kpsin(t) 	... 0<= t < 0.3184
+ *
+ *	Using a special Remez algorithm, we obtain the following polynomial
+ *	approximation for kpsin(t) for 0<=t<0.3184:
+ *
+ *	Computation note: in simulating higher precision arithmetic, kcpsin
+ *	return head = t and tail = ks[0]*t^3 + (...) to maintain extra bits.
+ *
+ *	Quad precision, remez error <= 2**(-129.74)
+ *                                   3            5                   27
+ *	    kpsin(t) = t + ks[0] * t  + ks[1] * t  + ... + ks[12] * t
+ *
+ *       ks[ 0] =  -1.64493406684822643647241516664602518705158902870e+0000
+ *       ks[ 1] =   8.11742425283353643637002772405874238094995726160e-0001
+ *       ks[ 2] =  -1.90751824122084213696472111835337366232282723933e-0001
+ *       ks[ 3] =   2.61478478176548005046532613563241288115395517084e-0002
+ *       ks[ 4] =  -2.34608103545582363750893072647117829448016479971e-0003
+ *       ks[ 5] =   1.48428793031071003684606647212534027556262040158e-0004
+ *       ks[ 6] =  -6.97587366165638046518462722252768122615952898698e-0006
+ *       ks[ 7] =   2.53121740413702536928659271747187500934840057929e-0007
+ *       ks[ 8] =  -7.30471182221385990397683641695766121301933621956e-0009
+ *       ks[ 9] =   1.71653847451163495739958249695549313987973589884e-0010
+ *       ks[10] =  -3.34813314714560776122245796929054813458341420565e-0012
+ *       ks[11] =   5.50724992262622033449487808306969135431411753047e-0014
+ *       ks[12] =  -7.67678132753577998601234393215802221104236979928e-0016
+ *
+ *	Double precision, Remez error <= 2**(-62.9)
+ *                                  3            5                  15
+ *	    kpsin(t) = t + ks[0] * t  + ks[1] * t  + ... + ks[6] * t
+ *
+ *       ks[0] =  -1.644934066848226406065691	(0x3ffa51a6 625307d3)
+ *       ks[1] =   8.11742425283341655883668741874008920850698590621e-0001
+ *       ks[2] =  -1.90751824120862873825597279118304943994042258291e-0001
+ *       ks[3] =   2.61478477632554278317289628332654539353521911570e-0002
+ *       ks[4] =  -2.34607978510202710377617190278735525354347705866e-0003
+ *       ks[5] =   1.48413292290051695897242899977121846763824221705e-0004
+ *       ks[6] =  -6.87730769637543488108688726777687262485357072242e-0006
+ *
+ *	Single precision, Remez error <= 2**(-34.09)
+ *                                  3            5                  9
+ *	    kpsin(t) = t + ks[0] * t  + ks[1] * t  + ... + ks[3] * t
+ *
+ *       ks[0] =  -1.64493404985645811354476665052005342839447790544e+0000
+ *       ks[1] =   8.11740794458351064092797249069438269367389272270e-0001
+ *       ks[2] =  -1.90703144603551216933075809162889536878854055202e-0001
+ *       ks[3] =   2.55742333994264563281155312271481108635575331201e-0002
+ *
+ *	Computation note: in simulating higher precision arithmetic, kcpsin
+ *	return head = t and tail = kc[0]*t^3 + (...) to maintain extra bits
+ *   	precision.
+ *
+ *	And for kpcos(t) for |t|< 0.183:
+ *
+ *	Quad precision, remez <= 2**(-122.48)
+ *                                     2            4                  22
+ *	    kpcos(t) = 1/pi +  pi/2 * t  + kc[2] * t + ... + kc[11] * t
+ *
+ *       kc[2] =   1.29192819501249250731151312779548918765320728489e+0000
+ *       kc[3] =  -4.25027339979557573976029596929319207009444090366e-0001
+ *       kc[4] =   7.49080661650990096109672954618317623888421628613e-0002
+ *       kc[5] =  -8.21458866111282287985539464173976555436050215120e-0003
+ *       kc[6] =   6.14202578809529228503205255165761204750211603402e-0004
+ *       kc[7] =  -3.33073432691149607007217330302595267179545908740e-0005
+ *       kc[8] =   1.36970959047832085796809745461530865597993680204e-0006
+ *       kc[9] =  -4.41780774262583514450246512727201806217271097336e-0008
+ *       kc[10]=   1.14741409212381858820016567664488123478660705759e-0009
+ *       kc[11]=  -2.44261236114707374558437500654381006300502749632e-0011
+ *
+ *	Double precision, remez < 2**(61.91)
+ *                                   2            4                  12
+ *	    kpcos(t) = 1/pi + pi/2 *t +  kc[2] * t  + ... + kc[6] * t
+ *
+ *       kc[2] =   1.29192819501230224953283586722575766189551966008e+0000
+ *       kc[3] =  -4.25027339940149518500158850753393173519732149213e-0001
+ *       kc[4] =   7.49080625187015312373925142219429422375556727752e-0002
+ *       kc[5] =  -8.21442040906099210866977352284054849051348692715e-0003
+ *       kc[6] =   6.10411356829515414575566564733632532333904115968e-0004
+ *
+ *	Single precision, remez < 2**(-30.13)
+ *                                       2                  6
+ *	    kpcos(t) = kc[0] +  kc[1] * t  + ... + kc[3] * t
+ *
+ *       kc[0] =   3.18309886183790671537767526745028724068919291480e-0001
+ *       kc[1] =  -1.57079581447762568199467875065854538626594937791e+0000
+ *       kc[2] =   1.29183528092558692844073004029568674027807393862e+0000
+ *       kc[3] =  -4.20232949771307685981015914425195471602739075537e-0001
+ *
+ *	Computation note: in simulating higher precision arithmetic, kcpcos
+ *	return head = 1/pi chopped, and tail = pi/2 *t^2 + (tail part of 1/pi
+ *	+ ...) to maintain extra bits precision. In particular, pi/2 * t^2
+ *	is calculated with great care.
+ *
+ *	Thus, the computation of gamma(-x), x>0, is:
+ *	Let k = int(x), z = x-k.
+ *	For z in (I)
+ *                                    k+1
+ *			          (-1)
+ * 		gamma(-x) = ------------------- ;
+ *		            kpsin(z)*gamma(1+x)
+ *
+ *	otherwise, for z in (II),
+ *                                      k+1
+ *			            (-1)
+ * 		gamma(-x) = ----------------------- ;
+ *			    kpcos(0.5-z)*gamma(1+x)
+ *
+ *	otherwise, for z in (III),
+ *                                      k+1
+ *			            (-1)
+ * 		gamma(-x) = --------------------- .
+ *		            kpsin(1-z)*gamma(1+x)
+ *
+ *	Thus, the computation of gamma(-x) reduced to the computation of
+ *	gamma(1+x) and kpsin(), kpcos().
+ *
+ * (B) For x between 1 and 2.  We break [1,2] into three parts:
+ *	GT1 = [1.0000, 1.2845]
+ * 	GT2 = [1.2844, 1.6374]
+ * 	GT3 = [1.6373, 2.0000]
+ *
+ *    For x in GTi, i=1,2,3, let
+ * 	z1  =  1.134861805732790769689793935774652917006
+ *	gz1 = gamma(z1)  =   0.9382046279096824494097535615803269576988
+ *	tz1 = gamma'(z1) =  -0.3517214357852935791015625000000000000000
+ *
+ *	z2  =  1.461632144968362341262659542325721328468e+0000
+ *	gz2 = gamma(z2)  = 0.8856031944108887002788159005825887332080
+ *	tz2 = gamma'(z2) = 0.00
+ *
+ *	z3  =  1.819773101100500601787868704921606996312e+0000
+ *	gz3 = gamma(z3)  = 0.9367814114636523216188468970808378497426
+ *	tz3 = gamma'(z3) = 0.2805306315422058105468750000000000000000
+ *
+ *    and
+ *	y = x-zi	... for extra precision, write y = y.h + y.l
+ *    Then
+ *	gamma(x) = gzi + tzi*(y.h+y.l) + y*y*Ri(y),
+ *		 = gzi.h + (tzi*y.h + ((tzi*y.l+gzi.l) +  y*y*Ri(y)))
+ *		 = gy.h + gy.l
+ *    where
+ *	(I) For double precision
+ *
+ *		Ri(y) = Pi(y)/Qi(y), i=1,2,3;
+ *
+ *		P1(y) = p1[0] + p1[1]*y + ... + p1[4]*y^4
+ *		Q1(y) = q1[0] + q1[1]*y + ... + q1[5]*y^5
+ *
+ *		P2(y) = p2[0] + p2[1]*y + ... + p2[3]*y^3
+ *		Q2(y) = q2[0] + q2[1]*y + ... + q2[6]*y^6
+ *
+ *		P3(y) = p3[0] + p3[1]*y + ... + p3[4]*y^4
+ *		Q3(y) = q3[0] + q3[1]*y + ... + q3[5]*y^5
+ *
+ *		Remez precision of Ri(y):
+ *		|gamma(x)-(gzi+tzi*y) - y*y*Ri(y)|  <= 2**-62.3	... for i = 1
+ *					            <= 2**-59.4	... for i = 2
+ *					            <= 2**-62.1	... for i = 3
+ *
+ *	(II) For quad precision
+ *
+ *		Ri(y) = Pi(y)/Qi(y), i=1,2,3;
+ *
+ *		P1(y) = p1[0] + p1[1]*y + ... + p1[9]*y^9
+ *		Q1(y) = q1[0] + q1[1]*y + ... + q1[8]*y^8
+ *
+ *		P2(y) = p2[0] + p2[1]*y + ... + p2[9]*y^9
+ *		Q2(y) = q2[0] + q2[1]*y + ... + q2[9]*y^9
+ *
+ *		P3(y) = p3[0] + p3[1]*y + ... + p3[9]*y^9
+ *		Q3(y) = q3[0] + q3[1]*y + ... + q3[9]*y^9
+ *
+ *		Remez precision of Ri(y):
+ *		|gamma(x)-(gzi+tzi*y) - y*y*Ri(y)|  <= 2**-118.2 ... for i = 1
+ *					            <= 2**-126.8 ... for i = 2
+ *					            <= 2**-119.5 ... for i = 3
+ *
+ *	(III) For single precision
+ *
+ *		Ri(y) = Pi(y), i=1,2,3;
+ *
+ *		P1(y) = p1[0] + p1[1]*y + ... + p1[5]*y^5
+ *
+ *		P2(y) = p2[0] + p2[1]*y + ... + p2[5]*y^5
+ *
+ *		P3(y) = p3[0] + p3[1]*y + ... + p3[4]*y^4
+ *
+ *		Remez precision of Ri(y):
+ *		|gamma(x)-(gzi+tzi*y) - y*y*Ri(y)|  <= 2**-30.8	... for i = 1
+ *					            <= 2**-31.6	... for i = 2
+ *					            <= 2**-29.5	... for i = 3
+ *
+ *    Notes. (1) GTi and zi are choosen to balance the interval width and
+ *		minimize the distant between gamma(x) and the tangent line at
+ *		zi. In particular, we have
+ *		|gamma(x)-(gzi+tzi*(x-zi))|  <=   0.01436... for x in [1,z2]
+ *					     <=   0.01265... for x in [z2,2]
+ *
+ *           (2) zi are slightly adjusted so that tzi=gamma'(zi) is very
+ *		close to a single precision value.
+ *
+ *    Coefficents: Single precision
+ *	i= 1:
+ *       P1[0] =   7.09087253435088360271451613398019280077561279443e-0001
+ *       P1[1] =  -5.17229560788652108545141978238701790105241761089e-0001
+ *       P1[2] =   5.23403394528150789405825222323770647162337764327e-0001
+ *       P1[3] =  -4.54586308717075010784041566069480411732634814899e-0001
+ *       P1[4] =   4.20596490915239085459964590559256913498190955233e-0001
+ *	P1[5] =  -3.57307589712377520978332185838241458642142185789e-0001
+ *
+ *	i = 2:
+ *       p2[0] =   4.28486983980295198166056119223984284434264344578e-0001
+ *       p2[1] =  -1.30704539487709138528680121627899735386650103914e-0001
+ *       p2[2] =   1.60856285038051955072861219352655851542955430871e-0001
+ *       p2[3] =  -9.22285161346010583774458802067371182158937943507e-0002
+ *       p2[4] =   7.19240511767225260740890292605070595560626179357e-0002
+ *       p2[5] =  -4.88158265593355093703112238534484636193260459574e-0002
+ *
+ *	i = 3
+ *       p3[0] =   3.82409531118807759081121479786092134814808872880e-0001
+ *       p3[1] =   2.65309888180188647956400403013495759365167853426e-0002
+ *       p3[2] =   8.06815109775079171923561169415370309376296739835e-0002
+ *       p3[3] =  -1.54821591666137613928840890835174351674007764799e-0002
+ *       p3[4] =   1.76308239242717268530498313416899188157165183405e-0002
+ *
+ *    Coefficents: Double precision
+ * 	i = 1:
+ *       p1[0]   =   0.70908683619977797008004927192814648151397705078125000
+ *       p1[1]   =   1.71987061393048558089579513384356441668351720061e-0001
+ *       p1[2]   =  -3.19273345791990970293320316122813960527705450671e-0002
+ *       p1[3]   =   8.36172645419110036267169600390549973563534476989e-0003
+ *       p1[4]   =   1.13745336648572838333152213474277971244629758101e-0003
+ *	 q1[0]   =   1.0
+ *       q1[1]   =   9.71980217826032937526460731778472389791321968082e-0001
+ *       q1[2]   =  -7.43576743326756176594084137256042653497087666030e-0002
+ *       q1[3]   =  -1.19345944932265559769719470515102012246995255372e-0001
+ *       q1[4]   =   1.59913445751425002620935120470781382215050284762e-0002
+ *	 q1[5]   =   1.12601136853374984566572691306402321911547550783e-0003
+ * 	i = 2:
+ *       p2[0]   =   0.42848681585558601181418225678498856723308563232421875
+ *       p2[1]   =   6.53596762668970816023718845105667418483122103629e-0002
+ *       p2[2]   =  -6.97280829631212931321050770925128264272768936731e-0003
+ *       p2[3]   =   6.46342359021981718947208605674813260166116632899e-0003
+ *	 q2[0]   =   1.0
+ *       q2[1]   =   4.57572620560506047062553957454062012327519313936e-0001
+ *       q2[2]   =  -2.52182594886075452859655003407796103083422572036e-0001
+ *       q2[3]   =  -1.82970945407778594681348166040103197178711552827e-0002
+ *       q2[4]   =   2.43574726993169566475227642128830141304953840502e-0002
+ *       q2[5]   =  -5.20390406466942525358645957564897411258667085501e-0003
+ *       q2[6]   =   4.79520251383279837635552431988023256031951133885e-0004
+ * 	i = 3:
+ *	 p3[0]   =   0.382409479734567459008331979930517263710498809814453125
+ *       p3[1]   =   1.42876048697668161599069814043449301572928034140e-0001
+ *       p3[2]   =   3.42157571052250536817923866013561760785748899071e-0003
+ *       p3[3]   =  -5.01542621710067521405087887856991700987709272937e-0004
+ *       p3[4]   =   8.89285814866740910123834688163838287618332122670e-0004
+ *	 q3[0]   =   1.0
+ *       q3[1]   =   3.04253086629444201002215640948957897906299633168e-0001
+ *       q3[2]   =  -2.23162407379999477282555672834881213873185520006e-0001
+ *       q3[3]   =  -1.05060867741952065921809811933670131427552903636e-0002
+ *       q3[4]   =   1.70511763916186982473301861980856352005926669320e-0002
+ *       q3[5]   =  -2.12950201683609187927899416700094630764182477464e-0003
+ *
+ *    Note that all pi0 are exact in double, which is obtained by a
+ *    special Remez Algorithm.
+ *
+ *    Coefficents: Quad precision
+ * 	i = 1:
+ *       p1[0] =   0.709086836199777919037185741507610124611513720557
+ *       p1[1] =   4.45754781206489035827915969367354835667391606951e-0001
+ *       p1[2] =   3.21049298735832382311662273882632210062918153852e-0002
+ *       p1[3] =  -5.71296796342106617651765245858289197369688864350e-0003
+ *       p1[4] =   6.04666892891998977081619174969855831606965352773e-0003
+ *       p1[5] =   8.99106186996888711939627812174765258822658645168e-0004
+ *       p1[6] =  -6.96496846144407741431207008527018441810175568949e-0005
+ *       p1[7] =   1.52597046118984020814225409300131445070213882429e-0005
+ *       p1[8] =   5.68521076168495673844711465407432189190681541547e-0007
+ *       p1[9] =   3.30749673519634895220582062520286565610418952979e-0008
+ *       q1[0] =   1.0+0000
+ *       q1[1] =   1.35806511721671070408570853537257079579490650668e+0000
+ *       q1[2] =   2.97567810153429553405327140096063086994072952961e-0001
+ *       q1[3] =  -1.52956835982588571502954372821681851681118097870e-0001
+ *       q1[4] =  -2.88248519561420109768781615289082053597954521218e-0002
+ *       q1[5] =   1.03475311719937405219789948456313936302378395955e-0002
+ *       q1[6] =   4.12310203243891222368965360124391297374822742313e-0004
+ *       q1[7] =  -3.12653708152290867248931925120380729518332507388e-0004
+ *       q1[8] =   2.36672170850409745237358105667757760527014332458e-0005
+ *
+ * 	i = 2:
+ *       p2[0] =   0.428486815855585429730209907810650616737756697477
+ *       p2[1] =   2.63622124067885222919192651151581541943362617352e-0001
+ *       p2[2] =   3.85520683670028865731877276741390421744971446855e-0002
+ *       p2[3] =   3.05065978278128549958897133190295325258023525862e-0003
+ *       p2[4] =   2.48232934951723128892080415054084339152450445081e-0003
+ *       p2[5] =   3.67092777065632360693313762221411547741550105407e-0004
+ *       p2[6] =   3.81228045616085789674530902563145250532194518946e-0006
+ *       p2[7] =   4.61677225867087554059531455133839175822537617677e-0006
+ *       p2[8] =   2.18209052385703200438239200991201916609364872993e-0007
+ *       p2[9] =   1.00490538985245846460006244065624754421022542454e-0008
+ *       q2[0] =   1.0
+ *       q2[1] =   9.20276350207639290567783725273128544224570775056e-0001
+ *       q2[2] =  -4.79533683654165107448020515733883781138947771495e-0003
+ *       q2[3] =  -1.24538337585899300494444600248687901947684291683e-0001
+ *       q2[4] =   4.49866050763472358547524708431719114204535491412e-0003
+ *       q2[5] =   7.20715455697920560621638325356292640604078591907e-0003
+ *       q2[6] =  -8.68513169029126780280798337091982780598228096116e-0004
+ *       q2[7] =  -1.25104431629401181525027098222745544809974229874e-0004
+ *       q2[8] =   3.10558344839000038489191304550998047521253437464e-0005
+ *       q2[9] =  -1.76829227852852176018537139573609433652506765712e-0006
+ *
+ *	i = 3
+ *       p3[0] =   0.3824094797345675048502747661075355640070439388902
+ *       p3[1] =   3.42198093076618495415854906335908427159833377774e-0001
+ *       p3[2] =   9.63828189500585568303961406863153237440702754858e-0002
+ *       p3[3] =   8.76069421042696384852462044188520252156846768667e-0003
+ *       p3[4] =   1.86477890389161491224872014149309015261897537488e-0003
+ *       p3[5] =   8.16871354540309895879974742853701311541286944191e-0004
+ *       p3[6] =   6.83783483674600322518695090864659381650125625216e-0005
+ *       p3[7] =  -1.10168269719261574708565935172719209272190828456e-0006
+ *       p3[8] =   9.66243228508380420159234853278906717065629721016e-0007
+ *       p3[9] =   2.31858885579177250541163820671121664974334728142e-0008
+ *       q3[0] =   1.0
+ *       q3[1] =   8.25479821168813634632437430090376252512793067339e-0001
+ *       q3[2] =  -1.62251363073937769739639623669295110346015576320e-0002
+ *       q3[3] =  -1.10621286905916732758745130629426559691187579852e-0001
+ *       q3[4] =   3.48309693970985612644446415789230015515365291459e-0003
+ *       q3[5] =   6.73553737487488333032431261131289672347043401328e-0003
+ *       q3[6] =  -7.63222008393372630162743587811004613050245128051e-0004
+ *       q3[7] =  -1.35792670669190631476784768961953711773073251336e-0004
+ *       q3[8] =   3.19610150954223587006220730065608156460205690618e-0005
+ *       q3[9] =  -1.82096553862822346610109522015129585693354348322e-0006
+ *
+ * (C) For x between 0 and 1.
+ *     Let P stand for the number of significant bits in the working precision.
+ *                      -P                            1
+ *    (1)For 0 <= x <= 2   , gamma(x) is computed by --- rounded to nearest.
+ *                                                    x
+ *       The error is bound by 0.739 ulp(gamma(x)) in IEEE double precision.
+ *	Proof.
+ *                1                       2
+ *	Since  --------  ~  x + 0.577...*x  - ...,  we have, for small x,
+ *              gamma(x)
+ *           1                    1
+ *	----------- < gamma(x) < --- and
+ *      x(1+0.578x)               x
+ *              1                 1           1
+ *	  0 <  --- - gamma(x) <= ---  -  ----------- < 0.578
+ *              x                 x      x(1+0.578x)
+ *                                     1       1                        -P
+ * 	The error is thus bounded by --- ulp(---) + 0.578. Since x <= 2   ,
+ *                                     2       x
+ *       1      P       1           P                                      1
+ *	--- >= 2 , ulp(---) >= ulp(2  ) >= 2. Thus 0.578=0.289*2<=0.289ulp(-)
+ *       x              x                                                  x
+ *       Thus
+ *                             1                                 1
+ *		| gamma(x) - [---] rounded | <= (0.5+0.289)*ulp(---).
+ *			       x	                         x
+ *                         -P                              1
+ *	Note that for x<= 2  , it is easy to see that ulp(---)=ulp(gamma(x))
+ *                                                         x
+ *                            n                             1
+ *	except only when x = 2 , (n<= -53). In such cases, --- is exact
+ *                                                          x
+ * 	and therefore the error is bounded by
+ *                         1
+ *		0.298*ulp(---) = 0.298*2*ulp(gamma(x)) = 0.578ulp(gamma(x)).
+ *                         x
+ *	Thus we conclude that the error in gamma is less than 0.739 ulp.
+ *
+ *    (2)Otherwise, for x in GTi-1 (see B), let y = x-(zi-1). From (B) we obtain
+ *                                                          gamma(1+x)
+ *	gamma(1+x) = gy.h + gy.l,  then compute gamma(x) by -----------.
+ *                                                               x
+ *                                                          gy.h
+ *	Implementaion note. Write x = x.h+x.l, and Let th = ----- chopped to
+ *                                                            x
+ *	20 bits, then
+ *                                gy.h+gy.l
+ *		gamma(x) = th + (----------  - th )
+ *                                    x
+ *                               1
+ *			 = th + ---*(gy.h-th*x.h+gy.l-th*x.l)
+ *	                         x
+ *
+ * (D) For x between 2 and 8. Let n = 1+x chopped to an integer. Then
+ *
+ *               gamma(x)=(x-1)*(x-2)*...*(x-n)*gamma(x-n)
+ *
+ *     Since x-n is between 1 and 2, we can apply (B) to compute gamma(x).
+ *
+ *     Implementation detail. The computation of (x-1)(x-2)...(x-n) in simulated
+ *     higher precision arithmetic can be somewhat optimized.  For example, in
+ *     computing (x-1)*(x-2)*(x-3)*(x-4), if we compute (x-1)*(x-4) = z.h+z.l,
+ *     then (x-2)(x-3) = z.h+2+z.l readily. In below, we list the expression
+ *     of the formula to compute gamma(x).
+ *
+ *     Assume x-n is in GTi (i=1,2, or 3, see B for detail). Let y = x - n - zi.
+ *     By (B) we have gamma(x-n) = gy.h+gy.l. If x = x.h+x.l, then we have
+ *      n=1 (x in [2,3]):
+ *	 gamma(x) = (x-1)*gamma(x-1) = (x-1)*(gy.h+gy.l)
+ *                 = [(x.h-1)+x.l]*(gy.h+gy.l)
+ *      n=2 (x in [3,4]):
+ *        gamma(x) = (x-1)(x-2)*gamma(x-2) = (x-1)*(x-2)*(gy.h+gy.l)
+ *                 = ((x.h-2)+x.l)*((x.h-1)+x.l)*(gy.h+gy.l)
+ *                 = [x.h*(x.h-3)+2+x.l*(x+(x.h-3))]*(gy.h+gy.l)
+ *      n=3 (x in [4,5])
+ *	 gamma(x) = (x-1)(x-2)(x-3)*(gy.h+gy.l)
+ *                 = (x.h*(x.h-3)+2+x.l*(x+(x.h-3)))*[((x.h-3)+x.l)(gy.h+gy.l)]
+ *      n=4 (x in [5,6])
+ *	 gamma(x) = [(x-1)(x-4)]*[(x-2)(x-3)]*(gy.h+gy.l)
+ *                 = [(x.h*(x.h-5)+4+x.l(x+(x.h-5)))]*[(x-2)*(x-3)]*(gy.h+gy.l)
+ *                 = (y.h+y.l)*(y.h+1+y.l)*(gy.h+gy.l)
+ *      n=5 (x in [6,7])
+ *	 gamma(x) = [(x-1)(x-4)]*[(x-2)(x-3)]*[(x-5)*(gy.h+gy.l)]
+ *      n=6 (x in [7,8])
+ *	 gamma(x) = [(x-1)(x-6)]*[(x-2)(x-5)]*[(x-3)(x-4)]*(gy.h+gy.l)]
+ *		  = [(y.h+y.l)(y.h+4+y.l)][(y.h+6+y.l)(gy.h+gy.l)]
+ *
+ * (E)Overflow Thresold. For x > Overflow thresold of gamma,
+ *    return huge*huge (overflow).
+ *
+ *    By checking whether lgamma(x) >= 2**{128,1024,16384}, one can
+ *    determine the overflow threshold for x in single, double, and
+ *    quad precision. See over.c for details.
+ *
+ *    The overflow threshold of gamma(x) are
+ *
+ *    single: x = 3.5040096283e+01
+ *              = 0x420C290F (IEEE single)
+ *    double: x = 1.71624376956302711505e+02
+ *              = 0x406573FAE561F647 (IEEE double)
+ *    quad:   x = 1.7555483429044629170038892160702032034177e+03
+ *              = 0x4009B6E3180CD66A5C4206F128BA77F4  (quad)
+ *
+ * (F)For overflow_threshold >= x >= 8, we use asymptotic approximation.
+ *    (1) Stirling's formula
+ *
+ *      log(G(x)) ~= (x-.5)*(log(x)-1) + .5(log(2*pi)-1) + (1/x)*P(1/(x*x))
+ *		  = L1 + L2 + L3,
+ *    where
+ *		L1(x) = (x-.5)*(log(x)-1),
+ *		L2    = .5(log(2pi)-1) = 0.41893853....,
+ *		L3(x) = (1/x)P(1/(x*x)),
+ *
+ *    The range of L1,L2, and L3 are as follows:
+ *
+ *	------------------------------------------------------------------
+ *  	Range(L1) =  (single) [8.09..,88.30..]	 =[2** 3.01..,2**  6.46..]
+ *                   (double) [8.09..,709.3..]   =[2** 3.01..,2**  9.47..]
+ *		     (quad)   [8.09..,11356.10..]=[2** 3.01..,2** 13.47..]
+ *  	Range(L2) = 0.41893853.....
+ *	Range(L3) = [0.0104...., 0.00048....]	 =[2**-6.58..,2**-11.02..]
+ *	------------------------------------------------------------------
+ *
+ *    Gamma(x) is then computed by exp(L1+L2+L3).
+ *
+ *    (2) Error analysis of (F):
+ *    --------------------------
+ *    The error in Gamma(x) depends on the error inherited in the computation
+ *    of L= L1+L2+L3. Let L' be the computed value of L. The absolute error
+ *    in L' is t = L-L'. Since exp(L') = exp(L-t) = exp(L)*exp(t) ~
+ *    (1+t)*exp(L), the relative error in exp(L') is approximately t.
+ *
+ *    To guarantee the relatively accuracy in exp(L'), we would like
+ *    |t| < 2**(-P-5) where P denotes for the number of significant bits
+ *    of the working precision. Consequently, each of the L1,L2, and L3
+ *    must be computed with absolute error bounded by 2**(-P-5) in absolute
+ *    value.
+ *
+ *    Since L2 is a constant, it can be pre-computed to the desired accuracy.
+ *    Also |L3| < 2**-6; therefore, it suffices to compute L3 with the
+ *    working precision.  That is,
+ *	L3(x) approxmiate log(G(x))-(x-.5)(log(x)-1)-.5(log(2pi)-1)
+ *    to a precision bounded by 2**(-P-5).
+ *
+ *                                   2**(-6)
+ *			    _________V___________________
+ *		L1(x):	   |_________|___________________|
+ *			           __ ________________________
+ *		L2:	          |__|________________________|
+ *			              __________________________
+ *         +    L3(x):               |__________________________|
+ *                       -------------------------------------------
+ *                         [leading] + [Trailing]
+ *
+ *    For L1(x)=(x-0.5)*(log(x)-1), we need ilogb(L1(x))+5 extra bits for
+ *    both multiplicants to guarantee L1(x)'s absolute error is bounded by
+ *    2**(-P-5) in absolute value. Here ilogb(y) is defined to be the unbias
+ *    binary exponent of y in IEEE format.  We can get x-0.5 to the desire
+ *    accuracy easily. It remains to compute log(x)-1 with ilogb(L1(x))+5
+ *    extra bits accracy. Note that the range of L1 is 88.30.., 709.3.., and
+ *    11356.10... for single, double, and quadruple precision, we have
+ *
+ *                           single     double      quadruple
+ *                         ------------------------------------
+ *	ilogb(L1(x))+5 <=     11	  14	       18
+ *                         ------------------------------------
+ *
+ *    (3) Table Driven Method for log(x)-1:
+ *    --------------------------------------
+ *    Let x = 2**n * y, where 1 <= y < 2. Let Z={z(i),i=1,...,m}
+ *    be a set of predetermined evenly distributed floating point numbers
+ *    in [1, 2]. Let z(j) be the closest one to y, then
+ *	log(x)-1 = n*log(2)-1  +  log(y)
+ *		 = n*log(2)-1  +  log(z(j)*y/z(j))
+ *		 = n*log(2)-1  +  log(z(j))  +  log(y/z(j))
+ *		 = T1(n)       +  T2(j)      +  T3,
+ *
+ *    where T1(n) = n*log(2)-1 and T2(j) = log(z(j)). Both T1 and T2 can be
+ *    pre-calculated and be looked-up in a table. Note that 8 <= x < 1756
+ *    implies 3<=n<=10 implies 1.079.. < T1(n) < 6.931.
+ *
+ *
+ *                     y-z(i)          y       1+s
+ *    For T3, let s = --------; then ----- =  ----- and
+ *                     y+z(i)         z(i)     1-s
+ *                1+s           2   3    2   5
+ *    	T3 = log(-----) = 2s + --- s  + --- s  + ....
+ *                1-s           3        5
+ *
+ *    Suppose the first term 2s is compute in extra precision. The
+ *    dominating error in T3 would then be the rounding error of the
+ *    second term 2/3*s**3. To force the rounding bounded by
+ *    the required accuracy, we have
+ *        single:  |2/3*s**3| < 2**-11  ==> |s|<0.09014...
+ *        double:  |2/3*s**3| < 2**-14  ==> |s|<0.04507...
+ *        quad  :  |2/3*s**3| < 2**-18  ==> |s|<0.01788... = 2**(-5.80..)
+ *
+ *    Base on this analysis, we choose Z = {z(i)|z(i)=1+i/64+1/128, 0<=i<=63}.
+ *    For any y in [1,2), let j = [64*y] chopped to integer, then z(j) is
+ *    the closest to y, and it is not difficult to see that |s| < 2**(-8).
+ *    Please note that the polynomial approximation of T3 must be accurate
+ *        -24-11   -35    -53-14    -67         -113-18   -131
+ *    to 2       =2   ,  2       = 2   ,  and  2        =2
+ *    for single, double, and quadruple precision respectively.
+ *
+ *    Inplementation notes.
+ *    (1) Table look-up entries for T1(n) and T2(j), as well as the calculation
+ *        of the leading term 2s in T3,  are broken up into leading and trailing
+ *        part such that (leading part)* 2**24 will always be an integer. That
+ *        will guarantee the addition of the leading parts will be exact.
+ *
+ *                                   2**(-24)
+ *			    _________V___________________
+ *		T1(n):	   |_________|___________________|
+ *			      _______ ______________________
+ *		T2(j):	     |_______|______________________|
+ *			         ____ _______________________
+ *		2s:	        |____|_______________________|
+ *			             __________________________
+ *         +    T3(s)-2s:           |__________________________|
+ *                       -------------------------------------------
+ *                         [leading] + [Trailing]
+ *
+ *    (2) How to compute 2s accurately.
+ *        (A) Compute v = 2s to the working precision. If |v| < 2**(-18),
+ *            stop.
+ *        (B) chopped v to 2**(-24): v = ((int)(v*2**24))/2**24
+ *	 (C) 2s = v + (2s - v), where
+ *                        1
+ *		2s - v = --- * (2(y-z) - v*(y+z) )
+ *                       y+z
+ *                         1
+ *                      = --- * ( [2(y-z) - v*(y+z)_h ]  - v*(y+z)_l  )
+ *                        y+z
+ *           where (y+z)_h = (y+z) rounded to 24 bits by (double)(float),
+ *	    and (y+z)_l = ((z+z)-(y+z)_h)+(y-z).  Note the the quantity
+ *	    in [] is exact.
+ *                                                      2         4
+ *    (3) Remez approximation for (T3(s)-2s)/s = T3[0]*s + T3[1]*s + ...:
+ *	 Single precision: 1 term (compute in double precision arithmetic)
+ *	    T3(s) = 2s + S1*s^3, S1 = 0.6666717231848518054693623697539230
+ *	    Remez error: |T3(s)/s - (2s+S1*s^3)| < 2**(-35.87)
+ *	 Double precision: 3 terms, Remez error is bounded by 2**(-72.40),
+ *	    see "tgamma_log"
+ *	 Quad precision: 7 terms, Remez error is bounded by 2**(-136.54),
+ *	    see "tgammal_log"
+ *
+ *   The computation of 0.5*(ln(2pi)-1):
+ *   	0.5*(ln(2pi)-1) =  0.4189385332046727417803297364056176398614...
+ *	split 0.5*(ln(2pi)-1) to hln2pi_h + hln2pi_l, where hln2pi_h is the
+ *	leading 21 bits of the constant.
+ *	    hln2pi_h= 0.4189383983612060546875
+ *	    hln2pi_l= 1.348434666870928297364056176398612173648e-07
+ *
+ *   The computation of 1/x*P(1/x^2) = log(G(x))-(x-.5)(ln(x)-1)-(.5ln(2pi)-1):
+ *	Let s = 1/x <= 1/8 < 0.125. We have
+ *	quad precision
+ *	    |GP(s) - s*P(s^2)| <= 2**(-120.6), where
+ *			       3      5            39
+ *	    GP(s) = GP0*s+GP1*s +GP2*s +... +GP19*s    ,
+ *       GP0  =   0.083333333333333333333333333333333172839171301
+ *			hex 0x3ffe5555 55555555 55555555 55555548
+ *       GP1  =  -2.77777777777777777777777777492501211999399424104e-0003
+ *       GP2  =   7.93650793650793650793635650541638236350020883243e-0004
+ *       GP3  =  -5.95238095238095238057299772679324503339241961704e-0004
+ *       GP4  =   8.41750841750841696138422987977683524926142600321e-0004
+ *       GP5  =  -1.91752691752686682825032547823699662178842123308e-0003
+ *       GP6  =   6.41025641022403480921891559356473451161279359322e-0003
+ *       GP7  =  -2.95506535798414019189819587455577003732808185071e-0002
+ *       GP8  =   1.79644367229970031486079180060923073476568732136e-0001
+ *       GP9  =  -1.39243086487274662174562872567057200255649290646e+0000
+ *       GP10 =   1.34025874044417962188677816477842265259608269775e+0001
+ *       GP11 =  -1.56803713480127469414495545399982508700748274318e+0002
+ *       GP12 =   2.18739841656201561694927630335099313968924493891e+0003
+ *       GP13 =  -3.55249848644100338419187038090925410976237921269e+0004
+ *       GP14 =   6.43464880437835286216768959439484376449179576452e+0005
+ *       GP15 =  -1.20459154385577014992600342782821389605893904624e+0007
+ *       GP16 =   2.09263249637351298563934942349749718491071093210e+0008
+ *       GP17 =  -2.96247483183169219343745316433899599834685703457e+0009
+ *       GP18 =   2.88984933605896033154727626086506756972327292981e+0010
+ *       GP19 =  -1.40960434146030007732838382416230610302678063984e+0011
+ *
+ *       double precision
+ *	    |GP(s) - s*P(s^2)| <= 2**(-63.5), where
+ *			       3      5      7      9      11      13      15
+ *	    GP(s) = GP0*s+GP1*s +GP2*s +GP3*s +GP4*s +GP5*s  +GP6*s  +GP7*s  ,
+ *
+ *		GP0=  0.0833333333333333287074040640618477 (3FB55555 55555555)
+ *		GP1= -2.77777777776649355200565611114627670089130772843e-0003
+ *		GP2=  7.93650787486083724805476194170211775784158551509e-0004
+ *		GP3= -5.95236628558314928757811419580281294593903582971e-0004
+ *		GP4=  8.41566473999853451983137162780427812781178932540e-0004
+ *		GP5= -1.90424776670441373564512942038926168175921303212e-0003
+ *		GP6=  5.84933161530949666312333949534482303007354299178e-0003
+ *		GP7= -1.59453228931082030262124832506144392496561694550e-0002
+ *       single precision
+ *	    |GP(s) - s*P(s^2)| <= 2**(-37.78), where
+ *			       3      5
+ *	    GP(s) = GP0*s+GP1*s +GP2*s
+ *        GP0 =   8.33333330959694065245736888749042811909994573178e-0002
+ *        GP1 =  -2.77765545601667179767706600890361535225507762168e-0003
+ *        GP2 =   7.77830853479775281781085278324621033523037489883e-0004
+ *
+ *
+ *	Implementation note:
+ *	z = (1/x), z2 = z*z, z4 = z2*z2;
+ *	p = z*(GP0+z2*(GP1+....+z2*GP7))
+ *	  = z*(GP0+(z4*(GP2+z4*(GP4+z4*GP6))+z2*(GP1+z4*(GP3+z4*(GP5+z4*GP7)))))
+ *
+ *   Adding everything up:
+ *	t = rr.h*ww.h+hln2pi_h      		... exact
+ *	w = (hln2pi_l + ((x-0.5)*ww.l+rr.l*ww.h)) + p
+ *
+ *   Computing exp(t+w):
+ *	s = t+w; write s = (n+j/32)*ln2+r, |r|<=(1/64)*ln2, then
+ *	exp(s) = 2**n * (2**(j/32) + 2**(j/32)*expm1(r)), where
+ *	expm1(r) = r + Et1*r^2 + Et2*r^3 + ... + Et5*r^6, and
+ *	2**(j/32) is obtained by table look-up S[j]+S_trail[j].
+ *	Remez error bound:
+ *	|exp(r) - (1+r+Et1*r^2+...+Et5*r^6)| <= 2^(-63).
+ */
+
+#include "libm.h"
+
+#define	__HI(x)	((int *) &x)[HIWORD]
+#define	__LO(x)	((unsigned *) &x)[LOWORD]
+
+struct Double {
+	double h;
+	double l;
+};
+
+/* Hex value of GP0 shoule be 3FB55555 55555555 */
+static const double c[] = {
+	+1.0,
+	+2.0,
+	+0.5,
+	+1.0e-300,
+	+6.66666666666666740682e-01,				/* A1=T3[0] */
+	+3.99999999955626478023093908674902212920e-01,		/* A2=T3[1] */
+	+2.85720221533145659809237398709372330980e-01,		/* A3=T3[2] */
+	+0.0833333333333333287074040640618477,			/* GP[0] */
+	-2.77777777776649355200565611114627670089130772843e-03,
+	+7.93650787486083724805476194170211775784158551509e-04,
+	-5.95236628558314928757811419580281294593903582971e-04,
+	+8.41566473999853451983137162780427812781178932540e-04,
+	-1.90424776670441373564512942038926168175921303212e-03,
+	+5.84933161530949666312333949534482303007354299178e-03,
+	-1.59453228931082030262124832506144392496561694550e-02,
+	+4.18937683105468750000e-01,				/* hln2pi_h */
+	+8.50099203991780279640e-07,				/* hln2pi_l */
+	+4.18938533204672741744150788368695779923320328369e-01,	/* hln2pi */
+	+2.16608493865351192653e-02,				/* ln2_32hi */
+	+5.96317165397058656257e-12,				/* ln2_32lo */
+	+4.61662413084468283841e+01,				/* invln2_32 */
+	+5.0000000000000000000e-1,				/* Et1 */
+	+1.66666666665223585560605991943703896196054020060e-01,	/* Et2 */
+	+4.16666666665895103520154073534275286743788421687e-02,	/* Et3 */
+	+8.33336844093536520775865096538773197505523826029e-03,	/* Et4 */
+	+1.38889201930843436040204096950052984793587640227e-03,	/* Et5 */
+};
+
+#define	one	  c[0]
+#define	two	  c[1]
+#define	half	  c[2]
+#define	tiny	  c[3]
+#define	A1	  c[4]
+#define	A2	  c[5]
+#define	A3	  c[6]
+#define	GP0	  c[7]
+#define	GP1	  c[8]
+#define	GP2	  c[9]
+#define	GP3	  c[10]
+#define	GP4	  c[11]
+#define	GP5	  c[12]
+#define	GP6	  c[13]
+#define	GP7	  c[14]
+#define	hln2pi_h  c[15]
+#define	hln2pi_l  c[16]
+#define	hln2pi	  c[17]
+#define	ln2_32hi  c[18]
+#define	ln2_32lo  c[19]
+#define	invln2_32 c[20]
+#define	Et1	  c[21]
+#define	Et2	  c[22]
+#define	Et3	  c[23]
+#define	Et4	  c[24]
+#define	Et5	  c[25]
+
+/*
+ * double precision coefficients for computing log(x)-1 in tgamma.
+ *  See "algorithm" for details
+ *
+ *  log(x) - 1 = T1(n) + T2(j) + T3(s), where x = 2**n * y,  1<=y<2,
+ *  j=[64*y], z[j]=1+j/64+1/128, s = (y-z[j])/(y+z[j]), and
+ *       T1(n) = T1[2n,2n+1] = n*log(2)-1,
+ *       T2(j) = T2[2j,2j+1] = log(z[j]),
+ *       T3(s) = 2s + T3[0]s^3 + T3[1]s^5 + T3[2]s^7
+ *	       = 2s + A1*s^3 + A2*s^5 + A3*s^7  (see const A1,A2,A3)
+ *  Note
+ *  (1) the leading entries are truncated to 24 binary point.
+ *      See Remezpak/sun/tgamma_log_64.c
+ *  (2) Remez error for T3(s) is bounded by 2**(-72.4)
+ *      See mpremez/work/Log/tgamma_log_4_outr2
+ */
+
+static const double T1[] = {
+	-1.00000000000000000000e+00,	/* 0xBFF00000 0x00000000 */
+	+0.00000000000000000000e+00,	/* 0x00000000 0x00000000 */
+	-3.06852817535400390625e-01,	/* 0xBFD3A37A 0x00000000 */
+	-1.90465429995776763166e-09,	/* 0xBE205C61 0x0CA86C38 */
+	+3.86294305324554443359e-01,	/* 0x3FD8B90B 0xC0000000 */
+	+5.57953361754750897367e-08,	/* 0x3E6DF473 0xDE6AF279 */
+	+1.07944148778915405273e+00,	/* 0x3FF14564 0x70000000 */
+	+5.38906818755173187963e-08,	/* 0x3E6CEEAD 0xCDA06BB5 */
+	+1.77258867025375366211e+00,	/* 0x3FFC5C85 0xF0000000 */
+	+5.19860275755595544734e-08,	/* 0x3E6BE8E7 0xBCD5E4F2 */
+	+2.46573585271835327148e+00,	/* 0x4003B9D3 0xB8000000 */
+	+5.00813732756017835330e-08,	/* 0x3E6AE321 0xAC0B5E2E */
+	+3.15888303518295288086e+00,	/* 0x40094564 0x78000000 */
+	+4.81767189756440192100e-08,	/* 0x3E69DD5B 0x9B40D76B */
+	+3.85203021764755249023e+00,	/* 0x400ED0F5 0x38000000 */
+	+4.62720646756862482697e-08,	/* 0x3E68D795 0x8A7650A7 */
+	+4.54517740011215209961e+00,	/* 0x40122E42 0xFC000000 */
+	+4.43674103757284839467e-08,	/* 0x3E67D1CF 0x79ABC9E4 */
+	+5.23832458257675170898e+00,	/* 0x4014F40B 0x5C000000 */
+	+4.24627560757707130063e-08,	/* 0x3E66CC09 0x68E14320 */
+	+5.93147176504135131836e+00,	/* 0x4017B9D3 0xBC000000 */
+	+4.05581017758129486834e-08,	/* 0x3E65C643 0x5816BC5D */
+};
+
+static const double T2[] = {
+	+7.78210163116455078125e-03,	/* 0x3F7FE020 0x00000000 */
+	+3.88108903981662140884e-08,	/* 0x3E64D620 0xCF11F86F */
+	+2.31670141220092773438e-02,	/* 0x3F97B918 0x00000000 */
+	+4.51595251008850513740e-08,	/* 0x3E683EAD 0x88D54940 */
+	+3.83188128471374511719e-02,	/* 0x3FA39E86 0x00000000 */
+	+5.14549991480218823411e-08,	/* 0x3E6B9FEB 0xD5FA9016 */
+	+5.32444715499877929688e-02,	/* 0x3FAB42DC 0x00000000 */
+	+4.29688244898971182165e-08,	/* 0x3E671197 0x1BEC28D1 */
+	+6.79506063461303710938e-02,	/* 0x3FB16536 0x00000000 */
+	+5.55623773783008185114e-08,	/* 0x3E6DD46F 0x5C1D0C4C */
+	+8.24436545372009277344e-02,	/* 0x3FB51B07 0x00000000 */
+	+1.46738736635337847313e-08,	/* 0x3E4F830C 0x1FB493C7 */
+	+9.67295765876770019531e-02,	/* 0x3FB8C345 0x00000000 */
+	+4.98708741103424492282e-08,	/* 0x3E6AC633 0x641EB597 */
+	+1.10814332962036132812e-01,	/* 0x3FBC5E54 0x00000000 */
+	+3.33782539813823062226e-08,	/* 0x3E61EB78 0xE862BAC3 */
+	+1.24703466892242431641e-01,	/* 0x3FBFEC91 0x00000000 */
+	+1.16087148042227818450e-08,	/* 0x3E48EDF5 0x5D551729 */
+	+1.38402283191680908203e-01,	/* 0x3FC1B72A 0x80000000 */
+	+3.96674382274822001957e-08,	/* 0x3E654BD9 0xE80A4181 */
+	+1.51916027069091796875e-01,	/* 0x3FC371FC 0x00000000 */
+	+1.49567501781968021494e-08,	/* 0x3E500F47 0xBA1DE6CB */
+	+1.65249526500701904297e-01,	/* 0x3FC526E5 0x80000000 */
+	+4.63946052585787334062e-08,	/* 0x3E68E86D 0x0DE8B900 */
+	+1.78407609462738037109e-01,	/* 0x3FC6D60F 0x80000000 */
+	+4.80100802600100279538e-08,	/* 0x3E69C674 0x8723551E */
+	+1.91394805908203125000e-01,	/* 0x3FC87FA0 0x00000000 */
+	+4.70914263296092971436e-08,	/* 0x3E694832 0x44240802 */
+	+2.04215526580810546875e-01,	/* 0x3FCA23BC 0x00000000 */
+	+1.48478803446288209001e-08,	/* 0x3E4FE2B5 0x63193712 */
+	+2.16873884201049804688e-01,	/* 0x3FCBC286 0x00000000 */
+	+5.40995645549315919488e-08,	/* 0x3E6D0B63 0x358A7E74 */
+	+2.29374051094055175781e-01,	/* 0x3FCD5C21 0x00000000 */
+	+4.99707906542102284117e-08,	/* 0x3E6AD3EE 0xE456E443 */
+	+2.41719901561737060547e-01,	/* 0x3FCEF0AD 0x80000000 */
+	+3.53254081075974352804e-08,	/* 0x3E62F716 0x4D948638 */
+	+2.53915190696716308594e-01,	/* 0x3FD04025 0x80000000 */
+	+1.92842471355435739091e-08,	/* 0x3E54B4D0 0x40DAE27C */
+	+2.65963494777679443359e-01,	/* 0x3FD1058B 0xC0000000 */
+	+5.37194584979797487125e-08,	/* 0x3E6CD725 0x6A8C4FD0 */
+	+2.77868449687957763672e-01,	/* 0x3FD1C898 0xC0000000 */
+	+1.31549854251447496506e-09,	/* 0x3E16999F 0xAFBC68E7 */
+	+2.89633274078369140625e-01,	/* 0x3FD2895A 0x00000000 */
+	+1.85046735362538929911e-08,	/* 0x3E53DE86 0xA35EB493 */
+	+3.01261305809020996094e-01,	/* 0x3FD347DD 0x80000000 */
+	+2.47691407849191245052e-08,	/* 0x3E5A987D 0x54D64567 */
+	+3.12755703926086425781e-01,	/* 0x3FD40430 0x80000000 */
+	+6.07781046260499658610e-09,	/* 0x3E3A1A9F 0x8EF4304A */
+	+3.24119448661804199219e-01,	/* 0x3FD4BE5F 0x80000000 */
+	+1.99924077768719198045e-08,	/* 0x3E557778 0xA0DB4C99 */
+	+3.35355520248413085938e-01,	/* 0x3FD57677 0x00000000 */
+	+2.16727247443196802771e-08,	/* 0x3E57455A 0x6C549AB7 */
+	+3.46466720104217529297e-01,	/* 0x3FD62C82 0xC0000000 */
+	+4.72419910516215900493e-08,	/* 0x3E695CE3 0xCA97B7B0 */
+	+3.57455849647521972656e-01,	/* 0x3FD6E08E 0x80000000 */
+	+3.92742818015697624778e-08,	/* 0x3E6515D0 0xF1C609CA */
+	+3.68325531482696533203e-01,	/* 0x3FD792A5 0x40000000 */
+	+2.96760111198451042238e-08,	/* 0x3E5FDD47 0xA27C15DA */
+	+3.79078328609466552734e-01,	/* 0x3FD842D1 0xC0000000 */
+	+2.43255029056564770289e-08,	/* 0x3E5A1E8B 0x17493B14 */
+	+3.89716744422912597656e-01,	/* 0x3FD8F11E 0x80000000 */
+	+6.71711261571421332726e-09,	/* 0x3E3CD98B 0x1DF85DA7 */
+	+4.00243163108825683594e-01,	/* 0x3FD99D95 0x80000000 */
+	+1.01818702333557515008e-09,	/* 0x3E117E08 0xACBA92EF */
+	+4.10659909248352050781e-01,	/* 0x3FDA4840 0x80000000 */
+	+1.57369163351530571459e-08,	/* 0x3E50E5BB 0x0A2BFCA7 */
+	+4.20969247817993164062e-01,	/* 0x3FDAF129 0x00000000 */
+	+4.68261364720663662040e-08,	/* 0x3E6923BC 0x358899C2 */
+	+4.31173443794250488281e-01,	/* 0x3FDB9858 0x80000000 */
+	+2.10241208525779214510e-08,	/* 0x3E569310 0xFB598FB1 */
+	+4.41274523735046386719e-01,	/* 0x3FDC3DD7 0x80000000 */
+	+3.70698288427707487748e-08,	/* 0x3E63E6D6 0xA6B9D9E1 */
+	+4.51274633407592773438e-01,	/* 0x3FDCE1AF 0x00000000 */
+	+1.07318658117071930723e-08,	/* 0x3E470BE7 0xD6F6FA58 */
+	+4.61175680160522460938e-01,	/* 0x3FDD83E7 0x00000000 */
+	+3.49616477054305011286e-08,	/* 0x3E62C517 0x9F2828AE */
+	+4.70979690551757812500e-01,	/* 0x3FDE2488 0x00000000 */
+	+2.46670332000468969567e-08,	/* 0x3E5A7C6C 0x261CBD8F */
+	+4.80688512325286865234e-01,	/* 0x3FDEC399 0xC0000000 */
+	+1.70204650424422423704e-08,	/* 0x3E52468C 0xC0175CEE */
+	+4.90303933620452880859e-01,	/* 0x3FDF6123 0xC0000000 */
+	+5.44247409572909703749e-08,	/* 0x3E6D3814 0x5630A2B6 */
+	+4.99827861785888671875e-01,	/* 0x3FDFFD2E 0x00000000 */
+	+7.77056065794633071345e-09,	/* 0x3E40AFE9 0x30AB2FA0 */
+	+5.09261846542358398438e-01,	/* 0x3FE04BDF 0x80000000 */
+	+5.52474495483665749052e-08,	/* 0x3E6DA926 0xD265FCC1 */
+	+5.18607735633850097656e-01,	/* 0x3FE0986F 0x40000000 */
+	+2.85741955344967264536e-08,	/* 0x3E5EAE6A 0x41723FB5 */
+	+5.27867078781127929688e-01,	/* 0x3FE0E449 0x80000000 */
+	+1.08397144554263914271e-08,	/* 0x3E474732 0x2FDBAB97 */
+	+5.37041425704956054688e-01,	/* 0x3FE12F71 0x80000000 */
+	+4.01919275998792285777e-08,	/* 0x3E6593EF 0xBC530123 */
+	+5.46132385730743408203e-01,	/* 0x3FE179EA 0xA0000000 */
+	+5.18673922421792693237e-08,	/* 0x3E6BD899 0xA0BFC60E */
+	+5.55141448974609375000e-01,	/* 0x3FE1C3B8 0x00000000 */
+	+5.85658922177154808539e-08,	/* 0x3E6F713C 0x24BC94F9 */
+	+5.64070105552673339844e-01,	/* 0x3FE20CDC 0xC0000000 */
+	+3.27321296262276338905e-08,	/* 0x3E6192AB 0x6D93503D */
+	+5.72919726371765136719e-01,	/* 0x3FE2555B 0xC0000000 */
+	+2.71900203723740076878e-08,	/* 0x3E5D31EF 0x96780876 */
+	+5.81691682338714599609e-01,	/* 0x3FE29D37 0xE0000000 */
+	+5.72959078829112371070e-08,	/* 0x3E6EC2B0 0x8AC85CD7 */
+	+5.90387403964996337891e-01,	/* 0x3FE2E474 0x20000000 */
+	+4.26371800367512948470e-08,	/* 0x3E66E402 0x68405422 */
+	+5.99008142948150634766e-01,	/* 0x3FE32B13 0x20000000 */
+	+4.66979327646159769249e-08,	/* 0x3E69121D 0x71320557 */
+	+6.07555210590362548828e-01,	/* 0x3FE37117 0xA0000000 */
+	+3.96341792466729582847e-08,	/* 0x3E654747 0xB5C5DD02 */
+	+6.16029858589172363281e-01,	/* 0x3FE3B684 0x40000000 */
+	+1.86263416563663175432e-08,	/* 0x3E53FFF8 0x455F1DBE */
+	+6.24433279037475585938e-01,	/* 0x3FE3FB5B 0x80000000 */
+	+8.97441791510503832111e-09,	/* 0x3E4345BD 0x096D3A75 */
+	+6.32766664028167724609e-01,	/* 0x3FE43F9F 0xE0000000 */
+	+5.54287010493641158796e-09,	/* 0x3E37CE73 0x3BD393DD */
+	+6.41031146049499511719e-01,	/* 0x3FE48353 0xC0000000 */
+	+3.33714317793368531132e-08,	/* 0x3E61EA88 0xDF73D5E9 */
+	+6.49227917194366455078e-01,	/* 0x3FE4C679 0xA0000000 */
+	+2.94307433638127158696e-08,	/* 0x3E5F99DC 0x7362D1DA */
+	+6.57358050346374511719e-01,	/* 0x3FE50913 0xC0000000 */
+	+2.23619855184231409785e-08,	/* 0x3E5802D0 0xD6979675 */
+	+6.65422618389129638672e-01,	/* 0x3FE54B24 0x60000000 */
+	+1.41559608102782173188e-08,	/* 0x3E4E6652 0x5EA4550A */
+	+6.73422634601593017578e-01,	/* 0x3FE58CAD 0xA0000000 */
+	+4.06105737027198329700e-08,	/* 0x3E65CD79 0x893092F2 */
+	+6.81359171867370605469e-01,	/* 0x3FE5CDB1 0xC0000000 */
+	+5.29405324634793230630e-08,	/* 0x3E6C6C17 0x648CF6E4 */
+	+6.89233243465423583984e-01,	/* 0x3FE60E32 0xE0000000 */
+	+3.77733853963405370102e-08,	/* 0x3E644788 0xD8CA7C89 */
+};
+
+/* S[j],S_trail[j] = 2**(j/32.) for the final computation of exp(t+w) */
+static const double S[] = {
+	+1.00000000000000000000e+00,	/* 3FF0000000000000 */
+	+1.02189714865411662714e+00,	/* 3FF059B0D3158574 */
+	+1.04427378242741375480e+00,	/* 3FF0B5586CF9890F */
+	+1.06714040067682369717e+00,	/* 3FF11301D0125B51 */
+	+1.09050773266525768967e+00,	/* 3FF172B83C7D517B */
+	+1.11438674259589243221e+00,	/* 3FF1D4873168B9AA */
+	+1.13878863475669156458e+00,	/* 3FF2387A6E756238 */
+	+1.16372485877757747552e+00,	/* 3FF29E9DF51FDEE1 */
+	+1.18920711500272102690e+00,	/* 3FF306FE0A31B715 */
+	+1.21524735998046895524e+00,	/* 3FF371A7373AA9CB */
+	+1.24185781207348400201e+00,	/* 3FF3DEA64C123422 */
+	+1.26905095719173321989e+00,	/* 3FF44E086061892D */
+	+1.29683955465100964055e+00,	/* 3FF4BFDAD5362A27 */
+	+1.32523664315974132322e+00,	/* 3FF5342B569D4F82 */
+	+1.35425554693689265129e+00,	/* 3FF5AB07DD485429 */
+	+1.38390988196383202258e+00,	/* 3FF6247EB03A5585 */
+	+1.41421356237309514547e+00,	/* 3FF6A09E667F3BCD */
+	+1.44518080697704665027e+00,	/* 3FF71F75E8EC5F74 */
+	+1.47682614593949934623e+00,	/* 3FF7A11473EB0187 */
+	+1.50916442759342284141e+00,	/* 3FF82589994CCE13 */
+	+1.54221082540794074411e+00,	/* 3FF8ACE5422AA0DB */
+	+1.57598084510788649659e+00,	/* 3FF93737B0CDC5E5 */
+	+1.61049033194925428347e+00,	/* 3FF9C49182A3F090 */
+	+1.64575547815396494578e+00,	/* 3FFA5503B23E255D */
+	+1.68179283050742900407e+00,	/* 3FFAE89F995AD3AD */
+	+1.71861929812247793414e+00,	/* 3FFB7F76F2FB5E47 */
+	+1.75625216037329945351e+00,	/* 3FFC199BDD85529C */
+	+1.79470907500310716820e+00,	/* 3FFCB720DCEF9069 */
+	+1.83400808640934243066e+00,	/* 3FFD5818DCFBA487 */
+	+1.87416763411029996256e+00,	/* 3FFDFC97337B9B5F */
+	+1.91520656139714740007e+00,	/* 3FFEA4AFA2A490DA */
+	+1.95714412417540017941e+00,	/* 3FFF50765B6E4540 */
+};
+
+static const double S_trail[] = {
+	+0.00000000000000000000e+00,
+	+5.10922502897344389359e-17,	/* 3C8D73E2A475B465 */
+	+8.55188970553796365958e-17,	/* 3C98A62E4ADC610A */
+	-7.89985396684158212226e-17,	/* BC96C51039449B3A */
+	-3.04678207981247114697e-17,	/* BC819041B9D78A76 */
+	+1.04102784568455709549e-16,	/* 3C9E016E00A2643C */
+	+8.91281267602540777782e-17,	/* 3C99B07EB6C70573 */
+	+3.82920483692409349872e-17,	/* 3C8612E8AFAD1255 */
+	+3.98201523146564611098e-17,	/* 3C86F46AD23182E4 */
+	-7.71263069268148813091e-17,	/* BC963AEABF42EAE2 */
+	+4.65802759183693679123e-17,	/* 3C8ADA0911F09EBC */
+	+2.66793213134218609523e-18,	/* 3C489B7A04EF80D0 */
+	+2.53825027948883149593e-17,	/* 3C7D4397AFEC42E2 */
+	-2.85873121003886075697e-17,	/* BC807ABE1DB13CAC */
+	+7.70094837980298946162e-17,	/* 3C96324C054647AD */
+	-6.77051165879478628716e-17,	/* BC9383C17E40B497 */
+	-9.66729331345291345105e-17,	/* BC9BDD3413B26456 */
+	-3.02375813499398731940e-17,	/* BC816E4786887A99 */
+	-3.48399455689279579579e-17,	/* BC841577EE04992F */
+	-1.01645532775429503911e-16,	/* BC9D4C1DD41532D8 */
+	+7.94983480969762085616e-17,	/* 3C96E9F156864B27 */
+	-1.01369164712783039808e-17,	/* BC675FC781B57EBC */
+	+2.47071925697978878522e-17,	/* 3C7C7C46B071F2BE */
+	-1.01256799136747726038e-16,	/* BC9D2F6EDB8D41E1 */
+	+8.19901002058149652013e-17,	/* 3C97A1CD345DCC81 */
+	-1.85138041826311098821e-17,	/* BC75584F7E54AC3B */
+	+2.96014069544887330703e-17,	/* 3C811065895048DD */
+	+1.82274584279120867698e-17,	/* 3C7503CBD1E949DB */
+	+3.28310722424562658722e-17,	/* 3C82ED02D75B3706 */
+	-6.12276341300414256164e-17,	/* BC91A5CD4F184B5C */
+	-1.06199460561959626376e-16,	/* BC9E9C23179C2893 */
+	+8.96076779103666776760e-17,	/* 3C99D3E12DD8A18B */
+};
+
+/* Primary interval GTi() */
+static const double cr[] = {
+/* p1, q1 */
+	+0.70908683619977797008004927192814648151397705078125000,
+	+1.71987061393048558089579513384356441668351720061e-0001,
+	-3.19273345791990970293320316122813960527705450671e-0002,
+	+8.36172645419110036267169600390549973563534476989e-0003,
+	+1.13745336648572838333152213474277971244629758101e-0003,
+	+1.0,
+	+9.71980217826032937526460731778472389791321968082e-0001,
+	-7.43576743326756176594084137256042653497087666030e-0002,
+	-1.19345944932265559769719470515102012246995255372e-0001,
+	+1.59913445751425002620935120470781382215050284762e-0002,
+	+1.12601136853374984566572691306402321911547550783e-0003,
+/* p2, q2 */
+	+0.42848681585558601181418225678498856723308563232421875,
+	+6.53596762668970816023718845105667418483122103629e-0002,
+	-6.97280829631212931321050770925128264272768936731e-0003,
+	+6.46342359021981718947208605674813260166116632899e-0003,
+	+1.0,
+	+4.57572620560506047062553957454062012327519313936e-0001,
+	-2.52182594886075452859655003407796103083422572036e-0001,
+	-1.82970945407778594681348166040103197178711552827e-0002,
+	+2.43574726993169566475227642128830141304953840502e-0002,
+	-5.20390406466942525358645957564897411258667085501e-0003,
+	+4.79520251383279837635552431988023256031951133885e-0004,
+/* p3, q3 */
+	+0.382409479734567459008331979930517263710498809814453125,
+	+1.42876048697668161599069814043449301572928034140e-0001,
+	+3.42157571052250536817923866013561760785748899071e-0003,
+	-5.01542621710067521405087887856991700987709272937e-0004,
+	+8.89285814866740910123834688163838287618332122670e-0004,
+	+1.0,
+	+3.04253086629444201002215640948957897906299633168e-0001,
+	-2.23162407379999477282555672834881213873185520006e-0001,
+	-1.05060867741952065921809811933670131427552903636e-0002,
+	+1.70511763916186982473301861980856352005926669320e-0002,
+	-2.12950201683609187927899416700094630764182477464e-0003,
+};
+
+#define	P10   cr[0]
+#define	P11   cr[1]
+#define	P12   cr[2]
+#define	P13   cr[3]
+#define	P14   cr[4]
+#define	Q10   cr[5]
+#define	Q11   cr[6]
+#define	Q12   cr[7]
+#define	Q13   cr[8]
+#define	Q14   cr[9]
+#define	Q15   cr[10]
+#define	P20   cr[11]
+#define	P21   cr[12]
+#define	P22   cr[13]
+#define	P23   cr[14]
+#define	Q20   cr[15]
+#define	Q21   cr[16]
+#define	Q22   cr[17]
+#define	Q23   cr[18]
+#define	Q24   cr[19]
+#define	Q25   cr[20]
+#define	Q26   cr[21]
+#define	P30   cr[22]
+#define	P31   cr[23]
+#define	P32   cr[24]
+#define	P33   cr[25]
+#define	P34   cr[26]
+#define	Q30   cr[27]
+#define	Q31   cr[28]
+#define	Q32   cr[29]
+#define	Q33   cr[30]
+#define	Q34   cr[31]
+#define	Q35   cr[32]
+
+static const double
+	GZ1_h = +0.938204627909682398190,
+	GZ1_l = +5.121952600248205157935e-17,
+	GZ2_h = +0.885603194410888749921,
+	GZ2_l = -4.964236872556339810692e-17,
+	GZ3_h = +0.936781411463652347038,
+	GZ3_l = -2.541923110834479415023e-17,
+	TZ1 = -0.3517214357852935791015625,
+	TZ3 = +0.280530631542205810546875;
+/* INDENT ON */
+
+/* compute gamma(y=yh+yl) for y in GT1 = [1.0000, 1.2845] */
+/* assume yh got 20 significant bits */
+static struct Double
+GT1(double yh, double yl) {
+	double t3, t4, y, z;
+	struct Double r;
+
+	y = yh + yl;
+	z = y * y;
+	t3 = (z * (P10 + y * ((P11 + y * P12) + z * (P13 + y * P14)))) /
+		(Q10 + y * ((Q11 + y * Q12) + z * ((Q13 + Q14 * y) + z * Q15)));
+	t3 += (TZ1 * yl + GZ1_l);
+	t4 = TZ1 * yh;
+	r.h = (double) ((float) (t4 + GZ1_h + t3));
+	t3 += (t4 - (r.h - GZ1_h));
+	r.l = t3;
+	return (r);
+}
+
+/* compute gamma(y=yh+yl) for y in GT2 = [1.2844, 1.6374] */
+/* assume yh got 20 significant bits */
+static struct Double
+GT2(double yh, double yl) {
+	double t3, y, z;
+	struct Double r;
+
+	y = yh + yl;
+	z = y * y;
+	t3 = (z * (P20 + y * P21 + z * (P22 + y * P23))) /
+		(Q20 + (y * ((Q21 + Q22 * y) + z * Q23) +
+		(z * z) * ((Q24 + Q25 * y) + z * Q26))) + GZ2_l;
+	r.h = (double) ((float) (GZ2_h + t3));
+	r.l = t3 - (r.h - GZ2_h);
+	return (r);
+}
+
+/* compute gamma(y=yh+yl) for y in GT3 = [1.6373, 2.0000] */
+/* assume yh got 20 significant bits */
+static struct Double
+GT3(double yh, double yl) {
+	double t3, t4, y, z;
+	struct Double r;
+
+	y = yh + yl;
+	z = y * y;
+	t3 = (z * (P30 + y * ((P31 + y * P32) + z * (P33 + y * P34)))) /
+		(Q30 + y * ((Q31 + y * Q32) + z * ((Q33 + Q34 * y) + z * Q35)));
+	t3 += (TZ3 * yl + GZ3_l);
+	t4 = TZ3 * yh;
+	r.h = (double) ((float) (t4 + GZ3_h + t3));
+	t3 += (t4 - (r.h - GZ3_h));
+	r.l = t3;
+	return (r);
+}
+
+/* INDENT OFF */
+/*
+ * return tgamma(x) scaled by 2**-m for 8<x<=171.62... using Stirling's formula
+ *     log(G(x)) ~= (x-.5)*(log(x)-1) + .5(log(2*pi)-1) + (1/x)*P(1/(x*x))
+ *                = L1 + L2 + L3,
+ */
+/* INDENT ON */
+static struct Double
+large_gam(double x, int *m) {
+	double z, t1, t2, t3, z2, t5, w, y, u, r, z4, v, t24 = 16777216.0,
+		p24 = 1.0 / 16777216.0;
+	int n2, j2, k, ix, j;
+	unsigned lx;
+	struct Double zz;
+	double u2, ss_h, ss_l, r_h, w_h, w_l, t4;
+
+/* INDENT OFF */
+/*
+ * compute ss = ss.h+ss.l = log(x)-1 (see tgamma_log.h for details)
+ *
+ *  log(x) - 1 = T1(n) + T2(j) + T3(s), where x = 2**n * y,  1<=y<2,
+ *  j=[64*y], z[j]=1+j/64+1/128, s = (y-z[j])/(y+z[j]), and
+ *       T1(n) = T1[2n,2n+1] = n*log(2)-1,
+ *       T2(j) = T2[2j,2j+1] = log(z[j]),
+ *       T3(s) = 2s + A1[0]s^3 + A2[1]s^5 + A3[2]s^7
+ *  Note
+ *  (1) the leading entries are truncated to 24 binary point.
+ *  (2) Remez error for T3(s) is bounded by 2**(-72.4)
+ *                                   2**(-24)
+ *                           _________V___________________
+ *               T1(n):     |_________|___________________|
+ *                             _______ ______________________
+ *               T2(j):       |_______|______________________|
+ *                                ____ _______________________
+ *               2s:             |____|_______________________|
+ *                                    __________________________
+ *          +    T3(s)-2s:           |__________________________|
+ *                       -------------------------------------------
+ *                          [leading] + [Trailing]
+ */
+/* INDENT ON */
+	ix = __HI(x);
+	lx = __LO(x);
+	n2 = (ix >> 20) - 0x3ff;	/* exponent of x, range:3-7 */
+	n2 += n2;			/* 2n */
+	ix = (ix & 0x000fffff) | 0x3ff00000;	/* y = scale x to [1,2] */
+	__HI(y) = ix;
+	__LO(y) = lx;
+	__HI(z) = (ix & 0xffffc000) | 0x2000;	/* z[j]=1+j/64+1/128 */
+	__LO(z) = 0;
+	j2 = (ix >> 13) & 0x7e;	/* 2j */
+	t1 = y + z;
+	t2 = y - z;
+	r = one / t1;
+	t1 = (double) ((float) t1);
+	u = r * t2;		/* u = (y-z)/(y+z) */
+	t4 = T2[j2 + 1] + T1[n2 + 1];
+	z2 = u * u;
+	k = __HI(u) & 0x7fffffff;
+	t3 = T2[j2] + T1[n2];
+	if ((k >> 20) < 0x3ec) {	/* |u|<2**-19 */
+		t2 = t4 + u * ((two + z2 * A1) + (z2 * z2) * (A2 + z2 * A3));
+	} else {
+		t5 = t4 + u * (z2 * A1 + (z2 * z2) * (A2 + z2 * A3));
+		u2 = u + u;
+		v = (double) ((int) (u2 * t24)) * p24;
+		t2 = t5 + r * ((two * t2 - v * t1) - v * (y - (t1 - z)));
+		t3 += v;
+	}
+	ss_h = (double) ((float) (t2 + t3));
+	ss_l = t2 - (ss_h - t3);
+
+	/*
+	 * compute ww = (x-.5)*(log(x)-1) + .5*(log(2pi)-1) + 1/x*(P(1/x^2)))
+	 * where ss = log(x) - 1 in already in extra precision
+	 */
+	z = one / x;
+	r = x - half;
+	r_h = (double) ((float) r);
+	w_h = r_h * ss_h + hln2pi_h;
+	z2 = z * z;
+	w = (r - r_h) * ss_h + r * ss_l;
+	z4 = z2 * z2;
+	t1 = z2 * (GP1 + z4 * (GP3 + z4 * (GP5 + z4 * GP7)));
+	t2 = z4 * (GP2 + z4 * (GP4 + z4 * GP6));
+	t1 += t2;
+	w += hln2pi_l;
+	w_l = z * (GP0 + t1) + w;
+	k = (int) ((w_h + w_l) * invln2_32 + half);
+
+	/* compute the exponential of w_h+w_l */
+	j = k & 0x1f;
+	*m = (k >> 5);
+	t3 = (double) k;
+
+	/* perform w - k*ln2_32 (represent as w_h - w_l) */
+	t1 = w_h - t3 * ln2_32hi;
+	t2 = t3 * ln2_32lo;
+	w = w_l - t2;
+	w_h = t1 + w_l;
+	w_l = t2 - (w_l - (w_h - t1));
+
+	/* compute exp(w_h+w_l) */
+	z = w_h - w_l;
+	z2 = z * z;
+	t1 = z2 * (Et1 + z2 * (Et3 + z2 * Et5));
+	t2 = z2 * (Et2 + z2 * Et4);
+	t3 = w_h - (w_l - (t1 + z * t2));
+	zz.l = S_trail[j] * (one + t3) + S[j] * t3;
+	zz.h = S[j];
+	return (zz);
+}
+
+/* INDENT OFF */
+/*
+ * kpsin(x)= sin(pi*x)/pi
+ *                 3        5        7        9        11        13        15
+ *	= x+ks[0]*x +ks[1]*x +ks[2]*x +ks[3]*x +ks[4]*x  +ks[5]*x  +ks[6]*x
+ */
+static const double ks[] = {
+	-1.64493406684822640606569,
+	+8.11742425283341655883668741874008920850698590621e-0001,
+	-1.90751824120862873825597279118304943994042258291e-0001,
+	+2.61478477632554278317289628332654539353521911570e-0002,
+	-2.34607978510202710377617190278735525354347705866e-0003,
+	+1.48413292290051695897242899977121846763824221705e-0004,
+	-6.87730769637543488108688726777687262485357072242e-0006,
+};
+/* INDENT ON */
+
+/* assume x is not tiny and positive */
+static struct Double
+kpsin(double x) {
+	double z, t1, t2, t3, t4;
+	struct Double xx;
+
+	z = x * x;
+	xx.h = x;
+	t1 = z * x;
+	t2 = z * z;
+	t4 = t1 * ks[0];
+	t3 = (t1 * z) * ((ks[1] + z * ks[2] + t2 * ks[3]) + (z * t2) *
+		(ks[4] + z * ks[5] + t2 * ks[6]));
+	xx.l = t4 + t3;
+	return (xx);
+}
+
+/* INDENT OFF */
+/*
+ * kpcos(x)= cos(pi*x)/pi
+ *                     2        4        6        8        10        12
+ *	= 1/pi +kc[0]*x +kc[1]*x +kc[2]*x +kc[3]*x +kc[4]*x  +kc[5]*x
+ */
+
+static const double one_pi_h = 0.318309886183790635705292970,
+		one_pi_l = 3.583247455607534006714276420e-17;
+static const double npi_2_h = -1.5625,
+		npi_2_l = -0.00829632679489661923132169163975055099555883223;
+static const double kc[] = {
+	-1.57079632679489661923132169163975055099555883223e+0000,
+	+1.29192819501230224953283586722575766189551966008e+0000,
+	-4.25027339940149518500158850753393173519732149213e-0001,
+	+7.49080625187015312373925142219429422375556727752e-0002,
+	-8.21442040906099210866977352284054849051348692715e-0003,
+	+6.10411356829515414575566564733632532333904115968e-0004,
+};
+/* INDENT ON */
+
+/* assume x is not tiny and positive */
+static struct Double
+kpcos(double x) {
+	double z, t1, t2, t3, t4, x4, x8;
+	struct Double xx;
+
+	z = x * x;
+	xx.h = one_pi_h;
+	t1 = (double) ((float) x);
+	x4 = z * z;
+	t2 = npi_2_l * z + npi_2_h * (x + t1) * (x - t1);
+	t3 = one_pi_l + x4 * ((kc[1] + z * kc[2]) + x4 * (kc[3] + z *
+		kc[4] + x4 * kc[5]));
+	t4 = t1 * t1;	/* 48 bits mantissa */
+	x8 = t2 + t3;
+	t4 *= npi_2_h;	/* npi_2_h is 5 bits const. The product is exact */
+	xx.l = x8 + t4;	/* that will minimized the rounding error in xx.l */
+	return (xx);
+}
+
+/* INDENT OFF */
+static const double
+	/* 0.134861805732790769689793935774652917006 */
+	t0z1   =  0.1348618057327907737708,
+	t0z1_l = -4.0810077708578299022531e-18,
+	/* 0.461632144968362341262659542325721328468 */
+	t0z2   =  0.4616321449683623567850,
+	t0z2_l = -1.5522348162858676890521e-17,
+	/* 0.819773101100500601787868704921606996312 */
+	t0z3   =  0.8197731011005006118708,
+	t0z3_l = -1.0082945122487103498325e-17;
+	/* 1.134861805732790769689793935774652917006 */
+/* INDENT ON */
+
+/* gamma(x+i) for 0 <= x < 1  */
+static struct Double
+gam_n(int i, double x) {
+	struct Double rr, yy;
+	double r1, r2, t2, z, xh, xl, yh, yl, zh, z1, z2, zl, x5, wh, wl;
+
+	/* compute yy = gamma(x+1) */
+	if (x > 0.2845) {
+		if (x > 0.6374) {
+			r1 = x - t0z3;
+			r2 = (double) ((float) (r1 - t0z3_l));
+			t2 = r1 - r2;
+			yy = GT3(r2, t2 - t0z3_l);
+		} else {
+			r1 = x - t0z2;
+			r2 = (double) ((float) (r1 - t0z2_l));
+			t2 = r1 - r2;
+			yy = GT2(r2, t2 - t0z2_l);
+		}
+	} else {
+		r1 = x - t0z1;
+		r2 = (double) ((float) (r1 - t0z1_l));
+		t2 = r1 - r2;
+		yy = GT1(r2, t2 - t0z1_l);
+	}
+
+	/* compute gamma(x+i) = (x+i-1)*...*(x+1)*yy, 0<i<8 */
+	switch (i) {
+	case 0:		/* yy/x */
+		r1 = one / x;
+		xh = (double) ((float) x);	/* x is not tiny */
+		rr.h = (double) ((float) ((yy.h + yy.l) * r1));
+		rr.l = r1 * (yy.h - rr.h * xh) -
+			((r1 * rr.h) * (x - xh) - r1 * yy.l);
+		break;
+	case 1:		/* yy */
+		rr.h = yy.h;
+		rr.l = yy.l;
+		break;
+	case 2:		/* (x+1)*yy */
+		z = x + one;	/* may not be exact */
+		zh = (double) ((float) z);
+		rr.h = zh * yy.h;
+		rr.l = z * yy.l + (x - (zh - one)) * yy.h;
+		break;
+	case 3:		/* (x+2)*(x+1)*yy */
+		z1 = x + one;
+		z2 = x + 2.0;
+		z = z1 * z2;
+		xh = (double) ((float) z);
+		zh = (double) ((float) z1);
+		xl = (x - (zh - one)) * (z2 + zh) - (xh - zh * (zh + one));
+		rr.h = xh * yy.h;
+		rr.l = z * yy.l + xl * yy.h;
+		break;
+
+	case 4:		/* (x+1)*(x+3)*(x+2)*yy */
+		z1 = x + 2.0;
+		z2 = (x + one) * (x + 3.0);
+		zh = z1;
+		__LO(zh) = 0;
+		__HI(zh) &= 0xfffffff8;	/* zh 18 bits mantissa */
+		zl = x - (zh - 2.0);
+		z = z1 * z2;
+		xh = (double) ((float) z);
+		xl = zl * (z2 + zh * (z1 + zh)) - (xh - zh * (zh * zh - one));
+		rr.h = xh * yy.h;
+		rr.l = z * yy.l + xl * yy.h;
+		break;
+	case 5:		/* ((x+1)*(x+4)*(x+2)*(x+3))*yy */
+		z1 = x + 2.0;
+		z2 = x + 3.0;
+		z = z1 * z2;
+		zh = (double) ((float) z1);
+		yh = (double) ((float) z);
+		yl = (x - (zh - 2.0)) * (z2 + zh) - (yh - zh * (zh + one));
+		z2 = z - 2.0;
+		z *= z2;
+		xh = (double) ((float) z);
+		xl = yl * (z2 + yh) - (xh - yh * (yh - 2.0));
+		rr.h = xh * yy.h;
+		rr.l = z * yy.l + xl * yy.h;
+		break;
+	case 6:		/* ((x+1)*(x+2)*(x+3)*(x+4)*(x+5))*yy */
+		z1 = x + 2.0;
+		z2 = x + 3.0;
+		z = z1 * z2;
+		zh = (double) ((float) z1);
+		yh = (double) ((float) z);
+		z1 = x - (zh - 2.0);
+		yl = z1 * (z2 + zh) - (yh - zh * (zh + one));
+		z2 = z - 2.0;
+		x5 = x + 5.0;
+		z *= z2;
+		xh = (double) ((float) z);
+		zh += 3.0;
+		xl = yl * (z2 + yh) - (xh - yh * (yh - 2.0));
+						/* xh+xl=(x+1)*...*(x+4) */
+		/* wh+wl=(x+5)*yy */
+		wh = (double) ((float) (x5 * (yy.h + yy.l)));
+		wl = (z1 * yy.h + x5 * yy.l) - (wh - zh * yy.h);
+		rr.h = wh * xh;
+		rr.l = z * wl + xl * wh;
+		break;
+	case 7:		/* ((x+1)*(x+2)*(x+3)*(x+4)*(x+5)*(x+6))*yy */
+		z1 = x + 3.0;
+		z2 = x + 4.0;
+		z = z2 * z1;
+		zh = (double) ((float) z1);
+		yh = (double) ((float) z);	/* yh+yl = (x+3)(x+4) */
+		yl = (x - (zh - 3.0)) * (z2 + zh) - (yh - (zh * (zh + one)));
+		z1 = x + 6.0;
+		z2 = z - 2.0;	/* z2 = (x+2)*(x+5) */
+		z *= z2;
+		xh = (double) ((float) z);
+		xl = yl * (z2 + yh) - (xh - yh * (yh - 2.0));
+						/* xh+xl=(x+2)*...*(x+5) */
+		/* wh+wl=(x+1)(x+6)*yy */
+		z2 -= 4.0;	/* z2 = (x+1)(x+6) */
+		wh = (double) ((float) (z2 * (yy.h + yy.l)));
+		wl = (z2 * yy.l + yl * yy.h) - (wh - (yh - 6.0) * yy.h);
+		rr.h = wh * xh;
+		rr.l = z * wl + xl * wh;
+	}
+	return (rr);
+}
+
+double
+tgamma(double x) {
+	struct Double ss, ww;
+	double t, t1, t2, t3, t4, t5, w, y, z, z1, z2, z3, z5;
+	int i, j, k, m, ix, hx, xk;
+	unsigned lx;
+
+	hx = __HI(x);
+	lx = __LO(x);
+	ix = hx & 0x7fffffff;
+	y = x;
+
+	if (ix < 0x3ca00000)
+		return (one / x);	/* |x| < 2**-53 */
+	if (ix >= 0x7ff00000)
+			/* +Inf -> +Inf, -Inf or NaN -> NaN */
+		return (x * ((hx < 0)? 0.0 : x));
+	if (hx > 0x406573fa ||	/* x > 171.62... overflow to +inf */
+	    (hx == 0x406573fa && lx > 0xE561F647)) {
+		z = x / tiny;
+		return (z * z);
+	}
+	if (hx >= 0x40200000) {	/* x >= 8 */
+		ww = large_gam(x, &m);
+		w = ww.h + ww.l;
+		__HI(w) += m << 20;
+		return (w);
+	}
+	if (hx > 0) {		/* x from 0 to 8 */
+		i = (int) x;
+		ww = gam_n(i, x - (double) i);
+		return (ww.h + ww.l);
+	}
+
+	/* negative x */
+	/* INDENT OFF */
+	/*
+	 * compute: xk =
+	 *	-2 ... x is an even int (-inf is even)
+	 *	-1 ... x is an odd int
+	 *	+0 ... x is not an int but chopped to an even int
+	 *	+1 ... x is not an int but chopped to an odd int
+	 */
+	/* INDENT ON */
+	xk = 0;
+	if (ix >= 0x43300000) {
+		if (ix >= 0x43400000)
+			xk = -2;
+		else
+			xk = -2 + (lx & 1);
+	} else if (ix >= 0x3ff00000) {
+		k = (ix >> 20) - 0x3ff;
+		if (k > 20) {
+			j = lx >> (52 - k);
+			if ((j << (52 - k)) == lx)
+				xk = -2 + (j & 1);
+			else
+				xk = j & 1;
+		} else {
+			j = ix >> (20 - k);
+			if ((j << (20 - k)) == ix && lx == 0)
+				xk = -2 + (j & 1);
+			else
+				xk = j & 1;
+		}
+	}
+	if (xk < 0)
+		/* ideally gamma(-n)= (-1)**(n+1) * inf, but c99 expect NaN */
+		return ((x - x) / (x - x));		/* 0/0 = NaN */
+
+
+	/* negative underflow thresold */
+	if (ix > 0x4066e000 || (ix == 0x4066e000 && lx > 11)) {
+		/* x < -183.0 - 11ulp */
+		z = tiny / x;
+		if (xk == 1)
+			z = -z;
+		return (z * tiny);
+	}
+
+	/* now compute gamma(x) by  -1/((sin(pi*y)/pi)*gamma(1+y)), y = -x */
+
+	/*
+	 * First compute ss = -sin(pi*y)/pi , so that
+	 * gamma(x) = 1/(ss*gamma(1+y))
+	 */
+	y = -x;
+	j = (int) y;
+	z = y - (double) j;
+	if (z > 0.3183098861837906715377675)
+		if (z > 0.6816901138162093284622325)
+			ss = kpsin(one - z);
+		else
+			ss = kpcos(0.5 - z);
+	else
+		ss = kpsin(z);
+	if (xk == 0) {
+		ss.h = -ss.h;
+		ss.l = -ss.l;
+	}
+
+	/* Then compute ww = gamma(1+y), note that result scale to 2**m */
+	m = 0;
+	if (j < 7) {
+		ww = gam_n(j + 1, z);
+	} else {
+		w = y + one;
+		if ((lx & 1) == 0) {	/* y+1 exact (note that y<184) */
+			ww = large_gam(w, &m);
+		} else {
+			t = w - one;
+			if (t == y) {	/* y+one exact */
+				ww = large_gam(w, &m);
+			} else {	/* use y*gamma(y) */
+				if (j == 7)
+					ww = gam_n(j, z);
+				else
+					ww = large_gam(y, &m);
+				t4 = ww.h + ww.l;
+				t1 = (double) ((float) y);
+				t2 = (double) ((float) t4);
+						/* t4 will not be too large */
+				ww.l = y * (ww.l - (t2 - ww.h)) + (y - t1) * t2;
+				ww.h = t1 * t2;
+			}
+		}
+	}
+
+	/* compute 1/(ss*ww) */
+	t3 = ss.h + ss.l;
+	t4 = ww.h + ww.l;
+	t1 = (double) ((float) t3);
+	t2 = (double) ((float) t4);
+	z1 = ss.l - (t1 - ss.h);	/* (t1,z1) = ss */
+	z2 = ww.l - (t2 - ww.h);	/* (t2,z2) = ww */
+	t3 = t3 * t4;			/* t3 = ss*ww */
+	z3 = one / t3;			/* z3 = 1/(ss*ww) */
+	t5 = t1 * t2;
+	z5 = z1 * t4 + t1 * z2;		/* (t5,z5) = ss*ww */
+	t1 = (double) ((float) t3);	/* (t1,z1) = ss*ww */
+	z1 = z5 - (t1 - t5);
+	t2 = (double) ((float) z3);	/* leading 1/(ss*ww) */
+	z2 = z3 * (t2 * z1 - (one - t2 * t1));
+	z = t2 - z2;
+
+	/* check whether z*2**-m underflow */
+	if (m != 0) {
+		hx = __HI(z);
+		i = hx & 0x80000000;
+		ix = hx ^ i;
+		j = ix >> 20;
+		if (j > m) {
+			ix -= m << 20;
+			__HI(z) = ix ^ i;
+		} else if ((m - j) > 52) {
+			/* underflow */
+			if (xk == 0)
+				z = -tiny * tiny;
+			else
+				z = tiny * tiny;
+		} else {
+			/* subnormal */
+			m -= 60;
+			t = one;
+			__HI(t) -= 60 << 20;
+			ix -= m << 20;
+			__HI(z) = ix ^ i;
+			z *= t;
+		}
+	}
+	return (z);
+}
diff --git a/usr/src/libm/src/m9x/tgammaf.c b/usr/src/libm/src/m9x/tgammaf.c
new file mode 100644
index 0000000..538cf89
--- /dev/null
+++ b/usr/src/libm/src/m9x/tgammaf.c
@@ -0,0 +1,545 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"@(#)tgammaf.c	1.10	06/01/31 SMI"
+
+#pragma weak tgammaf = __tgammaf
+
+/*
+ * True gamma function
+ *
+ * float tgammaf(float x)
+ *
+ * Algorithm: see tgamma.c
+ *
+ * Maximum error observed: 0.87ulp (both positive and negative arguments)
+ */
+
+#include "libm.h"
+#include "libm_synonyms.h"
+#include <math.h>
+#include <sunmath.h>
+
+#if defined(__sparc)
+#define	HIWORD	0
+#define	LOWORD	1
+#elif defined(__i386)
+#define	HIWORD	1
+#define	LOWORD	0
+#else
+#error Unknown architecture
+#endif
+#define	__HI(x)	((int *) &x)[HIWORD]
+#define	__LO(x)	((unsigned *) &x)[LOWORD]
+
+/* Coefficients for primary intervals GTi() */
+static const double cr[] = {
+	/* p1 */
+	+7.09087253435088360271451613398019280077561279443e-0001,
+	-5.17229560788652108545141978238701790105241761089e-0001,
+	+5.23403394528150789405825222323770647162337764327e-0001,
+	-4.54586308717075010784041566069480411732634814899e-0001,
+	+4.20596490915239085459964590559256913498190955233e-0001,
+	-3.57307589712377520978332185838241458642142185789e-0001,
+
+	/* p2 */
+	+4.28486983980295198166056119223984284434264344578e-0001,
+	-1.30704539487709138528680121627899735386650103914e-0001,
+	+1.60856285038051955072861219352655851542955430871e-0001,
+	-9.22285161346010583774458802067371182158937943507e-0002,
+	+7.19240511767225260740890292605070595560626179357e-0002,
+	-4.88158265593355093703112238534484636193260459574e-0002,
+
+	/* p3 */
+	+3.82409531118807759081121479786092134814808872880e-0001,
+	+2.65309888180188647956400403013495759365167853426e-0002,
+	+8.06815109775079171923561169415370309376296739835e-0002,
+	-1.54821591666137613928840890835174351674007764799e-0002,
+	+1.76308239242717268530498313416899188157165183405e-0002,
+
+	/* GZi and TZi */
+	+0.9382046279096824494097535615803269576988,	/* GZ1 */
+	+0.8856031944108887002788159005825887332080,	/* GZ2 */
+	+0.9367814114636523216188468970808378497426,	/* GZ3 */
+	-0.3517214357852935791015625,	/* TZ1 */
+	+0.280530631542205810546875,	/* TZ3 */
+};
+
+#define	P10	cr[0]
+#define	P11	cr[1]
+#define	P12	cr[2]
+#define	P13	cr[3]
+#define	P14	cr[4]
+#define	P15	cr[5]
+#define	P20	cr[6]
+#define	P21	cr[7]
+#define	P22	cr[8]
+#define	P23	cr[9]
+#define	P24	cr[10]
+#define	P25	cr[11]
+#define	P30	cr[12]
+#define	P31	cr[13]
+#define	P32	cr[14]
+#define	P33	cr[15]
+#define	P34	cr[16]
+#define	GZ1	cr[17]
+#define	GZ2	cr[18]
+#define	GZ3	cr[19]
+#define	TZ1	cr[20]
+#define	TZ3	cr[21]
+
+/* compute gamma(y) for y in GT1 = [1.0000, 1.2845] */
+static double
+GT1(double y) {
+	double z, r;
+
+	z = y * y;
+	r = TZ1 * y + z * ((P10 + y * P11 + z * P12) + (z * y) * (P13 + y *
+		P14 + z * P15));
+	return (GZ1 + r);
+}
+
+/* compute gamma(y) for y in GT2 = [1.2844, 1.6374] */
+static double
+GT2(double y) {
+	double z;
+
+	z = y * y;
+	return (GZ2 + z * ((P20 + y * P21 + z * P22) + (z * y) * (P23 + y *
+		P24 + z * P25)));
+}
+
+/* compute gamma(y) for y in GT3 = [1.6373, 2.0000] */
+static double
+GT3(double y) {
+double z, r;
+
+	z = y * y;
+	r = TZ3 * y + z * ((P30 + y * P31 + z * P32) + (z * y) * (P33 + y *
+		P34));
+	return (GZ3 + r);
+}
+
+/* INDENT OFF */
+static const double c[] = {
++1.0,
++2.0,
++0.5,
++1.0e-300,
++6.666717231848518054693623697539230e-0001,			/* A1=T3[0] */
++8.33333330959694065245736888749042811909994573178e-0002,	/* GP[0] */
+-2.77765545601667179767706600890361535225507762168e-0003,	/* GP[1] */
++7.77830853479775281781085278324621033523037489883e-0004,	/* GP[2] */
++4.18938533204672741744150788368695779923320328369e-0001,	/* hln2pi   */
++2.16608493924982901946e-02,					/* ln2_32 */
++4.61662413084468283841e+01,					/* invln2_32 */
++5.00004103388988968841156421415669985414073453720e-0001,	/* Et1 */
++1.66667656752800761782778277828110208108687545908e-0001,	/* Et2 */
+};
+
+#define	one		c[0]
+#define	two		c[1]
+#define	half		c[2]
+#define	tiny		c[3]
+#define	A1		c[4]
+#define	GP0		c[5]
+#define	GP1		c[6]
+#define	GP2		c[7]
+#define	hln2pi		c[8]
+#define	ln2_32		c[9]
+#define	invln2_32	c[10]
+#define	Et1		c[11]
+#define	Et2		c[12]
+
+/* S[j] = 2**(j/32.) for the final computation of exp(w) */
+static const double S[] = {
++1.00000000000000000000e+00,	/* 3FF0000000000000 */
++1.02189714865411662714e+00,	/* 3FF059B0D3158574 */
++1.04427378242741375480e+00,	/* 3FF0B5586CF9890F */
++1.06714040067682369717e+00,	/* 3FF11301D0125B51 */
++1.09050773266525768967e+00,	/* 3FF172B83C7D517B */
++1.11438674259589243221e+00,	/* 3FF1D4873168B9AA */
++1.13878863475669156458e+00,	/* 3FF2387A6E756238 */
++1.16372485877757747552e+00,	/* 3FF29E9DF51FDEE1 */
++1.18920711500272102690e+00,	/* 3FF306FE0A31B715 */
++1.21524735998046895524e+00,	/* 3FF371A7373AA9CB */
++1.24185781207348400201e+00,	/* 3FF3DEA64C123422 */
++1.26905095719173321989e+00,	/* 3FF44E086061892D */
++1.29683955465100964055e+00,	/* 3FF4BFDAD5362A27 */
++1.32523664315974132322e+00,	/* 3FF5342B569D4F82 */
++1.35425554693689265129e+00,	/* 3FF5AB07DD485429 */
++1.38390988196383202258e+00,	/* 3FF6247EB03A5585 */
++1.41421356237309514547e+00,	/* 3FF6A09E667F3BCD */
++1.44518080697704665027e+00,	/* 3FF71F75E8EC5F74 */
++1.47682614593949934623e+00,	/* 3FF7A11473EB0187 */
++1.50916442759342284141e+00,	/* 3FF82589994CCE13 */
++1.54221082540794074411e+00,	/* 3FF8ACE5422AA0DB */
++1.57598084510788649659e+00,	/* 3FF93737B0CDC5E5 */
++1.61049033194925428347e+00,	/* 3FF9C49182A3F090 */
++1.64575547815396494578e+00,	/* 3FFA5503B23E255D */
++1.68179283050742900407e+00,	/* 3FFAE89F995AD3AD */
++1.71861929812247793414e+00,	/* 3FFB7F76F2FB5E47 */
++1.75625216037329945351e+00,	/* 3FFC199BDD85529C */
++1.79470907500310716820e+00,	/* 3FFCB720DCEF9069 */
++1.83400808640934243066e+00,	/* 3FFD5818DCFBA487 */
++1.87416763411029996256e+00,	/* 3FFDFC97337B9B5F */
++1.91520656139714740007e+00,	/* 3FFEA4AFA2A490DA */
++1.95714412417540017941e+00,	/* 3FFF50765B6E4540 */
+};
+/* INDENT ON */
+
+/* INDENT OFF */
+/*
+ * return tgammaf(x) in double for 8<x<=35.040096283... using Stirling's formula
+ *     log(G(x)) ~= (x-.5)*(log(x)-1) + .5(log(2*pi)-1) + (1/x)*P(1/(x*x))
+ */
+/*
+ * compute ss = log(x)-1
+ *
+ *  log(x) - 1 = T1(n) + T2(j) + T3(s), where x = 2**n * y,  1<=y<2,
+ *  j=[64*y], z[j]=1+j/64+1/128, s = (y-z[j])/(y+z[j]), and
+ *       T1(n-3) = n*log(2)-1,  n=3,4,5
+ *       T2(j) = log(z[j]),
+ *       T3(s) = 2s + A1*s^3
+ *  Note
+ *  (1) Remez error for T3(s) is bounded by 2**(-35.8)
+ *	(see mpremez/work/Log/tgamma_log_2_outr1)
+ */
+
+static const double T1[] = { /* T1[j]=(j+3)*log(2)-1 */
++1.079441541679835928251696364375e+00,
++1.772588722239781237668928485833e+00,
++2.465735902799726547086160607291e+00,
+};
+
+static const double T2[] = {   /* T2[j]=log(1+j/64+1/128) */
++7.782140442054948947462900061137e-03,
++2.316705928153437822879916096229e-02,
++3.831886430213659919375532512380e-02,
++5.324451451881228286587019378653e-02,
++6.795066190850774939456527777263e-02,
++8.244366921107459126816006866831e-02,
++9.672962645855111229557105648746e-02,
++1.108143663402901141948061693232e-01,
++1.247034785009572358634065153809e-01,
++1.384023228591191356853258736016e-01,
++1.519160420258419750718034248969e-01,
++1.652495728953071628756114492772e-01,
++1.784076574728182971194002415109e-01,
++1.913948529996294546092988075613e-01,
++2.042155414286908915038203861962e-01,
++2.168739383006143596190895257443e-01,
++2.293741010648458299914807250461e-01,
++2.417199368871451681443075159135e-01,
++2.539152099809634441373232979066e-01,
++2.659635484971379413391259265375e-01,
++2.778684510034563061863500329234e-01,
++2.896332925830426768788930555257e-01,
++3.012613305781617810128755382338e-01,
++3.127557100038968883862465596883e-01,
++3.241194686542119760906707604350e-01,
++3.353555419211378302571795798142e-01,
++3.464667673462085809184621884258e-01,
++3.574558889218037742260094901409e-01,
++3.683255611587076530482301540504e-01,
++3.790783529349694583908533456310e-01,
++3.897167511400252133704636040035e-01,
++4.002431641270127069293251019951e-01,
++4.106599249852683859343062031758e-01,
++4.209692946441296361288671615068e-01,
++4.311734648183713408591724789556e-01,
++4.412745608048752294894964416613e-01,
++4.512746441394585851446923830790e-01,
++4.611757151221701663679999255979e-01,
++4.709797152187910125468978560564e-01,
++4.806885293457519076766184554480e-01,
++4.903039880451938381503461596457e-01,
++4.998278695564493298213314152470e-01,
++5.092619017898079468040749192283e-01,
++5.186077642080456321529769963648e-01,
++5.278670896208423851138922177783e-01,
++5.370414658968836545667292441538e-01,
++5.461324375981356503823972092312e-01,
++5.551415075405015927154803595159e-01,
++5.640701382848029660713842900902e-01,
++5.729197535617855090927567266263e-01,
++5.816917396346224825206107537254e-01,
++5.903874466021763746419167081236e-01,
++5.990081896460833993816000244617e-01,
++6.075552502245417955010851527911e-01,
++6.160298772155140196475659281967e-01,
++6.244332880118935010425387440547e-01,
++6.327666695710378295457864685036e-01,
++6.410311794209312910556013344054e-01,
++6.492279466251098188908399699053e-01,
++6.573580727083600301418900232459e-01,
++6.654226325450904489500926100067e-01,
++6.734226752121667202979603888010e-01,
++6.813592248079030689480715595681e-01,
++6.892332812388089803249143378146e-01,
+};
+/* INDENT ON */
+
+static double
+large_gam(double x) {
+	double ss, zz, z, t1, t2, w, y, u;
+	unsigned lx;
+	int k, ix, j, m;
+
+	ix = __HI(x);
+	lx = __LO(x);
+	m = (ix >> 20) - 0x3ff;			/* exponent of x, range:3-5 */
+	ix = (ix & 0x000fffff) | 0x3ff00000;	/* y = scale x to [1,2] */
+	__HI(y) = ix;
+	__LO(y) = lx;
+	__HI(z) = (ix & 0xffffc000) | 0x2000;	/* z[j]=1+j/64+1/128 */
+	__LO(z) = 0;
+	j = (ix >> 14) & 0x3f;
+	t1 = y + z;
+	t2 = y - z;
+	u = t2 / t1;
+	ss = T1[m - 3] + T2[j] + u * (two + A1 * (u * u));
+							/* ss = log(x)-1 */
+	/*
+	 * compute ww = (x-.5)*(log(x)-1) + .5*(log(2pi)-1) + 1/x*(P(1/x^2)))
+	 * where ss = log(x) - 1
+	 */
+	z = one / x;
+	zz = z * z;
+	w = ((x - half) * ss + hln2pi) + z * (GP0 + zz * GP1 + (zz * zz) * GP2);
+	k = (int) (w * invln2_32 + half);
+
+	/* compute the exponential of w */
+	j = k & 0x1f;
+	m = k >> 5;
+	z = w - (double) k *ln2_32;
+	zz = S[j] * (one + z + (z * z) * (Et1 + z * Et2));
+	__HI(zz) += m << 20;
+	return (zz);
+}
+/* INDENT OFF */
+/*
+ * kpsin(x)= sin(pi*x)/pi
+ *                 3        5        7        9
+ *	= x+ks[0]*x +ks[1]*x +ks[2]*x +ks[3]*x
+ */
+static const double ks[] = {
+-1.64493404985645811354476665052005342839447790544e+0000,
++8.11740794458351064092797249069438269367389272270e-0001,
+-1.90703144603551216933075809162889536878854055202e-0001,
++2.55742333994264563281155312271481108635575331201e-0002,
+};
+/* INDENT ON */
+
+static double
+kpsin(double x) {
+	double z;
+
+	z = x * x;
+	return (x + (x * z) * ((ks[0] + z * ks[1]) + (z * z) * (ks[2] + z *
+		ks[3])));
+}
+
+/* INDENT OFF */
+/*
+ * kpcos(x)= cos(pi*x)/pi
+ *                     2        4        6
+ *	= kc[0]+kc[1]*x +kc[2]*x +kc[3]*x
+ */
+static const double kc[] = {
++3.18309886183790671537767526745028724068919291480e-0001,
+-1.57079581447762568199467875065854538626594937791e+0000,
++1.29183528092558692844073004029568674027807393862e+0000,
+-4.20232949771307685981015914425195471602739075537e-0001,
+};
+/* INDENT ON */
+
+static double
+kpcos(double x) {
+	double z;
+
+	z = x * x;
+	return (kc[0] + z * (kc[1] + z * kc[2] + (z * z) * kc[3]));
+}
+
+/* INDENT OFF */
+static const double
+t0z1 = 0.134861805732790769689793935774652917006,
+t0z2 = 0.461632144968362341262659542325721328468,
+t0z3 = 0.819773101100500601787868704921606996312;
+	/* 1.134861805732790769689793935774652917006 */
+/* INDENT ON */
+
+/*
+ * gamma(x+i) for 0 <= x < 1
+ */
+static double
+gam_n(int i, double x) {
+	double rr, yy;
+	double z1, z2;
+
+	/* compute yy = gamma(x+1) */
+	if (x > 0.2845) {
+		if (x > 0.6374)
+			yy = GT3(x - t0z3);
+		else
+			yy = GT2(x - t0z2);
+	} else
+		yy = GT1(x - t0z1);
+
+	/* compute gamma(x+i) = (x+i-1)*...*(x+1)*yy, 0<i<8 */
+	switch (i) {
+	case 0:		/* yy/x */
+		rr = yy / x;
+		break;
+	case 1:		/* yy */
+		rr = yy;
+		break;
+	case 2:		/* (x+1)*yy */
+		rr = (x + one) * yy;
+		break;
+	case 3:		/* (x+2)*(x+1)*yy */
+		rr = (x + one) * (x + two) * yy;
+		break;
+
+	case 4:		/* (x+1)*(x+3)*(x+2)*yy */
+		rr = (x + one) * (x + two) * ((x + 3.0) * yy);
+		break;
+	case 5:		/* ((x+1)*(x+4)*(x+2)*(x+3))*yy */
+		z1 = (x + two) * (x + 3.0) * yy;
+		z2 = (x + one) * (x + 4.0);
+		rr = z1 * z2;
+		break;
+	case 6:		/* ((x+1)*(x+2)*(x+3)*(x+4)*(x+5))*yy */
+		z1 = (x + two) * (x + 3.0);
+		z2 = (x + 5.0) * yy;
+		rr = z1 * (z1 - two) * z2;
+		break;
+	case 7:		/* ((x+1)*(x+2)*(x+3)*(x+4)*(x+5)*(x+6))*yy */
+		z1 = (x + two) * (x + 3.0);
+		z2 = (x + 5.0) * (x + 6.0) * yy;
+		rr = z1 * (z1 - two) * z2;
+		break;
+	}
+	return (rr);
+}
+
+float
+tgammaf(float xf) {
+	float zf;
+	double ss, ww;
+	double x, y, z;
+	int i, j, k, ix, hx, xk;
+
+	hx = *(int *) &xf;
+	ix = hx & 0x7fffffff;
+
+	x = (double) xf;
+	if (ix < 0x33800000)
+		return (1.0F / xf);	/* |x| < 2**-24 */
+
+	if (ix >= 0x7f800000)
+		return (xf * ((hx < 0)? 0.0F : xf)); /* +-Inf or NaN */
+
+	if (hx > 0x420C290F) 	/* x > 35.040096283... overflow */
+		return (float)(x / tiny);
+
+	if (hx >= 0x41000000)	/* x >= 8 */
+		return ((float) large_gam(x));
+
+	if (hx > 0) {		/* x from 0 to 8 */
+		i = (int) xf;
+		return ((float) gam_n(i, x - (double) i));
+	}
+
+	/* negative x */
+	/* INDENT OFF */
+	/*
+	 * compute xk =
+	 *	-2 ... x is an even int (-inf is considered even)
+	 *	-1 ... x is an odd int
+	 *	+0 ... x is not an int but chopped to an even int
+	 *	+1 ... x is not an int but chopped to an odd int
+	 */
+	/* INDENT ON */
+	xk = 0;
+	if (ix >= 0x4b000000) {
+		if (ix > 0x4b000000)
+			xk = -2;
+		else
+			xk = -2 + (ix & 1);
+	} else if (ix >= 0x3f800000) {
+		k = (ix >> 23) - 0x7f;
+		j = ix >> (23 - k);
+		if ((j << (23 - k)) == ix)
+			xk = -2 + (j & 1);
+		else
+			xk = j & 1;
+	}
+	if (xk < 0) {
+		/* 0/0 invalid NaN, ideally gamma(-n)= (-1)**(n+1) * inf */
+		zf = xf - xf;
+		return (zf / zf);
+	}
+
+	/* negative underflow thresold */
+	if (ix > 0x4224000B) {	/* x < -(41+11ulp) */
+		if (xk == 0)
+			z = -tiny;
+		else
+			z = tiny;
+		return ((float)z);
+	}
+
+	/* INDENT OFF */
+	/* now compute gamma(x) by  -1/((sin(pi*y)/pi)*gamma(1+y)), y = -x */
+	/*
+	 * First compute ss = -sin(pi*y)/pi , so that
+	 * gamma(x) = 1/(ss*gamma(1+y))
+	 */
+	/* INDENT ON */
+	y = -x;
+	j = (int) y;
+	z = y - (double) j;
+	if (z > 0.3183098861837906715377675)
+		if (z > 0.6816901138162093284622325)
+			ss = kpsin(one - z);
+		else
+			ss = kpcos(0.5 - z);
+	else
+		ss = kpsin(z);
+	if (xk == 0)
+		ss = -ss;
+
+	/* Then compute ww = gamma(1+y)  */
+	if (j < 7)
+		ww = gam_n(j + 1, z);
+	else
+		ww = large_gam(y + one);
+
+	/* return 1/(ss*ww) */
+	return ((float) (one / (ww * ss)));
+}
diff --git a/usr/src/libm/src/m9x/tgammal.c b/usr/src/libm/src/m9x/tgammal.c
new file mode 100644
index 0000000..b0297de
--- /dev/null
+++ b/usr/src/libm/src/m9x/tgammal.c
@@ -0,0 +1,1166 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"@(#)tgammal.c	1.9	06/01/31 SMI"
+
+#if defined(ELFOBJ)
+#pragma weak tgammal = __tgammal
+#endif
+
+#include "libm.h"
+
+#if defined(__sparc)
+#define	H0_WORD(x)	((unsigned *) &x)[0]
+#define	H3_WORD(x)	((unsigned *) &x)[3]
+#define	CHOPPED(x)	(long double) ((double) (x))
+#elif defined(__i386)
+#define	H0_WORD(x)	((((int *) &x)[2] << 16) | \
+			(0x0000ffff & (((unsigned *) &x)[1] >> 15)))
+#define	H3_WORD(x)	((unsigned *) &x)[0]
+#define	CHOPPED(x)	(long double) ((float) (x))
+#else
+#error Unknown architecture
+#endif
+
+struct LDouble {
+	long double h, l;
+};
+
+/* INDENT OFF */
+/* Primary interval GTi() */
+static const long double P1[] = {
+	+0.709086836199777919037185741507610124611513720557L,
+	+4.45754781206489035827915969367354835667391606951e-0001L,
+	+3.21049298735832382311662273882632210062918153852e-0002L,
+	-5.71296796342106617651765245858289197369688864350e-0003L,
+	+6.04666892891998977081619174969855831606965352773e-0003L,
+	+8.99106186996888711939627812174765258822658645168e-0004L,
+	-6.96496846144407741431207008527018441810175568949e-0005L,
+	+1.52597046118984020814225409300131445070213882429e-0005L,
+	+5.68521076168495673844711465407432189190681541547e-0007L,
+	+3.30749673519634895220582062520286565610418952979e-0008L,
+};
+static const long double Q1[] = {
+	+1.0+0000L,
+	+1.35806511721671070408570853537257079579490650668e+0000L,
+	+2.97567810153429553405327140096063086994072952961e-0001L,
+	-1.52956835982588571502954372821681851681118097870e-0001L,
+	-2.88248519561420109768781615289082053597954521218e-0002L,
+	+1.03475311719937405219789948456313936302378395955e-0002L,
+	+4.12310203243891222368965360124391297374822742313e-0004L,
+	-3.12653708152290867248931925120380729518332507388e-0004L,
+	+2.36672170850409745237358105667757760527014332458e-0005L,
+};
+static const long double P2[] = {
+	+0.428486815855585429730209907810650135255270600668084114L,
+	+2.62768479103809762805691743305424077975230551176e-0001L,
+	+3.81187532685392297608310837995193946591425896150e-0002L,
+	+3.00063075891811043820666846129131255948527925381e-0003L,
+	+2.47315407812279164228398470797498649142513408654e-0003L,
+	+3.62838199917848372586173483147214880464782938664e-0004L,
+	+3.43991105975492623982725644046473030098172692423e-0006L,
+	+4.56902151569603272237014240794257659159045432895e-0006L,
+	+2.13734755837595695602045100675540011352948958453e-0007L,
+	+9.74123440547918230781670266967882492234877125358e-0009L,
+};
+static const long double Q2[] = {
+	+1.0L,
+	+9.18284118632506842664645516830761489700556179701e-0001L,
+	-6.41430858837830766045202076965923776189154874947e-0003L,
+	-1.24400885809771073213345747437964149775410921376e-0001L,
+	+4.69803798146251757538856567522481979624746875964e-0003L,
+	+7.18309447069495315914284705109868696262662082731e-0003L,
+	-8.75812626987894695112722600697653425786166399105e-0004L,
+	-1.23539972377769277995959339188431498626674835169e-0004L,
+	+3.10019017590151598732360097849672925448587547746e-0005L,
+	-1.77260223349332617658921874288026777465782364070e-0006L,
+};
+static const long double P3[] = {
+	+0.3824094797345675048502747661075355640070439388902L,
+	+3.42198093076618495415854906335908427159833377774e-0001L,
+	+9.63828189500585568303961406863153237440702754858e-0002L,
+	+8.76069421042696384852462044188520252156846768667e-0003L,
+	+1.86477890389161491224872014149309015261897537488e-0003L,
+	+8.16871354540309895879974742853701311541286944191e-0004L,
+	+6.83783483674600322518695090864659381650125625216e-0005L,
+	-1.10168269719261574708565935172719209272190828456e-0006L,
+	+9.66243228508380420159234853278906717065629721016e-0007L,
+	+2.31858885579177250541163820671121664974334728142e-0008L,
+};
+static const long double Q3[] = {
+	+1.0L,
+	+8.25479821168813634632437430090376252512793067339e-0001L,
+	-1.62251363073937769739639623669295110346015576320e-0002L,
+	-1.10621286905916732758745130629426559691187579852e-0001L,
+	+3.48309693970985612644446415789230015515365291459e-0003L,
+	+6.73553737487488333032431261131289672347043401328e-0003L,
+	-7.63222008393372630162743587811004613050245128051e-0004L,
+	-1.35792670669190631476784768961953711773073251336e-0004L,
+	+3.19610150954223587006220730065608156460205690618e-0005L,
+	-1.82096553862822346610109522015129585693354348322e-0006L,
+};
+
+static const long double
+#if defined(__i386)
+GZ1_h 	=  0.938204627909682449364570100414084663498215377L,
+GZ1_l   =  4.518346116624229420055327632718530617227944106e-20L,
+GZ2_h 	=  0.885603194410888700264725126309883762587560340L,
+GZ2_l   =  1.409077427270497062039119290776508217077297169e-20L,
+GZ3_h 	=  0.936781411463652321613537060640553022494714241L,
+GZ3_l   =  5.309836440284827247897772963887219035221996813e-21L,
+#else
+GZ1_h 	=  0.938204627909682449409753561580326910854647031L,
+GZ1_l   =  4.684412162199460089642452580902345976446297037e-35L,
+GZ2_h 	=  0.885603194410888700278815900582588658192658794L,
+GZ2_l   =  7.501529273890253789219935569758713534641074860e-35L,
+GZ3_h 	=  0.936781411463652321618846897080837818855399840L,
+GZ3_l   =  3.088721217404784363585591914529361687403776917e-35L,
+#endif
+TZ1	= -0.3517214357852935791015625L,
+TZ3	=  0.280530631542205810546875L;
+/* INDENT ON */
+
+/* INDENT OFF */
+/*
+ * compute gamma(y=yh+yl) for y in GT1 = [1.0000, 1.2845]
+ * ...assume yh got 53 or 24(i386) significant bits
+ */
+/* INDENT ON */
+static struct LDouble
+GT1(long double yh, long double yl) {
+	long double t3, t4, y;
+	int i;
+	struct LDouble r;
+
+	y = yh + yl;
+	for (t4 = Q1[8], t3 = P1[8] + y * P1[9], i = 7; i >= 0; i--) {
+		t4 = t4 * y + Q1[i];
+		t3 = t3 * y + P1[i];
+	}
+	t3 = (y * y) * t3 / t4;
+	t3 += (TZ1 * yl + GZ1_l);
+	t4 = TZ1 * yh;
+	r.h = CHOPPED((t4 + GZ1_h + t3));
+	t3 += (t4 - (r.h - GZ1_h));
+	r.l = t3;
+	return (r);
+}
+
+/* INDENT OFF */
+/*
+ * compute gamma(y=yh+yl) for y in GT2 = [1.2844, 1.6374]
+ * ...assume yh got 53 significant bits
+ */
+/* INDENT ON */
+static struct LDouble
+GT2(long double yh, long double yl) {
+	long double t3, t4, y;
+	int i;
+	struct LDouble r;
+
+	y = yh + yl;
+	for (t4 = Q2[9], t3 = P2[9], i = 8; i >= 0; i--) {
+		t4 = t4 * y + Q2[i];
+		t3 = t3 * y + P2[i];
+	}
+	t3 = GZ2_l + (y * y) * t3 / t4;
+	r.h = CHOPPED((GZ2_h + t3));
+	r.l = t3 - (r.h - GZ2_h);
+	return (r);
+}
+
+/* INDENT OFF */
+/*
+ * compute gamma(y=yh+yl) for y in GT3 = [1.6373, 2.0000]
+ * ...assume yh got 53 significant bits
+ */
+/* INDENT ON */
+static struct LDouble
+GT3(long double yh, long double yl) {
+	long double t3, t4, y;
+	int i;
+	struct LDouble r;
+
+	y = yh + yl;
+	for (t4 = Q3[9], t3 = P3[9], i = 8; i >= 0; i--) {
+		t4 = t4 * y + Q3[i];
+		t3 = t3 * y + P3[i];
+	}
+	t3 = (y * y) * t3 / t4;
+	t3 += (TZ3 * yl + GZ3_l);
+	t4 = TZ3 * yh;
+	r.h = CHOPPED((t4 + GZ3_h + t3));
+	t3 += (t4 - (r.h - GZ3_h));
+	r.l = t3;
+	return (r);
+}
+
+/* INDENT OFF */
+/* Hex value of GP[0] shoule be 3FB55555 55555555 */
+static const long double GP[] = {
+	+0.083333333333333333333333333333333172839171301L,
+	-2.77777777777777777777777777492501211999399424104e-0003L,
+	+7.93650793650793650793635650541638236350020883243e-0004L,
+	-5.95238095238095238057299772679324503339241961704e-0004L,
+	+8.41750841750841696138422987977683524926142600321e-0004L,
+	-1.91752691752686682825032547823699662178842123308e-0003L,
+	+6.41025641022403480921891559356473451161279359322e-0003L,
+	-2.95506535798414019189819587455577003732808185071e-0002L,
+	+1.79644367229970031486079180060923073476568732136e-0001L,
+	-1.39243086487274662174562872567057200255649290646e+0000L,
+	+1.34025874044417962188677816477842265259608269775e+0001L,
+	-1.56803713480127469414495545399982508700748274318e+0002L,
+	+2.18739841656201561694927630335099313968924493891e+0003L,
+	-3.55249848644100338419187038090925410976237921269e+0004L,
+	+6.43464880437835286216768959439484376449179576452e+0005L,
+	-1.20459154385577014992600342782821389605893904624e+0007L,
+	+2.09263249637351298563934942349749718491071093210e+0008L,
+	-2.96247483183169219343745316433899599834685703457e+0009L,
+	+2.88984933605896033154727626086506756972327292981e+0010L,
+	-1.40960434146030007732838382416230610302678063984e+0011L,	/* 19 */
+};
+
+static const long double T3[] = {
+	+0.666666666666666666666666666666666634567834260213L,	/* T3[0] */
+	+0.400000000000000000000000000040853636176634934140L,	/* T3[1] */
+	+0.285714285714285714285696975252753987869020263448L,	/* T3[2] */
+	+0.222222222222222225593221101192317258554772129875L,	/* T3[3] */
+	+0.181818181817850192105847183461778186703779262916L,	/* T3[4] */
+	+0.153846169861348633757101285952333369222567014596L,	/* T3[5] */
+	+0.133033462889260193922261296772841229985047571265L,	/* T3[6] */
+};
+
+static const long double c[] = {
+0.0L,
+1.0L,
+2.0L,
+0.5L,
+1.0e-4930L,							/* tiny */
+4.18937683105468750000e-01L,					/* hln2pim1_h */
+8.50099203991780329736405617639861397473637783412817152e-07L,	/* hln2pim1_l */
+0.418938533204672741780329736405617639861397473637783412817152L, /* hln2pim1 */
+2.16608493865351192653179168701171875e-02L,			/* ln2_32hi */
+5.96317165397058692545083025235937919875797669127130e-12L,	/* ln2_32lo */
+46.16624130844682903551758979206054839765267053289554989233L,	/* invln2_32 */
+#if defined(__i386)
+1.7555483429044629170023839037639845628291e+03L,		/* overflow */
+#else
+1.7555483429044629170038892160702032034177e+03L,		/* overflow */
+#endif
+};
+
+#define	zero		c[0]
+#define	one		c[1]
+#define	two		c[2]
+#define	half		c[3]
+#define	tiny		c[4]
+#define	hln2pim1_h	c[5]
+#define	hln2pim1_l	c[6]
+#define	hln2pim1	c[7]
+#define	ln2_32hi	c[8]
+#define	ln2_32lo	c[9]
+#define	invln2_32	c[10]
+#define	overflow	c[11]
+
+/*
+ * |exp(r) - (1+r+Et0*r^2+...+Et10*r^12)| <= 2^(-128.88) for |r|<=ln2/64
+ */
+static const long double Et[] = {
+	+5.0000000000000000000e-1L,
+	+1.66666666666666666666666666666828835166292152466e-0001L,
+	+4.16666666666666666666666666666693398646592712189e-0002L,
+	+8.33333333333333333333331748774512601775591115951e-0003L,
+	+1.38888888888888888888888845356011511394764753997e-0003L,
+	+1.98412698412698413237140350092993252684198882102e-0004L,
+	+2.48015873015873016080222025357442659895814371694e-0005L,
+	+2.75573192239028921114572986441972140933432317798e-0006L,
+	+2.75573192239448470555548102895526369739856219317e-0007L,
+	+2.50521677867683935940853997995937600214167232477e-0008L,
+	+2.08767928899010367374984448513685566514152147362e-0009L,
+};
+
+/*
+ * long double precision coefficients for computing log(x)-1 in tgamma.
+ *  See "algorithm" for details
+ *
+ *  log(x) - 1 = T1(n) + T2(j) + T3(s), where x = 2**n * y,  1<=y<2,
+ *  j=[64*y], z[j]=1+j/64+1/128, s = (y-z[j])/(y+z[j]), and
+ *       T1(n) = T1[2n,2n+1] = n*log(2)-1,
+ *       T2(j) = T2[2j,2j+1] = log(z[j]),
+ *       T3(s) = 2s + T3[0]s^3 + T3[1]s^5 + T3[2]s^7 + ... + T3[6]s^15
+ *  Note
+ *  (1) the leading entries are truncated to 24 binary point.
+ *  (2) Remez error for T3(s) is bounded by 2**(-136.54)
+ */
+static const long double T1[] = {
+-1.000000000000000000000000000000000000000000e+00L,
+	+0.000000000000000000000000000000000000000000e+00L,
+-3.068528175354003906250000000000000000000000e-01L,
+-1.904654299957767878541823431924500011926579e-09L,
+	+3.862943053245544433593750000000000000000000e-01L,
+	+5.579533617547508924291635313615100141107647e-08L,
+	+1.079441487789154052734375000000000000000000e+00L,
+	+5.389068187551732136437452970422650211661470e-08L,
+	+1.772588670253753662109375000000000000000000e+00L,
+	+5.198602757555955348583270627230200282215294e-08L,
+	+2.465735852718353271484375000000000000000000e+00L,
+	+5.008137327560178560729088284037750352769117e-08L,
+	+3.158883035182952880859375000000000000000000e+00L,
+	+4.817671897564401772874905940845299849351090e-08L,
+	+3.852030217647552490234375000000000000000000e+00L,
+	+4.627206467568624985020723597652849919904913e-08L,
+	+4.545177400112152099609375000000000000000000e+00L,
+	+4.436741037572848197166541254460399990458737e-08L,
+	+5.238324582576751708984375000000000000000000e+00L,
+	+4.246275607577071409312358911267950061012560e-08L,
+	+5.931471765041351318359375000000000000000000e+00L,
+	+4.055810177581294621458176568075500131566384e-08L,
+};
+
+/*
+ * T2[2i,2i+1] = log(1+i/64+1/128)
+ */
+static const long double T2[] = {
+	+7.7821016311645507812500000000000000000000e-03L,
+	+3.8810890398166212900061136763678127453570e-08L,
+	+2.3167014122009277343750000000000000000000e-02L,
+	+4.5159525100885049160962289916579411752759e-08L,
+	+3.8318812847137451171875000000000000000000e-02L,
+	+5.1454999148021880325123797290345960518164e-08L,
+	+5.3244471549987792968750000000000000000000e-02L,
+	+4.2968824489897120193786528776939573415076e-08L,
+	+6.7950606346130371093750000000000000000000e-02L,
+	+5.5562377378300815277772629414034632394030e-08L,
+	+8.2443654537200927734375000000000000000000e-02L,
+	+1.4673873663533785068668307805914095366600e-08L,
+	+9.6729576587677001953125000000000000000000e-02L,
+	+4.9870874110342446056487463437015041543346e-08L,
+	+1.1081433296203613281250000000000000000000e-01L,
+	+3.3378253981382306169323211928098474801099e-08L,
+	+1.2470346689224243164062500000000000000000e-01L,
+	+1.1608714804222781515380863268491613205318e-08L,
+	+1.3840228319168090820312500000000000000000e-01L,
+	+3.9667438227482200873601649187393160823607e-08L,
+	+1.5191602706909179687500000000000000000000e-01L,
+	+1.4956750178196803424896884511327584958252e-08L,
+	+1.6524952650070190429687500000000000000000e-01L,
+	+4.6394605258578736449277240313729237989366e-08L,
+	+1.7840760946273803710937500000000000000000e-01L,
+	+4.8010080260010025241510941968354682199540e-08L,
+	+1.9139480590820312500000000000000000000000e-01L,
+	+4.7091426329609298807561308873447039132856e-08L,
+	+2.0421552658081054687500000000000000000000e-01L,
+	+1.4847880344628820386196239272213742113867e-08L,
+	+2.1687388420104980468750000000000000000000e-01L,
+	+5.4099564554931589525744347498478964801484e-08L,
+	+2.2937405109405517578125000000000000000000e-01L,
+	+4.9970790654210230725046139871550961365282e-08L,
+	+2.4171990156173706054687500000000000000000e-01L,
+	+3.5325408107597432515913513900103385655073e-08L,
+	+2.5391519069671630859375000000000000000000e-01L,
+	+1.9284247135543573297906606667466299224747e-08L,
+	+2.6596349477767944335937500000000000000000e-01L,
+	+5.3719458497979750926537543389268821141517e-08L,
+	+2.7786844968795776367187500000000000000000e-01L,
+	+1.3154985425144750329234012330820349974537e-09L,
+	+2.8963327407836914062500000000000000000000e-01L,
+	+1.8504673536253893055525668970003860369760e-08L,
+	+3.0126130580902099609375000000000000000000e-01L,
+	+2.4769140784919125538233755492657352680723e-08L,
+	+3.1275570392608642578125000000000000000000e-01L,
+	+6.0778104626049965596883190321597861455475e-09L,
+	+3.2411944866180419921875000000000000000000e-01L,
+	+1.9992407776871920760434987352182336158873e-08L,
+	+3.3535552024841308593750000000000000000000e-01L,
+	+2.1672724744319679579814166199074433006807e-08L,
+	+3.4646672010421752929687500000000000000000e-01L,
+	+4.7241991051621587188425772950711830538414e-08L,
+	+3.5745584964752197265625000000000000000000e-01L,
+	+3.9274281801569759490140904474434669956562e-08L,
+	+3.6832553148269653320312500000000000000000e-01L,
+	+2.9676011119845105154050398826897178765758e-08L,
+	+3.7907832860946655273437500000000000000000e-01L,
+	+2.4325502905656478345631019858881408009210e-08L,
+	+3.8971674442291259765625000000000000000000e-01L,
+	+6.7171126157142136040035208670510556529487e-09L,
+	+4.0024316310882568359375000000000000000000e-01L,
+	+1.0181870233355751019951311700799406124957e-09L,
+	+4.1065990924835205078125000000000000000000e-01L,
+	+1.5736916335153056203175822787661567534220e-08L,
+	+4.2096924781799316406250000000000000000000e-01L,
+	+4.6826136472066367161506795972449857268707e-08L,
+	+4.3117344379425048828125000000000000000000e-01L,
+	+2.1024120852577922478955594998480144051225e-08L,
+	+4.4127452373504638671875000000000000000000e-01L,
+	+3.7069828842770746441661301225362605528786e-08L,
+	+4.5127463340759277343750000000000000000000e-01L,
+	+1.0731865811707192383079012478685922879010e-08L,
+	+4.6117568016052246093750000000000000000000e-01L,
+	+3.4961647705430499925597855358603099030515e-08L,
+	+4.7097969055175781250000000000000000000000e-01L,
+	+2.4667033200046897856056359251373510964634e-08L,
+	+4.8068851232528686523437500000000000000000e-01L,
+	+1.7020465042442243455448011551208861216878e-08L,
+	+4.9030393362045288085937500000000000000000e-01L,
+	+5.4424740957290971159645746860530583309571e-08L,
+	+4.9982786178588867187500000000000000000000e-01L,
+	+7.7705606579463314152470441415126573566105e-09L,
+	+5.0926184654235839843750000000000000000000e-01L,
+	+5.5247449548366574919228323824878565745713e-08L,
+	+5.1860773563385009765625000000000000000000e-01L,
+	+2.8574195534496726996364798698556235730848e-08L,
+	+5.2786707878112792968750000000000000000000e-01L,
+	+1.0839714455426392217778300963558522088193e-08L,
+	+5.3704142570495605468750000000000000000000e-01L,
+	+4.0191927599879229244153832299023744345999e-08L,
+	+5.4613238573074340820312500000000000000000e-01L,
+	+5.1867392242179272209231209163864971792889e-08L,
+	+5.5514144897460937500000000000000000000000e-01L,
+	+5.8565892217715480359515904050170125743178e-08L,
+	+5.6407010555267333984375000000000000000000e-01L,
+	+3.2732129626227634290090190711817681692354e-08L,
+	+5.7291972637176513671875000000000000000000e-01L,
+	+2.7190020372374006726626261068626400393936e-08L,
+	+5.8169168233871459960937500000000000000000e-01L,
+	+5.7295907882911235753725372340709967597394e-08L,
+	+5.9038740396499633789062500000000000000000e-01L,
+	+4.2637180036751291708123598757577783615014e-08L,
+	+5.9900814294815063476562500000000000000000e-01L,
+	+4.6697932764615975024461651502060474048774e-08L,
+	+6.0755521059036254882812500000000000000000e-01L,
+	+3.9634179246672960152791125371893149820625e-08L,
+	+6.1602985858917236328125000000000000000000e-01L,
+	+1.8626341656366315928196700650292529688219e-08L,
+	+6.2443327903747558593750000000000000000000e-01L,
+	+8.9744179151050387440546731199093039879228e-09L,
+	+6.3276666402816772460937500000000000000000e-01L,
+	+5.5428701049364114685035797584887586099726e-09L,
+	+6.4103114604949951171875000000000000000000e-01L,
+	+3.3371431779336851334405392546708949047361e-08L,
+	+6.4922791719436645507812500000000000000000e-01L,
+	+2.9430743363812714969905311122271269100885e-08L,
+	+6.5735805034637451171875000000000000000000e-01L,
+	+2.2361985518423140023245936165514147093250e-08L,
+	+6.6542261838912963867187500000000000000000e-01L,
+	+1.4155960810278217610006660181148303091649e-08L,
+	+6.7342263460159301757812500000000000000000e-01L,
+	+4.0610573702719835388801017264750843477878e-08L,
+	+6.8135917186737060546875000000000000000000e-01L,
+	+5.2940532463479321559568089441735584156689e-08L,
+	+6.8923324346542358398437500000000000000000e-01L,
+	+3.7773385396340539337814603903232796216537e-08L,
+};
+
+/*
+ * S[j],S_trail[j] = 2**(j/32.) for the final computation of exp(t+w)
+ */
+static const long double S[] = {
+#if defined(__i386)
+	+1.0000000000000000000000000e+00L,
+	+1.0218971486541166782081522e+00L,
+	+1.0442737824274138402382006e+00L,
+	+1.0671404006768236181297224e+00L,
+	+1.0905077326652576591003302e+00L,
+	+1.1143867425958925362894369e+00L,
+	+1.1387886347566916536971221e+00L,
+	+1.1637248587775775137938619e+00L,
+	+1.1892071150027210666875674e+00L,
+	+1.2152473599804688780476325e+00L,
+	+1.2418578120734840485256747e+00L,
+	+1.2690509571917332224885722e+00L,
+	+1.2968395546510096659215822e+00L,
+	+1.3252366431597412945939118e+00L,
+	+1.3542555469368927282668852e+00L,
+	+1.3839098819638319548151403e+00L,
+	+1.4142135623730950487637881e+00L,
+	+1.4451808069770466200253470e+00L,
+	+1.4768261459394993113155431e+00L,
+	+1.5091644275934227397133885e+00L,
+	+1.5422108254079408235859630e+00L,
+	+1.5759808451078864864006862e+00L,
+	+1.6104903319492543080837174e+00L,
+	+1.6457554781539648445110730e+00L,
+	+1.6817928305074290860378350e+00L,
+	+1.7186192981224779156032914e+00L,
+	+1.7562521603732994831094730e+00L,
+	+1.7947090750031071864148413e+00L,
+	+1.8340080864093424633989166e+00L,
+	+1.8741676341102999013002103e+00L,
+	+1.9152065613971472938202589e+00L,
+	+1.9571441241754002689657438e+00L,
+#else
+	+1.00000000000000000000000000000000000e+00L,
+	+1.02189714865411667823448013478329942e+00L,
+	+1.04427378242741384032196647873992910e+00L,
+	+1.06714040067682361816952112099280918e+00L,
+	+1.09050773266525765920701065576070789e+00L,
+	+1.11438674259589253630881295691960313e+00L,
+	+1.13878863475669165370383028384151134e+00L,
+	+1.16372485877757751381357359909218536e+00L,
+	+1.18920711500272106671749997056047593e+00L,
+	+1.21524735998046887811652025133879836e+00L,
+	+1.24185781207348404859367746872659561e+00L,
+	+1.26905095719173322255441908103233805e+00L,
+	+1.29683955465100966593375411779245118e+00L,
+	+1.32523664315974129462953709549872168e+00L,
+	+1.35425554693689272829801474014070273e+00L,
+	+1.38390988196383195487265952726519287e+00L,
+	+1.41421356237309504880168872420969798e+00L,
+	+1.44518080697704662003700624147167095e+00L,
+	+1.47682614593949931138690748037404985e+00L,
+	+1.50916442759342273976601955103319352e+00L,
+	+1.54221082540794082361229186209073479e+00L,
+	+1.57598084510788648645527016018190504e+00L,
+	+1.61049033194925430817952066735740067e+00L,
+	+1.64575547815396484451875672472582254e+00L,
+	+1.68179283050742908606225095246642969e+00L,
+	+1.71861929812247791562934437645631244e+00L,
+	+1.75625216037329948311216061937531314e+00L,
+	+1.79470907500310718642770324212778174e+00L,
+	+1.83400808640934246348708318958828892e+00L,
+	+1.87416763411029990132999894995444645e+00L,
+	+1.91520656139714729387261127029583086e+00L,
+	+1.95714412417540026901832225162687149e+00L,
+#endif
+};
+static const long double S_trail[] = {
+#if defined(__i386)
+	+0.0000000000000000000000000e+00L,
+	+2.6327965667180882569382524e-20L,
+	+8.3765863521895191129661899e-20L,
+	+3.9798705777454504249209575e-20L,
+	+1.0668046596651558640993042e-19L,
+	+1.9376009847285360448117114e-20L,
+	+6.7081819456112953751277576e-21L,
+	+1.9711680502629186462729727e-20L,
+	+2.9932584438449523689104569e-20L,
+	+6.8887754153039109411061914e-20L,
+	+6.8002718741225378942847820e-20L,
+	+6.5846917376975403439742349e-20L,
+	+1.2171958727511372194876001e-20L,
+	+3.5625253228704087115438260e-20L,
+	+3.1129551559077560956309179e-20L,
+	+5.7519192396164779846216492e-20L,
+	+3.7900651177865141593101239e-20L,
+	+1.1659262405698741798080115e-20L,
+	+7.1364385105284695967172478e-20L,
+	+5.2631003710812203588788949e-20L,
+	+2.6328853788732632868460580e-20L,
+	+5.4583950085438242788190141e-20L,
+	+9.5803254376938269960718656e-20L,
+	+7.6837733983874245823512279e-21L,
+	+2.4415965910835093824202087e-20L,
+	+2.6052966871016580981769728e-20L,
+	+2.6876456344632553875309579e-21L,
+	+1.2861930155613700201703279e-20L,
+	+8.8166633394037485606572294e-20L,
+	+2.9788615389580190940837037e-20L,
+	+5.2352341619805098677422139e-20L,
+	+5.2578463064010463732242363e-20L,
+#else
+	+0.00000000000000000000000000000000000e+00L,
+	+1.80506787420330954745573333054573786e-35L,
+-9.37452029228042742195756741973083214e-35L,
+-1.59696844729275877071290963023149997e-35L,
+	+9.11249341012502297851168610167248666e-35L,
+-6.50422820697854828723037477525938871e-35L,
+-8.14846884452585113732569176748815532e-35L,
+-5.06621457672180031337233074514290335e-35L,
+-1.35983097468881697374987563824591912e-35L,
+	+9.49742763556319647030771056643324660e-35L,
+-3.28317052317699860161506596533391526e-36L,
+-5.01723570938719041029018653045842895e-35L,
+-2.39147479768910917162283430160264014e-35L,
+-8.35057135763390881529889073794408385e-36L,
+	+7.03675688907326504242173719067187644e-35L,
+-5.18248485306464645753689301856695619e-35L,
+	+9.42224254862183206569211673639406488e-35L,
+-3.96750082539886230916730613021641828e-35L,
+	+7.14352899156330061452327361509276724e-35L,
+	+1.15987125286798512424651783410044433e-35L,
+	+4.69693347835811549530973921320187447e-35L,
+-3.38651317599500471079924198499981917e-35L,
+-8.58731877429824706886865593510387445e-35L,
+-9.60595154874935050318549936224606909e-35L,
+	+9.60973393212801278450755869714178581e-35L,
+	+6.37839792144002843924476144978084855e-35L,
+	+7.79243078569586424945646112516927770e-35L,
+	+7.36133776758845652413193083663393220e-35L,
+-6.47299514791334723003521457561217053e-35L,
+	+8.58747441795369869427879806229522962e-35L,
+	+2.37181542282517483569165122830269098e-35L,
+-3.02689168209611877300459737342190031e-37L,
+#endif
+};
+/* INDENT ON */
+
+/* INDENT OFF */
+/*
+ * return tgamma(x) scaled by 2**-m for 8<x<=171.62... using Stirling's formula
+ *     log(G(x)) ~= (x-.5)*(log(x)-1) + .5(log(2*pi)-1) + (1/x)*P(1/(x*x))
+ *                = L1 + L2 + L3,
+ */
+/* INDENT ON */
+static struct LDouble
+large_gam(long double x, int *m) {
+	long double z, t1, t2, t3, z2, t5, w, y, u, r, v;
+	long double t24 = 16777216.0L, p24 = 1.0L / 16777216.0L;
+	int n2, j2, k, ix, j, i;
+	struct LDouble zz;
+	long double u2, ss_h, ss_l, r_h, w_h, w_l, t4;
+
+/* INDENT OFF */
+/*
+ * compute ss = ss.h+ss.l = log(x)-1 (see tgamma_log.h for details)
+ *
+ *  log(x) - 1 = T1(n) + T2(j) + T3(s), where x = 2**n * y,  1<=y<2,
+ *  j=[64*y], z[j]=1+j/64+1/128, s = (y-z[j])/(y+z[j]), and
+ *       T1(n) = T1[2n,2n+1] = n*log(2)-1,
+ *       T2(j) = T2[2j,2j+1] = log(z[j]),
+ *       T3(s) = 2s + T3[0]s^3 + T3[1]s^5 + ... + T3[6]s^15
+ *  Note
+ *  (1) the leading entries are truncated to 24 binary point.
+ *  (2) Remez error for T3(s) is bounded by 2**(-72.4)
+ *                                   2**(-24)
+ *                           _________V___________________
+ *               T1(n):     |_________|___________________|
+ *                             _______ ______________________
+ *               T2(j):       |_______|______________________|
+ *                                ____ _______________________
+ *               2s:             |____|_______________________|
+ *                                    __________________________
+ *          +    T3(s)-2s:           |__________________________|
+ *                       -------------------------------------------
+ *                          [leading] + [Trailing]
+ */
+	/* INDENT ON */
+	ix = H0_WORD(x);
+	n2 = (ix >> 16) - 0x3fff;	/* exponent of x, range:3-10 */
+	y = scalbnl(x, -n2);	/* y = scale x to [1,2] */
+	n2 += n2;		/* 2n */
+	j = (ix >> 10) & 0x3f;	/* j */
+	z = 1.0078125L + (long double) j * 0.015625L;	/* z[j]=1+j/64+1/128 */
+	j2 = j + j;
+	t1 = y + z;
+	t2 = y - z;
+	r = one / t1;
+	u = r * t2;		/* u = (y-z)/(y+z) */
+	t1 = CHOPPED(t1);
+	t4 = T2[j2 + 1] + T1[n2 + 1];
+	z2 = u * u;
+	k = H0_WORD(u) & 0x7fffffff;
+	t3 = T2[j2] + T1[n2];
+	for (t5 = T3[6], i = 5; i >= 0; i--)
+		t5 = z2 * t5 + T3[i];
+	if ((k >> 16) < 0x3fec) {	/* |u|<2**-19 */
+		t2 = t4 + u * (two + z2 * t5);
+	} else {
+		t5 = t4 + (u * z2) * t5;
+		u2 = u + u;
+		v = (long double) ((int) (u2 * t24)) * p24;
+		t2 = t5 + r * ((two * t2 - v * t1) - v * (y - (t1 - z)));
+		t3 += v;
+	}
+	ss_h = CHOPPED((t2 + t3));
+	ss_l = t2 - (ss_h - t3);
+/* INDENT OFF */
+/*
+ * compute ww = (x-.5)*(log(x)-1) + .5*(log(2pi)-1) + 1/x*(P(1/x^2)))
+ * where ss = log(x) - 1 in already in extra precision
+ */
+	/* INDENT ON */
+	z = one / x;
+	r = x - half;
+	r_h = CHOPPED((r));
+	w_h = r_h * ss_h + hln2pim1_h;
+	z2 = z * z;
+	w = (r - r_h) * ss_h + r * ss_l;
+	t1 = GP[19];
+	for (i = 18; i > 0; i--)
+		t1 = z2 * t1 + GP[i];
+	w += hln2pim1_l;
+	w_l = z * (GP[0] + z2 * t1) + w;
+	k = (int) ((w_h + w_l) * invln2_32 + half);
+
+	/* compute the exponential of w_h+w_l */
+
+	j = k & 0x1f;
+	*m = k >> 5;
+	t3 = (long double) k;
+
+	/* perform w - k*ln2_32 (represent as w_h - w_l) */
+	t1 = w_h - t3 * ln2_32hi;
+	t2 = t3 * ln2_32lo;
+	w = t2 - w_l;
+	w_h = t1 - w;
+	w_l = w - (t1 - w_h);
+
+	/* compute exp(w_h-w_l) */
+	z = w_h - w_l;
+	for (t1 = Et[10], i = 9; i >= 0; i--)
+		t1 = z * t1 + Et[i];
+	t3 = w_h - (w_l - (z * z) * t1);	/* t3 = expm1(z) */
+	zz.l = S_trail[j] * (one + t3) + S[j] * t3;
+	zz.h = S[j];
+	return (zz);
+}
+
+/* INDENT OFF */
+/*
+ * kpsin(x)= sin(pi*x)/pi
+ *	           3        5        7        9        11                27
+ *	= x+ks[0]*x +ks[1]*x +ks[2]*x +ks[3]*x +ks[4]*x  + ... + ks[12]*x
+ */
+static const long double ks[] = {
+	-1.64493406684822643647241516664602518705158902870e+0000L,
+	+8.11742425283353643637002772405874238094995726160e-0001L,
+	-1.90751824122084213696472111835337366232282723933e-0001L,
+	+2.61478478176548005046532613563241288115395517084e-0002L,
+	-2.34608103545582363750893072647117829448016479971e-0003L,
+	+1.48428793031071003684606647212534027556262040158e-0004L,
+	-6.97587366165638046518462722252768122615952898698e-0006L,
+	+2.53121740413702536928659271747187500934840057929e-0007L,
+	-7.30471182221385990397683641695766121301933621956e-0009L,
+	+1.71653847451163495739958249695549313987973589884e-0010L,
+	-3.34813314714560776122245796929054813458341420565e-0012L,
+	+5.50724992262622033449487808306969135431411753047e-0014L,
+	-7.67678132753577998601234393215802221104236979928e-0016L,
+};
+/* INDENT ON */
+
+/*
+ * assume x is not tiny and positive
+ */
+static struct LDouble
+kpsin(long double x) {
+	long double z, t1, t2;
+	struct LDouble xx;
+	int i;
+
+	z = x * x;
+	xx.h = x;
+	for (t2 = ks[12], i = 11; i > 0; i--)
+		t2 = z * t2 + ks[i];
+	t1 = z * x;
+	t2 *= z * t1;
+	xx.l = t1 * ks[0] + t2;
+	return (xx);
+}
+
+/* INDENT OFF */
+/*
+ * kpcos(x)= cos(pi*x)/pi
+ *                     2        4        6        8        10        12
+ *	= 1/pi +kc[0]*x +kc[1]*x +kc[2]*x +kc[3]*x +kc[4]*x  +kc[5]*x
+ *
+ *                     2        4        6        8        10            22
+ *	= 1/pi - pi/2*x +kc[0]*x +kc[1]*x +kc[2]*x +kc[3]*x  +...+kc[9]*x
+ *
+ * -pi/2*x*x = (npi_2_h + npi_2_l) * (x_f+x_l)*(x_f+x_l)
+ *	   =  npi_2_h*(x_f+x_l)*(x_f+x_l) + npi_2_l*x*x
+ *	   =  npi_2_h*x_f*x_f + npi_2_h*(x*x-x_f*x_f) + npi_2_l*x*x
+ *	   =  npi_2_h*x_f*x_f + npi_2_h*(x+x_f)*(x-x_f) + npi_2_l*x*x
+ * Here x_f = (long double) (float)x
+ * Note that pi/2(in hex) =
+ *  1.921FB54442D18469898CC51701B839A252049C1114CF98E804177D4C76273644A29
+ * npi_2_h = -pi/2 chopped to 25 bits = -1.921FB50000000000000000000000000 =
+ *  -1.570796310901641845703125000000000 and
+ * npi_2_l =
+ *  -0.0000004442D18469898CC51701B839A252049C1114CF98E804177D4C76273644A29 =
+ *  -.0000000158932547735281966916397514420985846996875529104874722961539 =
+ *  -1.5893254773528196691639751442098584699687552910487472296153e-8
+ * 1/pi(in hex) =
+ *  .517CC1B727220A94FE13ABE8FA9A6EE06DB14ACC9E21C820FF28B1D5EF5DE2B
+ * will be splitted into:
+ *  one_pi_h = 1/pi chopped to 48 bits = .517CC1B727220000000000...  and
+ *  one_pi_l = .0000000000000A94FE13ABE8FA9A6EE06DB14ACC9E21C820FF28B1D5EF5DE2B
+ */
+
+static const long double
+#if defined(__i386)
+one_pi_h = 0.3183098861481994390487670898437500L,	/* 31 bits */
+one_pi_l = 3.559123248900043690127872406891929148e-11L,
+#else
+one_pi_h = 0.31830988618379052468299050815403461456298828125L,
+one_pi_l = 1.46854777018590994109505931010230912897495334688117e-16L,
+#endif
+npi_2_h = -1.570796310901641845703125000000000L,
+npi_2_l = -1.5893254773528196691639751442098584699687552910e-8L;
+
+static const long double kc[] = {
+	+1.29192819501249250731151312779548918765320728489e+0000L,
+	-4.25027339979557573976029596929319207009444090366e-0001L,
+	+7.49080661650990096109672954618317623888421628613e-0002L,
+	-8.21458866111282287985539464173976555436050215120e-0003L,
+	+6.14202578809529228503205255165761204750211603402e-0004L,
+	-3.33073432691149607007217330302595267179545908740e-0005L,
+	+1.36970959047832085796809745461530865597993680204e-0006L,
+	-4.41780774262583514450246512727201806217271097336e-0008L,
+	+1.14741409212381858820016567664488123478660705759e-0009L,
+	-2.44261236114707374558437500654381006300502749632e-0011L,
+};
+/* INDENT ON */
+
+/*
+ * assume x is not tiny and positive
+ */
+static struct LDouble
+kpcos(long double x) {
+	long double z, t1, t2, t3, t4, x4, x8;
+	int i;
+	struct LDouble xx;
+
+	z = x * x;
+	xx.h = one_pi_h;
+	t1 = (long double) ((float) x);
+	x4 = z * z;
+	t2 = npi_2_l * z + npi_2_h * (x + t1) * (x - t1);
+	for (i = 8, t3 = kc[9]; i >= 0; i--)
+		t3 = z * t3 + kc[i];
+	t3 = one_pi_l + x4 * t3;
+	t4 = t1 * t1 * npi_2_h;
+	x8 = t2 + t3;
+	xx.l = x8 + t4;
+	return (xx);
+}
+
+/* INDENT OFF */
+static const long double
+	/* 0.13486180573279076968979393577465291700642511139552429398233 */
+#if defined(__i386)
+t0z1   =  0.1348618057327907696779385054997035808810L,
+t0z1_l =  1.1855430274949336125392717150257379614654e-20L,
+#else
+t0z1   =  0.1348618057327907696897939357746529168654L,
+t0z1_l =  1.4102088588676879418739164486159514674310e-37L,
+#endif
+	/* 0.46163214496836234126265954232572132846819620400644635129599 */
+#if defined(__i386)
+t0z2   =  0.4616321449683623412538115843295472018326L,
+t0z2_l =  8.84795799617412663558532305039261747030640e-21L,
+#else
+t0z2   =  0.46163214496836234126265954232572132343318L,
+t0z2_l =  5.03501162329616380465302666480916271611101e-36L,
+#endif
+	/* 0.81977310110050060178786870492160699631174407846245179119586 */
+#if defined(__i386)
+t0z3   =  0.81977310110050060178773362329351925836817L,
+t0z3_l =  1.350816280877379435658077052534574556256230e-22L
+#else
+t0z3   =  0.8197731011005006017878687049216069516957449L,
+t0z3_l =  4.461599916947014419045492615933551648857380e-35L
+#endif
+;
+/* INDENT ON */
+
+/*
+ * gamma(x+i) for 0 <= x < 1
+ */
+static struct LDouble
+gam_n(int i, long double x) {
+	struct LDouble rr, yy;
+	long double r1, r2, t2, z, xh, xl, yh, yl, zh, z1, z2, zl, x5, wh, wl;
+
+	/* compute yy = gamma(x+1) */
+	if (x > 0.2845L) {
+		if (x > 0.6374L) {
+			r1 = x - t0z3;
+			r2 = CHOPPED((r1 - t0z3_l));
+			t2 = r1 - r2;
+			yy = GT3(r2, t2 - t0z3_l);
+		} else {
+			r1 = x - t0z2;
+			r2 = CHOPPED((r1 - t0z2_l));
+			t2 = r1 - r2;
+			yy = GT2(r2, t2 - t0z2_l);
+		}
+	} else {
+		r1 = x - t0z1;
+		r2 = CHOPPED((r1 - t0z1_l));
+		t2 = r1 - r2;
+		yy = GT1(r2, t2 - t0z1_l);
+	}
+	/* compute gamma(x+i) = (x+i-1)*...*(x+1)*yy, 0<i<8 */
+	switch (i) {
+	case 0:		/* yy/x */
+		r1 = one / x;
+		xh = CHOPPED((x));	/* x is not tiny */
+		rr.h = CHOPPED(((yy.h + yy.l) * r1));
+		rr.l = r1 * (yy.h - rr.h * xh) - ((r1 * rr.h) * (x - xh) -
+			r1 * yy.l);
+		break;
+	case 1:		/* yy */
+		rr.h = yy.h;
+		rr.l = yy.l;
+		break;
+	case 2:		/* (x+1)*yy */
+		z = x + one;	/* may not be exact */
+		zh = CHOPPED((z));
+		rr.h = zh * yy.h;
+		rr.l = z * yy.l + (x - (zh - one)) * yy.h;
+		break;
+	case 3:		/* (x+2)*(x+1)*yy */
+		z1 = x + one;
+		z2 = x + 2.0L;
+		z = z1 * z2;
+		xh = CHOPPED((z));
+		zh = CHOPPED((z1));
+		xl = (x - (zh - one)) * (z2 + zh) - (xh - zh * (zh + one));
+
+		rr.h = xh * yy.h;
+		rr.l = z * yy.l + xl * yy.h;
+		break;
+
+	case 4:		/* (x+1)*(x+3)*(x+2)*yy */
+		z1 = x + 2.0L;
+		z2 = (x + one) * (x + 3.0L);
+		zh = CHOPPED(z1);
+		zl = x - (zh - 2.0L);
+		xh = CHOPPED(z2);
+		xl = zl * (zh + z1) - (xh - (zh * zh - one));
+
+		/* wh+wl=(x+2)*yy */
+		wh = CHOPPED((z1 * (yy.h + yy.l)));
+		wl = (zl * yy.h + z1 * yy.l) - (wh - zh * yy.h);
+
+		rr.h = xh * wh;
+		rr.l = z2 * wl + xl * wh;
+
+		break;
+	case 5:		/* ((x+1)*(x+4)*(x+2)*(x+3))*yy */
+		z1 = x + 2.0L;
+		z2 = x + 3.0L;
+		z = z1 * z2;
+		zh = CHOPPED((z1));
+		yh = CHOPPED((z));
+		yl = (x - (zh - 2.0L)) * (z2 + zh) - (yh - zh * (zh + one));
+		z2 = z - 2.0L;
+		z *= z2;
+		xh = CHOPPED((z));
+		xl = yl * (z2 + yh) - (xh - yh * (yh - 2.0L));
+		rr.h = xh * yy.h;
+		rr.l = z * yy.l + xl * yy.h;
+		break;
+	case 6:		/* ((x+1)*(x+2)*(x+3)*(x+4)*(x+5))*yy */
+		z1 = x + 2.0L;
+		z2 = x + 3.0L;
+		z = z1 * z2;
+		zh = CHOPPED((z1));
+		yh = CHOPPED((z));
+		z1 = x - (zh - 2.0L);
+		yl = z1 * (z2 + zh) - (yh - zh * (zh + one));
+		z2 = z - 2.0L;
+		x5 = x + 5.0L;
+		z *= z2;
+		xh = CHOPPED(z);
+		zh += 3.0;
+		xl = yl * (z2 + yh) - (xh - yh * (yh - 2.0L));
+						/* xh+xl=(x+1)*...*(x+4) */
+		/* wh+wl=(x+5)*yy */
+		wh = CHOPPED((x5 * (yy.h + yy.l)));
+		wl = (z1 * yy.h + x5 * yy.l) - (wh - zh * yy.h);
+		rr.h = wh * xh;
+		rr.l = z * wl + xl * wh;
+		break;
+	case 7:		/* ((x+1)*(x+2)*(x+3)*(x+4)*(x+5)*(x+6))*yy */
+		z1 = x + 3.0L;
+		z2 = x + 4.0L;
+		z = z2 * z1;
+		zh = CHOPPED((z1));
+		yh = CHOPPED((z));	/* yh+yl = (x+3)(x+4) */
+		yl = (x - (zh - 3.0L)) * (z2 + zh) - (yh - (zh * (zh + one)));
+		z1 = x + 6.0L;
+		z2 = z - 2.0L;	/* z2 = (x+2)*(x+5) */
+		z *= z2;
+		xh = CHOPPED((z));
+		xl = yl * (z2 + yh) - (xh - yh * (yh - 2.0L));
+						/* xh+xl=(x+2)*...*(x+5) */
+		/* wh+wl=(x+1)(x+6)*yy */
+		z2 -= 4.0L;	/* z2 = (x+1)(x+6) */
+		wh = CHOPPED((z2 * (yy.h + yy.l)));
+		wl = (z2 * yy.l + yl * yy.h) - (wh - (yh - 6.0L) * yy.h);
+		rr.h = wh * xh;
+		rr.l = z * wl + xl * wh;
+	}
+	return (rr);
+}
+
+long double
+tgammal(long double x) {
+	struct LDouble ss, ww;
+	long double t, t1, t2, t3, t4, t5, w, y, z, z1, z2, z3, z5;
+	int i, j, m, ix, hx, xk;
+	unsigned lx;
+
+	hx = H0_WORD(x);
+	lx = H3_WORD(x);
+	ix = hx & 0x7fffffff;
+	y = x;
+	if (ix < 0x3f8e0000) {	/* x < 2**-113 */
+		return (one / x);
+	}
+	if (ix >= 0x7fff0000)
+		return (x * ((hx < 0)? zero : x));	/* Inf or NaN */
+	if (x > overflow)	/* overflow threshold */
+		return (x * 1.0e4932L);
+	if (hx >= 0x40020000) {	/* x >= 8 */
+		ww = large_gam(x, &m);
+		w = ww.h + ww.l;
+		return (scalbnl(w, m));
+	}
+
+	if (hx > 0) {		/* x from 0 to 8 */
+		i = (int) x;
+		ww = gam_n(i, x - (long double) i);
+		return (ww.h + ww.l);
+	}
+	/* INDENT OFF */
+	/* negative x */
+	/*
+	 * compute xk =
+	 *	-2 ... x is an even int (-inf is considered an even #)
+	 *	-1 ... x is an odd int
+	 *	+0 ... x is not an int but chopped to an even int
+	 *	+1 ... x is not an int but chopped to an odd int
+	 */
+	/* INDENT ON */
+	xk = 0;
+#if defined(__i386)
+	if (ix >= 0x403e0000) {	/* x >= 2**63 } */
+		if (ix >= 0x403f0000)
+			xk = -2;
+		else
+			xk = -2 + (lx & 1);
+#else
+	if (ix >= 0x406f0000) {	/* x >= 2**112 */
+		if (ix >= 0x40700000)
+			xk = -2;
+		else
+			xk = -2 + (lx & 1);
+#endif
+	} else if (ix >= 0x3fff0000) {
+		w = -x;
+		t1 = floorl(w);
+		t2 = t1 * half;
+		t3 = floorl(t2);
+		if (t1 == w) {
+			if (t2 == t3)
+				xk = -2;
+			else
+				xk = -1;
+		} else {
+			if (t2 == t3)
+				xk = 0;
+			else
+				xk = 1;
+		}
+	}
+
+	if (xk < 0) {
+		/* return NaN. Ideally gamma(-n)= (-1)**(n+1) * inf */
+		return (x - x) / (x - x);
+	}
+
+	/*
+	 * negative underflow thresold -(1774+9ulp)
+	 */
+	if (x < -1774.0000000000000000000000000000017749370L) {
+		z = tiny / x;
+		if (xk == 1)
+			z = -z;
+		return (z * tiny);
+	}
+
+	/* INDENT OFF */
+	/*
+	 * now compute gamma(x) by  -1/((sin(pi*y)/pi)*gamma(1+y)), y = -x
+	 */
+	/*
+	 * First compute ss = -sin(pi*y)/pi so that
+	 * gamma(x) = 1/(ss*gamma(1+y))
+	 */
+	/* INDENT ON */
+	y = -x;
+	j = (int) y;
+	z = y - (long double) j;
+	if (z > 0.3183098861837906715377675L)
+		if (z > 0.6816901138162093284622325L)
+			ss = kpsin(one - z);
+		else
+			ss = kpcos(0.5L - z);
+	else
+		ss = kpsin(z);
+	if (xk == 0) {
+		ss.h = -ss.h;
+		ss.l = -ss.l;
+	}
+
+	/* Then compute ww = gamma(1+y), note that result scale to 2**m */
+	m = 0;
+	if (j < 7) {
+		ww = gam_n(j + 1, z);
+	} else {
+		w = y + one;
+		if ((lx & 1) == 0) {	/* y+1 exact (note that y<184) */
+			ww = large_gam(w, &m);
+		} else {
+			t = w - one;
+			if (t == y) {	/* y+one exact */
+				ww = large_gam(w, &m);
+			} else {	/* use y*gamma(y) */
+				if (j == 7)
+					ww = gam_n(j, z);
+				else
+					ww = large_gam(y, &m);
+				t4 = ww.h + ww.l;
+				t1 = CHOPPED((y));
+				t2 = CHOPPED((t4));
+						/* t4 will not be too large */
+				ww.l = y * (ww.l - (t2 - ww.h)) + (y - t1) * t2;
+				ww.h = t1 * t2;
+			}
+		}
+	}
+
+	/* compute 1/(ss*ww) */
+	t3 = ss.h + ss.l;
+	t4 = ww.h + ww.l;
+	t1 = CHOPPED((t3));
+	t2 = CHOPPED((t4));
+	z1 = ss.l - (t1 - ss.h);	/* (t1,z1) = ss */
+	z2 = ww.l - (t2 - ww.h);	/* (t2,z2) = ww */
+	t3 = t3 * t4;			/* t3 = ss*ww */
+	z3 = one / t3;			/* z3 = 1/(ss*ww) */
+	t5 = t1 * t2;
+	z5 = z1 * t4 + t1 * z2;		/* (t5,z5) = ss*ww */
+	t1 = CHOPPED((t3));		/* (t1,z1) = ss*ww */
+	z1 = z5 - (t1 - t5);
+	t2 = CHOPPED((z3));		/* leading 1/(ss*ww) */
+	z2 = z3 * (t2 * z1 - (one - t2 * t1));
+	z = t2 - z2;
+
+	return (scalbnl(z, -m));
+}
diff --git a/usr/src/libm/src/m9x/trunc.c b/usr/src/libm/src/m9x/trunc.c
new file mode 100644
index 0000000..3797730
--- /dev/null
+++ b/usr/src/libm/src/m9x/trunc.c
@@ -0,0 +1,69 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"@(#)trunc.c	1.4	06/01/31 SMI"
+
+#if defined(ELFOBJ)
+#pragma weak trunc = __trunc
+#endif
+
+#include "libm.h"
+
+double
+trunc(double x) {
+	union {
+		unsigned i[2];
+		double d;
+	} xx;
+	unsigned hx, sx, i;
+
+	xx.d = x;
+	hx = xx.i[HIWORD] & ~0x80000000;
+	sx = xx.i[HIWORD] & 0x80000000;
+	if (hx < 0x43300000) {	/* |x| < 2^52 */
+		if (hx < 0x3ff00000)	/* |x| < 1 */
+			return (sx ? -0.0 : 0.0);
+
+		/* chop x at the integer bit */
+		if (hx < 0x41300000) {
+			i = 1 << (0x412 - (hx >> 20));
+			xx.i[HIWORD] &= ~(i | (i - 1));
+			xx.i[LOWORD] = 0;
+		} else {
+			i = 1 << (0x432 - (hx >> 20));
+			xx.i[LOWORD] &= ~(i | (i - 1));
+		}
+		return (xx.d);
+	} else if (hx < 0x7ff00000)
+		return (x);
+	else
+#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN)
+		return (hx >= 0x7ff80000 ? x : x + x);
+		/* assumes sparc-like QNaN */
+#else
+		return (x + x);
+#endif
+}
diff --git a/usr/src/libm/src/m9x/truncf.c b/usr/src/libm/src/m9x/truncf.c
new file mode 100644
index 0000000..e2cc454
--- /dev/null
+++ b/usr/src/libm/src/m9x/truncf.c
@@ -0,0 +1,62 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"@(#)truncf.c	1.3	06/01/31 SMI"
+
+#if defined(ELFOBJ)
+#pragma weak truncf = __truncf
+#endif
+
+#include "libm.h"
+
+float
+truncf(float x) {
+	union {
+		unsigned i;
+		float f;
+	} xx;
+	unsigned hx, sx, i;
+
+	xx.f = x;
+	hx = xx.i & ~0x80000000;
+	sx = xx.i & 0x80000000;
+	if (hx < 0x4b000000) {		/* |x| < 2^23 */
+		if (hx < 0x3f800000)		/* |x| < 1 */
+			return (sx ? -0.0F : 0.0F);
+
+		/* chop x at the integer bit */
+		i = 1 << (0x95 - (hx >> 23));
+		xx.i &= ~((i << 1) - 1);
+		return (xx.f);
+	} else if (hx < 0x7f800000)	/* |x| is integral */
+		return (x);
+	else
+#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN)
+		return (hx > 0x7f800000 ? x * x : x + x);
+#else
+		return (x + x);
+#endif
+}
diff --git a/usr/src/libm/src/m9x/truncl.c b/usr/src/libm/src/m9x/truncl.c
new file mode 100644
index 0000000..ba0724a
--- /dev/null
+++ b/usr/src/libm/src/m9x/truncl.c
@@ -0,0 +1,109 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"@(#)truncl.c	1.3	06/01/31 SMI"
+
+#if defined(ELFOBJ)
+#pragma weak truncl = __truncl
+#endif
+
+#include "libm.h"
+
+#if defined(__sparc)
+long double
+truncl(long double x) {
+	union {
+		unsigned i[4];
+		long double q;
+	} xx;
+	unsigned hx, sx;
+	int j;
+
+	xx.q = x;
+	sx = xx.i[0] & 0x80000000;
+	hx = xx.i[0] & ~0x80000000;
+
+	/* handle trivial cases */
+	if (hx >= 0x406f0000) /* |x| >= 2^112 + ... or x is nan */
+		return (hx >= 0x7fff0000 ? x + x : x);
+
+	/* handle |x| < 1 */
+	if (hx < 0x3fff0000)
+		return (sx ? -0.0L : 0.0L);
+
+	j = 0x406f - (hx >> 16);		/* 1 <= j <= 112 */
+	xx.i[0] = hx;
+	if (j >= 96) {				/* 96 <= j <= 112 */
+		xx.i[0] &= ~((1 << (j - 96)) - 1);
+		xx.i[1] = xx.i[2] = xx.i[3] = 0;
+	} else if (j >= 64) {			/* 64 <= j <= 95 */
+		xx.i[1] &= ~((1 << (j - 64)) - 1);
+		xx.i[2] = xx.i[3] = 0;
+	} else if (j >= 32) {			/* 32 <= j <= 63 */
+		xx.i[2] &= ~((1 << (j - 32)) - 1);
+		xx.i[3] = 0;
+	} else					/* 1 <= j <= 31 */
+		xx.i[3] &= ~((1 << j) - 1);
+
+	/* negate result if need be */
+	if (sx)
+		xx.i[0] |= 0x80000000;
+	return (xx.q);
+}
+#elif defined(__i386)
+long double
+truncl(long double x) {
+	union {
+		unsigned i[3];
+		long double e;
+	} xx;
+	int ex, sx, i;
+
+	xx.e = x;
+	ex = xx.i[2] & 0x7fff;
+	sx = xx.i[2] & 0x8000;
+	if (ex < 0x403e) {	/* |x| < 2^63 */
+		if (ex < 0x3fff)	/* |x| < 1 */
+			return (sx ? -0.0L : 0.0L);
+
+		/* chop x at the integer bit */
+		if (ex < 0x401e) {
+			i = 1 << (0x401d - ex);
+			xx.i[1] &= ~(i | (i - 1));
+			xx.i[0] = 0;
+		} else {
+			i = 1 << (0x403d - ex);
+			xx.i[0] &= ~(i | (i - 1));
+		}
+		return (xx.e);
+	} else if (ex < 0x7fff)	/* x is integral */
+		return (x);
+	else			/* inf or nan */
+		return (x + x);
+}
+#else
+#error Unknown architecture
+#endif	/* defined(__sparc) || defined(__i386) */