summaryrefslogtreecommitdiff
path: root/usr/src/libm/src/i386/common/exp.s
diff options
context:
space:
mode:
authorIgor Pashev <pashev.igor@gmail.com>2012-09-11 19:12:10 +0400
committerIgor Pashev <pashev.igor@gmail.com>2012-09-11 19:12:10 +0400
commit19700b860d9ec70d01e885d92c3d4f62fd052873 (patch)
treeb6b265cd81fb97b4675f4a285f397479609fdf18 /usr/src/libm/src/i386/common/exp.s
downloadlibm-19700b860d9ec70d01e885d92c3d4f62fd052873.tar.gz
Imported Upstream version 20060131HEADupstream/20060131upstreammaster
Diffstat (limited to 'usr/src/libm/src/i386/common/exp.s')
-rw-r--r--usr/src/libm/src/i386/common/exp.s155
1 files changed, 155 insertions, 0 deletions
diff --git a/usr/src/libm/src/i386/common/exp.s b/usr/src/libm/src/i386/common/exp.s
new file mode 100644
index 0000000..7f5bab6
--- /dev/null
+++ b/usr/src/libm/src/i386/common/exp.s
@@ -0,0 +1,155 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+ .ident "@(#)exp.s 1.10 06/01/23 SMI"
+
+ .file "exp.s"
+
+#include "libm.h"
+LIBM_ANSI_PRAGMA_WEAK(exp,function)
+#include "libm_synonyms.h"
+#include "libm_protos.h"
+
+ ENTRY(exp)
+ movl 8(%esp),%ecx / ecx <-- hi_32(x)
+ andl $0x7fffffff,%ecx / ecx <-- hi_32(|x|)
+ cmpl $0x3fe62e42,%ecx / Is |x| < ln(2)?
+ jb .shortcut / If so, take a shortcut.
+ je .check_tail / |x| may be only slightly < ln(2)
+ cmpl $0x7ff00000,%ecx / hi_32(|x|) >= hi_32(INF)?
+ jae .not_finite / if so, x is not finite
+.finite_non_special: / Here, ln(2) < |x| < INF
+ fldl 4(%esp) / push x
+ subl $8,%esp
+ /// overhead of RP save/restore; 63/15
+ fstcw (%esp) /// ; 15/3
+ movw (%esp),%ax /// ; 4/1
+ movw %ax,4(%esp) /// save old RP; 2/1
+ orw $0x0300,%ax /// force 64-bit RP; 2/1
+ movw %ax,(%esp) /// ; 2/1
+ fldcw (%esp) /// ; 19/4
+ fldl2e / push log2e }not for xtndd_dbl
+ fmulp %st,%st(1) / z = x*log2e }not for xtndd_dbl
+ fld %st(0) / duplicate stack top
+ frndint / [z],z
+ fucom / This and the next 3 instructions
+ fstsw %ax / add 10 clocks to runtime of the
+ sahf / main branch, but save about 265
+ je .z_integral / upon detection of integral z.
+ / [z] != z, compute exp(x)
+ fxch / z,[z]
+ fsub %st(1),%st / z-[z],[z]
+ f2xm1 / 2**(z-[z])-1,[z]
+ fld1 / 1,2**(z-[z])-1,[z]
+ faddp %st,%st(1) / 2**(z-[z]) ,[z]
+.merge:
+ fscale / exp(x) ,[z]
+ fstp %st(1)
+ fstcw (%esp) / restore RD
+ movw (%esp),%dx
+ andw $0xfcff,%dx
+ movw 4(%esp),%cx
+ andw $0x0300,%cx
+ orw %dx,%cx
+ movw %cx,(%esp)
+ fldcw (%esp) /// restore old RP; 19/4
+ fstpl (%esp) / round to double
+ fldl (%esp) / exp(x) rounded to double
+ fxam / determine class of exp(x)
+ add $8,%esp
+ fstsw %ax / store status in ax
+ andw $0x4500,%ax
+ cmpw $0x0500,%ax
+ je .overflow
+ cmpw $0x4000,%ax
+ je .underflow
+ ret
+
+.overflow:
+ fstp %st(0) / stack empty
+ push %ebp
+ mov %esp,%ebp
+ PIC_SETUP(1)
+ pushl $6
+ jmp .error
+
+.underflow:
+ fstp %st(0) / stack empty
+ push %ebp
+ mov %esp,%ebp
+ PIC_SETUP(2)
+ pushl $7
+
+.error:
+ pushl 12(%ebp) / high x
+ pushl 8(%ebp) / low x
+ pushl 12(%ebp) / high x
+ pushl 8(%ebp) / low x
+ call PIC_F(_SVID_libm_err)
+ addl $20,%esp
+ PIC_WRAPUP
+ leave
+ ret
+
+.z_integral: / here, z is integral
+ fstp %st(0) / ,z
+ fld1 / 1,z
+ jmp .merge
+
+.check_tail:
+ movl 4(%esp),%edx / edx <-- lo_32(x)
+ cmpl $0xfefa39ef,%edx / Is |x| slightly < ln(2)?
+ ja .finite_non_special / branch if |x| slightly > ln(2)
+.shortcut:
+ / Here, |x| < ln(2), so |z| = |x*log2(e)| < 1,
+ / whence z is in f2xm1's domain.
+ fldl 4(%esp) / push x
+ fldl2e / push log2e }not for xtndd_dbl
+ fmulp %st,%st(1) / z = x*log2e }not for xtndd_dbl
+ f2xm1 / 2**(x*log2(e))-1 = e**x - 1
+ fld1 / 1,2**(z)-1
+ faddp %st,%st(1) / 2**(z) = e**x
+ ret
+
+.not_finite:
+ / Here, flags still have settings from execution of
+ / cmpl $0x7ff00000,%ecx / hi_32(|x|) > hi_32(INF)?
+ ja .NaN_or_pinf / if not, x may be +/- INF
+ movl 4(%esp),%edx / edx <-- lo_32(x)
+ cmpl $0,%edx / lo_32(x) = 0?
+ jne .NaN_or_pinf / if not, x is NaN
+ movl 8(%esp),%eax / eax <-- hi_32(x)
+ andl $0x80000000,%eax / here, x is infinite, but +/-?
+ jz .NaN_or_pinf / branch if x = +INF
+ fldz / Here, x = -inf, so return 0
+ ret
+
+.NaN_or_pinf:
+ / Here, x = NaN or +inf, so load x and return immediately.
+ fldl 4(%esp)
+ fwait
+ ret
+ .align 4
+ SET_SIZE(exp)