summaryrefslogtreecommitdiff
path: root/usr/src/libm/src/m9x/fmaf.c
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/libm/src/m9x/fmaf.c')
-rw-r--r--usr/src/libm/src/m9x/fmaf.c241
1 files changed, 241 insertions, 0 deletions
diff --git a/usr/src/libm/src/m9x/fmaf.c b/usr/src/libm/src/m9x/fmaf.c
new file mode 100644
index 0000000..f0799b7
--- /dev/null
+++ b/usr/src/libm/src/m9x/fmaf.c
@@ -0,0 +1,241 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "@(#)fmaf.c 1.5 06/01/31 SMI"
+
+#if defined(ELFOBJ)
+#pragma weak fmaf = __fmaf
+#endif
+
+#include "libm.h"
+#include "fma.h"
+
+#if defined(__sparc)
+
+/*
+ * fmaf for SPARC: 32-bit single precision, big-endian
+ */
+float
+__fmaf(float x, float y, float z) {
+ union {
+ unsigned i[2];
+ double d;
+ } xy, zz;
+ unsigned u, s;
+ int exy, ez;
+
+ /*
+ * the following operations can only raise the invalid exception,
+ * and then only if either x*y is of the form Inf*0 or one of x,
+ * y, or z is a signaling NaN
+ */
+ xy.d = (double) x * y;
+ zz.d = (double) z;
+
+ /*
+ * if the sum xy + z will be exact, just compute it and cast the
+ * result to float
+ */
+ exy = (xy.i[0] >> 20) & 0x7ff;
+ ez = (zz.i[0] >> 20) & 0x7ff;
+ if ((ez - exy <= 4 && exy - ez <= 28) || exy == 0x7ff || exy == 0 ||
+ ez == 0x7ff || ez == 0) {
+ return ((float) (xy.d + zz.d));
+ }
+
+ /*
+ * collapse the tail of the smaller summand into a "sticky bit"
+ * so that the sum can be computed without error
+ */
+ if (ez > exy) {
+ if (ez - exy < 31) {
+ u = xy.i[1];
+ s = 2 << (ez - exy);
+ if (u & (s - 1))
+ u |= s;
+ xy.i[1] = u & ~(s - 1);
+ } else if (ez - exy < 51) {
+ u = xy.i[0];
+ s = 1 << (ez - exy - 31);
+ if ((u & (s - 1)) | xy.i[1])
+ u |= s;
+ xy.i[0] = u & ~(s - 1);
+ xy.i[1] = 0;
+ } else {
+ /* collapse all of xy into a single bit */
+ xy.i[0] = (xy.i[0] & 0x80000000) | ((ez - 51) << 20);
+ xy.i[1] = 0;
+ }
+ } else {
+ if (exy - ez < 31) {
+ u = zz.i[1];
+ s = 2 << (exy - ez);
+ if (u & (s - 1))
+ u |= s;
+ zz.i[1] = u & ~(s - 1);
+ } else if (exy - ez < 51) {
+ u = zz.i[0];
+ s = 1 << (exy - ez - 31);
+ if ((u & (s - 1)) | zz.i[1])
+ u |= s;
+ zz.i[0] = u & ~(s - 1);
+ zz.i[1] = 0;
+ } else {
+ /* collapse all of zz into a single bit */
+ zz.i[0] = (zz.i[0] & 0x80000000) | ((exy - 51) << 20);
+ zz.i[1] = 0;
+ }
+ }
+
+ return ((float) (xy.d + zz.d));
+}
+
+#elif defined(__i386)
+
+#if defined(__amd64)
+#define NI 4
+#else
+#define NI 3
+#endif
+
+/*
+ * fmaf for x86: 32-bit single precision, little-endian
+ */
+float
+__fmaf(float x, float y, float z) {
+ union {
+ unsigned i[NI];
+ long double e;
+ } xy, zz;
+ unsigned u, s, cwsw, oldcwsw;
+ int exy, ez;
+
+ /* set rounding precision to 64 bits */
+ __fenv_getcwsw(&oldcwsw);
+ cwsw = (oldcwsw & 0xfcffffff) | 0x03000000;
+ __fenv_setcwsw(&cwsw);
+
+ /*
+ * the following operations can only raise the invalid exception,
+ * and then only if either x*y is of the form Inf*0 or one of x,
+ * y, or z is a signaling NaN
+ */
+ xy.e = (long double) x * y;
+ zz.e = (long double) z;
+
+ /*
+ * if the sum xy + z will be exact, just compute it and cast the
+ * result to float
+ */
+ exy = xy.i[2] & 0x7fff;
+ ez = zz.i[2] & 0x7fff;
+ if ((ez - exy <= 15 && exy - ez <= 39) || exy == 0x7fff || exy == 0 ||
+ ez == 0x7fff || ez == 0) {
+ goto cont;
+ }
+
+ /*
+ * collapse the tail of the smaller summand into a "sticky bit"
+ * so that the sum can be computed without error
+ */
+ if (ez > exy) {
+ if (ez - exy < 31) {
+ u = xy.i[0];
+ s = 2 << (ez - exy);
+ if (u & (s - 1))
+ u |= s;
+ xy.i[0] = u & ~(s - 1);
+ } else if (ez - exy < 62) {
+ u = xy.i[1];
+ s = 1 << (ez - exy - 31);
+ if ((u & (s - 1)) | xy.i[0])
+ u |= s;
+ xy.i[1] = u & ~(s - 1);
+ xy.i[0] = 0;
+ } else {
+ /* collapse all of xy into a single bit */
+ xy.i[0] = 0;
+ xy.i[1] = 0x80000000;
+ xy.i[2] = (xy.i[2] & 0x8000) | (ez - 62);
+ }
+ } else {
+ if (exy - ez < 62) {
+ u = zz.i[1];
+ s = 1 << (exy - ez - 31);
+ if ((u & (s - 1)) | zz.i[0])
+ u |= s;
+ zz.i[1] = u & ~(s - 1);
+ zz.i[0] = 0;
+ } else {
+ /* collapse all of zz into a single bit */
+ zz.i[0] = 0;
+ zz.i[1] = 0x80000000;
+ zz.i[2] = (zz.i[2] & 0x8000) | (exy - 62);
+ }
+ }
+
+cont:
+ xy.e += zz.e;
+
+ /* restore the rounding precision */
+ __fenv_getcwsw(&cwsw);
+ cwsw = (cwsw & 0xfcffffff) | (oldcwsw & 0x03000000);
+ __fenv_setcwsw(&cwsw);
+
+ return ((float) xy.e);
+}
+
+#if 0
+/*
+ * another fmaf for x86: assumes return value will be left in
+ * long double (80-bit double extended) precision
+ */
+long double
+__fmaf(float x, float y, float z) {
+ /*
+ * Note: This implementation assumes the rounding precision mode
+ * is set to the default, rounding to 64 bit precision. If this
+ * routine must work in non-default rounding precision modes, do
+ * the following instead:
+ *
+ * long double t;
+ *
+ * <set rp mode to round to 64 bit precision>
+ * t = x * y;
+ * <restore rp mode>
+ * return t + z;
+ *
+ * Note that the code to change rounding precision must not alter
+ * the exception masks or flags, since the product x * y may raise
+ * an invalid operation exception.
+ */
+ return ((long double) x * y + z);
+}
+#endif
+
+#else
+#error Unknown architecture
+#endif