1 files changed, 241 insertions, 0 deletions
diff --git a/usr/src/libm/src/m9x/fmaf.c b/usr/src/libm/src/m9x/fmaf.c
new file mode 100644
index 0000000..f0799b7
--- /dev/null
+++ b/usr/src/libm/src/m9x/fmaf.c
@@ -0,0 +1,241 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"@(#)fmaf.c	1.5	06/01/31 SMI"
+
+#if defined(ELFOBJ)
+#pragma weak fmaf = __fmaf
+#endif
+
+#include "libm.h"
+#include "fma.h"
+
+#if defined(__sparc)
+
+/*
+ * fmaf for SPARC: 32-bit single precision, big-endian
+ */
+float
+__fmaf(float x, float y, float z) {
+	union {
+		unsigned i[2];
+		double d;
+	} xy, zz;
+	unsigned u, s;
+	int exy, ez;
+
+	/*
+	 * the following operations can only raise the invalid exception,
+	 * and then only if either x*y is of the form Inf*0 or one of x,
+	 * y, or z is a signaling NaN
+	 */
+	xy.d = (double) x * y;
+	zz.d = (double) z;
+
+	/*
+	 * if the sum xy + z will be exact, just compute it and cast the
+	 * result to float
+	 */
+	exy = (xy.i[0] >> 20) & 0x7ff;
+	ez = (zz.i[0] >> 20) & 0x7ff;
+	if ((ez - exy <= 4 && exy - ez <= 28) || exy == 0x7ff || exy == 0 ||
+		ez == 0x7ff || ez == 0) {
+		return ((float) (xy.d + zz.d));
+	}
+
+	/*
+	 * collapse the tail of the smaller summand into a "sticky bit"
+	 * so that the sum can be computed without error
+	 */
+	if (ez > exy) {
+		if (ez - exy < 31) {
+			u = xy.i[1];
+			s = 2 << (ez - exy);
+			if (u & (s - 1))
+				u |= s;
+			xy.i[1] = u & ~(s - 1);
+		} else if (ez - exy < 51) {
+			u = xy.i[0];
+			s = 1 << (ez - exy - 31);
+			if ((u & (s - 1)) | xy.i[1])
+				u |= s;
+			xy.i[0] = u & ~(s - 1);
+			xy.i[1] = 0;
+		} else {
+			/* collapse all of xy into a single bit */
+			xy.i[0] = (xy.i[0] & 0x80000000) | ((ez - 51) << 20);
+			xy.i[1] = 0;
+		}
+	} else {
+		if (exy - ez < 31) {
+			u = zz.i[1];
+			s = 2 << (exy - ez);
+			if (u & (s - 1))
+				u |= s;
+			zz.i[1] = u & ~(s - 1);
+		} else if (exy - ez < 51) {
+			u = zz.i[0];
+			s = 1 << (exy - ez - 31);
+			if ((u & (s - 1)) | zz.i[1])
+				u |= s;
+			zz.i[0] = u & ~(s - 1);
+			zz.i[1] = 0;
+		} else {
+			/* collapse all of zz into a single bit */
+			zz.i[0] = (zz.i[0] & 0x80000000) | ((exy - 51) << 20);
+			zz.i[1] = 0;
+		}
+	}
+
+	return ((float) (xy.d + zz.d));
+}
+
+#elif defined(__i386)
+
+#if defined(__amd64)
+#define	NI	4
+#else
+#define	NI	3
+#endif
+
+/*
+ * fmaf for x86: 32-bit single precision, little-endian
+ */
+float
+__fmaf(float x, float y, float z) {
+	union {
+		unsigned i[NI];
+		long double e;
+	} xy, zz;
+	unsigned u, s, cwsw, oldcwsw;
+	int exy, ez;
+
+	/* set rounding precision to 64 bits */
+	__fenv_getcwsw(&oldcwsw);
+	cwsw = (oldcwsw & 0xfcffffff) | 0x03000000;
+	__fenv_setcwsw(&cwsw);
+
+	/*
+	 * the following operations can only raise the invalid exception,
+	 * and then only if either x*y is of the form Inf*0 or one of x,
+	 * y, or z is a signaling NaN
+	 */
+	xy.e = (long double) x * y;
+	zz.e = (long double) z;
+
+	/*
+	 * if the sum xy + z will be exact, just compute it and cast the
+	 * result to float
+	 */
+	exy = xy.i[2] & 0x7fff;
+	ez = zz.i[2] & 0x7fff;
+	if ((ez - exy <= 15 && exy - ez <= 39) || exy == 0x7fff || exy == 0 ||
+		ez == 0x7fff || ez == 0) {
+		goto cont;
+	}
+
+	/*
+	 * collapse the tail of the smaller summand into a "sticky bit"
+	 * so that the sum can be computed without error
+	 */
+	if (ez > exy) {
+		if (ez - exy < 31) {
+			u = xy.i[0];
+			s = 2 << (ez - exy);
+			if (u & (s - 1))
+				u |= s;
+			xy.i[0] = u & ~(s - 1);
+		} else if (ez - exy < 62) {
+			u = xy.i[1];
+			s = 1 << (ez - exy - 31);
+			if ((u & (s - 1)) | xy.i[0])
+				u |= s;
+			xy.i[1] = u & ~(s - 1);
+			xy.i[0] = 0;
+		} else {
+			/* collapse all of xy into a single bit */
+			xy.i[0] = 0;
+			xy.i[1] = 0x80000000;
+			xy.i[2] = (xy.i[2] & 0x8000) | (ez - 62);
+		}
+	} else {
+		if (exy - ez < 62) {
+			u = zz.i[1];
+			s = 1 << (exy - ez - 31);
+			if ((u & (s - 1)) | zz.i[0])
+				u |= s;
+			zz.i[1] = u & ~(s - 1);
+			zz.i[0] = 0;
+		} else {
+			/* collapse all of zz into a single bit */
+			zz.i[0] = 0;
+			zz.i[1] = 0x80000000;
+			zz.i[2] = (zz.i[2] & 0x8000) | (exy - 62);
+		}
+	}
+
+cont:
+	xy.e += zz.e;
+
+	/* restore the rounding precision */
+	__fenv_getcwsw(&cwsw);
+	cwsw = (cwsw & 0xfcffffff) | (oldcwsw & 0x03000000);
+	__fenv_setcwsw(&cwsw);
+
+	return ((float) xy.e);
+}
+
+#if 0
+/*
+ * another fmaf for x86: assumes return value will be left in
+ * long double (80-bit double extended) precision
+ */
+long double
+__fmaf(float x, float y, float z) {
+	/*
+	 * Note: This implementation assumes the rounding precision mode
+	 * is set to the default, rounding to 64 bit precision.  If this
+	 * routine must work in non-default rounding precision modes, do
+	 * the following instead:
+	 *
+	 *   long double t;
+	 *
+	 *   <set rp mode to round to 64 bit precision>
+	 *   t = x * y;
+	 *   <restore rp mode>
+	 *   return t + z;
+	 *
+	 * Note that the code to change rounding precision must not alter
+	 * the exception masks or flags, since the product x * y may raise
+	 * an invalid operation exception.
+	 */
+	return ((long double) x * y + z);
+}
+#endif
+
+#else
+#error Unknown architecture
+#endif