summaryrefslogtreecommitdiff
path: root/usr/src/lib/libmvec/common/__vhypot.c
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/lib/libmvec/common/__vhypot.c')
-rw-r--r--usr/src/lib/libmvec/common/__vhypot.c397
1 files changed, 397 insertions, 0 deletions
diff --git a/usr/src/lib/libmvec/common/__vhypot.c b/usr/src/lib/libmvec/common/__vhypot.c
new file mode 100644
index 0000000000..6a31134eaf
--- /dev/null
+++ b/usr/src/lib/libmvec/common/__vhypot.c
@@ -0,0 +1,397 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/isa_defs.h>
+#include "libm_synonyms.h"
+#include "libm_inlines.h"
+
+#ifdef _LITTLE_ENDIAN
+#define HI(x) *(1+(int*)x)
+#define LO(x) *(unsigned*)x
+#else
+#define HI(x) *(int*)x
+#define LO(x) *(1+(unsigned*)x)
+#endif
+
+#ifdef __RESTRICT
+#define restrict _Restrict
+#else
+#define restrict
+#endif
+
+/* double hypot(double x, double y)
+ *
+ * Method :
+ * 1. Special cases:
+ * x or y is +Inf or -Inf => +Inf
+ * x or y is NaN => QNaN
+ * 2. Computes hypot(x,y):
+ * hypot(x,y) = m * sqrt(xnm * xnm + ynm * ynm)
+ * Where:
+ * m = max(|x|,|y|)
+ * xnm = x * (1/m)
+ * ynm = y * (1/m)
+ *
+ * Compute xnm * xnm + ynm * ynm by simulating
+ * muti-precision arithmetic.
+ *
+ * Accuracy:
+ * Maximum error observed: less than 0.872 ulp after 16.777.216.000
+ * results.
+ */
+
+#define sqrt __sqrt
+
+extern double sqrt(double);
+extern double fabs(double);
+
+static const unsigned long long LCONST[] = {
+0x41b0000000000000ULL, /* D2ON28 = 2 ** 28 */
+0x0010000000000000ULL, /* D2ONM1022 = 2 ** -1022 */
+0x7fd0000000000000ULL /* D2ONP1022 = 2 ** 1022 */
+};
+
+static void
+__vhypot_n(int n, double * restrict px, int stridex, double * restrict py,
+ int stridey, double * restrict pz, int stridez);
+
+#pragma no_inline(__vhypot_n)
+
+#define RETURN(ret) \
+{ \
+ *pz = (ret); \
+ py += stridey; \
+ pz += stridez; \
+ if (n_n == 0) \
+ { \
+ hx0 = HI(px); \
+ hy0 = HI(py); \
+ spx = px; spy = py; spz = pz; \
+ continue; \
+ } \
+ n--; \
+ break; \
+}
+
+void
+__vhypot(int n, double * restrict px, int stridex, double * restrict py,
+ int stridey, double * restrict pz, int stridez)
+{
+ int hx0, hx1, hy0, j0, diff;
+ double x_hi, x_lo, y_hi, y_lo;
+ double scl = 0;
+ double x, y, res;
+ double *spx, *spy, *spz;
+ int n_n;
+ double D2ON28 = ((double*)LCONST)[0]; /* 2 ** 28 */
+ double D2ONM1022 = ((double*)LCONST)[1]; /* 2 **-1022 */
+ double D2ONP1022 = ((double*)LCONST)[2]; /* 2 ** 1022 */
+
+ while (n > 1)
+ {
+ n_n = 0;
+ spx = px;
+ spy = py;
+ spz = pz;
+ hx0 = HI(px);
+ hy0 = HI(py);
+ for (; n > 1 ; n--)
+ {
+ px += stridex;
+ hx0 &= 0x7fffffff;
+ hy0 &= 0x7fffffff;
+
+ if (hx0 >= 0x7fe00000) /* |X| >= 2**1023 or Inf or NaN */
+ {
+ diff = hy0 - hx0;
+ j0 = diff >> 31;
+ j0 = hy0 - (diff & j0);
+ j0 &= 0x7ff00000;
+ x = *(px - stridex);
+ y = *py;
+ x = fabs(x);
+ y = fabs(y);
+ if (j0 >= 0x7ff00000) /* |X| or |Y| = Inf or NaN */
+ {
+ int lx = LO((px - stridex));
+ int ly = LO(py);
+ if (hx0 == 0x7ff00000 && lx == 0) res = x == y ? y : x;
+ else if (hy0 == 0x7ff00000 && ly == 0) res = x == y ? x : y;
+ else res = x + y;
+ RETURN (res)
+ }
+ else
+ {
+ j0 = diff >> 31;
+ if (((diff ^ j0) - j0) < 0x03600000) /* max(|X|,|Y|)/min(|X|,|Y|) < 2**54 */
+ {
+ x *= D2ONM1022;
+ y *= D2ONM1022;
+
+ x_hi = (x + D2ON28) - D2ON28;
+ x_lo = x - x_hi;
+ y_hi = (y + D2ON28) - D2ON28;
+ y_lo = y - y_hi;
+ res = (x_hi * x_hi + y_hi * y_hi);
+ res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo);
+
+ res = sqrt (res);
+
+ res = D2ONP1022 * res;
+ RETURN (res)
+ }
+ else RETURN (x + y)
+ }
+ }
+ if (hy0 >= 0x7fe00000) /* |Y| >= 2**1023 or Inf or NaN */
+ {
+ diff = hy0 - hx0;
+ j0 = diff >> 31;
+ j0 = hy0 - (diff & j0);
+ j0 &= 0x7ff00000;
+ x = *(px - stridex);
+ y = *py;
+ x = fabs(x);
+ y = fabs(y);
+ if (j0 >= 0x7ff00000) /* |X| or |Y| = Inf or NaN */
+ {
+ int lx = LO((px - stridex));
+ int ly = LO(py);
+ if (hx0 == 0x7ff00000 && lx == 0) res = x == y ? y : x;
+ else if (hy0 == 0x7ff00000 && ly == 0) res = x == y ? x : y;
+ else res = x + y;
+ RETURN (res)
+ }
+ else
+ {
+ j0 = diff >> 31;
+ if (((diff ^ j0) - j0) < 0x03600000) /* max(|X|,|Y|)/min(|X|,|Y|) < 2**54 */
+ {
+ x *= D2ONM1022;
+ y *= D2ONM1022;
+
+ x_hi = (x + D2ON28) - D2ON28;
+ x_lo = x - x_hi;
+ y_hi = (y + D2ON28) - D2ON28;
+ y_lo = y - y_hi;
+ res = (x_hi * x_hi + y_hi * y_hi);
+ res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo);
+
+ res = sqrt (res);
+
+ res = D2ONP1022 * res;
+ RETURN (res)
+ }
+ else RETURN (x + y)
+ }
+ }
+
+ hx1 = HI(px);
+
+ if (hx0 < 0x00100000 && hy0 < 0x00100000) /* X and Y are subnormal */
+ {
+ x = *(px - stridex);
+ y = *py;
+
+ x *= D2ONP1022;
+ y *= D2ONP1022;
+
+ x_hi = (x + D2ON28) - D2ON28;
+ x_lo = x - x_hi;
+ y_hi = (y + D2ON28) - D2ON28;
+ y_lo = y - y_hi;
+ res = (x_hi * x_hi + y_hi * y_hi);
+ res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo);
+
+ res = sqrt(res);
+
+ res = D2ONM1022 * res;
+ RETURN (res)
+ }
+
+ hx0 = hx1;
+ py += stridey;
+ pz += stridez;
+ n_n++;
+ hy0 = HI(py);
+ }
+ if (n_n > 0)
+ __vhypot_n (n_n, spx, stridex, spy, stridey, spz, stridez);
+ }
+
+ if (n > 0)
+ {
+ x = *px;
+ y = *py;
+ hx0 = HI(px);
+ hy0 = HI(py);
+
+ hx0 &= 0x7fffffff;
+ hy0 &= 0x7fffffff;
+
+ diff = hy0 - hx0;
+ j0 = diff >> 31;
+ j0 = hy0 - (diff & j0);
+ j0 &= 0x7ff00000;
+
+ if (j0 >= 0x7fe00000) /* max(|X|,|Y|) >= 2**1023 or X or Y = Inf or NaN */
+ {
+ x = fabs(x);
+ y = fabs(y);
+ if (j0 >= 0x7ff00000) /* |X| or |Y| = Inf or NaN */
+ {
+ int lx = LO(px);
+ int ly = LO(py);
+ if (hx0 == 0x7ff00000 && lx == 0) res = x == y ? y : x;
+ else if (hy0 == 0x7ff00000 && ly == 0) res = x == y ? x : y;
+ else res = x + y;
+ *pz = res;
+ return;
+ }
+ else
+ {
+ j0 = diff >> 31;
+ if (((diff ^ j0) - j0) < 0x03600000) /* max(|X|,|Y|)/min(|X|,|Y|) < 2**54 */
+ {
+ x *= D2ONM1022;
+ y *= D2ONM1022;
+
+ x_hi = (x + D2ON28) - D2ON28;
+ x_lo = x - x_hi;
+ y_hi = (y + D2ON28) - D2ON28;
+ y_lo = y - y_hi;
+ res = (x_hi * x_hi + y_hi * y_hi);
+ res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo);
+
+ res = sqrt (res);
+
+ res = D2ONP1022 * res;
+ *pz = res;
+ return;
+ }
+ else
+ {
+ *pz = x + y;
+ return;
+ }
+ }
+ }
+
+ if (j0 < 0x00100000) /* X and Y are subnormal */
+ {
+ x *= D2ONP1022;
+ y *= D2ONP1022;
+
+ x_hi = (x + D2ON28) - D2ON28;
+ x_lo = x - x_hi;
+ y_hi = (y + D2ON28) - D2ON28;
+ y_lo = y - y_hi;
+ res = (x_hi * x_hi + y_hi * y_hi);
+ res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo);
+
+ res = sqrt(res);
+
+ res = D2ONM1022 * res;
+ *pz = res;
+ return;
+ }
+
+ HI(&scl) = (0x7fe00000 - j0);
+
+ x *= scl;
+ y *= scl;
+
+ x_hi = (x + D2ON28) - D2ON28;
+ y_hi = (y + D2ON28) - D2ON28;
+ x_lo = x - x_hi;
+ y_lo = y - y_hi;
+
+ res = (x_hi * x_hi + y_hi * y_hi);
+ res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo);
+
+ res = sqrt(res);
+
+ HI(&scl) = j0;
+
+ res = scl * res;
+ *pz = res;
+ }
+}
+
+static void
+__vhypot_n(int n, double * restrict px, int stridex, double * restrict py,
+ int stridey, double * restrict pz, int stridez)
+{
+ int hx0, hy0, j0, diff0;
+ double x_hi0, x_lo0, y_hi0, y_lo0, scl0 = 0;
+ double x0, y0, res0;
+ double D2ON28 = ((double*)LCONST)[0]; /* 2 ** 28 */
+
+ for(; n > 0 ; n--)
+ {
+ x0 = *px;
+ y0 = *py;
+ hx0 = HI(px);
+ hy0 = HI(py);
+
+ hx0 &= 0x7fffffff;
+ hy0 &= 0x7fffffff;
+
+ diff0 = hy0 - hx0;
+ j0 = diff0 >> 31;
+ j0 = hy0 - (diff0 & j0);
+ j0 &= 0x7ff00000;
+
+ px += stridex;
+ py += stridey;
+
+ HI(&scl0) = (0x7fe00000 - j0);
+
+ x0 *= scl0;
+ y0 *= scl0;
+
+ x_hi0 = (x0 + D2ON28) - D2ON28;
+ y_hi0 = (y0 + D2ON28) - D2ON28;
+ x_lo0 = x0 - x_hi0;
+ y_lo0 = y0 - y_hi0;
+
+ res0 = (x_hi0 * x_hi0 + y_hi0 * y_hi0);
+ res0 += ((x0 + x_hi0) * x_lo0 + (y0 + y_hi0) * y_lo0);
+
+ res0 = sqrt(res0);
+
+ HI(&scl0) = j0;
+
+ res0 = scl0 * res0;
+ *pz = res0;
+
+ pz += stridez;
+ }
+}
+