diff options
author | Piotr Jasiukajtis <estibi@me.com> | 2014-02-04 20:31:57 +0100 |
---|---|---|
committer | Dan McDonald <danmcd@omniti.com> | 2014-10-17 18:00:52 -0400 |
commit | 25c28e83beb90e7c80452a7c818c5e6f73a07dc8 (patch) | |
tree | 95cb102e7fb37f52d4b3ec3e44508f352a335ee5 | |
parent | 4e6070e87069f63bef94d8e79c2fc3cab2c1ab6b (diff) | |
download | illumos-gate-25c28e83beb90e7c80452a7c818c5e6f73a07dc8.tar.gz |
693 Opensource replacement of sunwlibm
Reviewed by: Igor Kozhukhov ikozhukhov@gmail.com
Reviewed by: Keith M Wesolowski <keith.wesolowski@joyent.com>
Reviewed by: Richard Lowe <richlowe@richlowe.net>
Approved by: Dan McDonald <danmcd@omniti.com>
817 files changed, 177493 insertions, 8 deletions
diff --git a/exception_lists/packaging b/exception_lists/packaging index 5a93d23042..b634364a5f 100644 --- a/exception_lists/packaging +++ b/exception_lists/packaging @@ -44,7 +44,6 @@ usr/include/rpcsvc/daemon_utils.h usr/include/rpcsvc/svc_dg_priv.h usr/include/security/pam_impl.h usr/include/sys/clock_impl.h -usr/include/sys/ieeefp.h usr/include/sys/winlockio.h usr/include/scsi/plugins/ses/vendor/sun_impl.h # diff --git a/usr/src/Makefile.lint b/usr/src/Makefile.lint index b158a923af..284b069442 100644 --- a/usr/src/Makefile.lint +++ b/usr/src/Makefile.lint @@ -19,6 +19,9 @@ # CDDL HEADER END # +# +# Copyright 2011 Nexenta Systems, Inc. All rights reserved. +# # Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. # Copyright (c) 2012 by Delphix. All rights reserved. @@ -396,6 +399,9 @@ COMMON_SUBDIRS = \ lib/libkstat \ lib/liblgrp \ lib/liblm \ + lib/libm \ + lib/libm1 \ + lib/libmvec \ lib/libmalloc \ lib/libmapmalloc \ lib/libmapid \ diff --git a/usr/src/Targetdirs b/usr/src/Targetdirs index 1ccb5c1378..b846dedba8 100644 --- a/usr/src/Targetdirs +++ b/usr/src/Targetdirs @@ -22,6 +22,7 @@ # Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved. # Copyright 2011, Richard Lowe # Copyright (c) 2012 by Delphix. All rights reserved. +# Copyright (c) 2012, Igor Kozhukhov <ikozhukhov@gmail.com> # Copyright 2012 OmniTI Computer Consulting, Inc. All rights reserved. # Copyright (c) 2013 RackTop Systems. # Copyright 2013 Nexenta Systems, Inc. All rights reserved. @@ -50,6 +51,7 @@ i386_DIRS= \ /boot/grub \ /boot/grub/bin \ /platform/i86pc \ + /lib/libmvec \ /usr/lib/xen \ /usr/lib/xen/bin @@ -360,10 +362,12 @@ DIRS= \ /usr/share/man/man3ldap \ /usr/share/man/man3lgrp \ /usr/share/man/man3lib \ + /usr/share/man/man3m \ /usr/share/man/man3mail \ /usr/share/man/man3malloc \ /usr/share/man/man3mp \ /usr/share/man/man3mpapi \ + /usr/share/man/man3mvec \ /usr/share/man/man3nsl \ /usr/share/man/man3nvpair \ /usr/share/man/man3pam \ @@ -955,6 +959,9 @@ $(ROOT)/usr/lib/libkmfberder.so:= REALPATH=../../lib/libkmfberder.so.1 $(ROOT)/usr/lib/libkstat.so.1:= REALPATH=../../lib/libkstat.so.1 $(ROOT)/usr/lib/libkstat.so:= REALPATH=../../lib/libkstat.so.1 $(ROOT)/usr/lib/liblddbg.so.4:= REALPATH=../../lib/liblddbg.so.4 +$(ROOT)/usr/lib/libm.so.1:= REALPATH=../../lib/libm.so.1 +$(ROOT)/usr/lib/libm.so.2:= REALPATH=../../lib/libm.so.2 +$(ROOT)/usr/lib/libm.so:= REALPATH=../../lib/libm.so.2 $(ROOT)/usr/lib/libmd.so.1:= REALPATH=../../lib/libmd.so.1 $(ROOT)/usr/lib/libmd.so:= REALPATH=../../lib/libmd.so.1 $(ROOT)/usr/lib/libmd5.so.1:= REALPATH=../../lib/libmd5.so.1 @@ -964,6 +971,8 @@ $(ROOT)/usr/lib/libmeta.so:= REALPATH=../../lib/libmeta.so.1 $(ROOT)/usr/lib/libmp.so.1:= REALPATH=../../lib/libmp.so.1 $(ROOT)/usr/lib/libmp.so.2:= REALPATH=../../lib/libmp.so.2 $(ROOT)/usr/lib/libmp.so:= REALPATH=../../lib/libmp.so.2 +$(ROOT)/usr/lib/libmvec.so.1:= REALPATH=../../lib/libmvec.so.1 +$(ROOT)/usr/lib/libmvec.so:= REALPATH=../../lib/libmvec.so.1 $(ROOT)/usr/lib/libnsl.so.1:= REALPATH=../../lib/libnsl.so.1 $(ROOT)/usr/lib/libnsl.so:= REALPATH=../../lib/libnsl.so.1 $(ROOT)/usr/lib/libnvpair.so.1:= REALPATH=../../lib/libnvpair.so.1 @@ -1074,6 +1083,8 @@ $(ROOT)/usr/lib/llib-lintl.ln:= REALPATH=../../lib/llib-lintl.ln $(ROOT)/usr/lib/llib-lintl:= REALPATH=../../lib/llib-lintl $(ROOT)/usr/lib/llib-lkstat.ln:= REALPATH=../../lib/llib-lkstat.ln $(ROOT)/usr/lib/llib-lkstat:= REALPATH=../../lib/llib-lkstat +$(ROOT)/usr/lib/llib-lm:= REALPATH=../../lib/llib-lm +$(ROOT)/usr/lib/llib-lm.ln:= REALPATH=../../lib/llib-lm.ln $(ROOT)/usr/lib/llib-lmd5.ln:= REALPATH=../../lib/llib-lmd5.ln $(ROOT)/usr/lib/llib-lmd5:= REALPATH=../../lib/llib-lmd5 $(ROOT)/usr/lib/llib-lmeta.ln:= REALPATH=../../lib/llib-lmeta.ln @@ -1246,6 +1257,12 @@ $(ROOT)/usr/lib/$(MACH64)/libkstat.so:= \ REALPATH=../../../lib/$(MACH64)/libkstat.so.1 $(ROOT)/usr/lib/$(MACH64)/liblddbg.so.4:= \ REALPATH=../../../lib/$(MACH64)/liblddbg.so.4 +$(ROOT)/usr/lib/$(MACH64)/libm.so.1:= \ + REALPATH=../../../lib/$(MACH64)/libm.so.1 +$(ROOT)/usr/lib/$(MACH64)/libm.so.2:= \ + REALPATH=../../../lib/$(MACH64)/libm.so.2 +$(ROOT)/usr/lib/$(MACH64)/libm.so:= \ + REALPATH=../../../lib/$(MACH64)/libm.so.2 $(ROOT)/usr/lib/$(MACH64)/libmd.so.1:= \ REALPATH=../../../lib/$(MACH64)/libmd.so.1 $(ROOT)/usr/lib/$(MACH64)/libmd.so:= \ @@ -1258,6 +1275,10 @@ $(ROOT)/usr/lib/$(MACH64)/libmp.so.2:= \ REALPATH=../../../lib/$(MACH64)/libmp.so.2 $(ROOT)/usr/lib/$(MACH64)/libmp.so:= \ REALPATH=../../../lib/$(MACH64)/libmp.so.2 +$(ROOT)/usr/lib/$(MACH64)/libmvec.so.1:= \ + REALPATH=../../../lib/$(MACH64)/libmvec.so.1 +$(ROOT)/usr/lib/$(MACH64)/libmvec.so:= \ + REALPATH=../../../lib/$(MACH64)/libmvec.so.1 $(ROOT)/usr/lib/$(MACH64)/libnsl.so.1:= \ REALPATH=../../../lib/$(MACH64)/libnsl.so.1 $(ROOT)/usr/lib/$(MACH64)/libnsl.so:= \ @@ -1430,6 +1451,8 @@ $(ROOT)/usr/lib/$(MACH64)/llib-lintl.ln:= \ REALPATH=../../../lib/$(MACH64)/llib-lintl.ln $(ROOT)/usr/lib/$(MACH64)/llib-lkstat.ln:= \ REALPATH=../../../lib/$(MACH64)/llib-lkstat.ln +$(ROOT)/usr/lib/$(MACH64)/llib-lm.ln:= \ + REALPATH=../../../lib/$(MACH64)/llib-lm.ln $(ROOT)/usr/lib/$(MACH64)/llib-lmd5.ln:= \ REALPATH=../../../lib/$(MACH64)/llib-lmd5.ln $(ROOT)/usr/lib/$(MACH64)/llib-lnsl.ln:= \ @@ -1567,6 +1590,9 @@ SYM.USRLIB= \ /usr/lib/libkstat.so \ /usr/lib/libkstat.so.1 \ /usr/lib/liblddbg.so.4 \ + /usr/lib/libm.so.1 \ + /usr/lib/libm.so.2 \ + /usr/lib/libm.so \ /usr/lib/libmd.so \ /usr/lib/libmd.so.1 \ /usr/lib/libmd5.so \ @@ -1576,6 +1602,8 @@ SYM.USRLIB= \ /usr/lib/libmp.so \ /usr/lib/libmp.so.1 \ /usr/lib/libmp.so.2 \ + /usr/lib/libmvec.so.1 \ + /usr/lib/libmvec.so \ /usr/lib/libnsl.so \ /usr/lib/libnsl.so.1 \ /usr/lib/libnvpair.so \ @@ -1684,6 +1712,8 @@ SYM.USRLIB= \ /usr/lib/llib-lintl.ln \ /usr/lib/llib-lkstat \ /usr/lib/llib-lkstat.ln \ + /usr/lib/llib-lm \ + /usr/lib/llib-lm.ln \ /usr/lib/llib-lmd5 \ /usr/lib/llib-lmd5.ln \ /usr/lib/llib-lmeta \ @@ -1813,12 +1843,17 @@ SYM.USRLIB64= \ /usr/lib/$(MACH64)/libkstat.so \ /usr/lib/$(MACH64)/libkstat.so.1 \ /usr/lib/$(MACH64)/liblddbg.so.4 \ + /usr/lib/$(MACH64)/libm.so.1 \ + /usr/lib/$(MACH64)/libm.so.2 \ + /usr/lib/$(MACH64)/libm.so \ /usr/lib/$(MACH64)/libmd.so \ /usr/lib/$(MACH64)/libmd.so.1 \ /usr/lib/$(MACH64)/libmd5.so \ /usr/lib/$(MACH64)/libmd5.so.1 \ /usr/lib/$(MACH64)/libmp.so \ /usr/lib/$(MACH64)/libmp.so.2 \ + /usr/lib/$(MACH64)/libmvec.so.1 \ + /usr/lib/$(MACH64)/libmvec.so \ /usr/lib/$(MACH64)/libnsl.so \ /usr/lib/$(MACH64)/libnsl.so.1 \ /usr/lib/$(MACH64)/libnvpair.so \ @@ -1903,6 +1938,7 @@ SYM.USRLIB64= \ /usr/lib/$(MACH64)/llib-linetutil.ln \ /usr/lib/$(MACH64)/llib-lintl.ln \ /usr/lib/$(MACH64)/llib-lkstat.ln \ + /usr/lib/$(MACH64)/llib-lm.ln \ /usr/lib/$(MACH64)/llib-lmd5.ln \ /usr/lib/$(MACH64)/llib-lnsl.ln \ /usr/lib/$(MACH64)/llib-lnvpair.ln \ diff --git a/usr/src/head/Makefile b/usr/src/head/Makefile index 72e2224afc..d13946c1b9 100644 --- a/usr/src/head/Makefile +++ b/usr/src/head/Makefile @@ -19,6 +19,8 @@ # CDDL HEADER END # # +# Copyright 2011 Nexenta Systems, Inc. All rights reserved. +# # Copyright 2010 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # @@ -49,6 +51,7 @@ HDRS= $($(MACH)_HDRS) $(ATTRDB_HDRS) \ assert.h \ atomic.h \ attr.h \ + complex.h \ config_admin.h \ cpio.h \ crypt.h \ @@ -70,7 +73,9 @@ HDRS= $($(MACH)_HDRS) $(ATTRDB_HDRS) \ execinfo.h \ fatal.h \ fcntl.h \ + fenv.h \ float.h \ + floatingpoint.h \ fmtmsg.h \ fnmatch.h \ ftw.h \ @@ -101,6 +106,7 @@ HDRS= $($(MACH)_HDRS) $(ATTRDB_HDRS) \ locale.h \ macros.h \ malloc.h \ + math.h \ mdmn_changelog.h \ memory.h \ meta.h \ @@ -182,6 +188,7 @@ HDRS= $($(MACH)_HDRS) $(ATTRDB_HDRS) \ tar.h \ termio.h \ termios.h \ + tgmath.h \ thread.h \ thread_db.h \ time.h \ @@ -214,6 +221,8 @@ ISOHDRS = \ ctype_iso.h \ limits_iso.h \ locale_iso.h \ + math_c99.h \ + math_iso.h \ setjmp_iso.h \ signal_iso.h \ stdarg_c99.h \ diff --git a/usr/src/head/complex.h b/usr/src/head/complex.h new file mode 100644 index 0000000000..eaa2da8401 --- /dev/null +++ b/usr/src/head/complex.h @@ -0,0 +1,134 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _COMPLEX_H +#define _COMPLEX_H + +#ifdef __cplusplus +extern "C" { +#endif + +/* #if !defined(__cplusplus) */ + +/* + * Compilation environments for Solaris must provide the _Imaginary datatype + * and the compiler intrinsics _Complex_I and _Imaginary_I + */ +#if defined(__SUNPRO_C) +#define _Complex_I _Complex_I +#define _Imaginary_I _Imaginary_I +#else +#define _Complex_I 1.0fi +#define _Imaginary_I 1.0fi +#endif +#define complex _Complex +#define imaginary _Imaginary +#undef I +#define I _Imaginary_I + +extern float cabsf(float complex); +extern float cargf(float complex); +extern float cimagf(float complex); +extern float crealf(float complex); +extern float complex cacosf(float complex); +extern float complex cacoshf(float complex); +extern float complex casinf(float complex); +extern float complex casinhf(float complex); +extern float complex catanf(float complex); +extern float complex catanhf(float complex); +extern float complex ccosf(float complex); +extern float complex ccoshf(float complex); +extern float complex cexpf(float complex); +extern float complex clogf(float complex); +extern float complex conjf(float complex); +extern float complex cpowf(float complex, float complex); +extern float complex cprojf(float complex); +extern float complex csinf(float complex); +extern float complex csinhf(float complex); +extern float complex csqrtf(float complex); +extern float complex ctanf(float complex); +extern float complex ctanhf(float complex); + +extern double cabs(double complex); +extern double carg(double complex); +extern double cimag(double complex); +extern double creal(double complex); +extern double complex cacos(double complex); +extern double complex cacosh(double complex); +extern double complex casin(double complex); +extern double complex casinh(double complex); +extern double complex catan(double complex); +extern double complex catanh(double complex); +extern double complex ccos(double complex); +extern double complex ccosh(double complex); +extern double complex cexp(double complex); +#if defined(__PRAGMA_REDEFINE_EXTNAME) +#pragma redefine_extname clog __clog +#else +#undef clog +#define clog __clog +#endif +extern double complex clog(double complex); +extern double complex conj(double complex); +extern double complex cpow(double complex, double complex); +extern double complex cproj(double complex); +extern double complex csin(double complex); +extern double complex csinh(double complex); +extern double complex csqrt(double complex); +extern double complex ctan(double complex); +extern double complex ctanh(double complex); + +extern long double cabsl(long double complex); +extern long double cargl(long double complex); +extern long double cimagl(long double complex); +extern long double creall(long double complex); +extern long double complex cacoshl(long double complex); +extern long double complex cacosl(long double complex); +extern long double complex casinhl(long double complex); +extern long double complex casinl(long double complex); +extern long double complex catanhl(long double complex); +extern long double complex catanl(long double complex); +extern long double complex ccoshl(long double complex); +extern long double complex ccosl(long double complex); +extern long double complex cexpl(long double complex); +extern long double complex clogl(long double complex); +extern long double complex conjl(long double complex); +extern long double complex cpowl(long double complex, long double complex); +extern long double complex cprojl(long double complex); +extern long double complex csinhl(long double complex); +extern long double complex csinl(long double complex); +extern long double complex csqrtl(long double complex); +extern long double complex ctanhl(long double complex); +extern long double complex ctanl(long double complex); + +/* #endif */ /* !defined(__cplusplus) */ +#ifdef __cplusplus +} +#endif + +#endif /* _COMPLEX_H */ diff --git a/usr/src/head/fenv.h b/usr/src/head/fenv.h new file mode 100644 index 0000000000..66a2163b6d --- /dev/null +++ b/usr/src/head/fenv.h @@ -0,0 +1,247 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _FENV_H +#define _FENV_H + +#include <sys/feature_tests.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef __P +#ifdef __STDC__ +#define __P(p) p +#else +#define __P(p) () +#endif +#endif /* !defined(__P) */ + +/* + * Rounding modes + */ +#if defined(__sparc) + +#define FE_TONEAREST 0 +#define FE_TOWARDZERO 1 +#define FE_UPWARD 2 +#define FE_DOWNWARD 3 + +#elif defined(__i386) || defined(__amd64) + +#define FE_TONEAREST 0 +#define FE_DOWNWARD 1 +#define FE_UPWARD 2 +#define FE_TOWARDZERO 3 + +#endif + +extern int fegetround __P((void)); +extern int fesetround __P((int)); + +#if (defined(__i386) || defined(__amd64)) && \ + (!defined(_STRICT_STDC) || defined(__EXTENSIONS__)) + +#define FE_FLTPREC 0 +#define FE_DBLPREC 2 +#define FE_LDBLPREC 3 + +extern int fegetprec __P((void)); +extern int fesetprec __P((int)); + +#endif + +/* + * Exception flags + */ +#if defined(__sparc) + +#define FE_INEXACT 0x01 +#define FE_DIVBYZERO 0x02 +#define FE_UNDERFLOW 0x04 +#define FE_OVERFLOW 0x08 +#define FE_INVALID 0x10 +#define FE_ALL_EXCEPT 0x1f + +#elif defined(__i386) || defined(__amd64) + +#define FE_INVALID 0x01 +#define FE_DIVBYZERO 0x04 +#define FE_OVERFLOW 0x08 +#define FE_UNDERFLOW 0x10 +#define FE_INEXACT 0x20 +#define FE_ALL_EXCEPT 0x3d + +#endif + +typedef int fexcept_t; + +extern int feclearexcept __P((int)); +extern int feraiseexcept __P((int)); +extern int fetestexcept __P((int)); +extern int fegetexceptflag __P((fexcept_t *, int)); +extern int fesetexceptflag __P((const fexcept_t *, int)); + +#if !defined(_STRICT_STDC) || defined(__EXTENSIONS__) + +/* + * Exception handling extensions + */ +#define FEX_NOHANDLER -1 +#define FEX_NONSTOP 0 +#define FEX_ABORT 1 +#define FEX_SIGNAL 2 +#define FEX_CUSTOM 3 + +#define FEX_INEXACT 0x001 +#define FEX_DIVBYZERO 0x002 +#define FEX_UNDERFLOW 0x004 +#define FEX_OVERFLOW 0x008 +#define FEX_INV_ZDZ 0x010 +#define FEX_INV_IDI 0x020 +#define FEX_INV_ISI 0x040 +#define FEX_INV_ZMI 0x080 +#define FEX_INV_SQRT 0x100 +#define FEX_INV_SNAN 0x200 +#define FEX_INV_INT 0x400 +#define FEX_INV_CMP 0x800 +#define FEX_INVALID 0xff0 +#define FEX_COMMON (FEX_INVALID | FEX_DIVBYZERO | FEX_OVERFLOW) +#define FEX_ALL (FEX_COMMON | FEX_UNDERFLOW | FEX_INEXACT) +#define FEX_NONE 0 + +#define FEX_NUM_EXC 12 + +/* structure to hold a numeric value in any format used by the FPU */ +typedef struct { + enum fex_nt { + fex_nodata = 0, + fex_int = 1, + fex_llong = 2, + fex_float = 3, + fex_double = 4, + fex_ldouble = 5 + } type; + union { + int i; +#if !defined(_STRICT_STDC) && !defined(_NO_LONGLONG) || defined(_STDC_C99) || \ + defined(__C99FEATURES__) + long long l; +#else + struct { + int l[2]; + } l; +#endif + float f; + double d; + long double q; + } val; +} fex_numeric_t; + +/* structure to supply information about an exception to a custom handler */ +typedef struct { + enum fex_op { + fex_add = 0, + fex_sub = 1, + fex_mul = 2, + fex_div = 3, + fex_sqrt = 4, + fex_cnvt = 5, + fex_cmp = 6, + fex_other = 7 + } op; /* operation that caused the exception */ + int flags; /* flags to be set */ + fex_numeric_t op1, op2, res; /* operands and result */ +} fex_info_t; + +typedef struct fex_handler_data { + int __mode; + void (*__handler)(); +} fex_handler_t[FEX_NUM_EXC]; + +extern int fex_get_handling __P((int)); +extern int fex_set_handling __P((int, int, void (*)())); + +extern void fex_getexcepthandler __P((fex_handler_t *, int)); +extern void fex_setexcepthandler __P((const fex_handler_t *, int)); + +#ifdef __STDC__ +#include <stdio_tag.h> +#ifndef _FILEDEFED +#define _FILEDEFED +typedef __FILE FILE; +#endif +#endif +extern FILE *fex_get_log __P((void)); +extern int fex_set_log __P((FILE *)); +extern int fex_get_log_depth __P((void)); +extern int fex_set_log_depth __P((int)); +extern void fex_log_entry __P((const char *)); + +#define __fex_handler_t fex_handler_t + +#else + +typedef struct { + int __mode; + void (*__handler)(); +} __fex_handler_t[12]; + +#endif /* !defined(_STRICT_STDC) || defined(__EXTENSIONS__) */ + +/* + * Environment as a whole + */ +typedef struct { + __fex_handler_t __handlers; + unsigned long __fsr; +} fenv_t; + +#ifdef __STDC__ +extern const fenv_t __fenv_dfl_env; +#else +extern fenv_t __fenv_dfl_env; +#endif + +#define FE_DFL_ENV (&__fenv_dfl_env) + +extern int fegetenv __P((fenv_t *)); +extern int fesetenv __P((const fenv_t *)); +extern int feholdexcept __P((fenv_t *)); +extern int feupdateenv __P((const fenv_t *)); + +#if !defined(_STRICT_STDC) || defined(__EXTENSIONS__) +extern void fex_merge_flags __P((const fenv_t *)); +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* _FENV_H */ diff --git a/usr/src/head/floatingpoint.h b/usr/src/head/floatingpoint.h new file mode 100644 index 0000000000..c774303e65 --- /dev/null +++ b/usr/src/head/floatingpoint.h @@ -0,0 +1,212 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* Copyright (C) 1989 AT&T */ +/* All Rights Reserved */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _FLOATINGPOINT_H +#define _FLOATINGPOINT_H + +#ifdef __STDC__ +#include <stdio_tag.h> +#endif +#include <sys/ieeefp.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * <floatingpoint.h> contains definitions for constants, types, variables, + * and functions for: + * IEEE floating-point arithmetic base conversion; + * IEEE floating-point arithmetic modes; + * IEEE floating-point arithmetic exception handling. + */ + +#ifndef __P +#ifdef __STDC__ +#define __P(p) p +#else +#define __P(p) () +#endif +#endif /* !defined(__P) */ + +#if defined(__STDC__) && !defined(_FILEDEFED) +#define _FILEDEFED +typedef __FILE FILE; +#endif + +#define N_IEEE_EXCEPTION 5 /* Number of floating-point exceptions. */ + +typedef int sigfpe_code_type; /* Type of SIGFPE code. */ + +typedef void (*sigfpe_handler_type)(); /* Pointer to exception handler */ + +#define SIGFPE_DEFAULT (void (*)())0 /* default exception handling */ +#define SIGFPE_IGNORE (void (*)())1 /* ignore this exception or code */ +#define SIGFPE_ABORT (void (*)())2 /* force abort on exception */ + +extern sigfpe_handler_type sigfpe __P((sigfpe_code_type, sigfpe_handler_type)); + +/* + * Types for IEEE floating point. + */ +typedef float single; + +#ifndef _EXTENDED +#define _EXTENDED +typedef unsigned extended[3]; +#endif + +typedef long double quadruple; /* Quadruple-precision type. */ + +typedef unsigned fp_exception_field_type; + /* + * A field containing fp_exceptions OR'ed + * together. + */ +/* + * Definitions for base conversion. + */ +#define DECIMAL_STRING_LENGTH 512 /* Size of buffer in decimal_record. */ + +typedef char decimal_string[DECIMAL_STRING_LENGTH]; + /* Decimal significand. */ + +typedef struct { + enum fp_class_type fpclass; + int sign; + int exponent; + decimal_string ds; /* Significand - each char contains an ascii */ + /* digit, except the string-terminating */ + /* ascii null. */ + int more; /* On conversion from decimal to binary, != 0 */ + /* indicates more non-zero digits following */ + /* ds. */ + int ndigits; /* On fixed_form conversion from binary to */ + /* decimal, contains number of digits */ + /* required for ds. */ +} decimal_record; + +enum decimal_form { + fixed_form, /* Fortran F format: ndigits specifies number */ + /* of digits after point; if negative, */ + /* specifies rounding to occur to left of */ + /* point. */ + floating_form /* Fortran E format: ndigits specifies number */ + /* of significant digits. */ +}; + +typedef struct { + enum fp_direction_type rd; + /* Rounding direction. */ + enum decimal_form df; /* Format for conversion from binary to */ + /* decimal. */ + int ndigits; /* Number of digits for conversion. */ +} decimal_mode; + +enum decimal_string_form { /* Valid decimal number string formats. */ + invalid_form, /* Not a valid decimal string format. */ + whitespace_form, /* All white space - valid in Fortran! */ + fixed_int_form, /* <digs> */ + fixed_intdot_form, /* <digs>. */ + fixed_dotfrac_form, /* .<digs> */ + fixed_intdotfrac_form, /* <digs>.<frac> */ + floating_int_form, /* <digs><exp> */ + floating_intdot_form, /* <digs>.<exp> */ + floating_dotfrac_form, /* .<digs><exp> */ + floating_intdotfrac_form, /* <digs>.<digs><exp> */ + inf_form, /* inf */ + infinity_form, /* infinity */ + nan_form, /* nan */ + nanstring_form /* nan(string) */ +}; + +extern void single_to_decimal __P((single *, decimal_mode *, decimal_record *, + fp_exception_field_type *)); +extern void double_to_decimal __P((double *, decimal_mode *, decimal_record *, + fp_exception_field_type *)); +extern void extended_to_decimal __P((extended *, decimal_mode *, + decimal_record *, fp_exception_field_type *)); +extern void quadruple_to_decimal __P((quadruple *, decimal_mode *, + decimal_record *, fp_exception_field_type *)); + +extern void decimal_to_single __P((single *, decimal_mode *, decimal_record *, + fp_exception_field_type *)); +extern void decimal_to_double __P((double *, decimal_mode *, decimal_record *, + fp_exception_field_type *)); +extern void decimal_to_extended __P((extended *, decimal_mode *, + decimal_record *, fp_exception_field_type *)); +extern void decimal_to_quadruple __P((quadruple *, decimal_mode *, + decimal_record *, fp_exception_field_type *)); + +extern void string_to_decimal __P((char **, int, int, decimal_record *, + enum decimal_string_form *, char **)); +extern void func_to_decimal __P((char **, int, int, decimal_record *, + enum decimal_string_form *, char **, + int (*)(void), int *, int (*)(int))); +extern void file_to_decimal __P((char **, int, int, decimal_record *, + enum decimal_string_form *, char **, + FILE *, int *)); + +extern char *seconvert __P((single *, int, int *, int *, char *)); +extern char *sfconvert __P((single *, int, int *, int *, char *)); +extern char *sgconvert __P((single *, int, int, char *)); +extern char *econvert __P((double, int, int *, int *, char *)); +extern char *fconvert __P((double, int, int *, int *, char *)); +extern char *gconvert __P((double, int, int, char *)); +extern char *qeconvert __P((quadruple *, int, int *, int *, char *)); +extern char *qfconvert __P((quadruple *, int, int *, int *, char *)); +extern char *qgconvert __P((quadruple *, int, int, char *)); + +extern char *ecvt __P((double, int, int *, int *)); +extern char *fcvt __P((double, int, int *, int *)); +extern char *gcvt __P((double, int, char *)); + +#if __cplusplus >= 199711L +namespace std { +#endif +/* + * ANSI C Standard says the following entry points should be + * prototyped in <stdlib.h>. They are now, but weren't before. + */ +extern double atof __P((const char *)); +extern double strtod __P((const char *, char **)); +#if __cplusplus >= 199711L +} + +using std::atof; +using std::strtod; +#endif /* end of namespace std */ + +#ifdef __cplusplus +} +#endif + +#endif /* _FLOATINGPOINT_H */ diff --git a/usr/src/head/iso/math_c99.h b/usr/src/head/iso/math_c99.h new file mode 100644 index 0000000000..72c4b30fc0 --- /dev/null +++ b/usr/src/head/iso/math_c99.h @@ -0,0 +1,530 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _ISO_MATH_C99_H +#define _ISO_MATH_C99_H + +#include <sys/isa_defs.h> +#include <sys/feature_tests.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef __P +#ifdef __STDC__ +#define __P(p) p +#else +#define __P(p) () +#endif +#endif /* !defined(__P) */ + +#if defined(_STDC_C99) || _XOPEN_SOURCE - 0 >= 600 || defined(__C99FEATURES__) +#if defined(__GNUC__) +#undef HUGE_VAL +#define HUGE_VAL (__builtin_huge_val()) +#undef HUGE_VALF +#define HUGE_VALF (__builtin_huge_valf()) +#undef HUGE_VALL +#define HUGE_VALL (__builtin_huge_vall()) +#undef INFINITY +#define INFINITY (__builtin_inff()) +#undef NAN +#define NAN (__builtin_nanf("")) + +/* + * C99 7.12.3 classification macros + */ +#undef isnan +#undef isinf +#if __GNUC__ >= 4 +#define isnan(x) __builtin_isnan(x) +#define isinf(x) __builtin_isinf(x) +#else +#define isnan(x) __extension__( \ + { __typeof(x) __x_n = (x); \ + __builtin_isunordered(__x_n, __x_n); }) +#define isinf(x) __extension__( \ + { __typeof(x) __x_i = (x); \ + __x_i == (__typeof(__x_i)) INFINITY || \ + __x_i == (__typeof(__x_i)) (-INFINITY); }) +#endif +#undef isfinite +#define isfinite(x) __extension__( \ + { __typeof(x) __x_f = (x); \ + !isnan(__x_f) && !isinf(__x_f); }) +#undef isnormal +#define isnormal(x) __extension__( \ + { __typeof(x) __x_r = (x); isfinite(__x_r) && \ + (sizeof (__x_r) == sizeof (float) ? \ + __builtin_fabsf(__x_r) >= __FLT_MIN__ : \ + sizeof (__x_r) == sizeof (double) ? \ + __builtin_fabs(__x_r) >= __DBL_MIN__ : \ + __builtin_fabsl(__x_r) >= __LDBL_MIN__); }) +#undef fpclassify +#define fpclassify(x) __extension__( \ + { __typeof(x) __x_c = (x); \ + isnan(__x_c) ? FP_NAN : \ + isinf(__x_c) ? FP_INFINITE : \ + isnormal(__x_c) ? FP_NORMAL : \ + __x_c == (__typeof(__x_c)) 0 ? FP_ZERO : \ + FP_SUBNORMAL; }) +#undef signbit +#if defined(_BIG_ENDIAN) +#define signbit(x) __extension__( \ + { __typeof(x) __x_s = (x); \ + (int) (*(unsigned *) &__x_s >> 31); }) +#elif defined(_LITTLE_ENDIAN) +#define signbit(x) __extension__( \ + { __typeof(x) __x_s = (x); \ + (sizeof (__x_s) == sizeof (float) ? \ + (int) (*(unsigned *) &__x_s >> 31) : \ + sizeof (__x_s) == sizeof (double) ? \ + (int) (((unsigned *) &__x_s)[1] >> 31) : \ + (int) (((unsigned short *) &__x_s)[4] >> 15)); }) +#endif + +/* + * C99 7.12.14 comparison macros + */ +#undef isgreater +#define isgreater(x, y) __builtin_isgreater(x, y) +#undef isgreaterequal +#define isgreaterequal(x, y) __builtin_isgreaterequal(x, y) +#undef isless +#define isless(x, y) __builtin_isless(x, y) +#undef islessequal +#define islessequal(x, y) __builtin_islessequal(x, y) +#undef islessgreater +#define islessgreater(x, y) __builtin_islessgreater(x, y) +#undef isunordered +#define isunordered(x, y) __builtin_isunordered(x, y) +#else /* defined(__GNUC__) */ +#undef HUGE_VAL +#define HUGE_VAL __builtin_huge_val +#undef HUGE_VALF +#define HUGE_VALF __builtin_huge_valf +#undef HUGE_VALL +#define HUGE_VALL __builtin_huge_vall +#undef INFINITY +#define INFINITY __builtin_infinity +#undef NAN +#define NAN __builtin_nan + +/* + * C99 7.12.3 classification macros + */ +#undef fpclassify +#define fpclassify(x) __builtin_fpclassify(x) +#undef isfinite +#define isfinite(x) __builtin_isfinite(x) +#undef isinf +#define isinf(x) __builtin_isinf(x) +#undef isnan +#define isnan(x) __builtin_isnan(x) +#undef isnormal +#define isnormal(x) __builtin_isnormal(x) +#undef signbit +#define signbit(x) __builtin_signbit(x) + +/* + * C99 7.12.14 comparison macros + */ +#undef isgreater +#define isgreater(x, y) ((x) __builtin_isgreater(y)) +#undef isgreaterequal +#define isgreaterequal(x, y) ((x) __builtin_isgreaterequal(y)) +#undef isless +#define isless(x, y) ((x) __builtin_isless(y)) +#undef islessequal +#define islessequal(x, y) ((x) __builtin_islessequal(y)) +#undef islessgreater +#define islessgreater(x, y) ((x) __builtin_islessgreater(y)) +#undef isunordered +#define isunordered(x, y) ((x) __builtin_isunordered(y)) +#endif /* defined(__GNUC__) */ +#endif /* defined(_STDC_C99) || _XOPEN_SOURCE - 0 >= 600 || ... */ + +#if defined(__EXTENSIONS__) || defined(_STDC_C99) || \ + (!defined(_STRICT_STDC) && !defined(__XOPEN_OR_POSIX)) || \ + defined(__C99FEATURES__) +#if defined(__FLT_EVAL_METHOD__) && __FLT_EVAL_METHOD__ - 0 == 0 +typedef float float_t; +typedef double double_t; +#elif __FLT_EVAL_METHOD__ - 0 == 1 +typedef double float_t; +typedef double double_t; +#elif __FLT_EVAL_METHOD__ - 0 == 2 +typedef long double float_t; +typedef long double double_t; +#elif defined(__sparc) || defined(__amd64) +typedef float float_t; +typedef double double_t; +#elif defined(__i386) +typedef long double float_t; +typedef long double double_t; +#endif + +#undef FP_ZERO +#define FP_ZERO 0 +#undef FP_SUBNORMAL +#define FP_SUBNORMAL 1 +#undef FP_NORMAL +#define FP_NORMAL 2 +#undef FP_INFINITE +#define FP_INFINITE 3 +#undef FP_NAN +#define FP_NAN 4 + +#undef FP_ILOGB0 +#define FP_ILOGB0 (-2147483647) +#undef FP_ILOGBNAN +#define FP_ILOGBNAN 2147483647 + +#undef MATH_ERRNO +#define MATH_ERRNO 1 +#undef MATH_ERREXCEPT +#define MATH_ERREXCEPT 2 +#undef math_errhandling +#define math_errhandling MATH_ERREXCEPT + +extern double acosh __P((double)); +extern double asinh __P((double)); +extern double atanh __P((double)); + +extern double exp2 __P((double)); +extern double expm1 __P((double)); +extern int ilogb __P((double)); +extern double log1p __P((double)); +extern double log2 __P((double)); +extern double logb __P((double)); +extern double scalbn __P((double, int)); +extern double scalbln __P((double, long int)); + +extern double cbrt __P((double)); +extern double hypot __P((double, double)); + +extern double erf __P((double)); +extern double erfc __P((double)); +extern double lgamma __P((double)); +extern double tgamma __P((double)); + +extern double nearbyint __P((double)); +extern double rint __P((double)); +extern long int lrint __P((double)); +extern double round __P((double)); +extern long int lround __P((double)); +extern double trunc __P((double)); + +extern double remainder __P((double, double)); +extern double remquo __P((double, double, int *)); + +extern double copysign __P((double, double)); +extern double nan __P((const char *)); +extern double nextafter __P((double, double)); +extern double nexttoward __P((double, long double)); + +extern double fdim __P((double, double)); +extern double fmax __P((double, double)); +extern double fmin __P((double, double)); + +extern double fma __P((double, double, double)); + +extern float acosf __P((float)); +extern float asinf __P((float)); +extern float atanf __P((float)); +extern float atan2f __P((float, float)); +extern float cosf __P((float)); +extern float sinf __P((float)); +extern float tanf __P((float)); + +extern float acoshf __P((float)); +extern float asinhf __P((float)); +extern float atanhf __P((float)); +extern float coshf __P((float)); +extern float sinhf __P((float)); +extern float tanhf __P((float)); + +extern float expf __P((float)); +extern float exp2f __P((float)); +extern float expm1f __P((float)); +extern float frexpf __P((float, int *)); +extern int ilogbf __P((float)); +extern float ldexpf __P((float, int)); +extern float logf __P((float)); +extern float log10f __P((float)); +extern float log1pf __P((float)); +extern float log2f __P((float)); +extern float logbf __P((float)); +extern float modff __P((float, float *)); +extern float scalbnf __P((float, int)); +extern float scalblnf __P((float, long int)); + +extern float cbrtf __P((float)); +extern float fabsf __P((float)); +extern float hypotf __P((float, float)); +extern float powf __P((float, float)); +extern float sqrtf __P((float)); + +extern float erff __P((float)); +extern float erfcf __P((float)); +extern float lgammaf __P((float)); +extern float tgammaf __P((float)); + +extern float ceilf __P((float)); +extern float floorf __P((float)); +extern float nearbyintf __P((float)); +extern float rintf __P((float)); +extern long int lrintf __P((float)); +extern float roundf __P((float)); +extern long int lroundf __P((float)); +extern float truncf __P((float)); + +extern float fmodf __P((float, float)); +extern float remainderf __P((float, float)); +extern float remquof __P((float, float, int *)); + +extern float copysignf __P((float, float)); +extern float nanf __P((const char *)); +extern float nextafterf __P((float, float)); +extern float nexttowardf __P((float, long double)); + +extern float fdimf __P((float, float)); +extern float fmaxf __P((float, float)); +extern float fminf __P((float, float)); + +extern float fmaf __P((float, float, float)); + +extern long double acosl __P((long double)); +extern long double asinl __P((long double)); +extern long double atanl __P((long double)); +extern long double atan2l __P((long double, long double)); +extern long double cosl __P((long double)); +extern long double sinl __P((long double)); +extern long double tanl __P((long double)); + +extern long double acoshl __P((long double)); +extern long double asinhl __P((long double)); +extern long double atanhl __P((long double)); +extern long double coshl __P((long double)); +extern long double sinhl __P((long double)); +extern long double tanhl __P((long double)); + +extern long double expl __P((long double)); +extern long double exp2l __P((long double)); +extern long double expm1l __P((long double)); +extern long double frexpl __P((long double, int *)); +extern int ilogbl __P((long double)); +extern long double ldexpl __P((long double, int)); +extern long double logl __P((long double)); +extern long double log10l __P((long double)); +extern long double log1pl __P((long double)); +extern long double log2l __P((long double)); +extern long double logbl __P((long double)); +extern long double modfl __P((long double, long double *)); +extern long double scalbnl __P((long double, int)); +extern long double scalblnl __P((long double, long int)); + +extern long double cbrtl __P((long double)); +extern long double fabsl __P((long double)); +extern long double hypotl __P((long double, long double)); +extern long double powl __P((long double, long double)); +extern long double sqrtl __P((long double)); + +extern long double erfl __P((long double)); +extern long double erfcl __P((long double)); +extern long double lgammal __P((long double)); +extern long double tgammal __P((long double)); + +extern long double ceill __P((long double)); +extern long double floorl __P((long double)); +extern long double nearbyintl __P((long double)); +extern long double rintl __P((long double)); +extern long int lrintl __P((long double)); +extern long double roundl __P((long double)); +extern long int lroundl __P((long double)); +extern long double truncl __P((long double)); + +extern long double fmodl __P((long double, long double)); +extern long double remainderl __P((long double, long double)); +extern long double remquol __P((long double, long double, int *)); + +extern long double copysignl __P((long double, long double)); +extern long double nanl __P((const char *)); +extern long double nextafterl __P((long double, long double)); +extern long double nexttowardl __P((long double, long double)); + +extern long double fdiml __P((long double, long double)); +extern long double fmaxl __P((long double, long double)); +extern long double fminl __P((long double, long double)); + +extern long double fmal __P((long double, long double, long double)); + +#if !defined(_STRICT_STDC) && !defined(_NO_LONGLONG) || defined(_STDC_C99) || \ + defined(__C99FEATURES__) +extern long long int llrint __P((double)); +extern long long int llround __P((double)); + +extern long long int llrintf __P((float)); +extern long long int llroundf __P((float)); + +extern long long int llrintl __P((long double)); +extern long long int llroundl __P((long double)); +#endif + +#if !defined(__cplusplus) +#pragma does_not_read_global_data(asinh, exp2, expm1) +#pragma does_not_read_global_data(ilogb, log2) +#pragma does_not_read_global_data(scalbn, scalbln, cbrt) +#pragma does_not_read_global_data(erf, erfc, tgamma) +#pragma does_not_read_global_data(nearbyint, rint, lrint, round, lround, trunc) +#pragma does_not_read_global_data(remquo) +#pragma does_not_read_global_data(copysign, nan, nexttoward) +#pragma does_not_read_global_data(fdim, fmax, fmin, fma) +#pragma does_not_write_global_data(asinh, exp2, expm1) +#pragma does_not_write_global_data(ilogb, log2) +#pragma does_not_write_global_data(scalbn, scalbln, cbrt) +#pragma does_not_write_global_data(erf, erfc, tgamma) +#pragma does_not_write_global_data(nearbyint, rint, lrint, round, lround, trunc) +#pragma does_not_write_global_data(copysign, nan, nexttoward) +#pragma does_not_write_global_data(fdim, fmax, fmin, fma) + +#pragma does_not_read_global_data(acosf, asinf, atanf, atan2f) +#pragma does_not_read_global_data(cosf, sinf, tanf) +#pragma does_not_read_global_data(acoshf, asinhf, atanhf, coshf, sinhf, tanhf) +#pragma does_not_read_global_data(expf, exp2f, expm1f, frexpf, ilogbf, ldexpf) +#pragma does_not_read_global_data(logf, log10f, log1pf, log2f, logbf) +#pragma does_not_read_global_data(modff, scalbnf, scalblnf) +#pragma does_not_read_global_data(cbrtf, fabsf, hypotf, powf, sqrtf) +#pragma does_not_read_global_data(erff, erfcf, lgammaf, tgammaf) +#pragma does_not_read_global_data(ceilf, floorf, nearbyintf) +#pragma does_not_read_global_data(rintf, lrintf, roundf, lroundf, truncf) +#pragma does_not_read_global_data(fmodf, remainderf, remquof) +#pragma does_not_read_global_data(copysignf, nanf, nextafterf, nexttowardf) +#pragma does_not_read_global_data(fdimf, fmaxf, fminf, fmaf) +#pragma does_not_write_global_data(acosf, asinf, atanf, atan2f) +#pragma does_not_write_global_data(cosf, sinf, tanf) +#pragma does_not_write_global_data(acoshf, asinhf, atanhf, coshf, sinhf, tanhf) +#pragma does_not_write_global_data(expf, exp2f, expm1f, ilogbf, ldexpf) +#pragma does_not_write_global_data(logf, log10f, log1pf, log2f, logbf) +#pragma does_not_write_global_data(cbrtf, fabsf, hypotf, powf, sqrtf) +#pragma does_not_write_global_data(erff, erfcf, tgammaf) +#pragma does_not_write_global_data(ceilf, floorf, nearbyintf) +#pragma does_not_write_global_data(rintf, lrintf, roundf, lroundf, truncf) +#pragma does_not_write_global_data(fmodf, remainderf) +#pragma does_not_write_global_data(copysignf, nanf, nextafterf, nexttowardf) +#pragma does_not_write_global_data(fdimf, fmaxf, fminf, fmaf) + +#pragma does_not_read_global_data(acosl, asinl, atanl, atan2l) +#pragma does_not_read_global_data(cosl, sinl, tanl) +#pragma does_not_read_global_data(acoshl, asinhl, atanhl, coshl, sinhl, tanhl) +#pragma does_not_read_global_data(expl, exp2l, expm1l, frexpl, ilogbl, ldexpl) +#pragma does_not_read_global_data(logl, log10l, log1pl, log2l, logbl) +#pragma does_not_read_global_data(modfl, scalbnl, scalblnl) +#pragma does_not_read_global_data(cbrtl, fabsl, hypotl, powl, sqrtl) +#pragma does_not_read_global_data(erfl, erfcl, lgammal, tgammal) +#pragma does_not_read_global_data(ceill, floorl, nearbyintl) +#pragma does_not_read_global_data(rintl, lrintl, roundl, lroundl, truncl) +#pragma does_not_read_global_data(fmodl, remainderl, remquol) +#pragma does_not_read_global_data(copysignl, nanl, nextafterl, nexttowardl) +#pragma does_not_read_global_data(fdiml, fmaxl, fminl, fmal) +#pragma does_not_write_global_data(acosl, asinl, atanl, atan2l) +#pragma does_not_write_global_data(cosl, sinl, tanl) +#pragma does_not_write_global_data(acoshl, asinhl, atanhl, coshl, sinhl, tanhl) +#pragma does_not_write_global_data(expl, exp2l, expm1l, ilogbl, ldexpl) +#pragma does_not_write_global_data(logl, log10l, log1pl, log2l, logbl) +#pragma does_not_write_global_data(cbrtl, fabsl, hypotl, powl, sqrtl) +#pragma does_not_write_global_data(erfl, erfcl, tgammal) +#pragma does_not_write_global_data(ceill, floorl, nearbyintl) +#pragma does_not_write_global_data(rintl, lrintl, roundl, lroundl, truncl) +#pragma does_not_write_global_data(fmodl, remainderl) +#pragma does_not_write_global_data(copysignl, nanl, nextafterl, nexttowardl) +#pragma does_not_write_global_data(fdiml, fmaxl, fminl, fmal) + +#if !defined(_STRICT_STDC) && !defined(_NO_LONGLONG) || defined(_STDC_C99) || \ + defined(__C99FEATURES__) +#pragma does_not_read_global_data(llrint, llround) +#pragma does_not_read_global_data(llrintf, llroundf, llrintl, llroundl) +#pragma does_not_write_global_data(llrint, llround) +#pragma does_not_write_global_data(llrintf, llroundf, llrintl, llroundl) +#endif +#endif /* !defined(__cplusplus) */ + +#if defined(__MATHERR_ERRNO_DONTCARE) +#pragma does_not_read_global_data(acosh, atanh, hypot, lgamma, log1p, logb) +#pragma does_not_read_global_data(nextafter, remainder) +#pragma does_not_write_global_data(acosh, atanh, hypot, log1p, logb) +#pragma does_not_write_global_data(nextafter, remainder) + +#pragma no_side_effect(acosh, asinh, atanh, exp2, expm1) +#pragma no_side_effect(ilogb, log1p, log2, logb) +#pragma no_side_effect(scalbn, scalbln, cbrt, hypot) +#pragma no_side_effect(erf, erfc, tgamma) +#pragma no_side_effect(nearbyint, rint, lrint, round, lround, trunc) +#pragma no_side_effect(remainder) +#pragma no_side_effect(copysign, nan, nextafter, nexttoward) +#pragma no_side_effect(fdim, fmax, fmin, fma) + +#pragma no_side_effect(acosf, asinf, atanf, atan2f) +#pragma no_side_effect(cosf, sinf, tanf, coshf, sinhf, tanhf) +#pragma no_side_effect(acoshf, asinhf, atanhf, coshf, sinhf, tanhf) +#pragma no_side_effect(expf, exp2f, expm1f, ilogbf, ldexpf) +#pragma no_side_effect(logf, log10f, log1pf, log2f, logbf) +#pragma no_side_effect(cbrtf, fabsf, hypotf, powf, sqrtf) +#pragma no_side_effect(erff, erfcf, tgammaf) +#pragma no_side_effect(ceilf, floorf, nearbyintf) +#pragma no_side_effect(rintf, lrintf, roundf, lroundf, truncf) +#pragma no_side_effect(fmodf, remainderf) +#pragma no_side_effect(copysignf, nanf, nextafterf, nexttowardf) +#pragma no_side_effect(fdimf, fmaxf, fminf, fmaf) + +#pragma no_side_effect(acosl, asinl, atanl, atan2l) +#pragma no_side_effect(cosl, sinl, tanl, coshl, sinhl, tanhl) +#pragma no_side_effect(acoshl, asinhl, atanhl, coshl, sinhl, tanhl) +#pragma no_side_effect(expl, exp2l, expm1l, ilogbl, ldexpl) +#pragma no_side_effect(logl, log10l, log1pl, log2l, logbl) +#pragma no_side_effect(cbrtl, fabsl, hypotl, powl, sqrtl) +#pragma no_side_effect(erfl, erfcl, tgammal) +#pragma no_side_effect(ceill, floorl, nearbyintl) +#pragma no_side_effect(rintl, lrintl, roundl, lroundl, truncl) +#pragma no_side_effect(fmodl, remainderl) +#pragma no_side_effect(copysignl, nanl, nextafterl, nexttowardl) +#pragma no_side_effect(fdiml, fmaxl, fminl, fmal) + +#if !defined(_STRICT_STDC) && !defined(_NO_LONGLONG) || defined(_STDC_C99) || \ + defined(__C99FEATURES__) +#pragma no_side_effect(llrint, llround, llrintf, llroundf, llrintl, llroundl) +#endif +#endif /* defined(__MATHERR_ERRNO_DONTCARE) */ +#endif /* defined(__EXTENSIONS__) || defined(_STDC_C99) || ... */ + +#ifdef __cplusplus +} +#endif + +#endif /* _ISO_MATH_C99_H */ diff --git a/usr/src/head/iso/math_iso.h b/usr/src/head/iso/math_iso.h new file mode 100644 index 0000000000..4ecb29fbf8 --- /dev/null +++ b/usr/src/head/iso/math_iso.h @@ -0,0 +1,232 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _ISO_MATH_ISO_H +#define _ISO_MATH_ISO_H + +#include <sys/feature_tests.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef __P +#ifdef __STDC__ +#define __P(p) p +#else +#define __P(p) () +#endif +#endif /* !defined(__P) */ + +#if !defined(_STDC_C99) && _XOPEN_SOURCE - 0 < 600 && !defined(__C99FEATURES__) +typedef union _h_val { + unsigned long _i[sizeof (double) / sizeof (unsigned long)]; + double _d; +} _h_val; + +#ifdef __STDC__ +extern const _h_val __huge_val; +#else +extern _h_val __huge_val; +#endif +#undef HUGE_VAL +#define HUGE_VAL __huge_val._d +#endif /* !defined(_STDC_C99) && _XOPEN_SOURCE - 0 < 600 && ... */ + +#if __cplusplus >= 199711L +namespace std { +#endif + +extern double acos __P((double)); +extern double asin __P((double)); +extern double atan __P((double)); +extern double atan2 __P((double, double)); +extern double cos __P((double)); +extern double sin __P((double)); +extern double tan __P((double)); + +extern double cosh __P((double)); +extern double sinh __P((double)); +extern double tanh __P((double)); + +extern double exp __P((double)); +extern double frexp __P((double, int *)); +extern double ldexp __P((double, int)); +extern double log __P((double)); +extern double log10 __P((double)); +extern double modf __P((double, double *)); + +extern double pow __P((double, double)); +extern double sqrt __P((double)); + +extern double ceil __P((double)); +extern double fabs __P((double)); +extern double floor __P((double)); +extern double fmod __P((double, double)); + +#if defined(__MATHERR_ERRNO_DONTCARE) +#pragma does_not_read_global_data(acos, asin, atan, atan2) +#pragma does_not_read_global_data(cos, sin, tan, cosh, sinh, tanh) +#pragma does_not_read_global_data(exp, log, log10, pow, sqrt) +#pragma does_not_read_global_data(frexp, ldexp, modf) +#pragma does_not_read_global_data(ceil, fabs, floor, fmod) +#pragma does_not_write_global_data(acos, asin, atan, atan2) +#pragma does_not_write_global_data(cos, sin, tan, cosh, sinh, tanh) +#pragma does_not_write_global_data(exp, log, log10, pow, sqrt) +#pragma does_not_write_global_data(ldexp) +#pragma does_not_write_global_data(ceil, fabs, floor, fmod) +#pragma no_side_effect(acos, asin, atan, atan2) +#pragma no_side_effect(cos, sin, tan, cosh, sinh, tanh) +#pragma no_side_effect(exp, log, log10, pow, sqrt) +#pragma no_side_effect(ldexp) +#pragma no_side_effect(ceil, fabs, floor, fmod) +#endif + +#if __cplusplus >= 199711L +extern float __acosf(float); +extern float __asinf(float); +extern float __atanf(float); +extern float __atan2f(float, float); +extern float __ceilf(float); +extern float __cosf(float); +extern float __coshf(float); +extern float __expf(float); +extern float __fabsf(float); +extern float __floorf(float); +extern float __fmodf(float, float); +extern float __frexpf(float, int *); +extern float __ldexpf(float, int); +extern float __logf(float); +extern float __log10f(float); +extern float __modff(float, float *); +extern float __powf(float, float); +extern float __sinf(float); +extern float __sinhf(float); +extern float __sqrtf(float); +extern float __tanf(float); +extern float __tanhf(float); + +extern long double __acosl(long double); +extern long double __asinl(long double); +extern long double __atanl(long double); +extern long double __atan2l(long double, long double); +extern long double __ceill(long double); +extern long double __cosl(long double); +extern long double __coshl(long double); +extern long double __expl(long double); +extern long double __fabsl(long double); +extern long double __floorl(long double); +extern long double __fmodl(long double, long double); +extern long double __frexpl(long double, int *); +extern long double __ldexpl(long double, int); +extern long double __logl(long double); +extern long double __log10l(long double); +extern long double __modfl(long double, long double *); +extern long double __powl(long double, long double); +extern long double __sinl(long double); +extern long double __sinhl(long double); +extern long double __sqrtl(long double); +extern long double __tanl(long double); +extern long double __tanhl(long double); + +extern "C++" { +#undef __X +#undef __Y + inline double abs(double __X) { return fabs(__X); } + inline double pow(double __X, int __Y) { return + pow(__X, (double) (__Y)); } + + inline float abs(float __X) { return __fabsf(__X); } + inline float acos(float __X) { return __acosf(__X); } + inline float asin(float __X) { return __asinf(__X); } + inline float atan(float __X) { return __atanf(__X); } + inline float atan2(float __X, float __Y) { return __atan2f(__X, __Y); } + inline float ceil(float __X) { return __ceilf(__X); } + inline float cos(float __X) { return __cosf(__X); } + inline float cosh(float __X) { return __coshf(__X); } + inline float exp(float __X) { return __expf(__X); } + inline float fabs(float __X) { return __fabsf(__X); } + inline float floor(float __X) { return __floorf(__X); } + inline float fmod(float __X, float __Y) { return __fmodf(__X, __Y); } + inline float frexp(float __X, int *__Y) { return __frexpf(__X, __Y); } + inline float ldexp(float __X, int __Y) { return __ldexpf(__X, __Y); } + inline float log(float __X) { return __logf(__X); } + inline float log10(float __X) { return __log10f(__X); } + inline float modf(float __X, float *__Y) { return __modff(__X, __Y); } + inline float pow(float __X, float __Y) { return __powf(__X, __Y); } + inline float pow(float __X, int __Y) { return + pow((double) (__X), (double) (__Y)); } + inline float sin(float __X) { return __sinf(__X); } + inline float sinh(float __X) { return __sinhf(__X); } + inline float sqrt(float __X) { return __sqrtf(__X); } + inline float tan(float __X) { return __tanf(__X); } + inline float tanh(float __X) { return __tanhf(__X); } + + inline long double abs(long double __X) { return __fabsl(__X); } + inline long double acos(long double __X) { return __acosl(__X); } + inline long double asin(long double __X) { return __asinl(__X); } + inline long double atan(long double __X) { return __atanl(__X); } + inline long double atan2(long double __X, long double __Y) { return + __atan2l(__X, __Y); } + inline long double ceil(long double __X) { return __ceill(__X); } + inline long double cos(long double __X) { return __cosl(__X); } + inline long double cosh(long double __X) { return __coshl(__X); } + inline long double exp(long double __X) { return __expl(__X); } + inline long double fabs(long double __X) { return __fabsl(__X); } + inline long double floor(long double __X) { return __floorl(__X); } + inline long double fmod(long double __X, long double __Y) { return + __fmodl(__X, __Y); } + inline long double frexp(long double __X, int *__Y) { return + __frexpl(__X, __Y); } + inline long double ldexp(long double __X, int __Y) { return + __ldexpl(__X, __Y); } + inline long double log(long double __X) { return __logl(__X); } + inline long double log10(long double __X) { return __log10l(__X); } + inline long double modf(long double __X, long double *__Y) { return + __modfl(__X, __Y); } + inline long double pow(long double __X, long double __Y) { return + __powl(__X, __Y); } + inline long double pow(long double __X, int __Y) { return + __powl(__X, (long double) (__Y)); } + inline long double sin(long double __X) { return __sinl(__X); } + inline long double sinh(long double __X) { return __sinhl(__X); } + inline long double sqrt(long double __X) { return __sqrtl(__X); } + inline long double tan(long double __X) { return __tanl(__X); } + inline long double tanh(long double __X) { return __tanhl(__X); } +} /* end of extern "C++" */ +#endif /* __cplusplus >= 199711L */ + +#if __cplusplus >= 199711L +} /* end of namespace std */ +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* _ISO_MATH_ISO_H */ diff --git a/usr/src/head/math.h b/usr/src/head/math.h new file mode 100644 index 0000000000..b28f42a907 --- /dev/null +++ b/usr/src/head/math.h @@ -0,0 +1,351 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _MATH_H +#define _MATH_H + +#include <iso/math_iso.h> +#include <iso/math_c99.h> + +#if __cplusplus >= 199711L +using std::abs; +using std::acos; +using std::asin; +using std::atan2; +using std::atan; +using std::ceil; +using std::cos; +using std::cosh; +using std::exp; +using std::fabs; +using std::floor; +using std::fmod; +using std::frexp; +using std::ldexp; +using std::log10; +using std::log; +using std::modf; +using std::pow; +using std::sin; +using std::sinh; +using std::sqrt; +using std::tan; +using std::tanh; +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#if defined(__cplusplus) +#define exception __math_exception +#endif + +#ifndef __P +#ifdef __STDC__ +#define __P(p) p +#else +#define __P(p) () +#endif +#endif /* !defined(__P) */ + +#if defined(__EXTENSIONS__) || defined(_XOPEN_SOURCE) || \ + !defined(_STRICT_STDC) && !defined(_POSIX_C_SOURCE) +/* + * SVID & X/Open + */ +#define M_E 2.7182818284590452354 +#define M_LOG2E 1.4426950408889634074 +#define M_LOG10E 0.43429448190325182765 +#define M_LN2 0.69314718055994530942 +#define M_LN10 2.30258509299404568402 +#define M_PI 3.14159265358979323846 +#define M_PI_2 1.57079632679489661923 +#define M_PI_4 0.78539816339744830962 +#define M_1_PI 0.31830988618379067154 +#define M_2_PI 0.63661977236758134308 +#define M_2_SQRTPI 1.12837916709551257390 +#define M_SQRT2 1.41421356237309504880 +#define M_SQRT1_2 0.70710678118654752440 + +extern int signgam; + +#define MAXFLOAT ((float)3.40282346638528860e+38) + +#if defined(__EXTENSIONS__) || !defined(_XOPEN_SOURCE) +/* + * SVID + */ +enum version {libm_ieee = -1, c_issue_4, ansi_1, strict_ansi}; + +#ifdef __STDC__ +extern const enum version _lib_version; +#else +extern enum version _lib_version; +#endif + +struct exception { + int type; + char *name; + double arg1; + double arg2; + double retval; +}; + +#define HUGE MAXFLOAT + +#define _ABS(x) ((x) < 0 ? -(x) : (x)) + +#define _REDUCE(TYPE, X, XN, C1, C2) { \ + double x1 = (double)(TYPE)X, x2 = X - x1; \ + X = x1 - (XN) * (C1); X += x2; X -= (XN) * (C2); } + +#define DOMAIN 1 +#define SING 2 +#define OVERFLOW 3 +#define UNDERFLOW 4 +#define TLOSS 5 +#define PLOSS 6 + +#define _POLY1(x, c) ((c)[0] * (x) + (c)[1]) +#define _POLY2(x, c) (_POLY1((x), (c)) * (x) + (c)[2]) +#define _POLY3(x, c) (_POLY2((x), (c)) * (x) + (c)[3]) +#define _POLY4(x, c) (_POLY3((x), (c)) * (x) + (c)[4]) +#define _POLY5(x, c) (_POLY4((x), (c)) * (x) + (c)[5]) +#define _POLY6(x, c) (_POLY5((x), (c)) * (x) + (c)[6]) +#define _POLY7(x, c) (_POLY6((x), (c)) * (x) + (c)[7]) +#define _POLY8(x, c) (_POLY7((x), (c)) * (x) + (c)[8]) +#define _POLY9(x, c) (_POLY8((x), (c)) * (x) + (c)[9]) +#endif /* defined(__EXTENSIONS__) || !defined(_XOPEN_SOURCE) */ + +/* + * SVID & X/Open + */ +/* BEGIN adopted by C99 */ +extern double erf __P((double)); +extern double erfc __P((double)); +extern double hypot __P((double, double)); +extern double lgamma __P((double)); + +#if defined(__MATHERR_ERRNO_DONTCARE) +#pragma does_not_read_global_data(erf, erfc, hypot) +#pragma does_not_write_global_data(erf, erfc, hypot) +#pragma no_side_effect(erf, erfc, hypot) +#endif + +#if !defined(_STDC_C99) && _XOPEN_SOURCE - 0 < 600 && !defined(__C99FEATURES__) +extern int isnan __P((double)); + +#pragma does_not_read_global_data(isnan) +#pragma does_not_write_global_data(isnan) +#pragma no_side_effect(isnan) +#endif +/* END adopted by C99 */ + +#if defined(__EXTENSIONS__) || _XOPEN_SOURCE - 0 < 600 +extern double gamma __P((double)); /* deprecated; use lgamma */ +#endif +extern double j0 __P((double)); +extern double j1 __P((double)); +extern double jn __P((int, double)); +extern double y0 __P((double)); +extern double y1 __P((double)); +extern double yn __P((int, double)); + +#if defined(__MATHERR_ERRNO_DONTCARE) +#pragma does_not_read_global_data(j0, j1, jn, y0, y1, yn) +#pragma does_not_write_global_data(j0, j1, jn, y0, y1, yn) +#pragma no_side_effect(j0, j1, jn, y0, y1, yn) +#endif +#if defined(__EXTENSIONS__) || !defined(_XOPEN_SOURCE) || \ + _XOPEN_SOURCE - 0 >= 500 || \ + defined(_XOPEN_SOURCE) && _XOPEN_SOURCE_EXTENDED - 0 == 1 +/* + * SVID & XPG 4.2/5 + */ +extern double scalb __P((double, double)); + +#if defined(__MATHERR_ERRNO_DONTCARE) +#pragma does_not_read_global_data(scalb) +#pragma does_not_write_global_data(scalb) +#pragma no_side_effect(scalb) +#endif + +/* BEGIN adopted by C99 */ +extern double acosh __P((double)); +extern double asinh __P((double)); +extern double atanh __P((double)); +extern double cbrt __P((double)); +extern double logb __P((double)); +extern double nextafter __P((double, double)); +extern double remainder __P((double, double)); + +/* + * XPG 4.2/5 + */ +extern double expm1 __P((double)); +extern int ilogb __P((double)); +extern double log1p __P((double)); +extern double rint __P((double)); + +#if defined(__MATHERR_ERRNO_DONTCARE) +#pragma does_not_read_global_data(acosh, asinh, atanh, cbrt) +#pragma does_not_read_global_data(logb, nextafter, remainder) +#pragma does_not_read_global_data(expm1, ilogb, log1p, rint) +#pragma does_not_write_global_data(acosh, asinh, atanh, cbrt) +#pragma does_not_write_global_data(logb, nextafter, remainder) +#pragma does_not_write_global_data(expm1, ilogb, log1p, rint) +#pragma no_side_effect(acosh, asinh, atanh, cbrt) +#pragma no_side_effect(logb, nextafter, remainder) +#pragma no_side_effect(expm1, ilogb, log1p, rint) +#endif +/* END adopted by C99 */ +#endif /* defined(__EXTENSIONS__) || !defined(_XOPEN_SOURCE) || ... */ + +#if defined(__EXTENSIONS__) || !defined(_XOPEN_SOURCE) +/* + * SVID + */ +extern int matherr __P((struct exception *)); + +/* + * IEEE Test Vector + */ +extern double significand __P((double)); + +#if defined(__MATHERR_ERRNO_DONTCARE) +#pragma does_not_read_global_data(significand) +#pragma does_not_write_global_data(significand) +#pragma no_side_effect(significand) +#endif + +extern int signgamf; /* deprecated; use signgam */ +extern int signgaml; /* deprecated; use signgam */ + +extern int isnanf __P((float)); +extern int isnanl __P((long double)); +extern float gammaf __P((float)); /* deprecated; use lgammaf */ +extern float gammaf_r __P((float, int *)); /* deprecated; use lgammaf_r */ +extern float j0f __P((float)); +extern float j1f __P((float)); +extern float jnf __P((int, float)); +extern float lgammaf_r __P((float, int *)); +extern float scalbf __P((float, float)); +extern float significandf __P((float)); +extern float y0f __P((float)); +extern float y1f __P((float)); +extern float ynf __P((int, float)); +extern long double gammal __P((long double)); /* deprecated; use lgammal */ +extern long double gammal_r __P((long double, int *)); /* deprecated */ +extern long double j0l __P((long double)); +extern long double j1l __P((long double)); +extern long double jnl __P((int, long double)); +extern long double lgammal_r __P((long double, int *)); +extern long double scalbl __P((long double, long double)); +extern long double significandl __P((long double)); +extern long double y0l __P((long double)); +extern long double y1l __P((long double)); +extern long double ynl __P((int, long double)); + +#if defined(__MATHERR_ERRNO_DONTCARE) +#pragma does_not_read_global_data(isnanf, isnanl) +#pragma does_not_write_global_data(isnanf, isnanl) +#pragma no_side_effect(isnanf, isnanl) +#pragma does_not_read_global_data(gammaf_r, j0f, j1f, jnf, lgammaf_r, scalbf) +#pragma does_not_read_global_data(significandf, y0f, y1f, ynf) +#pragma does_not_write_global_data(j0f, j1f, jnf, scalbf) +#pragma does_not_write_global_data(significandf, y0f, y1f, ynf) +#pragma no_side_effect(j0f, j1f, jnf, scalbf) +#pragma no_side_effect(significandf, y0f, y1f, ynf) +#pragma does_not_read_global_data(gammal_r, j0l, j1l, jnl, lgammal_r, scalbl) +#pragma does_not_read_global_data(significandl, y0l, y1l, ynl) +#pragma does_not_write_global_data(j0l, j1l, jnl, scalbl) +#pragma does_not_write_global_data(significandl, y0l, y1l, ynl) +#pragma no_side_effect(j0l, j1l, jnl, scalbl) +#pragma no_side_effect(significandl, y0l, y1l, ynl) +#endif + +/* + * for sin+cos->sincos transformation + */ +extern void sincos __P((double, double *, double *)); +extern void sincosf __P((float, float *, float *)); +extern void sincosl __P((long double, long double *, long double *)); + +#if defined(__MATHERR_ERRNO_DONTCARE) +#pragma does_not_read_global_data(sincos, sincosf, sincosl) +#endif + +/* BEGIN adopted by C99 */ +/* + * Functions callable from C, intended to support IEEE arithmetic. + */ +extern double copysign __P((double, double)); +extern double scalbn __P((double, int)); + +#if defined(__MATHERR_ERRNO_DONTCARE) +#pragma does_not_read_global_data(copysign, scalbn) +#pragma does_not_write_global_data(copysign, scalbn) +#pragma no_side_effect(copysign, scalbn) +#endif +/* END adopted by C99 */ + +/* + * Reentrant version of gamma & lgamma; passes signgam back by reference + * as the second argument; user must allocate space for signgam. + */ +extern double gamma_r __P((double, int *)); /* deprecated; use lgamma_r */ +extern double lgamma_r __P((double, int *)); + +#if defined(__MATHERR_ERRNO_DONTCARE) +#pragma does_not_read_global_data(gamma_r, lgamma_r) +#endif + +/* BEGIN adopted by C99 */ +extern float modff __P((float, float *)); + +#if defined(__MATHERR_ERRNO_DONTCARE) +#pragma does_not_read_global_data(modff) +#endif +/* END adopted by C99 */ + +#if defined(__EXTENSIONS__) || !defined(__cplusplus) +#include <floatingpoint.h> +#endif +#endif /* defined(__EXTENSIONS__) || !defined(_XOPEN_SOURCE) */ +#endif /* defined(__EXTENSIONS__) || defined(_XOPEN_SOURCE) || ... */ + +#if defined(__cplusplus) && defined(__GNUC__) +#undef exception +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* _MATH_H */ diff --git a/usr/src/head/tgmath.h b/usr/src/head/tgmath.h new file mode 100644 index 0000000000..c5b2c519e5 --- /dev/null +++ b/usr/src/head/tgmath.h @@ -0,0 +1,171 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _TGMATH_H +#define _TGMATH_H + +#if !defined(__cplusplus) + +#include <math.h> +#include <complex.h> + +/* + * real-floating and complex + */ +#undef acos +#define acos(x) __tgmath_acos(x) +#undef asin +#define asin(x) __tgmath_asin(x) +#undef atan +#define atan(x) __tgmath_atan(x) +#undef acosh +#define acosh(x) __tgmath_acosh(x) +#undef asinh +#define asinh(x) __tgmath_asinh(x) +#undef atanh +#define atanh(x) __tgmath_atanh(x) +#undef cos +#define cos(x) __tgmath_cos(x) +#undef sin +#define sin(x) __tgmath_sin(x) +#undef tan +#define tan(x) __tgmath_tan(x) +#undef cosh +#define cosh(x) __tgmath_cosh(x) +#undef sinh +#define sinh(x) __tgmath_sinh(x) +#undef tanh +#define tanh(x) __tgmath_tanh(x) +#undef exp +#define exp(x) __tgmath_exp(x) +#undef log +#define log(x) __tgmath_log(x) +#undef pow +#define pow(x, y) __tgmath_pow(x, y) +#undef sqrt +#define sqrt(x) __tgmath_sqrt(x) +#undef fabs +#define fabs(x) __tgmath_fabs(x) + +/* + * real-floating only + */ +#undef atan2 +#define atan2(y, x) __tgmath_atan2(y, x) +#undef cbrt +#define cbrt(x) __tgmath_cbrt(x) +#undef ceil +#define ceil(x) __tgmath_ceil(x) +#undef copysign +#define copysign(x, y) __tgmath_copysign(x, y) +#undef erf +#define erf(x) __tgmath_erf(x) +#undef erfc +#define erfc(x) __tgmath_erfc(x) +#undef exp2 +#define exp2(x) __tgmath_exp2(x) +#undef expm1 +#define expm1(x) __tgmath_expm1(x) +#undef fdim +#define fdim(x, y) __tgmath_fdim(x, y) +#undef floor +#define floor(x) __tgmath_floor(x) +#undef fma +#define fma(x, y, z) __tgmath_fma(x, y, z) +#undef fmax +#define fmax(x, y) __tgmath_fmax(x, y) +#undef fmin +#define fmin(x, y) __tgmath_fmin(x, y) +#undef fmod +#define fmod(x, y) __tgmath_fmod(x, y) +#undef frexp +#define frexp(x, ip) __tgmath_frexp(x, ip) +#undef hypot +#define hypot(x, y) __tgmath_hypot(x, y) +#undef ilogb +#define ilogb(x) __tgmath_ilogb(x) +#undef ldexp +#define ldexp(x, i) __tgmath_ldexp(x, i) +#undef lgamma +#define lgamma(x) __tgmath_lgamma(x) +#undef llrint +#define llrint(x) __tgmath_llrint(x) +#undef llround +#define llround(x) __tgmath_llround(x) +#undef log10 +#define log10(x) __tgmath_log10(x) +#undef log1p +#define log1p(x) __tgmath_log1p(x) +#undef log2 +#define log2(x) __tgmath_log2(x) +#undef logb +#define logb(x) __tgmath_logb(x) +#undef lrint +#define lrint(x) __tgmath_lrint(x) +#undef lround +#define lround(x) __tgmath_lround(x) +#undef nearbyint +#define nearbyint(x) __tgmath_nearbyint(x) +#undef nextafter +#define nextafter(x, y) __tgmath_nextafter(x, y) +#undef nexttoward +#define nexttoward(x, y) __tgmath_nexttoward(x, y) +#undef remainder +#define remainder(x, y) __tgmath_remainder(x, y) +#undef remquo +#define remquo(x, y, ip) __tgmath_remquo(x, y, ip) +#undef rint +#define rint(x) __tgmath_rint(x) +#undef round +#define round(x) __tgmath_round(x) +#undef scalbln +#define scalbln(x, l) __tgmath_scalbln(x, l) +#undef scalbn +#define scalbn(x, i) __tgmath_scalbn(x, i) +#undef tgamma +#define tgamma(x) __tgmath_tgamma(x) +#undef trunc +#define trunc(x) __tgmath_trunc(x) + +/* + * complex only + */ +#undef carg +#define carg(x) __tgmath_carg(x) +#undef cimag +#define cimag(x) __tgmath_cimag(x) +#undef conj +#define conj(x) __tgmath_conj(x) +#undef cproj +#define cproj(x) __tgmath_cproj(x) +#undef creal +#define creal(x) __tgmath_creal(x) + +#endif /* !defined(__cplusplus) */ + +#endif /* _TGMATH_H */ diff --git a/usr/src/lib/Makefile b/usr/src/lib/Makefile index 3cea7e1efa..fec45bce8a 100644 --- a/usr/src/lib/Makefile +++ b/usr/src/lib/Makefile @@ -154,7 +154,11 @@ SUBDIRS += \ librdc \ libinstzones \ libpkg \ - libpcidb + libpcidb \ + libm1 \ + libm \ + libmvec + SUBDIRS += \ passwdutil \ @@ -560,7 +564,7 @@ _dc: $(DCSUBDIRS:%=%-nodepend) auditd_plugins: libbsm libnsl libsecdb gss_mechs/mech_krb5: libgss libnsl libsocket libresolv pkcs11 libadt_jni: libbsm -libast: libsocket +libast: libsocket libm libadutils: libldap5 libresolv libsocket libnsl nsswitch: libadutils libidmap libbe: libzfs @@ -589,6 +593,9 @@ libipadm: libnsl libinetutil libsocket libdlpi libnvpair libdhcpagent \ libdladm libsecdb libiscsit: libc libnvpair libstmf libuuid libnsl libkmf: libcryptoutil pkcs11 +libm: libc +libm1: libc libm +libmvec: libc libm libnsl: libmd5 libmapid: libresolv librdc: libsocket libnsl libnsctl libunistat libdscfg @@ -602,7 +609,7 @@ libsecdb: libnsl libsasl: libgss libsocket pkcs11 libmd sasl_plugins: pkcs11 libgss libsocket libsasl libsctp: libsocket -libshell: libast libcmd libdll libsocket libsecdb +libshell: libast libcmd libdll libsocket libsecdb libm libsip: libmd5 libsmbfs: libcmdutils libsocket libnsl libkrb5 libsocket: libnsl @@ -634,7 +641,7 @@ pkcs11: libcryptoutil print: libldap5 udapl/udapl_tavor: udapl/libdat libzfs: libdevid libgen libnvpair libuutil \ - libadm libavl libefi libidmap libmd libzfs_core + libadm libavl libefi libidmap libmd libzfs_core libm libzfs_core: libnvpair libzfs_jni: libdiskmgt libnvpair libzfs libzpool: libavl libumem libnvpair libcmdutils @@ -663,6 +670,13 @@ libreparse: libnvpair libhotplug: libnvpair cfgadm_plugins: libhotplug libilb: libsocket +libipmi: libm +libprtdiag: libm +libsqlite: libm +libstmf: libm +libvscan: libm + + $(INTEL_BUILD)libdiskmgt:libfdisk # diff --git a/usr/src/lib/libm/Makefile b/usr/src/lib/libm/Makefile new file mode 100644 index 0000000000..99e533aca2 --- /dev/null +++ b/usr/src/lib/libm/Makefile @@ -0,0 +1,42 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2011 Nexenta Systems, Inc. All rights reserved. +# + +LIBRARY= libm.a +VERS= .2 + +# include common library definitions +include $(SRC)/lib/Makefile.lib + +SUBDIRS = $(MACH) +$(BUILD64)SUBDIRS += $(MACH64) + +all := TARGET= all +install := TARGET= install +clean := TARGET= clean +clobber := TARGET= clobber +lint := TARGET= lint + +.KEEP_STATE: + +.PARALLEL: $(SUBDIRS) + +all clean clobber install lint: $(SUBDIRS) + +$(SUBDIRS): FRC + @cd $@; pwd; VERSION='$(VERSION)' $(MAKE) $(TARGET) + +FRC: + +include $(SRC)/lib/Makefile.targ diff --git a/usr/src/lib/libm/Makefile.com b/usr/src/lib/libm/Makefile.com new file mode 100644 index 0000000000..9beaa6b618 --- /dev/null +++ b/usr/src/lib/libm/Makefile.com @@ -0,0 +1,1029 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2011 Nexenta Systems, Inc. All rights reserved. +# + +LIBRARY = libm.a +VERS = .2 + +LIBMDIR = $(SRC)/lib/libm + +m9xsseOBJS_i386 = \ + __fex_hdlr.o \ + __fex_i386.o \ + __fex_sse.o \ + __fex_sym.o \ + fex_log.o + +m9xsseOBJS = $(m9xsseOBJS_$(TARGET_ARCH)) + +m9xOBJS_amd64 = \ + __fex_sse.o \ + feprec.o + +m9xOBJS_sparc = \ + lrint.o \ + lrintf.o \ + lrintl.o \ + lround.o \ + lroundf.o \ + lroundl.o + +m9xOBJS_i386 = \ + __fex_sse.o \ + feprec.o \ + lrint.o \ + lrintf.o \ + lrintl.o \ + lround.o \ + lroundf.o \ + lroundl.o + +# +# lrint.o, lrintf.o, lrintl.o, lround.o, lroundf.o & lroundl.o are 32-bit only +# +m9xOBJS = \ + $(m9xOBJS_$(TARGET_ARCH)) \ + __fex_$(MACH).o \ + __fex_hdlr.o \ + __fex_sym.o \ + fdim.o \ + fdimf.o \ + fdiml.o \ + feexcept.o \ + fenv.o \ + feround.o \ + fex_handler.o \ + fex_log.o \ + fma.o \ + fmaf.o \ + fmal.o \ + fmax.o \ + fmaxf.o \ + fmaxl.o \ + fmin.o \ + fminf.o \ + fminl.o \ + frexp.o \ + frexpf.o \ + frexpl.o \ + ldexp.o \ + ldexpf.o \ + ldexpl.o \ + llrint.o \ + llrintf.o \ + llrintl.o \ + llround.o \ + llroundf.o \ + llroundl.o \ + modf.o \ + modff.o \ + modfl.o \ + nan.o \ + nanf.o \ + nanl.o \ + nearbyint.o \ + nearbyintf.o \ + nearbyintl.o \ + nexttoward.o \ + nexttowardf.o \ + nexttowardl.o \ + remquo.o \ + remquof.o \ + remquol.o \ + round.o \ + roundf.o \ + roundl.o \ + scalbln.o \ + scalblnf.o \ + scalblnl.o \ + tgamma.o \ + tgammaf.o \ + tgammal.o \ + trunc.o \ + truncf.o \ + truncl.o + +OBJS_M9XSSE = $(m9xsseOBJS:%=pics/%) + +COBJS_i386 = \ + __libx_errno.o + +COBJS_sparc = \ + $(COBJS_i386) \ + _TBL_atan.o \ + _TBL_exp2.o \ + _TBL_log.o \ + _TBL_log2.o \ + _TBL_tan.o \ + __tan.o \ + __tanf.o + +# +# atan2pi.o and sincospi.o is for internal use only +# + +COBJS_amd64 = \ + _TBL_atan.o \ + _TBL_exp2.o \ + _TBL_log.o \ + _TBL_log2.o \ + __tan.o \ + __tanf.o \ + _TBL_tan.o \ + copysign.o \ + exp.o \ + fabs.o \ + fmod.o \ + ilogb.o \ + isnan.o \ + nextafter.o \ + remainder.o \ + rint.o \ + scalbn.o + +COBJS_sparcv9 = $(COBJS_amd64) + +COBJS = \ + $(COBJS_$(TARGET_ARCH)) \ + __cos.o \ + __lgamma.o \ + __rem_pio2.o \ + __rem_pio2m.o \ + __sin.o \ + __sincos.o \ + __xpg6.o \ + _lib_version.o \ + _SVID_error.o \ + _TBL_ipio2.o \ + _TBL_sin.o \ + acos.o \ + acosh.o \ + asin.o \ + asinh.o \ + atan.o \ + atan2.o \ + atan2pi.o \ + atanh.o \ + cbrt.o \ + ceil.o \ + cos.o \ + cosh.o \ + erf.o \ + exp10.o \ + exp2.o \ + expm1.o \ + floor.o \ + gamma.o \ + gamma_r.o \ + hypot.o \ + j0.o \ + j1.o \ + jn.o \ + lgamma.o \ + lgamma_r.o \ + log.o \ + log10.o \ + log1p.o \ + log2.o \ + logb.o \ + matherr.o \ + pow.o \ + scalb.o \ + signgam.o \ + significand.o \ + sin.o \ + sincos.o \ + sincospi.o \ + sinh.o \ + sqrt.o \ + tan.o \ + tanh.o + +# +# LSARC/2003/658 adds isnanl +# +QOBJS_sparc = \ + _TBL_atanl.o \ + _TBL_expl.o \ + _TBL_expm1l.o \ + _TBL_logl.o \ + finitel.o \ + isnanl.o + +QOBJS_sparcv9 = $(QOBJS_sparc) + +QOBJS_amd64 = \ + finitel.o \ + isnanl.o + +# +# atan2pil.o, ieee_funcl.o, rndintl.o, sinpil.o, sincospil.o +# are for internal use only +# +# LSARC/2003/279 adds the following: +# gammal.o 1 +# gammal_r.o 1 +# j0l.o 2 +# j1l.o 2 +# jnl.o 2 +# lgammal_r.o 1 +# scalbl.o 1 +# significandl.o 1 +# +QOBJS = \ + $(QOBJS_$(TARGET_ARCH)) \ + __cosl.o \ + __lgammal.o \ + __poly_libmq.o \ + __rem_pio2l.o \ + __sincosl.o \ + __sinl.o \ + __tanl.o \ + _TBL_cosl.o \ + _TBL_ipio2l.o \ + _TBL_sinl.o \ + _TBL_tanl.o \ + acoshl.o \ + acosl.o \ + asinhl.o \ + asinl.o \ + atan2l.o \ + atan2pil.o \ + atanhl.o \ + atanl.o \ + cbrtl.o \ + copysignl.o \ + coshl.o \ + cosl.o \ + erfl.o \ + exp10l.o \ + exp2l.o \ + expl.o \ + expm1l.o \ + fabsl.o \ + floorl.o \ + fmodl.o \ + gammal.o \ + gammal_r.o \ + hypotl.o \ + ieee_funcl.o \ + ilogbl.o \ + j0l.o \ + j1l.o \ + jnl.o \ + lgammal.o \ + lgammal_r.o \ + log10l.o \ + log1pl.o \ + log2l.o \ + logbl.o \ + logl.o \ + nextafterl.o \ + powl.o \ + remainderl.o \ + rintl.o \ + rndintl.o \ + scalbl.o \ + scalbnl.o \ + signgaml.o \ + significandl.o \ + sincosl.o \ + sincospil.o \ + sinhl.o \ + sinl.o \ + sinpil.o \ + sqrtl.o \ + tanhl.o \ + tanl.o + +# +# LSARC/2003/658 adds isnanf +# +ROBJS_sparc = \ + __cosf.o \ + __sincosf.o \ + __sinf.o \ + isnanf.o + +ROBJS_sparcv9 = $(ROBJS_sparc) + +ROBJS_amd64 = \ + isnanf.o \ + __cosf.o \ + __sincosf.o \ + __sinf.o + +# +# atan2pif.o, sincosf.o, sincospif.o are for internal use only +# +# LSARC/2003/279 adds the following: +# besself.o 6 +# scalbf.o 1 +# gammaf.o 1 +# gammaf_r.o 1 +# lgammaf_r.o 1 +# significandf.o 1 +# +ROBJS = \ + $(ROBJS_$(TARGET_ARCH)) \ + _TBL_r_atan_.o \ + acosf.o \ + acoshf.o \ + asinf.o \ + asinhf.o \ + atan2f.o \ + atan2pif.o \ + atanf.o \ + atanhf.o \ + besself.o \ + cbrtf.o \ + copysignf.o \ + cosf.o \ + coshf.o \ + erff.o \ + exp10f.o \ + exp2f.o \ + expf.o \ + expm1f.o \ + fabsf.o \ + floorf.o \ + fmodf.o \ + gammaf.o \ + gammaf_r.o \ + hypotf.o \ + ilogbf.o \ + lgammaf.o \ + lgammaf_r.o \ + log10f.o \ + log1pf.o \ + log2f.o \ + logbf.o \ + logf.o \ + nextafterf.o \ + powf.o \ + remainderf.o \ + rintf.o \ + scalbf.o \ + scalbnf.o \ + signgamf.o \ + significandf.o \ + sinf.o \ + sinhf.o \ + sincosf.o \ + sincospif.o \ + sqrtf.o \ + tanf.o \ + tanhf.o + +# +# LSARC/2003/658 adds isnanf/isnanl +# + +SOBJS_sparc = \ + copysign.o \ + exp.o \ + fabs.o \ + fmod.o \ + ilogb.o \ + isnan.o \ + nextafter.o \ + remainder.o \ + rint.o \ + scalbn.o + +SOBJS_i386 = \ + __reduction.o \ + finitef.o \ + finitel.o \ + isnanf.o \ + isnanl.o \ + $(SOBJS_sparc) + +SOBJS_amd64 = \ + __swapFLAGS.o +# _xtoll.o \ +# _xtoull.o \ + + +SOBJS = \ + $(SOBJS_$(TARGET_ARCH)) + +complexOBJS = \ + cabs.o \ + cabsf.o \ + cabsl.o \ + cacos.o \ + cacosf.o \ + cacosh.o \ + cacoshf.o \ + cacoshl.o \ + cacosl.o \ + carg.o \ + cargf.o \ + cargl.o \ + casin.o \ + casinf.o \ + casinh.o \ + casinhf.o \ + casinhl.o \ + casinl.o \ + catan.o \ + catanf.o \ + catanh.o \ + catanhf.o \ + catanhl.o \ + catanl.o \ + ccos.o \ + ccosf.o \ + ccosh.o \ + ccoshf.o \ + ccoshl.o \ + ccosl.o \ + cexp.o \ + cexpf.o \ + cexpl.o \ + cimag.o \ + cimagf.o \ + cimagl.o \ + clog.o \ + clogf.o \ + clogl.o \ + conj.o \ + conjf.o \ + conjl.o \ + cpow.o \ + cpowf.o \ + cpowl.o \ + cproj.o \ + cprojf.o \ + cprojl.o \ + creal.o \ + crealf.o \ + creall.o \ + csin.o \ + csinf.o \ + csinh.o \ + csinhf.o \ + csinhl.o \ + csinl.o \ + csqrt.o \ + csqrtf.o \ + csqrtl.o \ + ctan.o \ + ctanf.o \ + ctanh.o \ + ctanhf.o \ + ctanhl.o \ + ctanl.o \ + k_atan2.o \ + k_atan2l.o \ + k_cexp.o \ + k_cexpl.o \ + k_clog_r.o \ + k_clog_rl.o + +OBJECTS = $(COBJS) $(ROBJS) $(QOBJS) $(SOBJS) $(m9xOBJS) $(complexOBJS) + +include $(SRC)/lib/Makefile.lib +include $(LIBMDIR)/Makefile.libm.com +include $(SRC)/lib/Makefile.rootfs + +SRCDIR = ../common/ +LIBS = $(DYNLIB) $(LINTLIB) + +LINTERROFF = -erroff=E_FUNC_SET_NOT_USED +LINTERROFF += -erroff=E_FUNC_RET_ALWAYS_IGNOR2 +LINTERROFF += -erroff=E_FUNC_RET_MAYBE_IGNORED2 +LINTERROFF += -erroff=E_IMPL_CONV_RETURN +LINTERROFF += -erroff=E_NAME_MULTIPLY_DEF2 +LINTFLAGS += $(LINTERROFF) +LINTFLAGS64 += $(LINTERROFF) +LINTFLAGS64 += -errchk=longptr64 + +CPPFLAGS += -DLIBM_BUILD + +CFLAGS += $(C_BIGPICFLAGS) +CFLAGS64 += $(C_BIGPICFLAGS) + +m9x_IL = $(LIBMDIR)/common/m9x/__fenv_$(TARGET_ARCH).il + +SRCS_LD_i386_amd64 = \ + ../common/LD/finitel.c \ + ../common/LD/isnanl.c \ + ../common/LD/nextafterl.c + +SRCS_LD = \ + $(SRCS_LD_i386_$(TARGET_ARCH)) \ + ../common/LD/__cosl.c \ + ../common/LD/__lgammal.c \ + ../common/LD/__poly_libmq.c \ + ../common/LD/__rem_pio2l.c \ + ../common/LD/__sincosl.c \ + ../common/LD/__sinl.c \ + ../common/LD/__tanl.c \ + ../common/LD/_TBL_cosl.c \ + ../common/LD/_TBL_ipio2l.c \ + ../common/LD/_TBL_sinl.c \ + ../common/LD/_TBL_tanl.c \ + ../common/LD/acoshl.c \ + ../common/LD/asinhl.c \ + ../common/LD/atan2pil.c \ + ../common/LD/atanhl.c \ + ../common/LD/cbrtl.c \ + ../common/LD/coshl.c \ + ../common/LD/cosl.c \ + ../common/LD/erfl.c \ + ../common/LD/gammal.c \ + ../common/LD/gammal_r.c \ + ../common/LD/hypotl.c \ + ../common/LD/j0l.c \ + ../common/LD/j1l.c \ + ../common/LD/jnl.c \ + ../common/LD/lgammal.c \ + ../common/LD/lgammal_r.c \ + ../common/LD/log1pl.c \ + ../common/LD/logbl.c \ + ../common/LD/scalbl.c \ + ../common/LD/signgaml.c \ + ../common/LD/significandl.c \ + ../common/LD/sincosl.c \ + ../common/LD/sincospil.c \ + ../common/LD/sinhl.c \ + ../common/LD/sinl.c \ + ../common/LD/sinpil.c \ + ../common/LD/tanhl.c \ + ../common/LD/tanl.c + +SRCS_LD_i386 = \ + $(SRCS_LD) + +SRCS_R_amd64 = \ + ../common/R/__tanf.c \ + ../common/R/isnanf.c \ + ../common/R/__cosf.c \ + ../common/R/__sincosf.c \ + ../common/R/__sinf.c \ + ../common/R/acosf.c \ + ../common/R/asinf.c \ + ../common/R/atan2f.c \ + ../common/R/copysignf.c \ + ../common/R/exp10f.c \ + ../common/R/exp2f.c \ + ../common/R/expm1f.c \ + ../common/R/fabsf.c \ + ../common/R/hypotf.c \ + ../common/R/ilogbf.c \ + ../common/R/log10f.c \ + ../common/R/log2f.c \ + ../common/R/nextafterf.c \ + ../common/R/powf.c \ + ../common/R/rintf.c \ + ../common/R/scalbnf.c + +# sparc + sparcv9 +SRCS_R_sparc = \ + ../common/R/__tanf.c \ + ../common/R/__cosf.c \ + ../common/R/__sincosf.c \ + ../common/R/__sinf.c \ + ../common/R/isnanf.c \ + ../common/R/acosf.c \ + ../common/R/asinf.c \ + ../common/R/atan2f.c \ + ../common/R/copysignf.c \ + ../common/R/exp10f.c \ + ../common/R/exp2f.c \ + ../common/R/expm1f.c \ + ../common/R/fabsf.c \ + ../common/R/fmodf.c \ + ../common/R/hypotf.c \ + ../common/R/ilogbf.c \ + ../common/R/log10f.c \ + ../common/R/log2f.c \ + ../common/R/nextafterf.c \ + ../common/R/powf.c \ + ../common/R/remainderf.c \ + ../common/R/rintf.c \ + ../common/R/scalbnf.c + +SRCS_R = \ + $(SRCS_R_$(MACH)) \ + $(SRCS_R_$(TARGET_ARCH)) \ + ../common/R/_TBL_r_atan_.c \ + ../common/R/acoshf.c \ + ../common/R/asinhf.c \ + ../common/R/atan2pif.c \ + ../common/R/atanf.c \ + ../common/R/atanhf.c \ + ../common/R/besself.c \ + ../common/R/cbrtf.c \ + ../common/R/cosf.c \ + ../common/R/coshf.c \ + ../common/R/erff.c \ + ../common/R/expf.c \ + ../common/R/floorf.c \ + ../common/R/gammaf.c \ + ../common/R/gammaf_r.c \ + ../common/R/lgammaf.c \ + ../common/R/lgammaf_r.c \ + ../common/R/log1pf.c \ + ../common/R/logbf.c \ + ../common/R/logf.c \ + ../common/R/scalbf.c \ + ../common/R/signgamf.c \ + ../common/R/significandf.c \ + ../common/R/sinf.c \ + ../common/R/sinhf.c \ + ../common/R/sincosf.c \ + ../common/R/sincospif.c \ + ../common/R/sqrtf.c \ + ../common/R/tanf.c \ + ../common/R/tanhf.c + +SRCS_Q = \ + ../common/Q/_TBL_atanl.c \ + ../common/Q/_TBL_expl.c \ + ../common/Q/_TBL_expm1l.c \ + ../common/Q/_TBL_logl.c \ + ../common/Q/finitel.c \ + ../common/Q/isnanl.c \ + ../common/Q/__cosl.c \ + ../common/Q/__lgammal.c \ + ../common/Q/__poly_libmq.c \ + ../common/Q/__rem_pio2l.c \ + ../common/Q/__sincosl.c \ + ../common/Q/__sinl.c \ + ../common/Q/__tanl.c \ + ../common/Q/_TBL_cosl.c \ + ../common/Q/_TBL_ipio2l.c \ + ../common/Q/_TBL_sinl.c \ + ../common/Q/_TBL_tanl.c \ + ../common/Q/acoshl.c \ + ../common/Q/acosl.c \ + ../common/Q/asinhl.c \ + ../common/Q/asinl.c \ + ../common/Q/atan2l.c \ + ../common/Q/atan2pil.c \ + ../common/Q/atanhl.c \ + ../common/Q/atanl.c \ + ../common/Q/cbrtl.c \ + ../common/Q/copysignl.c \ + ../common/Q/coshl.c \ + ../common/Q/cosl.c \ + ../common/Q/erfl.c \ + ../common/Q/exp10l.c \ + ../common/Q/exp2l.c \ + ../common/Q/expl.c \ + ../common/Q/expm1l.c \ + ../common/Q/fabsl.c \ + ../common/Q/floorl.c \ + ../common/Q/fmodl.c \ + ../common/Q/gammal.c \ + ../common/Q/gammal_r.c \ + ../common/Q/hypotl.c \ + ../common/Q/ieee_funcl.c \ + ../common/Q/ilogbl.c \ + ../common/Q/j0l.c \ + ../common/Q/j1l.c \ + ../common/Q/jnl.c \ + ../common/Q/lgammal.c \ + ../common/Q/lgammal_r.c \ + ../common/Q/log10l.c \ + ../common/Q/log1pl.c \ + ../common/Q/log2l.c \ + ../common/Q/logbl.c \ + ../common/Q/logl.c \ + ../common/Q/nextafterl.c \ + ../common/Q/powl.c \ + ../common/Q/remainderl.c \ + ../common/Q/rintl.c \ + ../common/Q/rndintl.c \ + ../common/Q/scalbl.c \ + ../common/Q/scalbnl.c \ + ../common/Q/signgaml.c \ + ../common/Q/significandl.c \ + ../common/Q/sincosl.c \ + ../common/Q/sincospil.c \ + ../common/Q/sinhl.c \ + ../common/Q/sinl.c \ + ../common/Q/sinpil.c \ + ../common/Q/sqrtl.c \ + ../common/Q/tanhl.c \ + ../common/Q/tanl.c + +SRCS_Q_sparc = \ + $(SRCS_Q) + +SRCS_complex = \ + ../common/complex/cabs.c \ + ../common/complex/cabsf.c \ + ../common/complex/cabsl.c \ + ../common/complex/cacos.c \ + ../common/complex/cacosf.c \ + ../common/complex/cacosh.c \ + ../common/complex/cacoshf.c \ + ../common/complex/cacoshl.c \ + ../common/complex/cacosl.c \ + ../common/complex/carg.c \ + ../common/complex/cargf.c \ + ../common/complex/cargl.c \ + ../common/complex/casin.c \ + ../common/complex/casinf.c \ + ../common/complex/casinh.c \ + ../common/complex/casinhf.c \ + ../common/complex/casinhl.c \ + ../common/complex/casinl.c \ + ../common/complex/catan.c \ + ../common/complex/catanf.c \ + ../common/complex/catanh.c \ + ../common/complex/catanhf.c \ + ../common/complex/catanhl.c \ + ../common/complex/catanl.c \ + ../common/complex/ccos.c \ + ../common/complex/ccosf.c \ + ../common/complex/ccosh.c \ + ../common/complex/ccoshf.c \ + ../common/complex/ccoshl.c \ + ../common/complex/ccosl.c \ + ../common/complex/cexp.c \ + ../common/complex/cexpf.c \ + ../common/complex/cexpl.c \ + ../common/complex/cimag.c \ + ../common/complex/cimagf.c \ + ../common/complex/cimagl.c \ + ../common/complex/clog.c \ + ../common/complex/clogf.c \ + ../common/complex/clogl.c \ + ../common/complex/conj.c \ + ../common/complex/conjf.c \ + ../common/complex/conjl.c \ + ../common/complex/cpow.c \ + ../common/complex/cpowf.c \ + ../common/complex/cpowl.c \ + ../common/complex/cproj.c \ + ../common/complex/cprojf.c \ + ../common/complex/cprojl.c \ + ../common/complex/creal.c \ + ../common/complex/crealf.c \ + ../common/complex/creall.c \ + ../common/complex/csin.c \ + ../common/complex/csinf.c \ + ../common/complex/csinh.c \ + ../common/complex/csinhf.c \ + ../common/complex/csinhl.c \ + ../common/complex/csinl.c \ + ../common/complex/csqrt.c \ + ../common/complex/csqrtf.c \ + ../common/complex/csqrtl.c \ + ../common/complex/ctan.c \ + ../common/complex/ctanf.c \ + ../common/complex/ctanh.c \ + ../common/complex/ctanhf.c \ + ../common/complex/ctanhl.c \ + ../common/complex/ctanl.c \ + ../common/complex/k_atan2.c \ + ../common/complex/k_atan2l.c \ + ../common/complex/k_cexp.c \ + ../common/complex/k_cexpl.c \ + ../common/complex/k_clog_r.c \ + ../common/complex/k_clog_rl.c + +SRCS_m9x_i386 = \ + ../common/m9x/__fex_sse.c \ + ../common/m9x/feprec.c \ + ../common/m9x/__fex_i386.c + +SRCS_m9x_i386_i386 = \ + ../common/m9x/lroundf.c + +SRCS_m9x_i386_amd64 = \ + ../common/m9x/llrint.c \ + ../common/m9x/llrintf.c \ + ../common/m9x/llrintl.c \ + ../common/m9x/nexttowardl.c \ + ../common/m9x/remquo.c \ + ../common/m9x/remquof.c \ + ../common/m9x/round.c \ + ../common/m9x/roundl.c \ + ../common/m9x/scalbln.c \ + ../common/m9x/scalblnf.c \ + ../common/m9x/scalblnl.c \ + ../common/m9x/trunc.c \ + ../common/m9x/truncl.c + +# sparc +SRCS_m9x_sparc_sparc = \ + ../common/m9x/lrint.c \ + ../common/m9x/lrintf.c \ + ../common/m9x/lrintl.c \ + ../common/m9x/lround.c \ + ../common/m9x/lroundf.c \ + ../common/m9x/lroundl.c + +SRCS_m9x_sparc = \ + ../common/m9x/__fex_sparc.c \ + ../common/m9x/llrint.c \ + ../common/m9x/llrintf.c \ + ../common/m9x/llrintl.c \ + ../common/m9x/nexttowardl.c \ + ../common/m9x/remquo.c \ + ../common/m9x/remquof.c \ + ../common/m9x/remquol.c \ + ../common/m9x/round.c \ + ../common/m9x/roundl.c \ + ../common/m9x/scalbln.c \ + ../common/m9x/scalblnf.c \ + ../common/m9x/scalblnl.c \ + ../common/m9x/trunc.c \ + ../common/m9x/truncl.c + +SRCS_m9x = \ + $(SRCS_m9x_$(MACH)) \ + $(SRCS_m9x_sparc_$(TARGET_ARCH)) \ + $(SRCS_m9x_i386_$(TARGET_ARCH)) \ + ../common/m9x/__fex_hdlr.c \ + ../common/m9x/__fex_sym.c \ + ../common/m9x/fdim.c \ + ../common/m9x/fdimf.c \ + ../common/m9x/fdiml.c \ + ../common/m9x/feexcept.c \ + ../common/m9x/fenv.c \ + ../common/m9x/feround.c \ + ../common/m9x/fex_handler.c \ + ../common/m9x/fex_log.c \ + ../common/m9x/fma.c \ + ../common/m9x/fmaf.c \ + ../common/m9x/fmal.c \ + ../common/m9x/fmax.c \ + ../common/m9x/fmaxf.c \ + ../common/m9x/fmaxl.c \ + ../common/m9x/fmin.c \ + ../common/m9x/fminf.c \ + ../common/m9x/fminl.c \ + ../common/m9x/frexp.c \ + ../common/m9x/frexpf.c \ + ../common/m9x/frexpl.c \ + ../common/m9x/ldexp.c \ + ../common/m9x/ldexpf.c \ + ../common/m9x/ldexpl.c \ + ../common/m9x/llround.c \ + ../common/m9x/llroundf.c \ + ../common/m9x/llroundl.c \ + ../common/m9x/modf.c \ + ../common/m9x/modff.c \ + ../common/m9x/modfl.c \ + ../common/m9x/nan.c \ + ../common/m9x/nanf.c \ + ../common/m9x/nanl.c \ + ../common/m9x/nearbyint.c \ + ../common/m9x/nearbyintf.c \ + ../common/m9x/nearbyintl.c \ + ../common/m9x/nexttoward.c \ + ../common/m9x/nexttowardf.c \ + ../common/m9x/roundf.c \ + ../common/m9x/tgamma.c \ + ../common/m9x/tgammaf.c \ + ../common/m9x/tgammal.c \ + ../common/m9x/truncf.c + +SRCS_C_sparc = \ + ../common/C/__tan.c \ + ../common/C/_TBL_atan.c \ + ../common/C/_TBL_exp2.c \ + ../common/C/_TBL_log.c \ + ../common/C/_TBL_log2.c \ + ../common/C/_TBL_tan.c \ + ../common/C/acos.c \ + ../common/C/asin.c \ + ../common/C/atan.c \ + ../common/C/atan2.c \ + ../common/C/ceil.c \ + ../common/C/cos.c \ + ../common/C/exp.c \ + ../common/C/exp10.c \ + ../common/C/exp2.c \ + ../common/C/expm1.c \ + ../common/C/floor.c \ + ../common/C/fmod.c \ + ../common/C/hypot.c \ + ../common/C/ilogb.c \ + ../common/C/isnan.c \ + ../common/C/log.c \ + ../common/C/log10.c \ + ../common/C/log2.c \ + ../common/C/pow.c \ + ../common/C/remainder.c \ + ../common/C/rint.c \ + ../common/C/scalbn.c \ + ../common/C/sin.c \ + ../common/C/sincos.c \ + ../common/C/tan.c + +SRCS_i386_i386 = \ + ../common/C/__libx_errno.c + +SRCS_sparc_sparc = \ + $(SRCS_i386_i386) + +SRCS_sparc_sparcv9 = \ + ../common/C/copysign.c \ + ../common/C/fabs.c \ + ../common/C/nextafter.c + +SRCS_i386_amd64 = \ + ../common/C/_TBL_atan.c \ + ../common/C/_TBL_exp2.c \ + ../common/C/_TBL_log.c \ + ../common/C/_TBL_log2.c \ + ../common/C/__tan.c \ + ../common/C/_TBL_tan.c \ + ../common/C/copysign.c \ + ../common/C/exp.c \ + ../common/C/fabs.c \ + ../common/C/ilogb.c \ + ../common/C/isnan.c \ + ../common/C/nextafter.c \ + ../common/C/rint.c \ + ../common/C/scalbn.c \ + ../common/C/acos.c \ + ../common/C/asin.c \ + ../common/C/atan.c \ + ../common/C/atan2.c \ + ../common/C/ceil.c \ + ../common/C/cos.c \ + ../common/C/exp10.c \ + ../common/C/exp2.c \ + ../common/C/expm1.c \ + ../common/C/floor.c \ + ../common/C/hypot.c \ + ../common/C/log.c \ + ../common/C/log10.c \ + ../common/C/log2.c \ + ../common/C/pow.c \ + ../common/C/sin.c \ + ../common/C/sincos.c \ + ../common/C/tan.c + +SRCS_C = \ + $(SRCS_C_$(MACH)) \ + $(SRCS_C_i386_$(TARGET_ARCH)) \ + ../common/C/__cos.c \ + ../common/C/__lgamma.c \ + ../common/C/__rem_pio2.c \ + ../common/C/__rem_pio2m.c \ + ../common/C/__sin.c \ + ../common/C/__sincos.c \ + ../common/C/__xpg6.c \ + ../common/C/_lib_version.c \ + ../common/C/_SVID_error.c \ + ../common/C/_TBL_ipio2.c \ + ../common/C/_TBL_sin.c \ + ../common/C/acosh.c \ + ../common/C/asinh.c \ + ../common/C/atan2pi.c \ + ../common/C/atanh.c \ + ../common/C/cbrt.c \ + ../common/C/cosh.c \ + ../common/C/erf.c \ + ../common/C/gamma.c \ + ../common/C/gamma_r.c \ + ../common/C/j0.c \ + ../common/C/j1.c \ + ../common/C/jn.c \ + ../common/C/lgamma.c \ + ../common/C/lgamma_r.c \ + ../common/C/log1p.c \ + ../common/C/logb.c \ + ../common/C/matherr.c \ + ../common/C/scalb.c \ + ../common/C/signgam.c \ + ../common/C/significand.c \ + ../common/C/sincospi.c \ + ../common/C/sinh.c \ + ../common/C/sqrt.c \ + ../common/C/tanh.c + +SRCS = \ + $(SRCS_Q_$(MACH)) \ + $(SRCS_LD_$(MACH)) \ + $(SRCS_R) \ + $(SRCS_complex) \ + $(SRCS_C) + +.KEEP_STATE: + +all: $(LIBS) + +lint: lintcheck + diff --git a/usr/src/lib/libm/Makefile.libm.com b/usr/src/lib/libm/Makefile.libm.com new file mode 100644 index 0000000000..f6f9725596 --- /dev/null +++ b/usr/src/lib/libm/Makefile.libm.com @@ -0,0 +1,93 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2011 Nexenta Systems, Inc. All rights reserved. +# + +LIBMDIR = $(SRC)/lib/libm + +LIBMSRC = $(LIBMDIR)/common + +CPP_CMD = $(CC) -E -Xs + +ASSUFFIX_sparc = S +ASSUFFIX_i386 = s +ASSUFFIX = $(ASSUFFIX_$(MACH)) + +# C99MODE of neither enabled nor disabled is "no_lib", whereby we expect +# C99-the-language, but don't modify the behaviour of library routines. This +# is VERY IMPORTANT, as -xc99=%all, for instance, would link us with +# values-xpg6, which would introduce an __xpg6 to our object with the C99 +# flags set, causing us to default C99 libm behaviour on, breaking +# compatibility. +C99MODE = + +M4FLAGS = -D__STDC__ -DELFOBJ -DPIC + +LDBLDIR_sparc = Q +LDBLDIR_i386 = LD +LDBLDIR = $(LDBLDIR_$(MACH)) + +LM_IL = $(LIBMDIR)/$(TARGET_ARCH)/src/locallibm.il + +CFLAGS += $(C_PICFLAGS) -D__INLINE $(XSTRCONST) $(LM_IL) +CFLAGS64 += $(C_PICFLAGS) -D__INLINE $(XSTRCONST) $(LM_IL) +sparc_CFLAGS += -Wa,-xarch=v8plus + +CPPFLAGS += -DELFOBJ \ + -DLIBM_MT_FEX_SYNC \ + -I$(LIBMSRC)/C \ + -I$(LIBMSRC)/$(LDBLDIR) -I$(LIBMDIR)/$(TARGET_ARCH)/src + +# GCC needs __C99FEATURES__ such that the implementations of isunordered, +# isgreaterequal, islessequal, etc, exist. This is basically equivalent to +# providing no -xc99 to Studio, in that it gets us the C99 language features, +# but not values-xpg6, the reason for which is outline with C99MODE. +CFLAGS += -_gcc=-D__C99FEATURES__ +CFLAGS64 += -_gcc=-D__C99FEATURES__ + +# libm depends on integer overflow characteristics +CFLAGS += -_gcc=-fno-strict-overflow +CFLAGS64 += -_gcc=-fno-strict-overflow + +$(DYNLIB) := LDLIBS += -lc + +$(LINTLIB) := SRCS = $(LIBMSRC)/$(LINTSRC) + +CLEANFILES += pics/*.s pics/*.S + +FPDEF_amd64 = -DARCH_amd64 +FPDEF_sparc = -DCG89 -DARCH_v8plus -DFPADD_TRAPS_INCOMPLETE_ON_NAN +FPDEF_sparcv9 = -DARCH_v9 -DFPADD_TRAPS_INCOMPLETE_ON_NAN +FPDEF = $(FPDEF_$(TARGET_ARCH)) + +ASFLAGS = -P -D_ASM $(FPDEF) + +XARCH_sparc = v8plus +XARCH_sparcv9 = v9 +XARCH_i386 = f80387 +XARCH_amd64 = amd64 +XARCH = $(XARCH_$(TARGET_ARCH)) + +ASOPT_sparc = -xarch=$(XARCH) $(AS_PICFLAGS) +ASOPT_sparcv9 = -xarch=$(XARCH) $(AS_PICFLAGS) +ASOPT_i386 = +ASOPT_amd64 = -xarch=$(XARCH) $(AS_PICFLAGS) +ASOPT = $(ASOPT_$(TARGET_ARCH)) + +ASFLAGS += $(ASOPT) + +CPPFLAGS_sparc = -DFPADD_TRAPS_INCOMPLETE_ON_NAN \ + -DFDTOS_TRAPS_INCOMPLETE_IN_FNS_MODE + +CPPFLAGS += $(CPPFLAGS_$(MACH)) +ASFLAGS += $(CPPFLAGS) diff --git a/usr/src/lib/libm/Makefile.targ b/usr/src/lib/libm/Makefile.targ new file mode 100644 index 0000000000..8d8588ad1b --- /dev/null +++ b/usr/src/lib/libm/Makefile.targ @@ -0,0 +1,46 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2011 Nexenta Systems, Inc. All rights reserved. +# + +pics/%.o: $(LIBMDIR)/$(TARGETMACH)/src/%.$(ASSUFFIX) + $(COMPILE.s) -o $@ $< + $(POST_PROCESS_O) + +pics/%.o: $(SRCDIR)/C/%.c $(LM_IL) + $(COMPILE.c) -o $@ $< + $(POST_PROCESS_O) + +pics/%.o: $(SRCDIR)/$(LDBLDIR)/%.c $(LM_IL) + $(COMPILE.c) -o $@ $< + $(POST_PROCESS_O) + +pics/%.o: $(SRCDIR)/R/%.c $(LM_IL) + $(COMPILE.c) -o $@ $< + $(POST_PROCESS_O) + +pics/%.o: $(SRCDIR)/complex/%.c $(LM_IL) + $(COMPILE.c) -o $@ $< + $(POST_PROCESS_O) + +pics/%.o: $(SRCDIR)/m9x/%.c $(LM_IL) $(m9x_IL) + $(COMPILE.c) $(m9x_IL) -o $@ $< + $(POST_PROCESS_O) + +$(ROOTLIBDIR): $(ROOTFS_LIBDIR) + $(INS.dir) + +$(ROOTLIBDIR64): $(ROOTFS_LIBDIR64) + $(INS.dir) + +include $(SRC)/lib/Makefile.targ diff --git a/usr/src/lib/libm/amd64/Makefile b/usr/src/lib/libm/amd64/Makefile new file mode 100644 index 0000000000..c67cf5c91d --- /dev/null +++ b/usr/src/lib/libm/amd64/Makefile @@ -0,0 +1,22 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2011 Nexenta Systems, Inc. All rights reserved. +# + +TARGET_ARCH= amd64 +include ../Makefile.com +include $(SRC)/lib/Makefile.lib.64 + +install: all $(ROOTLIBS64) $(ROOTLINKS64) + +include ../Makefile.targ diff --git a/usr/src/lib/libm/amd64/src/__swapFLAGS.s b/usr/src/lib/libm/amd64/src/__swapFLAGS.s new file mode 100644 index 0000000000..c0ca8e8d7b --- /dev/null +++ b/usr/src/lib/libm/amd64/src/__swapFLAGS.s @@ -0,0 +1,162 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "__swapFLAGS.s" + +#include "libm.h" +#include "libm_synonyms.h" + +/* + * swap exception masks + * + * Put the complement of bits 5-0 of the argument into FPCW bits 5-0 + * and MXCSR bits 12-7, return the complement of the previous FPCW + * bits 5-0. + */ + ENTRY(__swapTE) / di <-- NOT(desired xcptn_masks) + subq $8,%rsp + fstcw (%rsp) / push current_cw on '86 stack + movq (%rsp),%rcx / cx <-- current_cw + movw %cx,%ax / ax <-- current_cw + orw $0x3f,%cx / cx <-- current_cw, but masking all xcptns + andw $0x3f,%di / make sure bits > B5 are all zero + xorw %di,%cx / cx <-- present_cw, with new xcptn_masks + movw %cx,(%rsp) + fldcw (%rsp) / load new cw + stmxcsr (%rsp) + movq (%rsp),%rcx + orw $0x1f80,%cx / cx <-- current mxcsr, but masking all xcptns + shlw $7,%di + xorw %di,%cx / cx <-- present mxcsr, with new xcptn_masks + movq %rcx,(%rsp) + ldmxcsr (%rsp) + andq $0x3f,%rax / al[5..0] <-- former xcptn_masks + xorq $0x3f,%rax / al[5..0] <-- NOT(former xcptn_masks) + addq $8,%rsp + ret + .align 16 + SET_SIZE(__swapTE) + +/* + * swap exception flags + * + * Put bits 5-0 of the argument into FPSW bits 5-0 and MXCSR bits 5-0, + * return the "or" of the previous FPSW bits 5-0 and MXCSR bits 5-0. + */ + ENTRY(__swapEX) + fstsw %ax / ax = sw + andq $0x3f,%rdi + jnz .L1 + / input ex=0, clear all exception + fnclex + subq $8,%rsp + stmxcsr (%rsp) + movq (%rsp),%rcx + orw %cx,%ax + andw $0xffc0,%cx + movq %rcx,(%rsp) + ldmxcsr (%rsp) + andq $0x3f,%rax + addq $8,%rsp + ret +.L1: + / input ex !=0, use fnstenv and fldenv + subq $32,%rsp / only needed 28 + fnstenv (%rsp) + movw %ax,%dx + andw $0xffc0,%dx + orw %cx,%dx + movw %dx,4(%rsp) / replace old sw by new one + fldenv (%rsp) + stmxcsr (%rsp) + movq (%rsp),%rdx + orw %dx,%ax + andw $0xffc0,%dx + orw %cx,%dx + movq %rdx,(%rsp) + ldmxcsr (%rsp) + andq $0x3f,%rax + addq $32,%rsp + ret + .align 16 + SET_SIZE(__swapEX) + +/* + * swap rounding precision + * + * Put bits 1-0 of the argument into FPCW bits 9-8, return the + * previous FPCW bits 9-8. + */ + ENTRY(__swapRP) + subq $8,%rsp + fstcw (%rsp) + movw (%rsp),%ax + movw %ax,%cx + andw $0xfcff,%cx + andq $0x3,%rdi + shlw $8,%di + orw %di,%cx + movq %rcx,(%rsp) + fldcw (%rsp) + shrw $8,%ax + andq $0x3,%rax + addq $8,%rsp + ret + .align 16 + SET_SIZE(__swapRP) + +/* + * swap rounding direction + * + * Put bits 1-0 of the argument into FPCW bits 11-10 and MXCSR + * bits 14-13, return the previous FPCW bits 11-10. + */ + ENTRY(__swapRD) + subq $8,%rsp + fstcw (%rsp) + movw (%rsp),%ax + movw %ax,%cx + andw $0xf3ff,%cx + andq $0x3,%rdi + shlw $10,%di + orw %di,%cx + movq %rcx,(%rsp) + fldcw (%rsp) + stmxcsr (%rsp) + movq (%rsp),%rcx + andw $0x9fff,%cx + shlw $3,%di + orw %di,%cx + movq %rcx,(%rsp) + ldmxcsr (%rsp) + shrw $10,%ax + andq $0x3,%rax + addq $8,%rsp + ret + .align 16 + SET_SIZE(__swapRD) diff --git a/usr/src/lib/libm/amd64/src/acosl.s b/usr/src/lib/libm/amd64/src/acosl.s new file mode 100644 index 0000000000..003539eea1 --- /dev/null +++ b/usr/src/lib/libm/amd64/src/acosl.s @@ -0,0 +1,71 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "acosl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(acosl,function) +#include "libm_synonyms.h" + +#undef fabs + + ENTRY(acosl) + fldt 8(%rsp) / push x + fld1 / push 1 + fld %st(1) / x , 1 , x + fabs / |x| , 1 , x + fucomip %st(1),%st + ja 9f + fadd %st(1),%st / 1+x,x + fldz + fucomip %st(1),%st + jp .L1 + jne .L1 + / x is -1 + fstp %st(0) / -1 + fstp %st(0) / empty NPX stack + fldpi + ret +.L1: + fxch %st(1) / x,1+x + fld1 / 1,x,1+x + fsubp %st,%st(1) / 1-x,1+x + fdivp %st,%st(1) / (1-x)/(1+x) + fsqrt + fld1 / 1,sqrt((1-x)/(1+x)) + fpatan + fadd %st(0),%st + ret +9: + / |x| > 1 + fstp %st(0) / x + fsub %st,%st(0) / +/-0 or NaN+invalid + fdiv %st,%st(0) / NaN+invalid or NaN + ret + .align 16 + SET_SIZE(acosl) diff --git a/usr/src/lib/libm/amd64/src/asinl.s b/usr/src/lib/libm/amd64/src/asinl.s new file mode 100644 index 0000000000..8282050156 --- /dev/null +++ b/usr/src/lib/libm/amd64/src/asinl.s @@ -0,0 +1,58 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "asinl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(asinl,function) +#include "libm_synonyms.h" + +#undef fabs + + ENTRY(asinl) + fldt 8(%rsp) / push x + fld1 / push 1 + fld %st(1) / x , 1 , x + fabs / |x| , 1 , x + fucomip %st(1),%st + ja 9f + fadd %st(1),%st / 1+x,x + fld1 / 1,1+x,x + fsub %st(2),%st / 1-x,1+x,x + fmulp %st,%st(1) / (1-x)*(1+x),x + fsqrt / sqrt((1-x)*(1+x)),x + fpatan / atan(x/sqrt((1-x)*(1+x))) + ret +9: + / |x| > 1 + fstp %st(0) / x + fsub %st,%st(0) / +/-0 or NaN+invalid + fdiv %st,%st(0) / NaN+invalid or NaN + ret + .align 16 + SET_SIZE(asinl) diff --git a/usr/src/lib/libm/amd64/src/atan2l.s b/usr/src/lib/libm/amd64/src/atan2l.s new file mode 100644 index 0000000000..7f892226ba --- /dev/null +++ b/usr/src/lib/libm/amd64/src/atan2l.s @@ -0,0 +1,41 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "atan2l.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(atan2l,function) +#include "libm_synonyms.h" + + ENTRY(atan2l) + fldt 8(%rsp) / push y + fldt 24(%rsp) / push x + fpatan / return atan2(y,x) + ret + .align 16 + SET_SIZE(atan2l) diff --git a/usr/src/lib/libm/amd64/src/atanl.s b/usr/src/lib/libm/amd64/src/atanl.s new file mode 100644 index 0000000000..263490dd7d --- /dev/null +++ b/usr/src/lib/libm/amd64/src/atanl.s @@ -0,0 +1,41 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "atanl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(atanl,function) +#include "libm_synonyms.h" + + ENTRY(atanl) + fldt 8(%rsp) / push arg + fld1 / push 1.0 + fpatan / atan(arg/1.0) + ret + .align 16 + SET_SIZE(atanl) diff --git a/usr/src/lib/libm/amd64/src/copysignl.s b/usr/src/lib/libm/amd64/src/copysignl.s new file mode 100644 index 0000000000..66be11d1d8 --- /dev/null +++ b/usr/src/lib/libm/amd64/src/copysignl.s @@ -0,0 +1,45 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "copysignl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(copysignl,function) +#include "libm_synonyms.h" + + ENTRY(copysignl) + movl 16(%rsp),%eax + movl 32(%rsp),%ecx + andl $0x7fff,%eax + andl $0x8000,%ecx + orl %ecx,%eax + movl %eax,16(%rsp) + fldt 8(%rsp) + ret + .align 16 + SET_SIZE(copysignl) diff --git a/usr/src/lib/libm/amd64/src/exp10l.s b/usr/src/lib/libm/amd64/src/exp10l.s new file mode 100644 index 0000000000..f6345e6e17 --- /dev/null +++ b/usr/src/lib/libm/amd64/src/exp10l.s @@ -0,0 +1,116 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "exp10l.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(exp10l,function) +#include "libm_synonyms.h" + + .data + .align 16 +lt2_hi: .4byte 0xfbd00000, 0x9a209a84, 0x3ffd, 0x0 +lt2_lo: .4byte 0x653f4837, 0x8677076a, 0xbfc9, 0x0 + + ENTRY(exp10l) + movl 16(%rsp),%ecx / cx <--sign&bexp(x) + andl $0x7fff,%ecx / ecx <-- zero_xtnd(bexp(x)) + cmpl $0x3ffd,%ecx / Is |x| < log10(2)? + jb .shortcut / If so, take a shortcut. + je .check_tail / maybe |x| only slightly < log10(2) +.general_case: / Here, |x| > log10(2) or x is NaN + cmpl $0x7fff,%ecx / bexp(|x|) = bexp(INF)? + je .not_finite / if so, x is not finite + cmpl $0x400e,%ecx / |x| < 32768 = 2^15? + jb .finite_non_special / if so, proceed with argument reduction + fldt 8(%rsp) / x + fld1 / 1, x + jmp 1f +.finite_non_special: / Here, log10(2) < |x| < 2^15 + fldt 8(%rsp) / x + fld %st(0) / x, x + fldl2t / log2(10), x, x + fmulp / z := x*log2(10), x + frndint / [z], x + fst %st(2) / [z], x, [z] + PIC_SETUP(1) + fldt PIC_L(lt2_hi) / lt2_hi, [z], x, [z] + fmulp / [z]*lt2_hi, x, [z] + fsubrp %st,%st(1) / x-[z]*lt2_hi, [z] + fldt PIC_L(lt2_lo) / lt2_lo, x-[z]*lt2_hi, [z] + PIC_WRAPUP + fmul %st(2),%st / [z]*lt2_lo, x-[z]*lt2_hi, [z] + fsubrp %st,%st(1) / r := x-[z]*log10(2), [z] + fldl2t / log2(10), r, [z] + fmulp / f := r*log2(10), [z] + f2xm1 / 2^f-1,[z] + fld1 / 1, 2^f-1, [z] + faddp %st,%st(1) / 2^f, [z] +1: + fscale / 10^x, [z] + fstp %st(1) + ret + +.check_tail: + movl 12(%rsp),%ecx / ecx <-- hi_32(sgnfcnd(x)) + cmpl $0x9a209a84,%ecx / Is |x| < log10(2)? + ja .finite_non_special + jb .shortcut + movl 8(%rsp),%edx / edx <-- lo_32(sgnfcnd(x)) + cmpl $0xfbcff798,%edx / Is |x| slightly > log10(2)? + ja .finite_non_special / branch if |x| slightly > log10(2) +.shortcut: + / Here, |x| < log10(2), so |z| = |x/log10(2)| < 1 + / whence z is in f2xm1's domain. + fldt 8(%rsp) / x + fldl2t / log2(10), x + fmulp / z := x*log2(10) + f2xm1 / 2^z-1 + fld1 / 1, 2^z-1 + faddp %st,%st(1) / 10^x + ret + +.not_finite: + movl 12(%rsp),%ecx / ecx <-- hi_32(sgnfcnd(x)) + cmpl $0x80000000,%ecx / hi_32(sgnfcnd(x)) = hi_32(sgnfcnd(INF))? + jne .NaN_or_pinf / if not, x is NaN or unsupp. + movl 8(%rsp),%edx / edx <-- lo_32(sgnfcnd(x)) + cmpl $0,%edx / lo_32(sgnfcnd(x)) = 0? + jne .NaN_or_pinf / if not, x is NaN + movl 16(%rsp),%eax / ax <-- sign&bexp((x)) + andl $0x8000,%eax / here, x is infinite, but +/-? + jz .NaN_or_pinf / branch if x = +INF + fldz / Here, x = -inf, so return 0 + ret + +.NaN_or_pinf: + / Here, x = NaN or +inf, so load x and return immediately. + fldt 8(%rsp) + ret + .align 16 + SET_SIZE(exp10l) diff --git a/usr/src/lib/libm/amd64/src/exp2l.s b/usr/src/lib/libm/amd64/src/exp2l.s new file mode 100644 index 0000000000..b978d3b85c --- /dev/null +++ b/usr/src/lib/libm/amd64/src/exp2l.s @@ -0,0 +1,100 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "exp2l.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(exp2l,function) +#include "libm_synonyms.h" + + ENTRY(exp2l) + movl 16(%rsp),%ecx / cx <--sign&bexp(x) + andl $0x7fff,%ecx / ecx <-- zero_xtnd(bexp(x)) + cmpl $0x3fff,%ecx / Is |x| <= 1? + jb .shortcut / If so, take a shortcut. + je .check_tail / |x| may be slightly > 1 +.general_case: / Here, |x| > 1 or x is NaN + cmpl $0x7fff,%ecx / bexp(|x|) = bexp(INF)? + je .not_finite / if so, x is not finite +.finite_non_special: / Here, 1 < |x| < INF + fldt 8(%rsp) / push arg + fld %st(0) / duplicate stack top + frndint / [x],x + fucomi %st(1),%st / x integral? + je .x_integral / branch if x integral + fxch / x, [x] + fsub %st(1),%st / x-[x], [x] + f2xm1 / 2**(x-[x])-1, [x] + fld1 / 1,2**(x-[x])-1, [x] + faddp %st,%st(1) / 2**(x-[x]), [x] + fscale / 2**x = 2**(arg), [x] + fstp %st(1) + ret + +.x_integral: + fstp %st(0) / ,x + fld1 / 1 = 2**0, x + fscale / 2**(0 + x) = 2**x, x + fstp %st(1) / 2**x + ret + +.check_tail: + movl 12(%rsp),%ecx / ecx <-- hi_32(sgnfcnd(x)) + cmpl $0x80000000,%ecx / Is |x| <= 1? + ja .finite_non_special + movl 8(%rsp),%edx / edx <-- lo_32(sgnfcnd(x)) + cmpl $0x0,%edx / Is |x| slightly > 1? + ja .finite_non_special / branch if |x| slightly > 1 +.shortcut: + / Here, |x| < 1, + / whence x is in f2xm1's domain. + fldt 8(%rsp) / push x + f2xm1 / 2**x - 1 + fld1 / 1,2**x - 1 + faddp %st,%st(1) / 2**x + ret + +.not_finite: + movl 12(%rsp),%ecx / ecx <-- hi_32(sgnfcnd(x)) + cmpl $0x80000000,%ecx / hi_32(|x|) = hi_32(INF)? + jne .NaN_or_pinf / if not, x is NaN + movl 8(%rsp),%edx / edx <-- lo_32(x) + cmpl $0,%edx / lo_32(x) = 0? + jne .NaN_or_pinf / if not, x is NaN + movl 16(%rsp),%eax / ax <-- sign&bexp((x)) + andl $0x8000,%eax / here, x is infinite, but +/-? + jz .NaN_or_pinf / branch if x = +INF + fldz / Here, x = -inf, so return 0 + ret + +.NaN_or_pinf: + / Here, x = NaN or +inf, so load x and return immediately. + fldt 8(%rsp) + ret + .align 16 + SET_SIZE(exp2l) diff --git a/usr/src/lib/libm/amd64/src/expl.s b/usr/src/lib/libm/amd64/src/expl.s new file mode 100644 index 0000000000..ac48f8d32b --- /dev/null +++ b/usr/src/lib/libm/amd64/src/expl.s @@ -0,0 +1,125 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "expl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(expl,function) +#include "libm_synonyms.h" + + .data + .align 16 +ln2_hi: .4byte 0xd1d00000, 0xb17217f7, 0x3ffe, 0x0 +ln2_lo: .4byte 0x4c67fc0d, 0x8654361c, 0xbfce, 0x0 + + ENTRY(expl) + movl 16(%rsp),%ecx / cx <--sign&bexp(x) + andl $0x7fff,%ecx / ecx <-- zero_xtnd(bexp(x)) + cmpl $0x3ffe,%ecx / Is |x| < 0.5? + jb 2f / If so, see which shortcut to take + je .check_tail / More checking if 0.5 <= |x| < 1 +.general_case: / Here, |x| >= 1 or x is NaN + cmpl $0x7fff,%ecx / bexp(|x|) = bexp(INF)? + je .not_finite / if so, x is not finite + cmpl $0x400e,%ecx / |x| < 32768 = 2^15? + jb .finite_non_special / if so, proceed with argument reduction + fldt 8(%rsp) / x + fld1 / 1, x + jmp 1f +.finite_non_special: / Here, ln(2) < |x| < 2^15 + fldt 8(%rsp) / x + fld %st(0) / x, x + fldl2e / log2(e), x, x + fmulp / z := x*log2(e), x + frndint / [z], x + fst %st(2) / [z], x, [z] + PIC_SETUP(1) + fldt PIC_L(ln2_hi) / ln2_hi, [z], x, [z] + fmulp / [z]*ln2_hi, x, [z] + fsubrp %st,%st(1) / x-[z]*ln2_hi, [z] + fldt PIC_L(ln2_lo) / ln2_lo, x-[z]*ln2_hi, [z] + PIC_WRAPUP + fmul %st(2),%st / [z]*ln2_lo, x-[z]*ln2_hi, [z] + fsubrp %st,%st(1) / r := x-[z]*ln(2), [z] + fldl2e / log2(e), r, [z] + fmulp / f := r*log2(e), [z] + f2xm1 / 2^f-1,[z] + fld1 / 1, 2^f-1, [z] + faddp %st,%st(1) / 2^f, [z] +1: + fscale / e^x, [z] + fstp %st(1) + ret + +2: / Here, |x| < 0.5 + cmpl $0x3fbe,%ecx / Is |x| >= 2^-65? + jae .shortcut / If so, take a shortcut + fldt 8(%rsp) / x + fld1 / 1, x + faddp %st,%st(1) / 1+x (for inexact & directed rounding) + ret + +.check_tail: + movl 12(%rsp),%ecx / ecx <-- hi_32(sgnfcnd(x)) + cmpl $0xb17217f7,%ecx / Is |x| < ln(2)? + ja .finite_non_special + jb .shortcut + movl 8(%rsp),%edx / edx <-- lo_32(x) + cmpl $0xd1cf79ab,%edx / Is |x| slightly < ln(2)? + ja .finite_non_special / branch if |x| slightly > ln(2) +.shortcut: + / Here, |x| < ln(2), so |z| = |x/ln(2)| < 1, + / whence z is in f2xm1's domain. + fldt 8(%rsp) / x + fldl2e / log2(e), x + fmulp / x*log2(e) + f2xm1 / 2^(x*log2(e))-1 = e^x-1 + fld1 / 1, e^x-1 + faddp %st,%st(1) / e^x + ret + +.not_finite: + movl 12(%rsp),%ecx / ecx <-- hi_32(sgnfcnd(x)) + cmpl $0x80000000,%ecx / hi_32(|x|) = hi_32(INF)? + jne .NaN_or_pinf / if not, x is NaN + movl 8(%rsp),%edx / edx <-- lo_32(x) + cmpl $0,%edx / lo_32(x) = 0? + jne .NaN_or_pinf / if not, x is NaN + movl 16(%rsp),%eax / ax <-- sign&bexp((x)) + andl $0x8000,%eax / here, x is infinite, but +/-? + jz .NaN_or_pinf / branch if x = +INF + fldz / Here, x = -inf, so return 0 + ret + +.NaN_or_pinf: + / Here, x = NaN or +inf, so load x and return immediately. + fldt 8(%rsp) + fadd %st(0),%st / quiet SNaN + ret + .align 16 + SET_SIZE(expl) diff --git a/usr/src/lib/libm/amd64/src/expm1l.s b/usr/src/lib/libm/amd64/src/expm1l.s new file mode 100644 index 0000000000..10b7594773 --- /dev/null +++ b/usr/src/lib/libm/amd64/src/expm1l.s @@ -0,0 +1,124 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "expm1l.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(expm1l,function) +#include "libm_synonyms.h" + + .data + .align 16 +ln2_hi: .4byte 0xd1d00000, 0xb17217f7, 0x3ffe, 0x0 +ln2_lo: .4byte 0x4c67fc0d, 0x8654361c, 0xbfce, 0x0 + + ENTRY(expm1l) + movl 16(%rsp),%ecx / cx <--sign&bexp(x) + movl %ecx,%eax / ax <--sign&bexp(x) + andl $0x7fff,%ecx / ecx <-- zero_xtnd(bexp(x)) + cmpl $0x3ffe,%ecx / Is |x| < ln(2)? + jb .shortcut / If so, take a shortcut. + je .check_tail / |x| may be only slightly < ln(2) +.general_case: / Here, |x| > ln(2) or x is NaN + cmpl $0x7fff,%ecx / bexp(|x|) = bexp(INF)? + je .not_finite / if so, x is not finite + andl $0xffff,%eax / eax <-- sign&bexp(x) + cmpl $0xc006,%eax / x <= -128? + jae 1f / if so, simply return -1 + cmpl $0x400d,%ecx / |x| < 16384 = 2^14? + jb .finite_non_special / if so, proceed with argument reduction + fldt 8(%rsp) / x >= 16384; x + fld1 / 1, x + fscale / +Inf, x + fstp %st(1) / +Inf + ret + +.finite_non_special: / -128 < x < -ln(2) || ln(2) < x < 2^14 + fldt 8(%rsp) / x + fld %st(0) / x, x + fldl2e / log2(e), x, x + fmulp / z := x*log2(e), x + frndint / [z], x + fst %st(2) / [z], x, [z] + PIC_SETUP(1) + fldt PIC_L(ln2_hi) / ln2_hi, [z], x, [z] + fmulp / [z]*ln2_hi, x, [z] + fsubrp %st,%st(1) / x-[z]*ln2_hi, [z] + fldt PIC_L(ln2_lo) / ln2_lo, x-[z]*ln2_hi, [z] + PIC_WRAPUP + fmul %st(2),%st / [z]*ln2_lo, x-[z]*ln2_hi, [z] + fsubrp %st,%st(1) / r := x-[z]*ln(2), [z] + fldl2e / log2(e), r, [z] + fmulp / f := r*log2(e), [z] + f2xm1 / 2^f-1,[z] + fld1 / 1, 2^f-1, [z] + faddp %st,%st(1) / 2^f, [z] + fscale / e^x, [z] + fstp %st(1) / e^x + fld1 / 1, e^x + fsubrp %st,%st(1) / e^x-1 + ret + +.check_tail: + movl 12(%rsp),%ecx / ecx <-- hi_32(sgnfcnd(x)) + cmpl $0xb17217f7,%ecx / Is |x| < ln(2)? + ja .finite_non_special + jb .shortcut + movl 8(%rsp),%edx / edx <-- lo_32(x) + cmpl $0xd1cf79ab,%edx / Is |x| slightly < ln(2)? + ja .finite_non_special / branch if |x| slightly > ln(2) +.shortcut: + / Here, |x| < ln(2), so |z| = |x/ln(2)| < 1, + / whence z is in f2xm1's domain. + fldt 8(%rsp) / x + fldl2e / log2(e), x + fmulp / z := x*log2(e) + f2xm1 / 2^(x*log2(e))-1 = e^x-1 + ret + +.not_finite: + movl 12(%rsp),%ecx / ecx <-- hi_32(sgnfcnd(x)) + cmpl $0x80000000,%ecx / hi_32(|x|) = hi_32(INF)? + jne .NaN_or_pinf / if not, x is NaN + movl 8(%rsp),%edx / edx <-- lo_32(x) + cmpl $0,%edx / lo_32(x) = 0? + jne .NaN_or_pinf / if not, x is NaN + movl 16(%rsp),%eax / ax <-- sign&bexp((x)) + andl $0x8000,%eax / here, x is infinite, but +/-? + jz .NaN_or_pinf / branch if x = +INF +1: + fld1 / Here, x = -inf, so return -1 + fchs + ret + +.NaN_or_pinf: + / Here, x = NaN or +inf, so load x and return immediately. + fldt 8(%rsp) + ret + .align 16 + SET_SIZE(expm1l) diff --git a/usr/src/lib/libm/amd64/src/fabsl.s b/usr/src/lib/libm/amd64/src/fabsl.s new file mode 100644 index 0000000000..bdd25103ce --- /dev/null +++ b/usr/src/lib/libm/amd64/src/fabsl.s @@ -0,0 +1,41 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "fabsl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(fabsl,function) +#include "libm_synonyms.h" + + ENTRY(fabsl) + fldt 8(%rsp) +#undef fabs + fabs + ret + .align 16 + SET_SIZE(fabsl) diff --git a/usr/src/lib/libm/amd64/src/floorl.s b/usr/src/lib/libm/amd64/src/floorl.s new file mode 100644 index 0000000000..c4aa4e9bf1 --- /dev/null +++ b/usr/src/lib/libm/amd64/src/floorl.s @@ -0,0 +1,81 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "floorl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(ceill,function) +LIBM_ANSI_PRAGMA_WEAK(floorl,function) +#include "libm_synonyms.h" + + ENTRY(ceill) + subq $16,%rsp + fstcw (%rsp) + fldt 24(%rsp) + movw (%rsp),%cx + orw $0x0c00,%cx + xorw $0x0400,%cx + movw %cx,4(%rsp) + fldcw 4(%rsp) / set RD = up + frndint + fstcw 4(%rsp) / restore RD + movw 4(%rsp),%dx + andw $0xf3ff,%dx + movw (%rsp),%cx + andw $0x0c00,%cx + orw %dx,%cx + movw %cx,(%rsp) + fldcw (%rsp) / restore RD + addq $16,%rsp + ret + .align 16 + SET_SIZE(ceill) + + + ENTRY(floorl) + subq $16,%rsp + fstcw (%rsp) + fldt 24(%rsp) + movw (%rsp),%cx + orw $0x0c00,%cx + xorw $0x0800,%cx + movw %cx,4(%rsp) + fldcw 4(%rsp) / set RD = down + frndint + fstcw 4(%rsp) / restore RD + movw 4(%rsp),%dx + andw $0xf3ff,%dx + movw (%rsp),%cx + andw $0x0c00,%cx + orw %dx,%cx + movw %cx,(%rsp) + fldcw (%rsp) / restore RD + addq $16,%rsp + ret + .align 16 + SET_SIZE(floorl) diff --git a/usr/src/lib/libm/amd64/src/fmod.s b/usr/src/lib/libm/amd64/src/fmod.s new file mode 100644 index 0000000000..ad7f3145fa --- /dev/null +++ b/usr/src/lib/libm/amd64/src/fmod.s @@ -0,0 +1,70 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "fmod.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(fmod,function) +#include "libm_synonyms.h" +#include "libm_protos.h" + + ENTRY(fmod) + push %rbp + movq %rsp,%rbp + subq $16,%rsp + movlpd %xmm1,-16(%rbp) + movlpd %xmm0,-8(%rbp) + + movl -12(%rbp),%eax / eax <-- hi_32(y) + andl $0x7fffffff,%eax / eax <-- hi_32(|y|) + orl -16(%rbp),%eax / eax <-- lo_32(y)|hi_32(|y|) + je .yzero + + fldl -16(%rbp) / y + fldl -8(%rbp) / x +.loop: + fprem / partial remainder + fstsw %ax / store status word + andw $0x400,%ax / check for incomplete reduction + jne .loop / loop while reduction incomplete + fstpl -8(%rbp) + movsd -8(%rbp),%xmm0 + fstp %st(0) + leave + ret + +.yzero: + PIC_SETUP(1) + movl $27,%edi + movl $2,%eax + call PIC_F(_SVID_libm_err) + PIC_WRAPUP + leave + ret + .align 4 + SET_SIZE(fmod) diff --git a/usr/src/lib/libm/amd64/src/fmodf.s b/usr/src/lib/libm/amd64/src/fmodf.s new file mode 100644 index 0000000000..1da2c028f3 --- /dev/null +++ b/usr/src/lib/libm/amd64/src/fmodf.s @@ -0,0 +1,54 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "fmodf.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(fmodf,function) +#include "libm_synonyms.h" + + ENTRY(fmodf) + push %rbp + movq %rsp,%rbp + subq $16,%rsp + movss %xmm1,-8(%rbp) + movss %xmm0,-4(%rbp) + flds -8(%rbp) / load arg y + flds -4(%rbp) / load arg x +.loop: + fprem / partial remainder + fstsw %ax / store status word + andw $0x400,%ax / check whether reduction complete + jne .loop / loop while reduction incomplete + fstps -4(%rbp) + movss -4(%rbp),%xmm0 + fstp %st(0) + leave + ret + .align 4 + SET_SIZE(fmodf) diff --git a/usr/src/lib/libm/amd64/src/fmodl.s b/usr/src/lib/libm/amd64/src/fmodl.s new file mode 100644 index 0000000000..812ca1a450 --- /dev/null +++ b/usr/src/lib/libm/amd64/src/fmodl.s @@ -0,0 +1,46 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "fmodl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(fmodl,function) +#include "libm_synonyms.h" + + ENTRY(fmodl) + fldt 24(%rsp) / load arg y + fldt 8(%rsp) / load arg x +.mod_loop: + fprem / partial fmod + fstsw %ax / store status word + andw $0x400,%ax / check for incomplete reduction + jne .mod_loop / while incomplete, do fprem again + fstp %st(1) + ret + .align 16 + SET_SIZE(fmodl) diff --git a/usr/src/lib/libm/amd64/src/ieee_funcl.s b/usr/src/lib/libm/amd64/src/ieee_funcl.s new file mode 100644 index 0000000000..756565de53 --- /dev/null +++ b/usr/src/lib/libm/amd64/src/ieee_funcl.s @@ -0,0 +1,122 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "ieee_funcl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(isinfl,function) +LIBM_ANSI_PRAGMA_WEAK(isnormall,function) +LIBM_ANSI_PRAGMA_WEAK(issubnormall,function) +LIBM_ANSI_PRAGMA_WEAK(iszerol,function) +LIBM_ANSI_PRAGMA_WEAK(signbitl,function) +#include "libm_synonyms.h" + + ENTRY(isinfl) + movl 16(%rsp),%eax / ax <-- sign and bexp of x + notl %eax + andq $0x7fff,%rax + jz .L6 + movq $0,%rax +.not_inf: + ret + +.L6: / here, (eax) = 0.0 + movl 12(%rsp),%ecx + xorl $0x80000000,%ecx / handle unsupported implicitly + orl 8(%rsp), %ecx + jnz .not_inf + movq $1,%rax + ret + .align 16 + SET_SIZE(isinfl) + + ENTRY(isnormall) + / TRUE iff (x is finite, but + / neither subnormal nor zero) + / iff (msb(sgnfcnd(x) /= 0 + / & 0 < bexp(x) < 0x7fff) + movl 12(%rsp),%eax / eax <-- hi_32(sgnfcnd(x)) + andl $-0x80000000,%eax / eax[31] <-- msb(sgnfcnd(x)), + / rest_of(eax) <-- 0 + jz .L8 / jump iff msb(sgnfcnd(x)) = 0 + movl 16(%rsp),%eax / ax <-- sign and bexp of x + notl %eax / ax[0..14] <-- not(bexp(x)) + andq $0x7fff,%rax / eax <-- zero_xtnd(not(bexp(x))) + jz .L8 / jump iff bexp(x) = 0x7fff or 0 + xorq $0x7fff,%rax / treat pseudo-denormal as subnormal + jz .L8 + movq $1,%rax +.L8: + ret + .align 16 + SET_SIZE(isnormall) + + ENTRY(issubnormall) + / TRUE iff (bexp(x) = 0 & + / msb(sgnfcnd(x)) = 0 & frac(x) /= 0) + movl 12(%rsp),%eax / eax <-- hi_32(sgnfcnd(x)) + testl $0x80000000,%eax / eax[31] = msb(sgnfcnd(x)); + / set ZF if it's 0. + jz .may_be_subnorm / jump iff msb(sgnfcnd(x)) = 0 +.not_subnorm: + movq $0,%rax + ret +.may_be_subnorm: + testl $0x7fff,16(%rsp) / set ZF iff bexp(x) = 0 + jnz .not_subnorm / jump iff bexp(x) /= 0 + orl 8(%rsp),%eax / (eax) = 0 iff sgnfcnd(x) = 0 + jz .not_subnorm + movq $1,%rax + ret + .align 16 + SET_SIZE(issubnormall) + + ENTRY(iszerol) + movl 16(%rsp),%eax / ax <-- sign and bexp of x + andl $0x7fff,%eax / eax <-- zero_xtnd(bexp(x)) + jz .may_be_zero / jump iff bexp(x) = 0 +.not_zero: + movq $0,%rax + ret +.may_be_zero: / here, (eax) = 0 + orl 12(%rsp),%eax / is hi_32(sgnfcnd(x)) = 0? + jnz .not_zero / jump iff hi_32(sgnfcnd(x)) /= 0 + orl 8(%rsp),%eax / is lo_32(sgnfcnd(x)) = 0? + jnz .not_zero / jump iff lo_32(sgnfcnd(x)) /= 0 + movq $1,%rax + ret + .align 16 + SET_SIZE(iszerol) + + ENTRY(signbitl) + movl 16(%rsp),%eax / eax[15] <-- sign_bit(x) + shrl $15,%eax / eax <-- zero_xtnd(sign_bit(x)) + andq $1,%rax + ret + .align 16 + SET_SIZE(signbitl) diff --git a/usr/src/lib/libm/amd64/src/ilogbl.s b/usr/src/lib/libm/amd64/src/ilogbl.s new file mode 100644 index 0000000000..ba21ace4f9 --- /dev/null +++ b/usr/src/lib/libm/amd64/src/ilogbl.s @@ -0,0 +1,87 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "ilogbl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(ilogbl,function) +#include "libm_synonyms.h" +#include "xpg6.h" + + .data + .align 16 +two63: .4byte 0x0,0x43d00000 / 2**63 + + ENTRY(ilogbl) + movq 16(%rsp),%rax / eax <-- sign and bexp of x + andq $0x7fff,%rax / eax <-- bexp(x) + jz .bexp_0 / jump iff x is 0 or subnormal + / here, biased exponent is non-zero + testl $0x80000000,12(%rsp) / test msb of hi_32(sgnfcnd(x)) + jz .ilogbl_not_finite / jump if unsupported format + cmpq $0x7fff,%rax + je .ilogbl_not_finite + subq $16383,%rax / unbias exponent by 16383 = 0x3fff + ret + +.ilogbl_not_finite: + movq $0x7fffffff,%rax / x is NaN/inf/unsup + jmp 0f + +.bexp_0: + movq 8(%rsp),%rax / rax <-- sgnfcnd(x) + orq %rax,%rax + jnz .ilogbl_subnorm / jump iff x is subnormal + movq $-2147483647,%rax / x is +/-0, so return 1-2^31 +0: + PIC_SETUP(0) + PIC_G_LOAD(movzwq,__xpg6,rcx) + PIC_WRAPUP + andl $_C99SUSv3_ilogb_0InfNaN_raises_invalid,%ecx + cmpl $0,%ecx + je 1f + fldz + fdivp %st,%st(0) / raise invalid as per SUSv3 +1: + ret + + +.ilogbl_subnorm: / subnormal or pseudo-denormal input + fldt 8(%rsp) / push x, setting D-flag + PIC_SETUP(1) + fmull PIC_L(two63) / x*2**63 + PIC_WRAPUP + subq $16,%rsp + fstpt (%rsp) + movq $0x7fff,%rax + andq 8(%rsp),%rax / eax <-- sign and bexp of x*2**63 + subq $16445,%rax / unbias it by (16,383 + 63) + addq $16,%rsp + ret + .align 16 + SET_SIZE(ilogbl) diff --git a/usr/src/lib/libm/amd64/src/libm_inlines.h b/usr/src/lib/libm/amd64/src/libm_inlines.h new file mode 100644 index 0000000000..d69c79bfcb --- /dev/null +++ b/usr/src/lib/libm/amd64/src/libm_inlines.h @@ -0,0 +1,221 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * Copyright 2011, Richard Lowe. + */ + +/* Functions in this file are duplicated in locallibm.il. Keep them in sync */ + +#ifndef _LIBM_INLINES_H +#define _LIBM_INLINES_H + +#ifdef __GNUC__ + +#ifdef __cplusplus +extern "C" { +#endif + +#include <sys/types.h> +#include <sys/ieeefp.h> + +extern __inline__ float +__inline_sqrtf(float a) +{ + float ret; + + __asm__ __volatile__("sqrtss %1, %0\n\t" : "=x" (ret) : "x" (a)); + return (ret); +} + +extern __inline__ double +__inline_sqrt(double a) +{ + double ret; + + __asm__ __volatile__("sqrtsd %1, %0\n\t" : "=x" (ret) : "x" (a)); + return (ret); +} + +extern __inline__ double +__ieee754_sqrt(double a) +{ + return (__inline_sqrt(a)); +} + +/* + * 00 - 24 bits + * 01 - reserved + * 10 - 53 bits + * 11 - 64 bits + */ +extern __inline__ int +__swapRP(int i) +{ + int ret; + uint16_t cw; + + __asm__ __volatile__("fstcw %0\n\t" : "=m" (cw)); + + ret = (cw >> 8) & 0x3; + cw = (cw & 0xfcff) | ((i & 0x3) << 8); + + __asm__ __volatile__("fldcw %0\n\t" : : "m" (cw)); + + return (ret); +} + +/* + * 00 - Round to nearest, with even preferred + * 01 - Round down + * 10 - Round up + * 11 - Chop + */ +extern __inline__ enum fp_direction_type +__swap87RD(enum fp_direction_type i) +{ + int ret; + uint16_t cw; + + __asm__ __volatile__("fstcw %0\n\t" : "=m" (cw)); + + ret = (cw >> 10) & 0x3; + cw = (cw & 0xf3ff) | ((i & 0x3) << 10); + + __asm__ __volatile__("fldcw %0\n\t" : : "m" (cw)); + + return (ret); +} + +extern __inline__ int +abs(int i) +{ + int ret; + __asm__ __volatile__( + "movl %1, %0\n\t" + "negl %1\n\t" + "cmovnsl %1, %0\n\t" + : "=r" (ret), "+r" (i) + : + : "cc"); + return (ret); +} + +extern __inline__ double +copysign(double d1, double d2) +{ + double tmpd; + + __asm__ __volatile__( + "movd %3, %1\n\t" + "andpd %1, %0\n\t" + "andnpd %2, %1\n\t" + "orpd %1, %0\n\t" + : "+&x" (d1), "=&x" (tmpd) + : "x" (d2), "r" (0x7fffffffffffffff)); + + return (d1); +} + +extern __inline__ double +fabs(double d) +{ + double tmp; + + __asm__ __volatile__( + "movd %2, %1\n\t" + "andpd %1, %0" + : "+x" (d), "=&x" (tmp) + : "r" (0x7fffffffffffffff)); + + return (d); +} + +extern __inline__ float +fabsf(float d) +{ + __asm__ __volatile__( + "andpd %1, %0" + : "+x" (d) + : "x" (0x7fffffff)); + + return (d); +} + +extern __inline__ int +finite(double d) +{ + long ret = 0x7fffffffffffffff; + uint64_t tmp; + + __asm__ __volatile__( + "movq %2, %1\n\t" + "andq %1, %0\n\t" + "movq $0x7ff0000000000000, %1\n\t" + "subq %1, %0\n\t" + "shrq $63, %0\n\t" + : "+r" (ret), "=r" (tmp) + : "x" (d) + : "cc"); + + return (ret); +} + +extern __inline__ int +signbit(double d) +{ + long ret; + __asm__ __volatile__( + "movmskpd %1, %0\n\t" + "andq $1, %0\n\t" + : "=r" (ret) + : "x" (d) + : "cc"); + return (ret); +} + +extern __inline__ double +sqrt(double d) +{ + return (__inline_sqrt(d)); +} + +extern __inline__ float +sqrtf(float f) +{ + return (__inline_sqrtf(f)); +} + +#ifdef __cplusplus +} +#endif + +#endif /* __GNUC__ */ + +#endif /* _LIBM_INLINES_H */ diff --git a/usr/src/lib/libm/amd64/src/locallibm.il b/usr/src/lib/libm/amd64/src/locallibm.il new file mode 100644 index 0000000000..65921d3c97 --- /dev/null +++ b/usr/src/lib/libm/amd64/src/locallibm.il @@ -0,0 +1,178 @@ +/ +/ CDDL HEADER START +/ +/ The contents of this file are subject to the terms of the +/ Common Development and Distribution License (the "License"). +/ You may not use this file except in compliance with the License. +/ +/ You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +/ or http://www.opensolaris.org/os/licensing. +/ See the License for the specific language governing permissions +/ and limitations under the License. +/ +/ When distributing Covered Code, this CDDL HEADER in each +/ file and the License file at usr/src/OPENSOLARIS.LICENSE. +/ If applicable, add the following below this CDDL HEADER, with the +/ fields enclosed by brackets "[]" replaced with your own identifying +/ information: Portions Copyright [yyyy] [name of copyright owner] +/ +/ CDDL HEADER END +/ +/ +/ Copyright 2011 Nexenta Systems, Inc. All rights reserved. +/ +/ Copyright 2006 Sun Microsystems, Inc. All rights reserved. +/ Use is subject to license terms. +/ + +/ Portions of this file are duplicated as GCC inline assembly in +/ libm_inlines.h. Keep them in sync. + + .inline __ieee754_sqrt,0 + sqrtsd %xmm0,%xmm0 + .end + + .inline __inline_sqrtf,0 + sqrtss %xmm0,%xmm0 + .end + + .inline __inline_sqrt,0 + sqrtsd %xmm0,%xmm0 + .end + +/ +/ 00 - 24 bits +/ 01 - reserved +/ 10 - 53 bits +/ 11 - 64 bits +/ + .inline __swapRP,0 + subq $16,%rsp + fstcw (%rsp) + movw (%rsp),%ax + movw %ax,%cx + andw $0xfcff,%cx + andl $0x3,%edi + shlw $8,%di + orw %di,%cx + movl %ecx,(%rsp) + fldcw (%rsp) + shrw $8,%ax + andq $0x3,%rax + addq $16,%rsp + .end + +/ +/ 00 - Round to nearest, with even preferred +/ 01 - Round down +/ 10 - Round up +/ 11 - Chop +/ + .inline __swap87RD,0 + subq $16,%rsp + fstcw (%rsp) + movw (%rsp),%ax + movw %ax,%cx + andw $0xf3ff,%cx + andl $0x3,%edi + shlw $10,%di + orw %di,%cx + movl %ecx,(%rsp) + fldcw (%rsp) + shrw $10,%ax + andq $0x3,%rax + addq $16,%rsp + .end + + .inline abs,0 + cmpl $0,%edi + jge 1f + negl %edi +1: movl %edi,%eax + .end + + .inline __copysign,0 + movq $0x7fffffffffffffff,%rax + movdq %rax,%xmm2 + andpd %xmm2,%xmm0 + andnpd %xmm1,%xmm2 + orpd %xmm2,%xmm0 + .end + + .inline __fabs,0 + movq $0x7fffffffffffffff,%rax + movdq %rax,%xmm1 + andpd %xmm1,%xmm0 + .end + + .inline __fabsf,0 + movl $0x7fffffff,%eax + movdl %eax,%xmm1 + andps %xmm1,%xmm0 + .end + + .inline _finite,0 + subq $16,%rsp + movlpd %xmm0,(%rsp) + movq (%rsp),%rcx + movq $0x7fffffffffffffff,%rax + andq %rcx,%rax + movq $0x7ff0000000000000,%rcx + subq %rcx,%rax + shrq $63,%rax + addq $16,%rsp + .end + + .inline __signbit,0 + movmskpd %xmm0,%eax + andq $1,%rax + .end + + .inline __sqrt,0 + sqrtsd %xmm0,%xmm0 + .end + + .inline __sqrtf,0 + sqrtss %xmm0,%xmm0 + .end + + .inline __f95_signf,0 + movl (%rdi),%eax + movl (%rsi),%ecx + andl $0x7fffffff,%eax + andl $0x80000000,%ecx + orl %ecx,%eax + movdl %eax,%xmm0 + .end + + .inline __f95_sign,0 + movq (%rsi),%rax + movq $0x7fffffffffffffff,%rdx + shrq $63,%rax + shlq $63,%rax + andq (%rdi),%rdx + orq %rdx,%rax + movdq %rax,%xmm0 + .end + + .inline __r_sign,0 + movl $0x7fffffff,%eax + movl $0x80000000,%edx + andl (%rdi),%eax + cmpl (%rsi),%edx + cmovel %eax,%edx + andl (%rsi),%edx + orl %edx,%eax + movdl %eax,%xmm0 + .end + + .inline __d_sign,0 + movq $0x7fffffffffffffff,%rax + movq $0x8000000000000000,%rdx + andq (%rdi),%rax + cmpq (%rsi),%rdx + cmoveq %rax,%rdx + andq (%rsi),%rdx + orq %rdx,%rax + movdq %rax,%xmm0 + .end diff --git a/usr/src/lib/libm/amd64/src/log10l.s b/usr/src/lib/libm/amd64/src/log10l.s new file mode 100644 index 0000000000..bad1507378 --- /dev/null +++ b/usr/src/lib/libm/amd64/src/log10l.s @@ -0,0 +1,41 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "log10l.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(log10l,function) +#include "libm_synonyms.h" + + ENTRY(log10l) + fldlg2 + fldt 8(%rsp) / st = arg, st(1) = log10(2) + fyl2x / st = log10(arg) = log10(2)*log2(arg) + ret + .align 16 + SET_SIZE(log10l) diff --git a/usr/src/lib/libm/amd64/src/log2l.s b/usr/src/lib/libm/amd64/src/log2l.s new file mode 100644 index 0000000000..c22ee9e288 --- /dev/null +++ b/usr/src/lib/libm/amd64/src/log2l.s @@ -0,0 +1,41 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "log2l.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(log2l,function) +#include "libm_synonyms.h" + + ENTRY(log2l) + fld1 / push 1.0 + fldt 8(%rsp) / push x + fyl2x / st = 1.0*log2(arg) + ret + .align 16 + SET_SIZE(log2l) diff --git a/usr/src/lib/libm/amd64/src/logl.s b/usr/src/lib/libm/amd64/src/logl.s new file mode 100644 index 0000000000..31d0521738 --- /dev/null +++ b/usr/src/lib/libm/amd64/src/logl.s @@ -0,0 +1,41 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "logl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(logl,function) +#include "libm_synonyms.h" + + ENTRY(logl) + fldln2 + fldt 8(%rsp) / st = arg, st(1) = loge(2) + fyl2x / st = ln(arg) = loge(2)*log2(arg) + ret + .align 16 + SET_SIZE(logl) diff --git a/usr/src/lib/libm/amd64/src/powl.s b/usr/src/lib/libm/amd64/src/powl.s new file mode 100644 index 0000000000..8c879c72bc --- /dev/null +++ b/usr/src/lib/libm/amd64/src/powl.s @@ -0,0 +1,420 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "powl.s" + +/ Special cases: +/ +/ x ** 0 is 1 +/ 1 ** y is 1 (C99) +/ x ** NaN is NaN +/ NaN ** y (except 0) is NaN +/ x ** 1 is x +/ +-(|x| > 1) ** +inf is +inf +/ +-(|x| > 1) ** -inf is +0 +/ +-(|x| < 1) ** +inf is +0 +/ +-(|x| < 1) ** -inf is +inf +/ (-1) ** +-inf is +1 (C99) +/ +0 ** +y (except 0, NaN) is +0 +/ -0 ** +y (except 0, NaN, odd int) is +0 +/ +0 ** -y (except 0, NaN) is +inf (z flag) +/ -0 ** -y (except 0, NaN, odd int) is +inf (z flag) +/ -0 ** y (odd int) is - (+0 ** x) +/ +inf ** +y (except 0, NaN) is +inf +/ +inf ** -y (except 0, NaN) is +0 +/ -inf ** +-y (except 0, NaN) is -0 ** -+y (NO z flag) +/ x ** -1 is 1/x +/ x ** 2 is x*x +/ -x ** y (an integer) is (-1)**(y) * (+x)**(y) +/ x ** y (x negative & y not integer) is NaN (i flag) + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(powl,function) +#include "libm_synonyms.h" +#include "xpg6.h" + +#undef fabs + + .data + .align 16 +negzero: + .float -0.0 +half: + .float 0.5 +one: + .float 1.0 +negone: + .float -1.0 +two: + .float 2.0 +Snan: + .4byte 0x7f800001 +pinfinity: + .4byte 0x7f800000 +ninfinity: + .4byte 0xff800000 + + + ENTRY(powl) + pushq %rbp + movq %rsp,%rbp + PIC_SETUP(1) + + fldt 16(%rbp) / x + fxam / determine class of x + fnstsw %ax / store status in %ax + movb %ah,%dh / %dh <- condition code of x + + fldt 32(%rbp) / y , x + fxam / determine class of y + fnstsw %ax / store status in %ax + movb %ah,%dl / %dl <- condition code of y + + call .pow_main /// LOCAL + PIC_WRAPUP + leave + ret + +.pow_main: + / x ** 0 is 1 + movb %dl,%cl + andb $0x45,%cl + cmpb $0x40,%cl / C3=1 C2=0 C1=? C0=0 when +-0 + jne 1f + fstp %st(0) / x + fstp %st(0) / stack empty + fld1 / 1 + ret + +1: / y is not zero + PIC_G_LOAD(movzwq,__xpg6,rax) + andl $_C99SUSv3_pow_treats_Inf_as_an_even_int,%eax + cmpl $0,%eax + je 1f + + / C99: 1 ** anything is 1 + fld1 / 1, y, x + fucomip %st(2),%st / y, x + jp 1f / so that pow(NaN1,NaN2) returns NaN2 + jne 1f + fstp %st(0) / x + ret + +1: + / x ** NaN is NaN + movb %dl,%cl + andb $0x45,%cl + cmpb $0x01,%cl / C3=0 C2=0 C1=? C0=1 when +-NaN + jne 1f + fstp %st(1) / y + ret + +1: / y is not NaN + / NaN ** y (except 0) is NaN + movb %dh,%cl + andb $0x45,%cl + cmpb $0x01,%cl / C3=0 C2=0 C1=? C0=1 when +-NaN + jne 1f + fstp %st(0) / x + ret + +1: / x is not NaN + / x ** 1 is x + fld1 / 1, y, x + fcomip %st(1),%st / y, x + jne 1f + fstp %st(0) / x + ret + +1: / y is not 1 + / +-(|x| > 1) ** +inf is +inf + / +-(|x| > 1) ** -inf is +0 + / +-(|x| < 1) ** +inf is +0 + / +-(|x| < 1) ** -inf is +inf + / +-(|x| = 1) ** +-inf is NaN + movb %dl,%cl + andb $0x47,%cl + cmpb $0x05,%cl / C3=0 C2=1 C1=0 C0=1 when +inf + je .yispinf + cmpb $0x07,%cl / C3=0 C2=1 C1=1 C0=1 when -inf + je .yisninf + + / +0 ** +y (except 0, NaN) is +0 + / -0 ** +y (except 0, NaN, odd int) is +0 + / +0 ** -y (except 0, NaN) is +inf (z flag) + / -0 ** -y (except 0, NaN, odd int) is +inf (z flag) + / -0 ** y (odd int) is - (+0 ** x) + movb %dh,%cl + andb $0x47,%cl + cmpb $0x40,%cl / C3=1 C2=0 C1=0 C0=0 when +0 + je .xispzero + cmpb $0x42,%cl / C3=1 C2=0 C1=1 C0=0 when -0 + je .xisnzero + + / +inf ** +y (except 0, NaN) is +inf + / +inf ** -y (except 0, NaN) is +0 + / -inf ** +-y (except 0, NaN) is -0 ** -+y (NO z flag) + movb %dh,%cl + andb $0x47,%cl + cmpb $0x05,%cl / C3=0 C2=1 C1=0 C0=1 when +inf + je .xispinf + cmpb $0x07,%cl / C3=0 C2=1 C1=1 C0=1 when -inf + je .xisninf + + / x ** -1 is 1/x + flds PIC_L(negone) / -1, y, x + fcomip %st(1),%st / y, x + jne 1f + fld %st(1) / x , y , x + fdivrs PIC_L(one) / 1/x , y , x + jmp .signok / check for over/underflow + +1: / y is not -1 + / x ** 2 is x*x + flds PIC_L(two) / 2, y , x + fcomip %st(1),%st / y, x + jne 1f + fld %st(1) / x , y , x + fld %st(0) / x , x , y , x + fmulp / x^2 , y , x + jmp .signok / check for over/underflow + +1: / y is not 2 + / x ** 1/2 is sqrt(x) + flds PIC_L(half) / 1/2, y , x + fcomip %st(1),%st / y, x + jne 1f + fld %st(1) / x , y , x + fsqrt / sqrt(x) , y , x + jmp .signok / check for over/underflow + +1: / y is not 1/2 + / make copies of x & y + fld %st(1) / x , y , x + fld %st(1) / y , x , y , x + + / -x ** y (an integer) is (-1)**(y) * (+x)**(y) + / x ** y (x negative & y not integer) is NaN + movl $0,%ecx / track whether to flip sign of result + fldz / 0 , y , x , y , x + fcomip %st(2),%st / compare 0 with %st(2) + jb .merge / 0 < x + / x < 0 + call .y_is_int + cmpl $0,%ecx + jne 1f + / x < 0 & y != int so x**y = NaN (i flag) + fstp %st(0) / x , y , x + fstp %st(0) / y , x + fstp %st(0) / x + fstp %st(0) / stack empty + fldz + fdiv %st,%st(0) / 0/0 + ret + +1: / x < 0 & y = int + fxch / x , y , y , x + fchs / px = -x , y , y , x + fxch / y , px , y , x +.merge: + / px > 0 + fxch / px , y , y , x + + / x**y = exp(y*ln(x)) + fyl2x / t=y*log2(px) , y , x + fld %st(0) / t , t , y , x + frndint / [t] , t , y , x + fxch / t , [t] , y , x + fucomi %st(1),%st + je 1f / t is integral + fsub %st(1),%st / t-[t] , [t] , y , x + f2xm1 / 2**(t-[t])-1 , [t] , y , x + fadds PIC_L(one) / 2**(t-[t]) , [t] , y , x + fscale / 2**t = px**y , [t] , y , x + jmp 2f +1: + fstp %st(0) / t=[t] , y , x + fld1 / 1 , t , y , x + fscale / 1*2**t = x**y , t , y , x +2: + fstp %st(1) / x**y , y , x + cmpl $1,%ecx + jne .signok + fchs / change sign since x<0 & y=-int +.signok: + fstp %st(2) / y , x**y + fstp %st(0) / x**y + ret + +/ ------------------------------------------------------------------------ + +.xispinf: + fldz + fcomip %st(1),%st / compare 0 with %st(1) + jb .retpinf / 0 < y + jmp .retpzero / y < 0 + +.xisninf: + / -inf ** +-y is -0 ** -+y + fchs / -y , x + flds PIC_L(negzero) / -0 , -y , x + fstp %st(2) / -y , -0 + jmp .xisnzero + +.yispinf: + fld %st(1) / x , y , x + fabs / |x| , y , x + flds PIC_L(one) / 1 , |x| , y , x + fcomip %st(1),%st / |x| , y , x + fstp %st(0) / y , x + je .retponeorinvalid / x == -1 C99 + jb .retpinf / 1 < |x| + jmp .retpzero / |x| < 1 + +.yisninf: + fld %st(1) / x , y , x + fabs / |x| , y , x + flds PIC_L(one) / 1 , |x| , y , x + fcomip %st(1),%st / |x| , y , x + fstp %st(0) / y , x + je .retponeorinvalid / x == -1 C99 + jb .retpzero / 1 < |x| + jmp .retpinf / |x| < 1 + +.xispzero: + / y cannot be 0 or NaN ; stack has y , x + fldz / 0 , y , x + fcomip %st(1),%st / compare 0 with %st(1) + jb .retpzero / 0 < y + / x = +0 & y < 0 so x**y = +inf + jmp .retpinfzflag / ret +inf & z flag + +.xisnzero: + / y cannot be 0 or NaN ; stack has y , x + call .y_is_int + cmpl $1,%ecx + jne 1f / y is not an odd integer + / y is an odd integer + fldz + fcomip %st(1),%st / compare 0 with %st(1) + jb .retnzero / 0 < y + / x = -0 & y < 0 (odd int) return -inf (z flag) + / x = -inf & y != 0 or NaN return -inf (NO z flag) + movb %dh,%cl + andb $0x45,%cl + cmpb $0x05,%cl / C3=0 C2=1 C1=? C0=1 when +-inf + je 2f + fdiv %st,%st(1) / y / x, x (raise z flag) +2: + fstp %st(0) / x + fstp %st(0) / stack empty + flds PIC_L(ninfinity) / -inf + ret + +1: / y is not an odd integer + fldz + fcomip %st(1),%st / compare 0 with %st(1) + jb .retpzero / 0 < y + / x = -0 & y < 0 (not odd int) return +inf (z flag) + / x = -inf & y not 0 or NaN return +inf (NO z flag) + movb %dh,%cl + andb $0x45,%cl + cmpb $0x05,%cl / C3=0 C2=1 C1=? C0=1 when +-inf + jne .retpinfzflag / ret +inf & divide-by-0 flag + jmp .retpinf / return +inf (NO z flag) + +.retpzero: + fstp %st(0) / x + fstp %st(0) / stack empty + fldz / +0 + ret + +.retnzero: + fstp %st(0) / x + fstp %st(0) / stack empty + flds PIC_L(negzero) / -0 + ret + +.retponeorinvalid: + PIC_G_LOAD(movzwq,__xpg6,rax) + andl $_C99SUSv3_pow_treats_Inf_as_an_even_int,%eax + cmpl $0,%eax + je 1f + fstp %st(0) / x + fstp %st(0) / stack empty + fld1 / 1 + ret + +1: + fstp %st(0) / x + fstp %st(0) / stack empty + flds PIC_L(Snan) / Q NaN (i flag) + fwait + ret + +.retpinf: + fstp %st(0) / x + fstp %st(0) / stack empty + flds PIC_L(pinfinity) / +inf + ret + +.retpinfzflag: + fstp %st(0) / x + fstp %st(0) / stack empty + fldz + fdivrs PIC_L(one) / 1/0 + ret + +/ Set %ecx to 2 if y is an even integer, 1 if y is an odd integer, +/ 0 otherwise. Assume y is not zero. Do not raise inexact or modify +/ %edx. +.y_is_int: + movl 40(%rbp),%eax + andl $0x7fff,%eax / exponent of y + cmpl $0x403f,%eax + jae 1f / |y| >= 2^64, an even int + cmpl $0x3fff,%eax + jb 2f / |y| < 1, can't be an int + movl %eax,%ecx + subl $0x403e,%ecx + negl %ecx / 63 - unbiased exponent of y + movq 32(%rbp),%rax + bsfq %rax,%rax / index of least sig. 1 bit + cmpl %ecx,%eax + jb 2f + ja 1f + movl $1,%ecx + ret +1: + movl $2,%ecx + ret +2: + xorl %ecx,%ecx + ret + .align 16 + SET_SIZE(powl) diff --git a/usr/src/lib/libm/amd64/src/remainder.s b/usr/src/lib/libm/amd64/src/remainder.s new file mode 100644 index 0000000000..746590cfcc --- /dev/null +++ b/usr/src/lib/libm/amd64/src/remainder.s @@ -0,0 +1,80 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "remainder.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(remainder,function) +#include "libm_synonyms.h" +#include "libm_protos.h" + + ENTRY(remainder) + push %rbp + movq %rsp,%rbp + subq $16,%rsp + movlpd %xmm1,-16(%rbp) + movlpd %xmm0,-8(%rbp) + + ucomisd %xmm0,%xmm1 / if x or y is NaN, use fprem1 + jp 1f + + movl -12(%rbp),%eax / eax <-- hi_32(y) + andl $0x7fffffff,%eax / eax <-- hi_32(|y|) + orl -16(%rbp),%eax / eax <-- lo_32(y)|hi_32(|y|) + je .yzero_or_xinf + + movl -4(%rbp),%eax / eax <-- hi_32(x) + andl $0x7fffffff,%eax / eax <-- hi_32(|x|) + cmpl $0x7ff00000,%eax + jne 1f + cmpl $0,-8(%rbp) + je .yzero_or_xinf +1: + fldl -16(%rbp) / y + fldl -8(%rbp) / x +.rem_loop: + fprem1 / partial remainder + fstsw %ax / store status word + andw $0x400,%ax / check for incomplete reduction + jne .rem_loop / while incomplete, do fprem1 again + fstpl -8(%rbp) + movsd -8(%rbp),%xmm0 + fstp %st(0) + leave + ret + +.yzero_or_xinf: + PIC_SETUP(1) + movl $28,%edi + movl $2,%eax + call PIC_F(_SVID_libm_err) + PIC_WRAPUP + leave + ret + .align 4 + SET_SIZE(remainder) diff --git a/usr/src/lib/libm/amd64/src/remainderf.s b/usr/src/lib/libm/amd64/src/remainderf.s new file mode 100644 index 0000000000..0c96de8539 --- /dev/null +++ b/usr/src/lib/libm/amd64/src/remainderf.s @@ -0,0 +1,54 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "remainderf.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(remainderf,function) +#include "libm_synonyms.h" + + ENTRY(remainderf) + push %rbp + movq %rsp,%rbp + subq $16,%rsp + movss %xmm1,-8(%rbp) + movss %xmm0,-4(%rbp) + flds -8(%rbp) / load arg y + flds -4(%rbp) / load arg x +.rem_loop: + fprem1 / partial remainder + fstsw %ax / store status word + andw $0x400,%ax / check whether reduction complete + jne .rem_loop / while reduction incomplete, do fprem1 + fstps -4(%rbp) + movss -4(%rbp),%xmm0 + fstp %st(0) + leave + ret + .align 4 + SET_SIZE(remainderf) diff --git a/usr/src/lib/libm/amd64/src/remainderl.s b/usr/src/lib/libm/amd64/src/remainderl.s new file mode 100644 index 0000000000..22cbe76e13 --- /dev/null +++ b/usr/src/lib/libm/amd64/src/remainderl.s @@ -0,0 +1,46 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "remainderl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(remainderl,function) +#include "libm_synonyms.h" + + ENTRY(remainderl) + fldt 24(%rsp) / load arg y + fldt 8(%rsp) / load arg x +.rem_loop: + fprem1 / partial remainder + fstsw %ax / store status word + andw $0x400,%ax / check whether reduction complete + jne .rem_loop / while reduction incomplete, do fprem1 + fstp %st(1) + ret + .align 16 + SET_SIZE(remainderl) diff --git a/usr/src/lib/libm/amd64/src/remquol.s b/usr/src/lib/libm/amd64/src/remquol.s new file mode 100644 index 0000000000..999864fe9f --- /dev/null +++ b/usr/src/lib/libm/amd64/src/remquol.s @@ -0,0 +1,68 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "remquol.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(remquol,function) +#include "libm_synonyms.h" +#include "libm_protos.h" + ENTRY(remquol) + fldt 24(%rsp) / load arg y + fldt 8(%rsp) / load arg x +.Lreml_loop: + fprem1 / partial remainder + fstsw %ax / store status word + andw $0x400,%ax / check whether reduction complete + jne .Lreml_loop / while reduction incomplete, do fprem1 + fstsw %ax + fwait + fstp %st(1) + movw %ax,%dx + andw $0x4000,%dx / get C3 + sarw $13,%dx + movw %ax,%cx + andw $0x100,%cx / get C0 + sarw $6,%cx + addw %cx,%dx + andw $0x200,%ax / get C1 + sarw $9,%ax + addw %dx,%ax + cwtl + movl 16(%rsp),%edx / sign and bexp of x + movl 32(%rsp),%ecx / sign and bexp of y + andl $0x8000,%edx / edx <- sign(x) + andl $0x8000,%ecx / ecx <- sign(y) + cmpl %edx,%ecx + je 1f + negl %eax / negative n +1: + movl %eax,(%rdi) / last 3 significant bits of quotient + ret + .align 16 + SET_SIZE(remquol) diff --git a/usr/src/lib/libm/amd64/src/rintl.s b/usr/src/lib/libm/amd64/src/rintl.s new file mode 100644 index 0000000000..dd8bc3ecb3 --- /dev/null +++ b/usr/src/lib/libm/amd64/src/rintl.s @@ -0,0 +1,41 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "rintl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(rintl,function) +#include "libm_synonyms.h" + + ENTRY(rintl) + fldt 8(%rsp) / load x + frndint / [x], per rounding mode + fwait + ret + .align 16 + SET_SIZE(rintl) diff --git a/usr/src/lib/libm/amd64/src/rndintl.s b/usr/src/lib/libm/amd64/src/rndintl.s new file mode 100644 index 0000000000..2aafbde131 --- /dev/null +++ b/usr/src/lib/libm/amd64/src/rndintl.s @@ -0,0 +1,147 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "rndintl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(aintl,function) +LIBM_ANSI_PRAGMA_WEAK(irintl,function) +LIBM_ANSI_PRAGMA_WEAK(anintl,function) +LIBM_ANSI_PRAGMA_WEAK(nintl,function) +#include "libm_synonyms.h" + +#undef fabs + + ENTRY(aintl) + movq %rsp,%rax + subq $16,%rsp + fstcw -8(%rax) + fldt 8(%rax) + movw -8(%rax),%cx + orw $0x0c00,%cx + movw %cx,-4(%rax) + fldcw -4(%rax) / set RD = to_zero + frndint + fstcw -4(%rax) + movw -4(%rax),%dx + andw $0xf3ff,%dx + movw -8(%rax),%cx + andw $0x0c00,%cx + orw %dx,%cx + movw %cx,-8(%rax) + fldcw -8(%rax) / restore RD + addq $16,%rsp + ret + .align 16 + SET_SIZE(aintl) + + ENTRY(irintl) + movq %rsp,%rcx + subq $16,%rsp + fldt 8(%rcx) / load x + fistpl -8(%rcx) / [x] + fwait + movslq -8(%rcx),%rax + addq $16,%rsp + ret + .align 16 + SET_SIZE(irintl) + + .data + .align 16 +half: .float 0.5 + + ENTRY(anintl) +.Lanintl: + movq %rsp,%rcx + subq $16,%rsp + fstcw -8(%rcx) + fldt 8(%rcx) + movw -8(%rcx),%dx + andw $0xf3ff,%dx + movw %dx,-4(%rcx) + fldcw -4(%rcx) / set RD = to_nearest + fld %st(0) + frndint / [x],x + fstcw -4(%rcx) + movw -4(%rcx),%dx + andw $0xf3ff,%dx + movw -8(%rcx),%ax + andw $0x0c00,%ax + orw %dx,%ax + movw %ax,-8(%rcx) + fldcw -8(%rcx) / restore RD + fucomi %st(1),%st / check if x is already an integer + jp .L0 + je .L0 + fxch / x,[x] + fsub %st(1),%st / x-[x],[x] + fabs / |x-[x]|,[x] + PIC_SETUP(1) + flds PIC_L(half) + fcomip %st(1),%st / compare 0.5 with |x-[x]| + PIC_WRAPUP + je .halfway / if 0.5 = |x-[x]| goto halfway, + / most cases will not take branch. +.L0: + addq $16,%rsp + fstp %st(0) + ret +.halfway: + / x = n+0.5, recompute anint(x) as x+sign(x)*0.5 + fldt 8(%rcx) / x, 0.5, [x] + movw 16(%rcx),%ax / sign+exp part of x + andw $0x8000,%ax / look at sign bit + jnz .x_neg + faddp + addq $16,%rsp + fstp %st(1) + ret +.x_neg: + / here, x is negative, so return x-0.5 + fsubp %st,%st(1) / x-0.5,[x] + addq $16,%rsp + fstp %st(1) + ret + .align 16 + SET_SIZE(anintl) + + ENTRY(nintl) + pushq %rbp + movq %rsp,%rbp + subq $16,%rsp + pushq 24(%rbp) + pushq 16(%rbp) + call .Lanintl /// LOCAL + fistpl -8(%rbp) + fwait + movslq -8(%rbp),%rax + leave + ret + .align 16 + SET_SIZE(nintl) diff --git a/usr/src/lib/libm/amd64/src/scalbnl.s b/usr/src/lib/libm/amd64/src/scalbnl.s new file mode 100644 index 0000000000..cc7dfb3560 --- /dev/null +++ b/usr/src/lib/libm/amd64/src/scalbnl.s @@ -0,0 +1,45 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "scalbnl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(scalbnl,function) +#include "libm_synonyms.h" + + ENTRY(scalbnl) + subq $16,%rsp + movl %edi,(%rsp) + fildl (%rsp) + fldt 24(%rsp) + addq $16,%rsp + fscale + fstp %st(1) + ret + .align 16 + SET_SIZE(scalbnl) diff --git a/usr/src/lib/libm/amd64/src/sqrtl.s b/usr/src/lib/libm/amd64/src/sqrtl.s new file mode 100644 index 0000000000..478f814968 --- /dev/null +++ b/usr/src/lib/libm/amd64/src/sqrtl.s @@ -0,0 +1,40 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "sqrtl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(sqrtl,function) +#include "libm_synonyms.h" + + ENTRY(sqrtl) + fldt 8(%rsp) + fsqrt + ret + .align 16 + SET_SIZE(sqrtl) diff --git a/usr/src/lib/libm/common/C/_SVID_error.c b/usr/src/lib/libm/common/C/_SVID_error.c new file mode 100644 index 0000000000..68f55bb81c --- /dev/null +++ b/usr/src/lib/libm/common/C/_SVID_error.c @@ -0,0 +1,976 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "libm.h" +#include "xpg6.h" /* __xpg6 */ +#include <stdio.h> +#include <float.h> /* DBL_MAX, DBL_MIN */ +#include <unistd.h> /* write */ +#if defined(__x86) +#include <ieeefp.h> +#undef fp_class +#define fp_class fpclass +#define fp_quiet FP_QNAN +#endif +#include <errno.h> +#undef fflush +#include <sys/isa_defs.h> + +/* INDENT OFF */ +/* + * Report libm exception error according to System V Interface Definition + * (SVID). + * Error mapping: + * 1 -- acos(|x|>1) + * 2 -- asin(|x|>1) + * 3 -- atan2(+-0,+-0) + * 4 -- hypot overflow + * 5 -- cosh overflow + * 6 -- exp overflow + * 7 -- exp underflow + * 8 -- y0(0) + * 9 -- y0(-ve) + * 10-- y1(0) + * 11-- y1(-ve) + * 12-- yn(0) + * 13-- yn(-ve) + * 14-- lgamma(finite) overflow + * 15-- lgamma(-integer) + * 16-- log(0) + * 17-- log(x<0) + * 18-- log10(0) + * 19-- log10(x<0) + * 20-- pow(0.0,0.0) + * 21-- pow(x,y) overflow + * 22-- pow(x,y) underflow + * 23-- pow(0,negative) + * 24-- pow(neg,non-integral) + * 25-- sinh(finite) overflow + * 26-- sqrt(negative) + * 27-- fmod(x,0) + * 28-- remainder(x,0) + * 29-- acosh(x<1) + * 30-- atanh(|x|>1) + * 31-- atanh(|x|=1) + * 32-- scalb overflow + * 33-- scalb underflow + * 34-- j0(|x|>X_TLOSS) + * 35-- y0(x>X_TLOSS) + * 36-- j1(|x|>X_TLOSS) + * 37-- y1(x>X_TLOSS) + * 38-- jn(|x|>X_TLOSS, n) + * 39-- yn(x>X_TLOSS, n) + * 40-- gamma(finite) overflow + * 41-- gamma(-integer) + * 42-- pow(NaN,0.0) return NaN for SVID/XOPEN + * 43-- log1p(-1) + * 44-- log1p(x<-1) + * 45-- logb(0) + * 46-- nextafter overflow + * 47-- scalb(x,inf) + */ +/* INDENT ON */ + +static double setexception(int, double); + +static const union { + unsigned x[2]; + double d; +} C[] = { +#ifdef _LITTLE_ENDIAN + { 0xffffffff, 0x7fffffff }, + { 0x54442d18, 0x400921fb }, +#else + { 0x7fffffff, 0xffffffff }, + { 0x400921fb, 0x54442d18 }, +#endif +}; + +#define NaN C[0].d +#define PI_RZ C[1].d + +#define __HI(x) ((unsigned *)&x)[HIWORD] +#define __LO(x) ((unsigned *)&x)[LOWORD] +#undef Inf +#define Inf HUGE_VAL + +double +_SVID_libm_err(double x, double y, int type) { + struct exception exc; + double t, w, ieee_retval = 0; + enum version lib_version = _lib_version; + int iy; + + /* force libm_ieee behavior in SUSv3 mode */ + if ((__xpg6 & _C99SUSv3_math_errexcept) != 0) + lib_version = libm_ieee; + if (lib_version == c_issue_4) { + (void) fflush(stdout); + } + exc.arg1 = x; + exc.arg2 = y; + switch (type) { + case 1: + /* acos(|x|>1) */ + exc.type = DOMAIN; + exc.name = "acos"; + ieee_retval = setexception(3, 1.0); + exc.retval = 0.0; + if (lib_version == strict_ansi) { + errno = EDOM; + } else if (!matherr(&exc)) { + if (lib_version == c_issue_4) { + (void) write(2, "acos: DOMAIN error\n", 19); + } + errno = EDOM; + } + break; + case 2: + /* asin(|x|>1) */ + exc.type = DOMAIN; + exc.name = "asin"; + exc.retval = 0.0; + ieee_retval = setexception(3, 1.0); + if (lib_version == strict_ansi) { + errno = EDOM; + } else if (!matherr(&exc)) { + if (lib_version == c_issue_4) { + (void) write(2, "asin: DOMAIN error\n", 19); + } + errno = EDOM; + } + break; + case 3: + /* atan2(+-0,+-0) */ + exc.arg1 = y; + exc.arg2 = x; + exc.type = DOMAIN; + exc.name = "atan2"; + ieee_retval = copysign(1.0, x) == 1.0 ? y : + copysign(PI_RZ + DBL_MIN, y); + exc.retval = 0.0; + if (lib_version == strict_ansi) { + errno = EDOM; + } else if (!matherr(&exc)) { + if (lib_version == c_issue_4) { + (void) write(2, "atan2: DOMAIN error\n", 20); + } + errno = EDOM; + } + break; + case 4: + /* hypot(finite,finite) overflow */ + exc.type = OVERFLOW; + exc.name = "hypot"; + ieee_retval = Inf; + if (lib_version == c_issue_4) + exc.retval = HUGE; + else + exc.retval = HUGE_VAL; + if (lib_version == strict_ansi) + errno = ERANGE; + else if (!matherr(&exc)) + errno = ERANGE; + break; + case 5: + /* cosh(finite) overflow */ + exc.type = OVERFLOW; + exc.name = "cosh"; + ieee_retval = setexception(2, 1.0); + if (lib_version == c_issue_4) + exc.retval = HUGE; + else + exc.retval = HUGE_VAL; + if (lib_version == strict_ansi) + errno = ERANGE; + else if (!matherr(&exc)) + errno = ERANGE; + break; + case 6: + /* exp(finite) overflow */ + exc.type = OVERFLOW; + exc.name = "exp"; + ieee_retval = setexception(2, 1.0); + if (lib_version == c_issue_4) + exc.retval = HUGE; + else + exc.retval = HUGE_VAL; + if (lib_version == strict_ansi) + errno = ERANGE; + else if (!matherr(&exc)) + errno = ERANGE; + break; + case 7: + /* exp(finite) underflow */ + exc.type = UNDERFLOW; + exc.name = "exp"; + ieee_retval = setexception(1, 1.0); + exc.retval = 0.0; + if (lib_version == strict_ansi) + errno = ERANGE; + else if (!matherr(&exc)) + errno = ERANGE; + break; + case 8: + /* y0(0) = -inf */ + exc.type = DOMAIN; /* should be SING for IEEE */ + exc.name = "y0"; + ieee_retval = setexception(0, -1.0); + if (lib_version == c_issue_4) + exc.retval = -HUGE; + else + exc.retval = -HUGE_VAL; + if (lib_version == strict_ansi) { + errno = EDOM; + } else if (!matherr(&exc)) { + if (lib_version == c_issue_4) { + (void) write(2, "y0: DOMAIN error\n", 17); + } + errno = EDOM; + } + break; + case 9: + /* y0(x<0) = NaN */ + exc.type = DOMAIN; + exc.name = "y0"; + ieee_retval = setexception(3, 1.0); + if (lib_version == c_issue_4) + exc.retval = -HUGE; + else + exc.retval = -HUGE_VAL; + if (lib_version == strict_ansi) { + errno = EDOM; + } else if (!matherr(&exc)) { + if (lib_version == c_issue_4) { + (void) write(2, "y0: DOMAIN error\n", 17); + } + errno = EDOM; + } + break; + case 10: + /* y1(0) = -inf */ + exc.type = DOMAIN; /* should be SING for IEEE */ + exc.name = "y1"; + ieee_retval = setexception(0, -1.0); + if (lib_version == c_issue_4) + exc.retval = -HUGE; + else + exc.retval = -HUGE_VAL; + if (lib_version == strict_ansi) { + errno = EDOM; + } else if (!matherr(&exc)) { + if (lib_version == c_issue_4) { + (void) write(2, "y1: DOMAIN error\n", 17); + } + errno = EDOM; + } + break; + case 11: + /* y1(x<0) = NaN */ + exc.type = DOMAIN; + exc.name = "y1"; + ieee_retval = setexception(3, 1.0); + if (lib_version == c_issue_4) + exc.retval = -HUGE; + else + exc.retval = -HUGE_VAL; + if (lib_version == strict_ansi) { + errno = EDOM; + } else if (!matherr(&exc)) { + if (lib_version == c_issue_4) { + (void) write(2, "y1: DOMAIN error\n", 17); + } + errno = EDOM; + } + break; + case 12: + /* yn(n,0) = -inf */ + exc.type = DOMAIN; /* should be SING for IEEE */ + exc.name = "yn"; + ieee_retval = setexception(0, -1.0); + if (lib_version == c_issue_4) + exc.retval = -HUGE; + else + exc.retval = -HUGE_VAL; + if (lib_version == strict_ansi) { + errno = EDOM; + } else if (!matherr(&exc)) { + if (lib_version == c_issue_4) { + (void) write(2, "yn: DOMAIN error\n", 17); + } + errno = EDOM; + } + break; + case 13: + /* yn(x<0) = NaN */ + exc.type = DOMAIN; + exc.name = "yn"; + ieee_retval = setexception(3, 1.0); + if (lib_version == c_issue_4) + exc.retval = -HUGE; + else + exc.retval = -HUGE_VAL; + if (lib_version == strict_ansi) { + errno = EDOM; + } else if (!matherr(&exc)) { + if (lib_version == c_issue_4) { + (void) write(2, "yn: DOMAIN error\n", 17); + } + errno = EDOM; + } + break; + case 14: + /* lgamma(finite) overflow */ + exc.type = OVERFLOW; + exc.name = "lgamma"; + ieee_retval = setexception(2, 1.0); + if (lib_version == c_issue_4) + exc.retval = HUGE; + else + exc.retval = HUGE_VAL; + if (lib_version == strict_ansi) + errno = ERANGE; + else if (!matherr(&exc)) + errno = ERANGE; + break; + case 15: + /* lgamma(-integer) or lgamma(0) */ + exc.type = SING; + exc.name = "lgamma"; + ieee_retval = setexception(0, 1.0); + if (lib_version == c_issue_4) + exc.retval = HUGE; + else + exc.retval = HUGE_VAL; + if (lib_version == strict_ansi) { + errno = EDOM; + } else if (!matherr(&exc)) { + if (lib_version == c_issue_4) { + (void) write(2, "lgamma: SING error\n", 19); + } + errno = EDOM; + } + break; + case 16: + /* log(0) */ + exc.type = SING; + exc.name = "log"; + ieee_retval = setexception(0, -1.0); + if (lib_version == c_issue_4) + exc.retval = -HUGE; + else + exc.retval = -HUGE_VAL; + if (lib_version == strict_ansi) { + errno = ERANGE; + } else if (!matherr(&exc)) { + if (lib_version == c_issue_4) { + (void) write(2, "log: SING error\n", 16); + errno = EDOM; + } else { + errno = ERANGE; + } + } + break; + case 17: + /* log(x<0) */ + exc.type = DOMAIN; + exc.name = "log"; + ieee_retval = setexception(3, 1.0); + if (lib_version == c_issue_4) + exc.retval = -HUGE; + else + exc.retval = -HUGE_VAL; + if (lib_version == strict_ansi) { + errno = EDOM; + } else if (!matherr(&exc)) { + if (lib_version == c_issue_4) { + (void) write(2, "log: DOMAIN error\n", 18); + } + errno = EDOM; + } + break; + case 18: + /* log10(0) */ + exc.type = SING; + exc.name = "log10"; + ieee_retval = setexception(0, -1.0); + if (lib_version == c_issue_4) + exc.retval = -HUGE; + else + exc.retval = -HUGE_VAL; + if (lib_version == strict_ansi) { + errno = ERANGE; + } else if (!matherr(&exc)) { + if (lib_version == c_issue_4) { + (void) write(2, "log10: SING error\n", 18); + errno = EDOM; + } else { + errno = ERANGE; + } + } + break; + case 19: + /* log10(x<0) */ + exc.type = DOMAIN; + exc.name = "log10"; + ieee_retval = setexception(3, 1.0); + if (lib_version == c_issue_4) + exc.retval = -HUGE; + else + exc.retval = -HUGE_VAL; + if (lib_version == strict_ansi) { + errno = EDOM; + } else if (!matherr(&exc)) { + if (lib_version == c_issue_4) { + (void) write(2, "log10: DOMAIN error\n", 20); + } + errno = EDOM; + } + break; + case 20: + /* pow(0.0,0.0) */ + /* error only if lib_version == c_issue_4 */ + exc.type = DOMAIN; + exc.name = "pow"; + exc.retval = 0.0; + ieee_retval = 1.0; + if (lib_version != c_issue_4) { + exc.retval = 1.0; + } else if (!matherr(&exc)) { + (void) write(2, "pow(0,0): DOMAIN error\n", 23); + errno = EDOM; + } + break; + case 21: + /* pow(x,y) overflow */ + exc.type = OVERFLOW; + exc.name = "pow"; + exc.retval = (lib_version == c_issue_4)? HUGE : HUGE_VAL; + if (signbit(x)) { + t = rint(y); + if (t == y) { + w = rint(0.5 * y); + if (t != w + w) { /* y is odd */ + exc.retval = -exc.retval; + } + } + } + ieee_retval = setexception(2, exc.retval); + if (lib_version == strict_ansi) + errno = ERANGE; + else if (!matherr(&exc)) + errno = ERANGE; + break; + case 22: + /* pow(x,y) underflow */ + exc.type = UNDERFLOW; + exc.name = "pow"; + exc.retval = 0.0; + if (signbit(x)) { + t = rint(y); + if (t == y) { + w = rint(0.5 * y); + if (t != w + w) /* y is odd */ + exc.retval = -exc.retval; + } + } + ieee_retval = setexception(1, exc.retval); + if (lib_version == strict_ansi) + errno = ERANGE; + else if (!matherr(&exc)) + errno = ERANGE; + break; + case 23: + /* (+-0)**neg */ + exc.type = DOMAIN; + exc.name = "pow"; + ieee_retval = setexception(0, 1.0); + { + int ahy, k, j, yisint, ly, hx; + /* INDENT OFF */ + /* + * determine if y is an odd int when x = -0 + * yisint = 0 ... y is not an integer + * yisint = 1 ... y is an odd int + * yisint = 2 ... y is an even int + */ + /* INDENT ON */ + hx = __HI(x); + ahy = __HI(y)&0x7fffffff; + ly = __LO(y); + + yisint = 0; + if (ahy >= 0x43400000) { + yisint = 2; /* even integer y */ + } else if (ahy >= 0x3ff00000) { + k = (ahy >> 20) - 0x3ff; /* exponent */ + if (k > 20) { + j = ly >> (52 - k); + if ((j << (52 - k)) == ly) + yisint = 2 - (j & 1); + } else if (ly == 0) { + j = ahy >> (20 - k); + if ((j << (20 - k)) == ahy) + yisint = 2 - (j & 1); + } + } + if (hx < 0 && yisint == 1) + ieee_retval = -ieee_retval; + } + if (lib_version == c_issue_4) + exc.retval = 0.0; + else + exc.retval = -HUGE_VAL; + if (lib_version == strict_ansi) { + errno = EDOM; + } else if (!matherr(&exc)) { + if (lib_version == c_issue_4) { + (void) write(2, "pow(0,neg): DOMAIN error\n", + 25); + } + errno = EDOM; + } + break; + case 24: + /* neg**non-integral */ + exc.type = DOMAIN; + exc.name = "pow"; + ieee_retval = setexception(3, 1.0); + if (lib_version == c_issue_4) + exc.retval = 0.0; + else + exc.retval = ieee_retval; /* X/Open allow NaN */ + if (lib_version == strict_ansi) { + errno = EDOM; + } else if (!matherr(&exc)) { + if (lib_version == c_issue_4) { + (void) write(2, + "neg**non-integral: DOMAIN error\n", 32); + } + errno = EDOM; + } + break; + case 25: + /* sinh(finite) overflow */ + exc.type = OVERFLOW; + exc.name = "sinh"; + ieee_retval = copysign(Inf, x); + if (lib_version == c_issue_4) + exc.retval = x > 0.0 ? HUGE : -HUGE; + else + exc.retval = x > 0.0 ? HUGE_VAL : -HUGE_VAL; + if (lib_version == strict_ansi) + errno = ERANGE; + else if (!matherr(&exc)) + errno = ERANGE; + break; + case 26: + /* sqrt(x<0) */ + exc.type = DOMAIN; + exc.name = "sqrt"; + ieee_retval = setexception(3, 1.0); + if (lib_version == c_issue_4) + exc.retval = 0.0; + else + exc.retval = ieee_retval; /* quiet NaN */ + if (lib_version == strict_ansi) { + errno = EDOM; + } else if (!matherr(&exc)) { + if (lib_version == c_issue_4) { + (void) write(2, "sqrt: DOMAIN error\n", 19); + } + errno = EDOM; + } + break; + case 27: + /* fmod(x,0) */ + exc.type = DOMAIN; + exc.name = "fmod"; + if (fp_class(x) == fp_quiet) + ieee_retval = NaN; + else + ieee_retval = setexception(3, 1.0); + if (lib_version == c_issue_4) + exc.retval = x; + else + exc.retval = ieee_retval; + if (lib_version == strict_ansi) { + errno = EDOM; + } else if (!matherr(&exc)) { + if (lib_version == c_issue_4) { + (void) write(2, "fmod: DOMAIN error\n", 20); + } + errno = EDOM; + } + break; + case 28: + /* remainder(x,0) */ + exc.type = DOMAIN; + exc.name = "remainder"; + if (fp_class(x) == fp_quiet) + ieee_retval = NaN; + else + ieee_retval = setexception(3, 1.0); + exc.retval = NaN; + if (lib_version == strict_ansi) { + errno = EDOM; + } else if (!matherr(&exc)) { + if (lib_version == c_issue_4) { + (void) write(2, "remainder: DOMAIN error\n", + 24); + } + errno = EDOM; + } + break; + case 29: + /* acosh(x<1) */ + exc.type = DOMAIN; + exc.name = "acosh"; + ieee_retval = setexception(3, 1.0); + exc.retval = NaN; + if (lib_version == strict_ansi) { + errno = EDOM; + } else if (!matherr(&exc)) { + if (lib_version == c_issue_4) { + (void) write(2, "acosh: DOMAIN error\n", 20); + } + errno = EDOM; + } + break; + case 30: + /* atanh(|x|>1) */ + exc.type = DOMAIN; + exc.name = "atanh"; + ieee_retval = setexception(3, 1.0); + exc.retval = NaN; + if (lib_version == strict_ansi) { + errno = EDOM; + } else if (!matherr(&exc)) { + if (lib_version == c_issue_4) { + (void) write(2, "atanh: DOMAIN error\n", 20); + } + errno = EDOM; + } + break; + case 31: + /* atanh(|x|=1) */ + exc.type = SING; + exc.name = "atanh"; + ieee_retval = setexception(0, x); + exc.retval = ieee_retval; + if (lib_version == strict_ansi) { + errno = ERANGE; + } else if (!matherr(&exc)) { + if (lib_version == c_issue_4) { + (void) write(2, "atanh: SING error\n", 18); + errno = EDOM; + } else { + errno = ERANGE; + } + } + break; + case 32: + /* scalb overflow; SVID also returns +-HUGE_VAL */ + exc.type = OVERFLOW; + exc.name = "scalb"; + ieee_retval = setexception(2, x); + exc.retval = x > 0.0 ? HUGE_VAL : -HUGE_VAL; + if (lib_version == strict_ansi) + errno = ERANGE; + else if (!matherr(&exc)) + errno = ERANGE; + break; + case 33: + /* scalb underflow */ + exc.type = UNDERFLOW; + exc.name = "scalb"; + ieee_retval = setexception(1, x); + exc.retval = ieee_retval; /* +-0.0 */ + if (lib_version == strict_ansi) + errno = ERANGE; + else if (!matherr(&exc)) + errno = ERANGE; + break; + case 34: + /* j0(|x|>X_TLOSS) */ + exc.type = TLOSS; + exc.name = "j0"; + exc.retval = 0.0; + ieee_retval = y; + if (lib_version == strict_ansi) { + errno = ERANGE; + } else if (!matherr(&exc)) { + if (lib_version == c_issue_4) { + (void) write(2, exc.name, 2); + (void) write(2, ": TLOSS error\n", 14); + } + errno = ERANGE; + } + break; + case 35: + /* y0(x>X_TLOSS) */ + exc.type = TLOSS; + exc.name = "y0"; + exc.retval = 0.0; + ieee_retval = y; + if (lib_version == strict_ansi) { + errno = ERANGE; + } else if (!matherr(&exc)) { + if (lib_version == c_issue_4) { + (void) write(2, exc.name, 2); + (void) write(2, ": TLOSS error\n", 14); + } + errno = ERANGE; + } + break; + case 36: + /* j1(|x|>X_TLOSS) */ + exc.type = TLOSS; + exc.name = "j1"; + exc.retval = 0.0; + ieee_retval = y; + if (lib_version == strict_ansi) { + errno = ERANGE; + } else if (!matherr(&exc)) { + if (lib_version == c_issue_4) { + (void) write(2, exc.name, 2); + (void) write(2, ": TLOSS error\n", 14); + } + errno = ERANGE; + } + break; + case 37: + /* y1(x>X_TLOSS) */ + exc.type = TLOSS; + exc.name = "y1"; + exc.retval = 0.0; + ieee_retval = y; + if (lib_version == strict_ansi) { + errno = ERANGE; + } else if (!matherr(&exc)) { + if (lib_version == c_issue_4) { + (void) write(2, exc.name, 2); + (void) write(2, ": TLOSS error\n", 14); + } + errno = ERANGE; + } + break; + case 38: + /* jn(|x|>X_TLOSS) */ + /* incorrect ieee value: ieee should never be here */ + exc.type = TLOSS; + exc.name = "jn"; + exc.retval = 0.0; + ieee_retval = 0.0; /* shall not be used */ + if (lib_version == strict_ansi) { + errno = ERANGE; + } else if (!matherr(&exc)) { + if (lib_version == c_issue_4) { + (void) write(2, exc.name, 2); + (void) write(2, ": TLOSS error\n", 14); + } + errno = ERANGE; + } + break; + case 39: + /* yn(x>X_TLOSS) */ + /* incorrect ieee value: ieee should never be here */ + exc.type = TLOSS; + exc.name = "yn"; + exc.retval = 0.0; + ieee_retval = 0.0; /* shall not be used */ + if (lib_version == strict_ansi) { + errno = ERANGE; + } else if (!matherr(&exc)) { + if (lib_version == c_issue_4) { + (void) write(2, exc.name, 2); + (void) write(2, ": TLOSS error\n", 14); + } + errno = ERANGE; + } + break; + case 40: + /* gamma(finite) overflow */ + exc.type = OVERFLOW; + exc.name = "gamma"; + ieee_retval = setexception(2, 1.0); + if (lib_version == c_issue_4) + exc.retval = HUGE; + else + exc.retval = HUGE_VAL; + if (lib_version == strict_ansi) + errno = ERANGE; + else if (!matherr(&exc)) + errno = ERANGE; + break; + case 41: + /* gamma(-integer) or gamma(0) */ + exc.type = SING; + exc.name = "gamma"; + ieee_retval = setexception(0, 1.0); + if (lib_version == c_issue_4) + exc.retval = HUGE; + else + exc.retval = HUGE_VAL; + if (lib_version == strict_ansi) { + errno = EDOM; + } else if (!matherr(&exc)) { + if (lib_version == c_issue_4) { + (void) write(2, "gamma: SING error\n", 18); + } + errno = EDOM; + } + break; + case 42: + /* pow(NaN,0.0) */ + /* error if lib_version == c_issue_4 or ansi_1 */ + exc.type = DOMAIN; + exc.name = "pow"; + exc.retval = x; + ieee_retval = 1.0; + if (lib_version == strict_ansi) { + exc.retval = 1.0; + } else if (!matherr(&exc)) { + if ((lib_version == c_issue_4) || (lib_version == ansi_1)) + errno = EDOM; + } + break; + case 43: + /* log1p(-1) */ + exc.type = SING; + exc.name = "log1p"; + ieee_retval = setexception(0, -1.0); + if (lib_version == c_issue_4) + exc.retval = -HUGE; + else + exc.retval = -HUGE_VAL; + if (lib_version == strict_ansi) { + errno = ERANGE; + } else if (!matherr(&exc)) { + if (lib_version == c_issue_4) { + (void) write(2, "log1p: SING error\n", 18); + errno = EDOM; + } else { + errno = ERANGE; + } + } + break; + case 44: + /* log1p(x<-1) */ + exc.type = DOMAIN; + exc.name = "log1p"; + ieee_retval = setexception(3, 1.0); + exc.retval = ieee_retval; + if (lib_version == strict_ansi) { + errno = EDOM; + } else if (!matherr(&exc)) { + if (lib_version == c_issue_4) { + (void) write(2, "log1p: DOMAIN error\n", 20); + } + errno = EDOM; + } + break; + case 45: + /* logb(0) */ + exc.type = DOMAIN; + exc.name = "logb"; + ieee_retval = setexception(0, -1.0); + exc.retval = -HUGE_VAL; + if (lib_version == strict_ansi) + errno = EDOM; + else if (!matherr(&exc)) + errno = EDOM; + break; + case 46: + /* nextafter overflow */ + exc.type = OVERFLOW; + exc.name = "nextafter"; + /* + * The value as returned by setexception is +/-DBL_MAX in + * round-to-{zero,-/+Inf} mode respectively, which is not + * usable. + */ + (void) setexception(2, x); + ieee_retval = x > 0 ? Inf : -Inf; + exc.retval = x > 0 ? HUGE_VAL : -HUGE_VAL; + if (lib_version == strict_ansi) + errno = ERANGE; + else if (!matherr(&exc)) + errno = ERANGE; + break; + case 47: + /* scalb(x,inf) */ + iy = ((int *)&y)[HIWORD]; + if (lib_version == c_issue_4) + /* SVID3: ERANGE in all cases */ + errno = ERANGE; + else if ((x == 0.0 && iy > 0) || (!finite(x) && iy < 0)) + /* EDOM for scalb(0,+inf) or scalb(inf,-inf) */ + errno = EDOM; + exc.retval = ieee_retval = ((iy < 0)? x / -y : x * y); + break; + } + switch (lib_version) { + case c_issue_4: + case ansi_1: + case strict_ansi: + return (exc.retval); + /* NOTREACHED */ + default: + return (ieee_retval); + } + /* NOTREACHED */ +} + +static double +setexception(int n, double x) { + /* + * n = + * 0 division by zero + * 1 underflow + * 2 overflow + * 3 invalid + */ + volatile double one = 1.0, zero = 0.0, retv; + + switch (n) { + case 0: /* division by zero */ + retv = copysign(one / zero, x); + break; + case 1: /* underflow */ + retv = DBL_MIN * copysign(DBL_MIN, x); + break; + case 2: /* overflow */ + retv = DBL_MAX * copysign(DBL_MAX, x); + break; + case 3: /* invalid */ + retv = zero * Inf; /* for Cheetah */ + break; + } + return (retv); +} diff --git a/usr/src/lib/libm/common/C/_TBL_atan.c b/usr/src/lib/libm/common/C/_TBL_atan.c new file mode 100644 index 0000000000..a7b0f6e3ff --- /dev/null +++ b/usr/src/lib/libm/common/C/_TBL_atan.c @@ -0,0 +1,138 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "libm_protos.h" + +/* + * Let y[j] = _TBL_atan[2j], atan_y[j] = _TBL_atan[2j+1], j = 0, 1, ..., 95. + * {y[j], 0 <= j < 96} is a set of break points in (-1/8, 8) chosen so that + * the high part of y[j] is very close to 0x3fc08000 + (j << 16), + * and atan_y[j] = atan(y[j]) rounded has relative error bounded by 2^-60. + * + * -- K.C. Ng, 10/17/2004 + */ + +const double _TBL_atan[] = { +1.28906287871928065814e-01, 1.28199318484201185697e-01, +1.36718905591866640714e-01, 1.35876480966603985223e-01, +1.44531257606217988787e-01, 1.43537301152401930437e-01, +1.52343679482641575218e-01, 1.51181262880709432750e-01, +1.60156177403962790562e-01, 1.58807537535115006477e-01, +1.67968772982362929413e-01, 1.66415323534856884891e-01, +1.75781211596017922227e-01, 1.74003563682464612583e-01, +1.83593807762862160082e-01, 1.81571767039387044207e-01, +1.91406205589629646591e-01, 1.89118806085245338977e-01, +1.99218440148815872925e-01, 1.96643947167121080355e-01, +2.07031180070658488157e-01, 2.04147078126891479144e-01, +2.14843557086546094181e-01, 2.11626624363759674452e-01, +2.22656308649619494311e-01, 2.19082566659412503185e-01, +2.30468759807905931858e-01, 2.26513550670145669130e-01, +2.38281413377399470255e-01, 2.33919360814280885563e-01, +2.46093763828156536499e-01, 2.41298839969374956382e-01, +2.57812599322508773092e-01, 2.52318074018685223336e-01, +2.73437443946477509726e-01, 2.66912935433335718471e-01, +2.89062532292519769328e-01, 2.81392462451501401688e-01, +3.04687577351389293767e-01, 2.95751756530947318424e-01, +3.20312405527377053183e-01, 3.09986305565206343715e-01, +3.35937715576634265968e-01, 3.24092664204967739749e-01, +3.51562621385942464247e-01, 3.38066230870244233131e-01, +3.67187719833070636000e-01, 3.51904019130060419229e-01, +3.82812538440931826589e-01, 3.65602365234580339859e-01, +3.98437724467857745658e-01, 3.79158862748537828224e-01, +4.14062683287296784407e-01, 3.92570291474021892952e-01, +4.29687654458357937148e-01, 4.05834423459965343284e-01, +4.45312642848883721847e-01, 4.18949086342842669239e-01, +4.60937644536906665493e-01, 4.31912354681638355203e-01, +4.76563149131543906112e-01, 4.44722952952162131623e-01, +4.92187842452541601812e-01, 4.57378374341803173309e-01, +5.15624825518001039804e-01, 4.76069192487019954285e-01, +5.46874516057966109095e-01, 5.00440440618262982753e-01, +5.78125566624434150675e-01, 5.24180053466007933594e-01, +6.09375102172641347487e-01, 5.47284455493244337276e-01, +6.40624936950189516338e-01, 5.69756408779493739303e-01, +6.71875248719545625775e-01, 5.91599881698465779323e-01, +7.03124988865964306584e-01, 6.12820194714659649549e-01, +7.34376295967088421612e-01, 6.33426724884753156175e-01, +7.65624929092156736310e-01, 6.53426296477277901431e-01, +7.96874196003358736817e-01, 6.72832055855442590087e-01, +8.28125565205639735389e-01, 6.91656957129326954714e-01, +8.59375453355927021448e-01, 7.09911879233846576653e-01, +8.90625694745052709500e-01, 7.27611720056701827275e-01, +9.21875110259870345075e-01, 7.44770185320721367361e-01, +9.53125042657123722201e-01, 7.61402792157321428590e-01, +9.84374765277631902372e-01, 7.77524191164056688308e-01, +1.03126494373528343473e+00, 8.00788807142382097481e-01, +1.09374968909110092952e+00, 8.30144253291031475328e-01, +1.15625019152505204012e+00, 8.57735575892430546219e-01, +1.21874985186151341132e+00, 8.83672057048812575886e-01, +1.28124876006842702836e+00, 9.08066349515326720621e-01, +1.34375006271148444981e+00, 9.31026566320014126177e-01, +1.40627222899692072566e+00, 9.52659566341466756967e-01, +1.46874957658300542285e+00, 9.73037801091363618866e-01, +1.53124999999999555911e+00, 9.92272112377190040888e-01, +1.59375089676214143353e+00, 1.01043670320979472876e+00, +1.65624949800269094524e+00, 1.02760661639661776690e+00, +1.71874946971376685312e+00, 1.04385296549501305208e+00, +1.78125111924655166185e+00, 1.05924046784549474864e+00, +1.84374921332370989013e+00, 1.07382754310190620117e+00, +1.90625055239083862624e+00, 1.08767078118685489585e+00, +1.96874992734227549640e+00, 1.10081967347672460278e+00, +2.06250046973591683042e+00, 1.11934332464931074469e+00, +2.18749905173933534286e+00, 1.14201813543610697366e+00, +2.31249933788800232648e+00, 1.16264711873167669864e+00, +2.43749855191054187742e+00, 1.18147939634549814514e+00, +2.56251104936881235474e+00, 1.19873002825057639598e+00, +2.68750036758144528193e+00, 1.21457671610223272296e+00, +2.81249907059852954916e+00, 1.22918073183895870670e+00, +2.93749583903062294610e+00, 1.24267599964591468620e+00, +3.06250108260464948273e+00, 1.25518076906426045980e+00, +3.18750016629930410517e+00, 1.26679540235591403530e+00, +3.31250071362610132297e+00, 1.27760948984166233799e+00, +3.43749999999999333866e+00, 1.28770054149540058575e+00, +3.56249877589327157423e+00, 1.29713691630583838332e+00, +3.68750696071718842006e+00, 1.30597947372626776996e+00, +3.81250023149192607264e+00, 1.31427972905173717777e+00, +3.93749827850909683846e+00, 1.32208623339324304879e+00, +4.12500187917697846984e+00, 1.33296050364557672196e+00, +4.37499759905160701123e+00, 1.34608503917096200553e+00, +4.62500066729278191957e+00, 1.35785800701782477518e+00, +4.87499852385410648026e+00, 1.36847463881194641999e+00, +5.12499918742110072145e+00, 1.37809553833018583191e+00, +5.37500000000004529710e+00, 1.38685287025772296943e+00, +5.62499999999991828759e+00, 1.39485670134236627860e+00, +5.87499417854096694924e+00, 1.40219922327269230777e+00, +6.12500000000013233858e+00, 1.40895889555647713109e+00, +6.37499999999991828759e+00, 1.41520149881786494461e+00, +6.62499933107761584949e+00, 1.42098385532083781868e+00, +6.87500431528593747288e+00, 1.42635483782722261026e+00, +7.12499228632883863099e+00, 1.43135612069194451124e+00, +7.37499257154547205317e+00, 1.43602490820671135907e+00, +7.62499911873607416624e+00, 1.44039300400460135165e+00, +7.87500000000018918200e+00, 1.44448820973165936721e+00, +}; diff --git a/usr/src/lib/libm/common/C/_TBL_exp2.c b/usr/src/lib/libm/common/C/_TBL_exp2.c new file mode 100644 index 0000000000..0d141192b6 --- /dev/null +++ b/usr/src/lib/libm/common/C/_TBL_exp2.c @@ -0,0 +1,79 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "libm_protos.h" + +const double _TBL_exp2_hi[] = { + 1.00000000000000000e+00, 1.01088928605170048e+00, 1.02189714865411663e+00, + 1.03302487902122841e+00, 1.04427378242741375e+00, 1.05564517836055716e+00, + 1.06714040067682370e+00, 1.07876079775711986e+00, 1.09050773266525769e+00, + 1.10238258330784089e+00, 1.11438674259589243e+00, 1.12652161860824185e+00, + 1.13878863475669156e+00, 1.15118922995298267e+00, 1.16372485877757748e+00, + 1.17639699165028122e+00, 1.18920711500272103e+00, 1.20215673145270308e+00, + 1.21524735998046896e+00, 1.22848053610687002e+00, 1.24185781207348400e+00, + 1.25538075702469110e+00, 1.26905095719173322e+00, 1.28287001607877826e+00, + 1.29683955465100964e+00, 1.31096121152476441e+00, 1.32523664315974132e+00, + 1.33966752405330292e+00, 1.35425554693689265e+00, 1.36900242297459052e+00, + 1.38390988196383202e+00, 1.39897967253831124e+00, 1.41421356237309515e+00, + 1.42961333839197002e+00, 1.44518080697704665e+00, 1.46091779418064704e+00, + 1.47682614593949935e+00, 1.49290772829126484e+00, 1.50916442759342284e+00, + 1.52559815074453820e+00, 1.54221082540794074e+00, 1.55900440023783693e+00, + 1.57598084510788650e+00, 1.59314215134226700e+00, 1.61049033194925428e+00, + 1.62802742185734783e+00, 1.64575547815396495e+00, 1.66367658032673638e+00, + 1.68179283050742900e+00, 1.70010635371852348e+00, 1.71861929812247793e+00, + 1.73733383527370622e+00, 1.75625216037329945e+00, 1.77537649252652119e+00, + 1.79470907500310717e+00, 1.81425217550039886e+00, 1.83400808640934243e+00, + 1.85397912508338547e+00, 1.87416763411029996e+00, 1.89457598158696561e+00, + 1.91520656139714740e+00, 1.93606179349229435e+00, 1.95714412417540018e+00, + 1.97845602638795093e+00, +}; +const double _TBL_exp2_lo[] = { + 0.00000000000000000e+00,-1.52347786033685772e-17, 5.10922502897344389e-17, + 7.60083887402708849e-18, 8.55188970553796366e-17, 1.75932573877209198e-18, +-7.89985396684158212e-17,-6.65666043605659260e-17,-3.04678207981247115e-17, + 5.26603687157069439e-17, 1.04102784568455710e-16, 5.16585675879545612e-17, + 8.91281267602540778e-17, 3.25071021886382721e-17, 3.82920483692409350e-17, + 5.55420325421807896e-17, 3.98201523146564611e-17, 6.64498149925230124e-17, +-7.71263069268148813e-17,-1.89878163130252995e-17, 4.65802759183693679e-17, +-6.71138982129687842e-18, 2.66793213134218610e-18, 1.71359491824356097e-17, + 2.53825027948883150e-17,-7.18153613551945386e-17,-2.85873121003886076e-17, + 8.92728259483173198e-17, 7.70094837980298946e-17, 9.59379791911884877e-17, +-6.77051165879478629e-17,-9.61421320905132307e-17,-9.66729331345291345e-17, +-1.20316424890536552e-17,-3.02375813499398732e-17,-5.60037718607521580e-17, +-3.48399455689279580e-17, 1.41929201542840358e-17,-1.01645532775429504e-16, + 1.11795187801605699e-16, 7.94983480969762086e-17, 3.78120705335752750e-17, +-1.01369164712783040e-17,-1.00944065423119625e-16, 2.47071925697978879e-17, +-6.71295508470708409e-17,-1.01256799136747726e-16, 5.89099269671309967e-17, + 8.19901002058149652e-17,-8.02371937039770025e-18,-1.85138041826311099e-17, + 3.16438929929295695e-17, 2.96014069544887331e-17, 6.42973179655657203e-17, + 1.82274584279120868e-17,-9.96953153892034882e-17, 3.28310722424562659e-17, + 9.76188749072759354e-17,-6.12276341300414256e-17, 3.40340353521652967e-17, +-1.06199460561959626e-16, 1.03323859606763257e-16, 8.96076779103666777e-17, + 4.03887531092781666e-17, +}; diff --git a/usr/src/lib/libm/common/C/_TBL_ipio2.c b/usr/src/lib/libm/common/C/_TBL_ipio2.c new file mode 100644 index 0000000000..1e974fa4fe --- /dev/null +++ b/usr/src/lib/libm/common/C/_TBL_ipio2.c @@ -0,0 +1,87 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "libm_protos.h" + +/* + * Table of constants for 2/pi, used in __rem_pio2 (trigl) function. + */ + +/* + * 396 Hex digits (476 decimal) of 2/pi + */ +const int _TBL_ipio2_inf[] = { +0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62, +0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A, +0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129, +0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41, +0x3991D6, 0x398353, 0x39F49C, 0x845F8B, 0xBDF928, 0x3B1FF8, +0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF, +0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5, +0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08, +0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3, +0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880, +0x4D7327, 0x310606, 0x1556CA, 0x73A8C9, 0x60E27B, 0xC08C6B, +}; + +#if 0 /* remove from SVR4 */ +/* + * 396 Hex digits (476 decimal) of 2/PI, PI = 66 bits of pi + */ +const int _TBL_ipio2_66[] = { +0xA2F983, 0x6E4E44, 0x152A00, 0x062BC4, 0x0DA276, 0xBED4C1, +0xFDF905, 0x5CD5BA, 0x767CEC, 0x1F80D6, 0xC26053, 0x3A0070, +0x107C2A, 0xF68EE9, 0x687B7A, 0xB990AA, 0x38DE4B, 0x96CFF3, +0x92735E, 0x8B34F6, 0x195BFC, 0x27F88E, 0xA93EC5, 0x3958A5, +0x3E5D13, 0x1C55A8, 0x5B4A8B, 0xA42E04, 0x12D105, 0x35580D, +0xF62347, 0x450900, 0xB98BCA, 0xF7E8A4, 0xA2E5D5, 0x69BC52, +0xF0381D, 0x1A0A88, 0xFE8714, 0x7F6735, 0xBB7D4D, 0xC6F642, +0xB27E80, 0x6191BF, 0xB6B750, 0x52776E, 0xD60FD0, 0x607DCC, +0x68BFAF, 0xED69FC, 0x6EB305, 0xD2557D, 0x25BDFB, 0x3E4AA1, +0x84472D, 0x8B0376, 0xF77740, 0xD290DF, 0x15EC8C, 0x45A5C3, +0x6181EF, 0xC5E7E8, 0xD8909C, 0xF62144, 0x298428, 0x6E5D9D, +}; + +/* + * 396 Hex digits (476 decimal) of 2/PI, PI = 53 bits of pi + */ +const int _TBL_ipio2_53[] = { +0xA2F983, 0x6E4E44, 0x16F3C4, 0xEA69B5, 0xD3E131, 0x60E1D2, +0xD7982A, 0xC031F5, 0xD67BCC, 0xFD1375, 0x60919B, 0x3FA0BB, +0x612ABB, 0x714F9B, 0x03DA8A, 0xC05948, 0xD023F4, 0x5AFA37, +0x51631D, 0xCD7A90, 0xC0474A, 0xF6A6F3, 0x1A52E1, 0x5C3927, +0x3ADA45, 0x4E2DB5, 0x64E8C4, 0x274A5B, 0xB74ADC, 0x1E6591, +0x2822BE, 0x4771F5, 0x12A63F, 0x83BD35, 0x2488CA, 0x1FE1BE, +0x42C21A, 0x682569, 0x2AFB91, 0x68ADE1, 0x4A42E5, 0x9BE357, +0xB79675, 0xCE998A, 0x83AF8B, 0xE645E6, 0xDF0789, 0x9E9747, +0xAA15FF, 0x358C3F, 0xAF3141, 0x72A3F7, 0x2BF1D4, 0xF3AD96, +0x7D759F, 0x257FCE, 0x29FB69, 0xB1B42C, 0xC32DE1, 0x8C0BBD, +0x31EC2F, 0x942026, 0x85DCE7, 0x653FF3, 0x136FA7, 0x0D7A5F, +}; +#endif diff --git a/usr/src/lib/libm/common/C/_TBL_log.c b/usr/src/lib/libm/common/C/_TBL_log.c new file mode 100644 index 0000000000..6e8f2ade2d --- /dev/null +++ b/usr/src/lib/libm/common/C/_TBL_log.c @@ -0,0 +1,299 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "libm_protos.h" + +/* + * Table of constants for log, log2, and log10 + * By K.C. Ng, November 21, 2004 + * + * Y[j], 1/Y[j], log(Y[j]) for j = 0 to 255 + * where HIWORD(Y[j]) ~ 0x3fb8400 + (j<<15) + * That is, 256 Y[j] space out logrithmically between 0.09375 and 24, and + * each is chosen so that 1/Y[j] and log(Y[j]) are very close to a IEEE + * double. In addition, each log(Y[j]) has 3 trailing zeros. + */ +const double _TBL_log[] = { +9.47265623608246343e-02, 1.05567010464380857e+01, -2.35676082856530300e+00, +9.66796869131412717e-02, 1.03434344062203838e+01, -2.33635196153499791e+00, +9.86328118117651004e-02, 1.01386139321308306e+01, -2.31635129573594156e+00, +1.00585936733578435e-01, 9.94174764856737347e+00, -2.29674282498938709e+00, +1.02539062499949152e-01, 9.75238095238578850e+00, -2.27751145544242561e+00, +1.04492186859904843e-01, 9.57009351656812157e+00, -2.25864297726331742e+00, +1.06445312294918631e-01, 9.39449543094380957e+00, -2.24012392529694537e+00, +1.08398437050250693e-01, 9.22522526350104144e+00, -2.22194160843615762e+00, +1.10351562442130582e-01, 9.06194690740703912e+00, -2.20408398741152212e+00, +1.12304686894746625e-01, 8.90434787407592943e+00, -2.18653968262558962e+00, +1.14257811990227776e-01, 8.75213679118525256e+00, -2.16929787526329321e+00, +1.16210936696872255e-01, 8.60504207627572093e+00, -2.15234831939887172e+00, +1.18164061975360682e-01, 8.46280995492959498e+00, -2.13568126444263484e+00, +1.20117187499996322e-01, 8.32520325203277523e+00, -2.11928745022706622e+00, +1.22070312499895098e-01, 8.19200000000703987e+00, -2.10315806829801133e+00, +1.24023436774175100e-01, 8.06299217317146599e+00, -2.08728472499318229e+00, +1.26953123746900931e-01, 7.87692315467275872e+00, -2.06393736501443570e+00, +1.30859374098123454e-01, 7.64179109744297769e+00, -2.03363201254049386e+00, +1.34765623780674720e-01, 7.42028992220936967e+00, -2.00421812948999545e+00, +1.38671874242985771e-01, 7.21126764500034501e+00, -1.97564475345722457e+00, +1.42578124148616536e-01, 7.01369867201821506e+00, -1.94786518986246371e+00, +1.46484374166731490e-01, 6.82666670549979404e+00, -1.92083651719164372e+00, +1.50390624434435488e-01, 6.64935067435644189e+00, -1.89451920694646070e+00, +1.54296874339723084e-01, 6.48101268596180624e+00, -1.86887677685174936e+00, +1.58203124999987427e-01, 6.32098765432149001e+00, -1.84387547036714849e+00, +1.62109374999815342e-01, 6.16867469880220742e+00, -1.81948401724404896e+00, +1.66015624243955634e-01, 6.02352943919619310e+00, -1.79567337310324682e+00, +1.69921874302298687e-01, 5.88505749542848644e+00, -1.77241651049093640e+00, +1.73828124315277527e-01, 5.75280901142480605e+00, -1.74968825924644555e+00, +1.77734374286237506e-01, 5.62637364896854919e+00, -1.72746512253855222e+00, +1.81640624146994889e-01, 5.50537636993989743e+00, -1.70572513658236602e+00, +1.85546874316304788e-01, 5.38947370406942916e+00, -1.68444773712372431e+00, +1.89453124405085355e-01, 5.27835053203882509e+00, -1.66361364967629299e+00, +1.93359374570531595e-01, 5.17171718320401652e+00, -1.64320477712600699e+00, +1.97265624263334577e-01, 5.06930694962380368e+00, -1.62320411193263148e+00, +2.01171874086291030e-01, 4.97087380898513764e+00, -1.60359564135180399e+00, +2.05078123979995308e-01, 4.87619050044336610e+00, -1.58436427985572159e+00, +2.08984373896073439e-01, 4.78504675424820736e+00, -1.56549579585994181e+00, +2.12890623963011144e-01, 4.69724772930228163e+00, -1.54697674768135762e+00, +2.16796874723889421e-01, 4.61261261848719517e+00, -1.52879442500076479e+00, +2.20703124198150608e-01, 4.53097346778917753e+00, -1.51093680996032553e+00, +2.24609374375627030e-01, 4.45217392541970725e+00, -1.49339249945607477e+00, +2.28515625000094036e-01, 4.37606837606657528e+00, -1.47615069024134016e+00, +2.32421873924349737e-01, 4.30252102831546246e+00, -1.45920113655598627e+00, +2.36328123935216378e-01, 4.23140497774241098e+00, -1.44253408394829741e+00, +2.40234375000066919e-01, 4.16260162601510064e+00, -1.42614026966681173e+00, +2.44140623863132178e-01, 4.09600001907347711e+00, -1.41001089239381727e+00, +2.48046874999894917e-01, 4.03149606299383390e+00, -1.39413753858134015e+00, +2.53906248590769879e-01, 3.93846156032078243e+00, -1.37079018013412401e+00, +2.61718748558906533e-01, 3.82089554342693294e+00, -1.34048483059486401e+00, +2.69531249159214337e-01, 3.71014493910979404e+00, -1.31107094300173976e+00, +2.77343749428383191e-01, 3.60563381024826013e+00, -1.28249756949928795e+00, +2.85156249289339359e-01, 3.50684932380819214e+00, -1.25471800582335113e+00, +2.92968749999700462e-01, 3.41333333333682321e+00, -1.22768933094427446e+00, +3.00781248554318814e-01, 3.32467534065511261e+00, -1.20137202743229921e+00, +3.08593748521894806e-01, 3.24050634463533127e+00, -1.17572959680235023e+00, +3.16406249999639899e-01, 3.16049382716409077e+00, -1.15072828980826181e+00, +3.24218749999785061e-01, 3.08433734939963511e+00, -1.12633683668362750e+00, +3.32031248841858584e-01, 3.01176471638753718e+00, -1.10252619147729547e+00, +3.39843749265406558e-01, 2.94252874199264314e+00, -1.07926932798654107e+00, +3.47656249999834799e-01, 2.87640449438338930e+00, -1.05654107474789782e+00, +3.55468749999899247e-01, 2.81318681318761055e+00, -1.03431793796299587e+00, +3.63281249999864997e-01, 2.75268817204403371e+00, -1.01257795132667816e+00, +3.71093749064121570e-01, 2.69473684890124421e+00, -9.91300555400967731e-01, +3.78906249999751032e-01, 2.63917525773369288e+00, -9.70466465976836723e-01, +3.86718748879039009e-01, 2.58585859335407608e+00, -9.50057597243619156e-01, +3.94531249999987899e-01, 2.53465346534661240e+00, -9.30056927638333697e-01, +4.02343749999485523e-01, 2.48543689320706163e+00, -9.10448456251205407e-01, +4.10156249578856991e-01, 2.43809524059864202e+00, -8.91217095348825872e-01, +4.17968749447214571e-01, 2.39252336765021800e+00, -8.72348611340208357e-01, +4.25781248601723117e-01, 2.34862386092395203e+00, -8.53829565534445223e-01, +4.33593749393073047e-01, 2.30630630953458038e+00, -8.35647244566987801e-01, +4.41406248572254134e-01, 2.26548673299152270e+00, -8.17789629001761220e-01, +4.49218749348472501e-01, 2.22608695975035964e+00, -8.00245317566669279e-01, +4.57031249277175089e-01, 2.18803419149470768e+00, -7.83003511263371976e-01, +4.64843748529596368e-01, 2.15126051100659366e+00, -7.66053954531254355e-01, +4.72656248830947701e-01, 2.11570248457175136e+00, -7.49386901356188240e-01, +4.80468748609962581e-01, 2.08130081902951236e+00, -7.32993092000230995e-01, +4.88281249241778237e-01, 2.04800000318021258e+00, -7.16863708730099525e-01, +4.96093748931098810e-01, 2.01574803583926521e+00, -7.00990360175606675e-01, +5.07812497779701388e-01, 1.96923077784079825e+00, -6.77642998396260410e-01, +5.23437498033319737e-01, 1.91044776837204044e+00, -6.47337648285891021e-01, +5.39062498006593560e-01, 1.85507247062801328e+00, -6.17923763020271188e-01, +5.54687498964024250e-01, 1.80281690477552603e+00, -5.89350388745976339e-01, +5.70312499806522322e-01, 1.75342465812909332e+00, -5.61570823110474571e-01, +5.85937497921867001e-01, 1.70666667271966777e+00, -5.34542153929987052e-01, +6.01562498226483444e-01, 1.66233766723853860e+00, -5.08224845014116688e-01, +6.17187498682654212e-01, 1.62025316801528496e+00, -4.82582413587029357e-01, +6.32812500000264566e-01, 1.58024691357958624e+00, -4.57581109246760320e-01, +6.48437499353274216e-01, 1.54216867623689291e+00, -4.33189657120379490e-01, +6.64062498728508976e-01, 1.50588235582451335e+00, -4.09379009344016609e-01, +6.79687498865382267e-01, 1.47126437027210688e+00, -3.86122146934356092e-01, +6.95312498728747119e-01, 1.43820224982050338e+00, -3.63393896015796081e-01, +7.10937499999943157e-01, 1.40659340659351906e+00, -3.41170757402847080e-01, +7.26562499999845568e-01, 1.37634408602179792e+00, -3.19430770766573779e-01, +7.42187500000120126e-01, 1.34736842105241350e+00, -2.98153372318914478e-01, +7.57812499999581890e-01, 1.31958762886670744e+00, -2.77319285416786077e-01, +7.73437498602746576e-01, 1.29292929526503420e+00, -2.56910415591577124e-01, +7.89062500000142664e-01, 1.26732673267303819e+00, -2.36909747078176913e-01, +8.04687500000259015e-01, 1.24271844660154174e+00, -2.17301275689659512e-01, +8.20312499999677036e-01, 1.21904761904809900e+00, -1.98069913762487504e-01, +8.35937499999997113e-01, 1.19626168224299478e+00, -1.79201429457714445e-01, +8.51562499999758749e-01, 1.17431192660583728e+00, -1.60682381690756770e-01, +8.67187500000204725e-01, 1.15315315315288092e+00, -1.42500062607046951e-01, +8.82812500000407896e-01, 1.13274336283133503e+00, -1.24642445206814556e-01, +8.98437499999816813e-01, 1.11304347826109651e+00, -1.07098135556570995e-01, +9.14062499999708455e-01, 1.09401709401744296e+00, -8.98563291221800009e-02, +9.29687500000063949e-01, 1.07563025210076635e+00, -7.29067708080189947e-02, +9.45312499999844014e-01, 1.05785123966959604e+00, -5.62397183230410880e-02, +9.60937500000120459e-01, 1.04065040650393459e+00, -3.98459085470743157e-02, +9.76562499999976685e-01, 1.02400000000002445e+00, -2.37165266173399170e-02, +9.92187500000169420e-01, 1.00787401574785940e+00, -7.84317746085513856e-03, +1.01562500000004907e+00, 9.84615384615337041e-01, 1.55041865360135717e-02, +1.04687500000009237e+00, 9.55223880596930641e-01, 4.58095360313824362e-02, +1.07812500000002154e+00, 9.27536231884039442e-01, 7.52234212376075018e-02, +1.10937499999982481e+00, 9.01408450704367703e-01, 1.03796793681485644e-01, +1.14062500000007416e+00, 8.76712328767066285e-01, 1.31576357788784293e-01, +1.17187500000009659e+00, 8.53333333333263000e-01, 1.58605030176721007e-01, +1.20312499999950173e+00, 8.31168831169175393e-01, 1.84922338493597849e-01, +1.23437500000022027e+00, 8.10126582278336449e-01, 2.10564769107528083e-01, +1.26562500000064615e+00, 7.90123456789720069e-01, 2.35566071313277448e-01, +1.29687500000144706e+00, 7.71084337348537208e-01, 2.59957524438041876e-01, +1.32812499999945932e+00, 7.52941176470894757e-01, 2.83768173130237500e-01, +1.35937500055846350e+00, 7.35632183605830825e-01, 3.07025035705735583e-01, +1.39062499999999467e+00, 7.19101123595508374e-01, 3.29753286372464149e-01, +1.42187500000017564e+00, 7.03296703296616421e-01, 3.51976423157301710e-01, +1.45312500161088876e+00, 6.88172042247866766e-01, 3.73716410902152685e-01, +1.48437500134602307e+00, 6.73684209915422660e-01, 3.94993809147663466e-01, +1.51562499999932343e+00, 6.59793814433284220e-01, 4.15827895143264570e-01, +1.54687500000028200e+00, 6.46464646464528614e-01, 4.36236766775100371e-01, +1.57812500000061906e+00, 6.33663366336385092e-01, 4.56237433481979870e-01, +1.60937500243255216e+00, 6.21359222361793417e-01, 4.75845906381452632e-01, +1.64062500000026312e+00, 6.09523809523711768e-01, 4.95077266798011895e-01, +1.67187500000027911e+00, 5.98130841121395473e-01, 5.13945751102401260e-01, +1.70312500224662178e+00, 5.87155962528224662e-01, 5.32464800188589216e-01, +1.73437500283893620e+00, 5.76576575632799071e-01, 5.50647119589526390e-01, +1.76562500399259092e+00, 5.66371680135198341e-01, 5.68504737613959144e-01, +1.79687500443862880e+00, 5.56521737755718449e-01, 5.86049047473771623e-01, +1.82812500114411280e+00, 5.47008546666207462e-01, 6.03290852063923744e-01, +1.85937500250667465e+00, 5.37815125325376786e-01, 6.20240411099985067e-01, +1.89062500504214515e+00, 5.28925618424108568e-01, 6.36907464903988974e-01, +1.92187500371610143e+00, 5.20325202245941476e-01, 6.53301273946326866e-01, +1.95312500494870611e+00, 5.11999998702726389e-01, 6.69430656476366792e-01, +1.98437500351688123e+00, 5.03937006980894941e-01, 6.85304004871206018e-01, +2.03125000000003997e+00, 4.92307692307682621e-01, 7.08651367095930240e-01, +2.09375000579615866e+00, 4.77611938976327366e-01, 7.38956719359554093e-01, +2.15625000000061062e+00, 4.63768115941897652e-01, 7.68370601797816022e-01, +2.21875000323311955e+00, 4.50704224695355204e-01, 7.96943975698769513e-01, +2.28125000853738547e+00, 4.38356162743050726e-01, 8.24723542091080120e-01, +2.34374999999916556e+00, 4.26666666666818573e-01, 8.51752210736227866e-01, +2.40625000438447856e+00, 4.15584414827170512e-01, 8.78069520876078258e-01, +2.46875000884389584e+00, 4.05063289688167072e-01, 9.03711953249632494e-01, +2.53124999999940403e+00, 3.95061728395154743e-01, 9.28713251872476775e-01, +2.59375000434366632e+00, 3.85542168029044230e-01, 9.53104706671537905e-01, +2.65625000734081196e+00, 3.76470587194880080e-01, 9.76915356454189698e-01, +2.71875000787161980e+00, 3.67816090889081959e-01, 1.00017221875016560e+00, +2.78125001557333462e+00, 3.59550559784484969e-01, 1.02290047253181449e+00, +2.84375001147093220e+00, 3.51648350229895601e-01, 1.04512360775085789e+00, +2.90625000771072894e+00, 3.44086020592463127e-01, 1.06686359300668343e+00, +2.96875001371853831e+00, 3.36842103706616824e-01, 1.08814099342179560e+00, +3.03125000512624965e+00, 3.29896906658595002e-01, 1.10897507739479018e+00, +3.09375001373132807e+00, 3.23232321797685962e-01, 1.12938395177327244e+00, +3.15625001204422961e+00, 3.16831681959289180e-01, 1.14938461785752644e+00, +3.21875000888250318e+00, 3.10679610793130057e-01, 1.16899308818952186e+00, +3.28125000000102052e+00, 3.04761904761809976e-01, 1.18822444735810784e+00, +3.34375001587649123e+00, 2.99065419140752298e-01, 1.20709293641028914e+00, +3.40625000791328070e+00, 2.93577980969346064e-01, 1.22561198175258212e+00, +3.46875000615970519e+00, 2.88288287776354346e-01, 1.24379430028837845e+00, +3.53125000516822674e+00, 2.83185840293502689e-01, 1.26165191737618265e+00, +3.59375001425228779e+00, 2.78260868461675415e-01, 1.27919622952937750e+00, +3.65625001719730669e+00, 2.73504272217836075e-01, 1.29643803670156643e+00, +3.71875000856489324e+00, 2.68907562405871714e-01, 1.31338759261496740e+00, +3.78125001788371806e+00, 2.64462808666557803e-01, 1.33005464752659286e+00, +3.84375001532508964e+00, 2.60162600588744020e-01, 1.34644845655970613e+00, +3.90625000429340918e+00, 2.55999999718627136e-01, 1.36257783560168733e+00, +3.96875001912740766e+00, 2.51968502722644594e-01, 1.37845118847836900e+00, +4.06250002536431332e+00, 2.46153844616978895e-01, 1.40179855389937913e+00, +4.18750001743208244e+00, 2.38805969155131859e-01, 1.43210390131407017e+00, +4.31250002253733200e+00, 2.31884056759177282e-01, 1.46151778758352613e+00, +4.43750000671406397e+00, 2.25352112335092170e-01, 1.49009115631456268e+00, +4.56250002627485340e+00, 2.19178080929562313e-01, 1.51787072466748185e+00, +4.68750001185115028e+00, 2.13333332793974317e-01, 1.54489939382477459e+00, +4.81250001682742301e+00, 2.07792207065640028e-01, 1.57121670311050998e+00, +4.93750000000042366e+00, 2.02531645569602875e-01, 1.59685913022732606e+00, +5.06249999999927613e+00, 1.97530864197559108e-01, 1.62186043243251454e+00, +5.18750002327641901e+00, 1.92771083472381588e-01, 1.64625189004383721e+00, +5.31250002381002329e+00, 1.88235293273997795e-01, 1.67006253873242194e+00, +5.43750000000577405e+00, 1.83908045976816203e-01, 1.69331939641586438e+00, +5.56250002193114934e+00, 1.79775280190080267e-01, 1.71604765143503712e+00, +5.68749999999938005e+00, 1.75824175824194989e-01, 1.73827078427695980e+00, +5.81250002749782002e+00, 1.72043009938785768e-01, 1.76001077564428243e+00, +5.93749999999874767e+00, 1.68421052631614471e-01, 1.78128816936054868e+00, +6.06250001966917473e+00, 1.64948453073088669e-01, 1.80212225950800153e+00, +6.18750003004243609e+00, 1.61616160831459688e-01, 1.82253113275015188e+00, +6.31250002448351388e+00, 1.58415840969730465e-01, 1.84253179848005466e+00, +6.43750001359968849e+00, 1.55339805497076044e-01, 1.86214026810242750e+00, +6.56250003345742350e+00, 1.52380951604072529e-01, 1.88137163301601618e+00, +6.68750002403557531e+00, 1.49532709742937614e-01, 1.90024011581622965e+00, +6.81250003423489581e+00, 1.46788990088028509e-01, 1.91875916501466826e+00, +6.93750003062940923e+00, 1.44144143507740546e-01, 1.93694148348760287e+00, +7.06250002747386052e+00, 1.41592919803171097e-01, 1.95479910036266347e+00, +7.18750003617887856e+00, 1.39130434082284093e-01, 1.97234341115705192e+00, +7.31250000000050537e+00, 1.36752136752127301e-01, 1.98958521255804399e+00, +7.43750002212249761e+00, 1.34453781112678528e-01, 2.00653477384620160e+00, +7.56250003604752941e+00, 1.32231404328381430e-01, 2.02320182812357530e+00, +7.68750005007207449e+00, 1.30081299965731312e-01, 2.03959563964607682e+00, +7.81249996125652668e+00, 1.28000000634773070e-01, 2.05572501010335529e+00, +7.93750005224239974e+00, 1.25984251139310915e-01, 2.07159837080052966e+00, +8.12500004244456164e+00, 1.23076922433975874e-01, 2.09494573343974722e+00, +8.37500006149772425e+00, 1.19402984197849338e-01, 2.12525108505414195e+00, +8.62500006593247370e+00, 1.15942028099206410e-01, 2.15466497056176820e+00, +8.87500007743793873e+00, 1.12676055354884341e-01, 2.18323834408688100e+00, +9.12500001754142609e+00, 1.09589040885222130e-01, 2.21101790139090326e+00, +9.37500007707016181e+00, 1.06666665789779500e-01, 2.23804658007729174e+00, +9.62500004426353151e+00, 1.03896103418305616e-01, 2.26436388477265638e+00, +9.87500006518495788e+00, 1.01265822116353585e-01, 2.29000631738819393e+00, +1.01250000000026539e+01, 9.87654320987395445e-02, 2.31500761299286495e+00, +1.03750000409819823e+01, 9.63855417879450060e-02, 2.33939907006683256e+00, +1.06250000362555337e+01, 9.41176467376672460e-02, 2.36320971822276604e+00, +1.08750000879032314e+01, 9.19540222452362582e-02, 2.38646658505780351e+00, +1.11250000697274576e+01, 8.98876398860551373e-02, 2.40919483431994053e+00, +1.13750000462194141e+01, 8.79120875548795450e-02, 2.43141796890025930e+00, +1.16250000714972366e+01, 8.60215048472860316e-02, 2.45315795762371991e+00, +1.18750000788855150e+01, 8.42105257563797310e-02, 2.47443535656369562e+00, +1.21250000895724916e+01, 8.24742261948517991e-02, 2.49526944421096886e+00, +1.23750000985058719e+01, 8.08080801648427965e-02, 2.51567831641482442e+00, +1.26250000894226950e+01, 7.92079202310506381e-02, 2.53567898224440924e+00, +1.28750000768594433e+01, 7.76699024489580225e-02, 2.55528745251946532e+00, +1.31250000578007420e+01, 7.61904758549435401e-02, 2.57451881288155349e+00, +1.33750000809310077e+01, 7.47663546877819496e-02, 2.59338729883298669e+00, +1.36250000915049636e+01, 7.33944949199294983e-02, 2.61190634726526838e+00, +1.38750000830616607e+01, 7.20720716406179490e-02, 2.63008866561892418e+00, +1.41249999999960103e+01, 7.07964601770111474e-02, 2.64794627703222218e+00, +1.43750000290097564e+01, 6.95652172509168693e-02, 2.66549058870148414e+00, +1.46250000868097665e+01, 6.83760679702078294e-02, 2.68273239905363070e+00, +1.48750000966053975e+01, 6.72268903196987927e-02, 2.69968195792617394e+00, +1.51250001097012756e+01, 6.61157019998031836e-02, 2.71634901116988203e+00, +1.53750000510427132e+01, 6.50406501905787804e-02, 2.73274281701243282e+00, +1.56250001080665442e+01, 6.39999995573594382e-02, 2.74887220253872400e+00, +1.58750000434989929e+01, 6.29921258116476201e-02, 2.76474554751884938e+00, +1.62500000641781739e+01, 6.15384612954199342e-02, 2.78809291272517257e+00, +1.67500001015987401e+01, 5.97014921751882754e-02, 2.81839826433667184e+00, +1.72500001048300184e+01, 5.79710141404578272e-02, 2.84781214955447126e+00, +1.77500001262529885e+01, 5.63380277682904579e-02, 2.87638552303426920e+00, +1.82500001543340602e+01, 5.47945200845665337e-02, 2.90416508848516131e+00, +1.87500001096404212e+01, 5.33333330214672482e-02, 2.93119375826390893e+00, +1.92500001680268191e+01, 5.19480514946147609e-02, 2.95751106946245912e+00, +1.97500000329124035e+01, 5.06329113080278073e-02, 2.98315349301358168e+00, +2.02500001270002485e+01, 4.93827157396732261e-02, 3.00815479982416534e+00, +2.07500001519906796e+01, 4.81927707313324349e-02, 3.03254625400155930e+00, +2.12500001425219267e+01, 4.70588232137922752e-02, 3.05635690207734001e+00, +2.17500000758314478e+01, 4.59770113339538697e-02, 3.07961376102119644e+00, +2.22500001767207358e+01, 4.49438198677525880e-02, 3.10234201655475417e+00, +2.27500001365873317e+01, 4.39560436921389575e-02, 3.12456515140079816e+00, +2.32500001697599998e+01, 4.30107523741288036e-02, 3.14630513933487066e+00, +2.37500001766865303e+01, 4.21052628446554611e-02, 3.16758253792008304e+00, +}; diff --git a/usr/src/lib/libm/common/C/_TBL_log2.c b/usr/src/lib/libm/common/C/_TBL_log2.c new file mode 100644 index 0000000000..dc35b6dd99 --- /dev/null +++ b/usr/src/lib/libm/common/C/_TBL_log2.c @@ -0,0 +1,121 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "libm_protos.h" + +const double _TBL_log2_hi[] = { + 0.00000000000000000e+00, 1.12272500991821289e-02, 2.23678052425384521e-02, + 3.34229767322540283e-02, 4.43941056728363037e-02, 5.52824139595031738e-02, + 6.60891532897949219e-02, 7.68155455589294434e-02, 8.74627828598022461e-02, + 9.80320572853088379e-02, 1.08524441719055176e-01, 1.18941068649291992e-01, + 1.29282951354980469e-01, 1.39551281929016113e-01, 1.49747014045715332e-01, + 1.59871220588684082e-01, 1.69924974441528320e-01, 1.79908990859985352e-01, + 1.89824461936950684e-01, 1.99672341346740723e-01, 2.09453344345092773e-01, + 2.19168424606323242e-01, 2.28818655014038086e-01, 2.38404631614685059e-01, + 2.47927427291870117e-01, 2.57387638092041016e-01, 2.66786336898803711e-01, + 2.76124238967895508e-01, 2.85402059555053711e-01, 2.94620513916015625e-01, + 3.03780555725097656e-01, 3.12882900238037109e-01, 3.21928024291992188e-01, + 3.30916643142700195e-01, 3.39849948883056641e-01, 3.48727941513061523e-01, + 3.57551813125610352e-01, 3.66322040557861328e-01, 3.75039339065551758e-01, + 3.83704185485839844e-01, 3.92317295074462891e-01, 4.00879383087158203e-01, + 4.09390926361083984e-01, 4.17852401733398438e-01, 4.26264524459838867e-01, + 4.34628009796142578e-01, 4.42943334579467773e-01, 4.51210975646972656e-01, + 4.59431409835815430e-01, 4.67605352401733398e-01, 4.75733280181884766e-01, + 4.83815670013427734e-01, 4.91852998733520508e-01, 4.99845743179321289e-01, + 5.07794380187988281e-01, 5.15699386596679688e-01, 5.23561954498291016e-01, + 5.31381130218505859e-01, 5.39158344268798828e-01, 5.46894073486328125e-01, + 5.54588794708251953e-01, 5.62242031097412109e-01, 5.69855213165283203e-01, + 5.77428817749023438e-01, 5.84962368011474609e-01, 5.92456817626953125e-01, + 5.99912643432617188e-01, 6.07329845428466797e-01, 6.14709377288818359e-01, + 6.22051715850830078e-01, 6.29356384277343750e-01, 6.36624336242675781e-01, + 6.43856048583984375e-01, 6.51051521301269531e-01, 6.58211231231689453e-01, + 6.65335655212402344e-01, 6.72425270080566406e-01, 6.79480075836181641e-01, + 6.86500072479248047e-01, 6.93486690521240234e-01, 7.00439453125000000e-01, + 7.07358837127685547e-01, 7.14245319366455078e-01, 7.21098899841308594e-01, + 7.27920055389404297e-01, 7.34709262847900391e-01, 7.41466522216796875e-01, + 7.48192787170410156e-01, 7.54887104034423828e-01, 7.61550903320312500e-01, + 7.68184185028076172e-01, 7.74786949157714844e-01, 7.81359672546386719e-01, + 7.87902355194091797e-01, 7.94415473937988281e-01, 8.00899505615234375e-01, + 8.07354450225830078e-01, 8.13780784606933594e-01, 8.20178508758544922e-01, + 8.26548099517822266e-01, 8.32889556884765625e-01, 8.39203357696533203e-01, + 8.45489978790283203e-01, 8.51748943328857422e-01, 8.57980728149414062e-01, + 8.64185810089111328e-01, 8.70364665985107422e-01, 8.76516819000244141e-01, + 8.82642745971679688e-01, 8.88742923736572266e-01, 8.94817352294921875e-01, + 9.00866508483886719e-01, 9.06890392303466797e-01, 9.12889003753662109e-01, + 9.18862819671630859e-01, 9.24812316894531250e-01, 9.30737018585205078e-01, + 9.36637878417968750e-01, 9.42514419555664062e-01, 9.48367118835449219e-01, + 9.54195976257324219e-01, 9.60001468658447266e-01, 9.65784072875976562e-01, + 9.71543312072753906e-01, 9.77279663085937500e-01, 9.82993125915527344e-01, + 9.88684654235839844e-01, 9.94353294372558594e-01, +}; +const double _TBL_log2_lo[] = { + 0.00000000000000000e+00, 5.32407199143163062e-09, 7.78591605611869461e-09, + 2.48051962506972834e-08, 1.36856171339421649e-08, 2.15416864274073636e-08, + 3.71679775110542797e-08, 5.14919014488721604e-08, 5.83905371621603131e-08, + 2.56752178779050280e-08, 1.50591138779666358e-08, 4.07421543880223335e-09, + 6.55899859865622946e-08, 7.04697774403433060e-08, 1.05458966729375492e-07, + 1.16189705334564924e-07, 2.70007840425949794e-08, 9.91549491170275978e-08, + 9.69430665462702729e-08, 3.48962367368142750e-09, 2.12838570084203029e-08, + 9.58558383294243244e-08, 3.54818427912568755e-08, 1.07710393847949145e-07, + 8.61517153766060168e-08, 2.04600610755536536e-07, 2.03796097652703831e-07, + 1.66306342048863931e-07, 1.59307194630913047e-07, 2.34975611381410033e-07, + 1.92452005268177275e-07, 5.50463182513595194e-08, 7.05953701603703195e-08, + 2.34971916784423615e-07, 5.40015680851899589e-08, 2.12718016029126278e-07, + 1.91492473341603465e-07, 1.73687954457398432e-07, 9.22813729985471341e-08, + 1.06988212380721318e-07, 1.27704297398270718e-07, 5.31950261176686284e-08, + 9.77661777174938596e-09, 1.13152499419201003e-07, 2.30242259071696645e-07, + 2.17840582054596399e-07, 1.61269260528736021e-07, 1.36185356146932601e-07, + 2.08801481826511869e-07, 1.97681264041823641e-07, 1.50784512989339287e-07, + 1.07250828689716638e-07, 9.75961542029652924e-08, 1.43903884071471071e-07, + 2.60010707986588806e-07, 4.51687362770425967e-07, 1.55872185666914818e-09, + 3.30297806270353139e-07, 4.66839232562134881e-07, 3.86401308539453419e-07, + 5.69693854190458130e-08, 3.93123660542428204e-07, 3.95165664638538863e-07, + 1.02867252517587785e-08, 1.32709681572078730e-07, 2.19641127294637299e-07, + 1.98754510492326232e-07, 4.68321143892845854e-07, 4.66826389855508924e-07, + 1.03605546188658804e-07, 2.35802265869106829e-07, 2.84300973057307715e-07, + 1.41190740320740639e-07, 1.69877659083133016e-07, 2.51520105284046651e-07, + 2.61972773884411727e-07, 7.18909291834578061e-08, 2.36692644004112907e-08, + 4.54703970334185855e-07, 2.66978085000826612e-07, 2.65016092160396791e-07, + 2.94953197203117899e-07, 1.98299667558641024e-07, 2.88865876540408914e-07, + 3.99173794882405776e-07, 3.57377937852235498e-07, 4.64184350072864601e-07, + 6.24190501305044646e-08, 3.98129044716236242e-07, 3.29124166816248113e-07, + 1.39748850186603795e-07, 1.10443458567567753e-07, 4.09782728853196823e-08, + 2.04197339771775867e-07, 3.92412117682061536e-07, 3.94305070358032831e-07, + 4.71831774029316962e-07, 4.06610103464898125e-07, 4.53656642786443564e-07, + 3.87773092718157073e-07, 4.57279976050247260e-07, 4.30400410735578705e-07, + 7.21540920170394723e-08, 9.80872001232200742e-08, 2.66978158058219765e-07, + 3.34565168908893463e-07, 5.35982971014292903e-08, 1.27564755579416119e-07, + 3.03390161571307385e-07, 3.25161686840256005e-07, 4.11013021640696012e-07, + 2.99496861839592342e-07, 2.03305051732449063e-07, 3.32476299509608735e-07, + 4.17602963653023739e-07, 1.86711249657268702e-07, 3.18977681198347184e-07, + 6.05846018127542565e-08, 8.57835758121197076e-08, 1.12749228435440334e-07, + 3.34129550990056099e-07, 4.63409633672188390e-07, 2.11786110481110945e-07, + 2.41878018084726962e-07, 2.60413978970349421e-07, 4.48778782784743522e-07, + 3.25363260095300064e-08, 1.42486299343828112e-07, +}; diff --git a/usr/src/lib/libm/common/C/_TBL_sin.c b/usr/src/lib/libm/common/C/_TBL_sin.c new file mode 100644 index 0000000000..b0fee697d9 --- /dev/null +++ b/usr/src/lib/libm/common/C/_TBL_sin.c @@ -0,0 +1,799 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "libm_protos.h" + +/* + * Table of constants for x[i],sin(x[i]),cos(x[i]), where + * x[i] ~ (i+10.5)/64 chosen to make the value of sine and + * cosine nearly representable in double (with error less + * than 2**-8 ulp) + * By K.C. Ng, May 5, 1995 + * + * For each i, _TBL_sincosx[i] := x[i], _TBL_sincos[2*i] := + * sin(x[i]), and _TBL_sincos[2*i+1] := cos(x[i]). + */ + +const double _TBL_sincos[] = { + 1.63327491736778435127e-01, 9.86571908399470176576e-01, + 1.78722113534634630128e-01, 9.83899591489758251761e-01, + 1.94073102892906523831e-01, 9.80987069605669836925e-01, + 2.09376712086097482857e-01, 9.77835053797937558961e-01, + 2.24629204957583178404e-01, 9.74444313586017130113e-01, + 2.39826857830661321902e-01, 9.70815676770349522684e-01, + 2.54965960415442560727e-01, 9.66950029230792762469e-01, + 2.70042816718758793559e-01, 9.62848314709330965755e-01, + 2.85053745940880454146e-01, 9.58511534581129587274e-01, + 2.99995083378835347698e-01, 9.53940747608846839611e-01, + 3.14863181320744367486e-01, 9.49137069684131584602e-01, + 3.29654409930721814526e-01, 9.44101673557052656349e-01, + 3.44365158144533722862e-01, 9.38835788546692695533e-01, + 3.58991834544317267586e-01, 9.33340700243220688925e-01, + 3.73530868238515501023e-01, 9.27617750192923362640e-01, + 3.87978709726743087316e-01, 9.21668335573470609567e-01, + 4.02331831777567594521e-01, 9.15493908848391546584e-01, + 4.16586730281922223984e-01, 9.09095977415485534401e-01, + 4.30739925110786514573e-01, 9.02476103237949467406e-01, + 4.44787960958008266044e-01, 8.95635902466408118094e-01, + 4.58727408216676513231e-01, 8.88577045028066558885e-01, + 4.72554863751536879946e-01, 8.81301254251215970825e-01, + 4.86266951795427115890e-01, 8.73810306411857196096e-01, + 4.99860324731856597857e-01, 8.66106030321324382726e-01, + 5.13331663943585647658e-01, 8.58190306862591900661e-01, + 5.26677680590333596733e-01, 8.50065068549453184410e-01, + 5.39895116435048061376e-01, 8.41732299041438647436e-01, + 5.52980744632255882820e-01, 8.33194032663434169805e-01, + 5.65931370507619768695e-01, 8.24452353914625679643e-01, + 5.78743832357296650315e-01, 8.15509396946711651033e-01, + 5.91415002201596706755e-01, 8.06367345054898265744e-01, + 6.03941786558566895415e-01, 7.97028430138126520177e-01, + 6.16321127179607297641e-01, 7.87494932169127248578e-01, + 6.28550001844884853597e-01, 7.77769178600434929471e-01, + 6.40625425044079821468e-01, 7.67853543839638774671e-01, + 6.52544448725672743272e-01, 7.57750448655299613243e-01, + 6.64304163044103668234e-01, 7.47462359562187539375e-01, + 6.75901697026429104653e-01, 7.36991788256011193248e-01, + 6.87334219302880855551e-01, 7.26341290975047959577e-01, + 6.98598938789923074033e-01, 7.15513467882745946014e-01, + 7.09693105361432152733e-01, 7.04510962443060329008e-01, + 7.20614010544995853280e-01, 6.93336460750663685637e-01, + 7.31358988151144640000e-01, 6.81992690906972898190e-01, + 7.41925414945620254059e-01, 6.70482422333180339002e-01, + 7.52310711296420575600e-01, 6.58808465085774175307e-01, + 7.62512341773335489137e-01, 6.46973669204044199432e-01, + 7.72527815799416095466e-01, 6.34980923978180178402e-01, + 7.82354688238184881044e-01, 6.22833157267443926486e-01, + 7.91990560000511156780e-01, 6.10533334773848967991e-01, + 8.01433078627164507957e-01, 5.98084459321745920413e-01, + 8.10679938859144910701e-01, 5.85489570130274028514e-01, + 8.19728883213368231253e-01, 5.72751742053888568407e-01, + 8.28577702516849257108e-01, 5.59874084854710574177e-01, + 8.37224236455711978699e-01, 5.46859742430497508536e-01, + 8.45666374107491569667e-01, 5.33711892039036461810e-01, + 8.53902054441761149128e-01, 5.20433743544881588505e-01, + 8.61929266833302509809e-01, 5.07028538621057900393e-01, + 8.69746051561515076678e-01, 4.93499549942200410602e-01, + 8.77350500260862697921e-01, 4.79850080433476600117e-01, + 8.84740756420631879742e-01, 4.66083462405874393575e-01, + 8.91915015812867362222e-01, 4.52203056787028545571e-01, + 8.98871526946913745881e-01, 4.38212252275223035358e-01, + 9.05608591487805036913e-01, 4.24114464529888268718e-01, + 9.12124564678846838639e-01, 4.09913135321892496687e-01, + 9.18417855741508804002e-01, 3.95611731695571844369e-01, + 9.24486928255549345046e-01, 3.81213745141251114656e-01, + 9.30330300545781363475e-01, 3.66722690716563270996e-01, + 9.35946546034209125864e-01, 3.52142106210879102246e-01, + 9.41334293596668869597e-01, 3.37475551260917883134e-01, + 9.46492227896101323559e-01, 3.22726606483374089951e-01, + 9.51419089686698082886e-01, 3.07898872650964328113e-01, + 9.56113676155394554002e-01, 2.92995969713948978264e-01, + 9.60574841181938254842e-01, 2.78021536015277237475e-01, + 9.64801495637480077683e-01, 2.62979227346346711158e-01, + 9.68792607644664016675e-01, 2.47872716072285947941e-01, + 9.72547202831614887586e-01, 2.32705690227810568782e-01, + 9.76064364566613607010e-01, 2.17481852629530458820e-01, + 9.79343234187565414572e-01, 2.02204919947659544910e-01, + 9.82383011202836109454e-01, 1.86878621837941599759e-01, + 9.85182953494231017366e-01, 1.71506699998524386741e-01, + 9.87742377497998091940e-01, 1.56092907252707135957e-01, + 9.90060658366647028394e-01, 1.40641006660935985462e-01, + 9.92137230124395808062e-01, 1.25154770588626285122e-01, + 9.93971585806359803072e-01, 1.09637979777038541140e-01, + 9.95563277581850036846e-01, 9.40944224196323536491e-02, + 9.96911916861350277941e-01, 7.85278932598362233719e-02, + 9.98017174394052908326e-01, 6.29421926414276133865e-02, + 9.98878780347215333713e-01, 4.73411255892753485286e-02, + 9.99496524372108563483e-01, 3.17285008797294557081e-02, + 9.99870255655346151791e-01, 1.61081301122361006395e-02, + 9.99999882955821872699e-01, 4.83826769160181427432e-04, + 9.99885374626887313276e-01, -1.51405946795101862407e-02, + 9.99526758624139421983e-01, -3.07613197753498594789e-02, + 9.98924122498464628350e-01, -4.63745349375326090802e-02, + 9.98077613374894423437e-01, -6.19764284214149308028e-02, + 9.96987437916807328619e-01, -7.75631912448182664344e-02, + 9.95653862273598311283e-01, -9.31310181393209396417e-02, + 9.94077212020575529117e-01, -1.08676108420387079745e-01, + 9.92257872072439317535e-01, -1.24194666996109981394e-01, + 9.90196286596708996619e-01, -1.39682905217811598186e-01, + 9.87892958898728967831e-01, -1.55137041864005509328e-01, + 9.85348451302295424981e-01, -1.70553304031812708041e-01, + 9.82563385014030843401e-01, -1.85927928052160545969e-01, + 9.79538439968065888230e-01, -2.01257160431443482551e-01, + 9.76274354660002341433e-01, -2.16537258764389006771e-01, + 9.72771925969731610095e-01, -2.31764492632368090952e-01, + 9.69032008956924317822e-01, -2.46935144556029828600e-01, + 9.65055516693764658953e-01, -2.62045510739892129060e-01, + 9.60843419958733790942e-01, -2.77091902303196746526e-01, + 9.56396747083171572257e-01, -2.92070645852553878452e-01, + 9.51716583658057113659e-01, -3.06978084543891138747e-01, + 9.46804072278775166183e-01, -3.21810578937871294425e-01, + 9.41660412264228252610e-01, -3.36564507894643705210e-01, + 9.36286859366077139910e-01, -3.51236269451786098372e-01, + 9.30684725460523609719e-01, -3.65822281708577445869e-01, + 9.24855378224429758305e-01, -3.80318983708869018390e-01, + 9.18800240811794344253e-01, -3.94722836284130795814e-01, + 9.12520791499566663596e-01, -4.09030322935848345001e-01, + 9.06018563323250702979e-01, -4.23237950701107090712e-01, + 8.99295143708603639254e-01, -4.37342250991282488481e-01, + 8.92352174084417359978e-01, -4.51339780439098559039e-01, + 8.85191349474114597129e-01, -4.65227121754735961634e-01, + 8.77814418087698666859e-01, -4.79000884547570504601e-01, + 8.70223180902864101860e-01, -4.92657706140177065190e-01, + 8.62419491209962973954e-01, -5.06194252418129098103e-01, + 8.54405254167239447405e-01, -5.19607218629047684644e-01, + 8.46182426332270809510e-01, -5.32893330195106762481e-01, + 8.37753015193838712626e-01, -5.46049343497116423940e-01, + 8.29119078677651999421e-01, -5.59072046674417011403e-01, + 8.20282724626069215113e-01, -5.71958260435175724901e-01, + 8.11246110312714763246e-01, -5.84704838788333125521e-01, + 8.02011441899084687179e-01, -5.97308669837422590021e-01, + 7.92580973890125495274e-01, -6.09766676547169317324e-01, + 7.82957008603788473522e-01, -6.22075817467780289860e-01, + 7.73141895594474215514e-01, -6.34233087497477643346e-01, + 7.63138031079152456826e-01, -6.46235518615801973752e-01, + 7.52947857359473227135e-01, -6.58080180599429964694e-01, + 7.42573862219235825144e-01, -6.69764181745192588302e-01, + 7.32018578314804879703e-01, -6.81284669577975954269e-01, + 7.21284582577006005977e-01, -6.92638831525286491342e-01, + 7.10374495555637031075e-01, -7.03823895633044149811e-01, + 6.99290980797484418297e-01, -7.14837131223114541356e-01, + 6.88036744157449198234e-01, -7.25675849597612554476e-01, + 6.76614533221899572268e-01, -7.36337404613476742554e-01, + 6.65027136549188546688e-01, -7.46819193415104276568e-01, + 6.53277383052505156158e-01, -7.57118657009633100330e-01, + 6.41368141233487065733e-01, -7.67233280958732777322e-01, + 6.29302318589868403542e-01, -7.77160595898567008177e-01, + 6.17082860810903133242e-01, -7.86898178224750721732e-01, + 6.04712751105658807838e-01, -7.96443650643424705393e-01, + 5.92195009450509846083e-01, -8.05794682770934245220e-01, + 5.79532691867931770702e-01, -8.14948991689853463605e-01, + 5.66728889706594629594e-01, -8.23904342488817387213e-01, + 5.53786728799491090314e-01, -8.32658548869558479133e-01, + 5.40709368819720759269e-01, -8.41209473597735457595e-01, + 5.27500002380493770993e-01, -8.49555029111463189118e-01, + 5.14161854409658891640e-01, -8.57693177931374672873e-01, + 5.00698181184736190730e-01, -8.65621933270118271153e-01, + 4.87112269682015319727e-01, -8.73339359427499739574e-01, + 4.73407436683839610847e-01, -8.80843572317148937323e-01, + 4.59587028080454429446e-01, -8.88132739865035936155e-01, + 4.45654417892204612883e-01, -8.95205082544307417791e-01, + 4.31613007576607476956e-01, -9.02058873738668665077e-01, + 4.17466225094956511210e-01, -9.08692440215591923369e-01, + 4.03217524247773739798e-01, -9.15104162453376668296e-01, + 3.88870383625079307777e-01, -9.21292475134407928827e-01, + 3.74428305866518040812e-01, -9.27255867474522488259e-01, + 3.59894816812003803808e-01, -9.32992883591217014860e-01, + 3.45273464602750546071e-01, -9.38502122875176647554e-01, + 3.30567818825136694461e-01, -9.43782240327286303661e-01, + 3.15781469657649860316e-01, -9.48831946880402399280e-01, + 3.00918026974915431282e-01, -9.53650009721346392233e-01, + 2.85981119468962208252e-01, -9.58235252590552089025e-01, + 2.70974393771316324209e-01, -9.62586556066657106356e-01, + 2.55901513568614069616e-01, -9.66702857838587559236e-01, + 2.40766158683884484715e-01, -9.70583152971761897732e-01, + 2.25572024178931879179e-01, -9.74226494152062860721e-01, + 2.10322819513115238932e-01, -9.77631991902911057224e-01, + 1.95022267545207572681e-01, -9.80798814824694553671e-01, + 1.79674103687683967001e-01, -9.83726189782516358129e-01, + 1.64282074965636487596e-01, -9.86413402101261493904e-01, + 1.48849939140241666058e-01, -9.88859795733422641817e-01, + 1.33381463740289751829e-01, -9.91064773428305123559e-01, + 1.17880425165185737102e-01, -9.93027796873216961338e-01, + 1.02350607771443738447e-01, -9.94748386823932517764e-01, + 8.67958029390951818494e-02, -9.96226123223115322958e-01, + 7.12198081674702832000e-02, -9.97460645301151083153e-01, + 5.56264261071372570489e-02, -9.98451651667994988237e-01, + 4.00194636390110436430e-02, -9.99198900384726140800e-01, + 2.44027309972172715136e-02, -9.99702209020204901613e-01, + 8.78004077991816241078e-03, -9.99961454699081375708e-01, + -6.84479296391702837776e-03, -9.99976574130254869388e-01, + -2.24679556394218951643e-02, -9.99747563622630175395e-01, + -3.80856331006515710924e-02, -9.99274479085362488107e-01, + -5.36940124898220294547e-02, -9.98557436015947375019e-01, + -6.92892832575160572128e-02, -9.97596609469809547655e-01, + -8.48676380386628043118e-02, -9.96392234019183087312e-01, + -1.00425273601341916163e-01, -9.94944603695148255262e-01, + -1.15958391781735684067e-01, -9.93254071914831615508e-01, + -1.31463200384306394541e-01, -9.91321051397939245753e-01, + -1.46935914119801724897e-01, -9.89146014065556578032e-01, + -1.62372755568482129984e-01, -9.86729490918913376696e-01, + -1.77769956039573850948e-01, -9.84072071918356994225e-01, + -1.93123756521520834051e-01, -9.81174405835688490107e-01, + -2.08430408606563005725e-01, -9.78037200094199477007e-01, + -2.23686175400125447643e-01, -9.74661220596605204491e-01, + -2.38887332428331961021e-01, -9.71047291539024692852e-01, + -2.54030168529570332669e-01, -9.67196295214595047618e-01, + -2.69110986809851404633e-01, -9.63109171785954898404e-01, + -2.84126105504238113397e-01, -9.58786919065437892584e-01, + -2.99071858881536201125e-01, -9.54230592270622235418e-01, + -3.13944598143160502612e-01, -9.49441303765919730751e-01, + -3.28740692363219233485e-01, -9.44420222774031481450e-01, + -3.43456529243486463621e-01, -9.39168575134420757777e-01, + -3.58088516132365641820e-01, -9.33687642958886176991e-01, + -3.72633080853157161449e-01, -9.27978764333475703019e-01, + -3.87086672547184373894e-01, -9.22043333003578990947e-01, + -4.01445762590873223008e-01, -9.15882798013933796533e-01, + -4.15706845395529489551e-01, -9.09498663380709615467e-01, + -4.29866439353555507275e-01, -9.02892487684716527063e-01, + -4.43921087571260808424e-01, -8.96065883743795366101e-01, + -4.57867358817895864220e-01, -8.89020518171051099543e-01, + -4.71701848327647499381e-01, -8.81758110982984399939e-01, + -4.85421178579811707365e-01, -8.74280435207254624785e-01, + -4.99022000232008211551e-01, -8.66589316391822128693e-01, + -5.12500992809901023683e-01, -8.58686632229048840692e-01, + -5.25854865641323332426e-01, -8.50574312027670975667e-01, + -5.39080358520030999969e-01, -8.42254336324791408330e-01, + -5.52174242663304060130e-01, -8.33728736304085060738e-01, + -5.65133321393192722404e-01, -8.24999593364201810886e-01, + -5.77954430931352902689e-01, -8.16069038603239760299e-01, + -5.90634441175508673183e-01, -8.06939252296785092256e-01, + -6.03170256463835929850e-01, -7.97612463366358381833e-01, + -6.15558816459891189332e-01, -7.88090948735295393490e-01, + -6.27797096543907584554e-01, -7.78377033044423516372e-01, + -6.39882108993420795073e-01, -7.68473087746169514212e-01, + -6.51810903392718188343e-01, -7.58381530773507561705e-01, + -6.63580567511655061708e-01, -7.48104825823834529430e-01, + -6.75188227925781481176e-01, -7.37645481834222960238e-01, + -6.86631050850229573967e-01, -7.27006052250123602221e-01, + -6.97906242654146802273e-01, -7.16189134561793783185e-01, + -7.09011050643817641870e-01, -7.05197369581700761465e-01, + -7.19942763756367454242e-01, -6.94033440775618015728e-01, + -7.30698713155769064009e-01, -6.82700073672548479742e-01, + -7.41276272975477157345e-01, -6.71200035103981407225e-01, + -7.51672860805046583188e-01, -6.59536132694151233657e-01, + -7.61885938516202787518e-01, -6.47711213961349341339e-01, + -7.71913012640803364306e-01, -6.35728165897814334606e-01, + -7.81751635309322678857e-01, -6.23589913878664137137e-01, + -7.91399404523052685256e-01, -6.11299421331770620469e-01, + -8.00853964899717496451e-01, -5.98859688829029512824e-01, + -8.10113008319712335492e-01, -5.86273753251146056975e-01, + -8.19174274236826760465e-01, -5.73544687385881157837e-01, + -8.28035550507897455397e-01, -5.60675598804766250893e-01, + -8.36694673776658404130e-01, -5.47669629314764150330e-01, + -8.45149530028187490061e-01, -5.34529954158916909002e-01, + -8.53398055161871504914e-01, -5.21259781151332757254e-01, + -8.61438235389631601358e-01, -5.07862350060326539491e-01, + -8.69268107829002323328e-01, -4.94340931656873816546e-01, + -8.76885760925650292741e-01, -4.80698827006935058836e-01, + -8.84289334936661730602e-01, -4.66939366639049280305e-01, + -8.91477022398163843064e-01, -4.53065909704210345588e-01, + -8.98447068525225711610e-01, -4.39081843234753188554e-01, + -9.05197771673453388530e-01, -4.24990581257296273776e-01, + -9.11727483791179293959e-01, -4.10795563875518132679e-01, + -9.18034610707084031134e-01, -3.96500256675695827990e-01, + -9.24117612643078456536e-01, -3.82108149615860981374e-01, + -9.29975004511545022545e-01, -3.67622756346437040698e-01, + -9.35605356329172521690e-01, -3.53047613231079804308e-01, + -9.41007293511755382731e-01, -3.38386278619096869669e-01, + -9.46179497257704227309e-01, -3.23642331855951870256e-01, + -9.51120704853153031699e-01, -3.08819372448752571536e-01, + -9.55829709968717189383e-01, -2.93921019223052470970e-01, + -9.60305362967905695726e-01, -2.78950909399985458315e-01, + -9.64546571183209522360e-01, -2.63912697721639999404e-01, + -9.68552299193694232748e-01, -2.48810055517474232323e-01, + -9.72321569045517364316e-01, -2.33646669928897571245e-01, + -9.75853460530087812863e-01, -2.18426242863471758993e-01, + -9.79147111396304836717e-01, -2.03152490125698942380e-01, + -9.82201717531947959827e-01, -1.87829140649930864670e-01, + -9.85016533205280153673e-01, -1.72459935382833828843e-01, + -9.87590871221861066331e-01, -1.57048626479970948600e-01, + -9.89924103089018792012e-01, -1.41598976420741456961e-01, + -9.92015659185421450061e-01, -1.26114756991058563074e-01, + -9.93865028889118318212e-01, -1.10599748423005059261e-01, + -9.95471760691319929037e-01, -9.50577385914659067634e-02, + -9.96835462344218936614e-01, -7.94925217425341834598e-02, + -9.97955800916290658442e-01, -6.39078979276023889655e-02, + -9.98832502892746831868e-01, -4.83076719063431220258e-02, + -9.99465354238023406808e-01, -3.26956522776712110723e-02, + -9.99854200451614993916e-01, -1.70756504784357332483e-02, + -9.99998946602528415717e-01, -1.45147987706449187358e-03, + -9.99899557352339485305e-01, 1.41730450713867285606e-02, + -9.99556056965451689145e-01, 2.97941098823021957576e-02, + -9.98968529303411734155e-01, 4.54079008695470534573e-02, + -9.98137117802025963798e-01, 6.10106061751933687054e-02, + -9.97062025438244736719e-01, 7.65984166219185330649e-02, + -9.95743514682696617690e-01, 9.21675266422526118237e-02, + -9.94181907425219280050e-01, 1.07714135322866152999e-01, + -9.92377584917212285376e-01, 1.23234447107459038628e-01, + -9.90330987653228356216e-01, 1.38724672981345553691e-01, + -9.88042615281836456020e-01, 1.54181031216647779214e-01, + -9.85513026476385278762e-01, 1.69599748364658631239e-01, + -9.82742838804682716791e-01, 1.84977060140206039929e-01, + -9.79732728555263054915e-01, 2.00309212463279484595e-01, + -9.76483430616286174342e-01, 2.15592462140605983789e-01, + -9.72995738247511954278e-01, 2.30823078032026951512e-01, + -9.69270502929450938900e-01, 2.45997341755737758406e-01, + -9.65308634114379171542e-01, 2.61111548776057855736e-01, + -9.61111099038787108917e-01, 2.76162009162112587202e-01, + -9.56678922485658334018e-01, 2.91145048509638459944e-01, + -9.52013186489632401432e-01, 3.06057008986653333871e-01, + -9.47115030121562395671e-01, 3.20894250054175877995e-01, + -9.41985649202698782645e-01, 3.35653149391108962529e-01, + -9.36626296000886870985e-01, 3.50330103815899684960e-01, + -9.31038278925287121623e-01, 3.64921530162087393023e-01, + -9.25222962204842236389e-01, 3.79423866156172462372e-01, + -9.19181765559584973424e-01, 3.93833571274420646269e-01, + -9.12916163872961705650e-01, 4.08147127564896294860e-01, + -9.06427686803489396361e-01, 4.22361040575566504263e-01, + -8.99717918410242289973e-01, 4.36471840204543548580e-01, + -8.92788496793018526709e-01, 4.50476081489419755144e-01, + -8.85641113671704172106e-01, 4.64370345494136360642e-01, + -8.78277513965914136129e-01, 4.78151240155093082418e-01, + -8.70699495405757306621e-01, 4.91815401040023969514e-01, + -8.62908908048144129843e-01, 5.05359492253939501794e-01, + -8.54907653871092576559e-01, 5.18780207171229856833e-01, + -8.46697686222891654495e-01, 5.32074269388026044325e-01, + -8.38281009508205721126e-01, 5.45238433254574883513e-01, + -8.29659678498936070667e-01, 5.58269484991829045839e-01, + -8.20835797971514846694e-01, 5.71164243250981584765e-01, + -8.11811522157973475267e-01, 5.83919559949445554636e-01, + -8.02589054191142126093e-01, 5.96532321079560556853e-01, + -7.93170645644559635379e-01, 6.08999447362468804279e-01, + -7.83558595847759331576e-01, 6.21317895181756174594e-01, + -7.73755251444074421130e-01, 6.33484657164415598807e-01, + -7.63763005819449558587e-01, 6.45496762921116018497e-01, + -7.53584298396099971917e-01, 6.57351279918779618505e-01, + -7.43221614171602262822e-01, 6.69045314031985416392e-01, + -7.32677483058112311021e-01, 6.80576010317458623966e-01, + -7.21954479231692536345e-01, 6.91940553745258979390e-01, + -7.11055220593523329420e-01, 7.03136169789818077369e-01, + -6.99982367997418419847e-01, 7.14160125246941168697e-01, + -6.88738624756222161949e-01, 7.25009728740868553132e-01, + -6.77326735865867002317e-01, 7.35682331500009611958e-01, + -6.65749487360529190738e-01, 7.46175327975397872926e-01, + -6.54009705667427665432e-01, 7.56486156444917789976e-01, + -6.42110256878597240870e-01, 7.66612299673897656938e-01, + -6.30054046069779882799e-01, 7.76551285512489308793e-01, + -6.17844016641709514737e-01, 7.86300687459981162419e-01, + -6.05483149427811451204e-01, 7.95858125396090021475e-01, + -5.92974462184078454641e-01, 8.05221265986873269149e-01, + -5.80321008740226185196e-01, 8.14387823346301220617e-01, + -5.67525878248187232167e-01, 8.23355559596596009442e-01, + -5.54592194460652221366e-01, 8.32122285390385463266e-01, + -5.41523114921985349035e-01, 8.40685860476545809838e-01, + -5.28321830279222970361e-01, 8.49044194168013799384e-01, + -5.14991563445484024086e-01, 8.57195245892075630145e-01, + -5.01535568812419785267e-01, 8.65137025687840122146e-01, + -4.87957131464199334037e-01, 8.72867594686175807261e-01, + -4.74259566375507701785e-01, 8.80385065582847903265e-01, + -4.60446217616484521074e-01, 8.87687603091691812551e-01, + -4.46520457522199765155e-01, 8.94773424400929218159e-01, + -4.32485685857187662773e-01, 9.01640799614035870491e-01, + -4.18345329015600841949e-01, 9.08288052156819181171e-01, + -4.04102839158860249746e-01, 9.14713559199681003342e-01, + -3.89761693387688290535e-01, 9.20915752046603697245e-01, + -3.75325392887144893006e-01, 9.26893116521052995438e-01, + -3.60797462091837162212e-01, 9.32644193327814230443e-01, + -3.46181447754430826613e-01, 9.38167578437160809557e-01, + -3.31480918189186846146e-01, 9.43461923384538936332e-01, + -3.16699462305234713533e-01, 9.48525935636751693636e-01, + -3.01840688808588275549e-01, 9.53358378879399670502e-01, + -2.86908225223433177575e-01, 9.57958073351407035645e-01, + -2.71905717092672694069e-01, 9.62323896103759568454e-01, + -2.56836827106365517270e-01, 9.66454781271185447977e-01, + -2.41705234084330145006e-01, 9.70349720366960877271e-01, + -2.26514632188532627488e-01, 9.74007762497041795768e-01, + -2.11268729991721526673e-01, 9.77428014601425809715e-01, + -1.95971249573089145724e-01, 9.80609641672343546048e-01, + -1.80625925602857506647e-01, 9.83551866959801457391e-01, + -1.65236504388101917984e-01, 9.86253972168224413153e-01, + -1.49806743037279310737e-01, 9.88715297616337362996e-01, + -1.34340408538235145386e-01, 9.90935242401732363504e-01, + -1.18841276732320783038e-01, 9.92913264562737096774e-01, + -1.03313131549733094872e-01, 9.94648881188425981748e-01, + -8.77597639605576101962e-02, 9.96141668554020087711e-01, + -7.21849710624100776579e-02, 9.97391262219956997725e-01, + -5.65925552406322598942e-02, 9.98397357113557260000e-01, + -4.09863231473179684405e-02, 9.99159707611782965664e-01, + -2.53700848500082870585e-02, 9.99678127596429599855e-01, + -9.74765281333290004029e-03, 9.99952490503739244154e-01, + 5.87715899658624793545e-03, 9.99982729351926780126e-01, + 2.15005359577336609134e-02, 9.99768836758543000265e-01, + 3.71186638844091532086e-02, 9.99310864942154153390e-01, + 5.27277298119544560184e-02, 9.98608925710599448777e-01, + 6.83239230305028866219e-02, 9.97663190431380964007e-01, + 8.39034359259593492952e-02, 9.96473889994022088423e-01, + 9.94624650198910192911e-02, 9.95041314746361149624e-01, + 1.14997211772979862632e-01, 9.93365814433152527485e-01, + 1.30503883601530007441e-01, 9.91447798103822663940e-01, + 1.45978694798140268274e-01, 9.89287734011208397256e-01, + 1.61417867390196478894e-01, 9.86886149506213783411e-01, + 1.76817632086965909055e-01, 9.84243630908099076393e-01, + 1.92174229316510819521e-01, 9.81360823340021615202e-01, + 2.07483909972419000578e-01, 9.78238430599900898876e-01, + 2.22742936384479617296e-01, 9.74877214981876405453e-01, + 2.37947583337762752498e-01, 9.71277997065576714775e-01, + 2.53094138761417730699e-01, 9.67441655566172231673e-01, + 2.68178904913313143066e-01, 9.63369127053330553956e-01, + 2.83198199008898365836e-01, 9.59061405791160170864e-01, + 2.98148354355895761625e-01, 9.54519543432648220893e-01, + 3.13025720984674848957e-01, 9.49744648840952665481e-01, + 3.27826666953088208256e-01, 9.44737887688658850571e-01, + 3.42547578723123691269e-01, 9.39500482336717790410e-01, + 3.57184862422095295020e-01, 9.34033711413302714099e-01, + 3.71734944552921997563e-01, 9.28338909557407276907e-01, + 3.86194272922183889918e-01, 9.22417467073399333088e-01, + 4.00559317492650446280e-01, 9.16270829596698477282e-01, + 4.14826571255144882500e-01, 9.09900497736263580428e-01, + 4.28992551069135752417e-01, 9.03308026714694345394e-01, + 4.43053798493044215245e-01, 8.96495025998965022751e-01, + 4.57006880688855809947e-01, 8.89463158879018389591e-01, + 4.70848391227359996947e-01, 8.82214142075838037016e-01, + 4.84574950851524355322e-01, 8.74749745359918784438e-01, + 4.98183208470846405902e-01, 8.67071791028685923131e-01, + 5.11669841801385194557e-01, 8.59182153557058847504e-01, + 5.25031558273095666500e-01, 8.51082759088283347104e-01, + 5.38265095838926344030e-01, 8.42775584958125989488e-01, + 5.51367223674840811753e-01, 8.34262659272904327779e-01, + 5.64334743129053073574e-01, 8.25546060312485341370e-01, + 5.77164488339731551747e-01, 8.16627916127985353789e-01, + 5.89853327114563730227e-01, 8.07510403952716560028e-01, + 6.02398161667909270989e-01, 7.98195749687458211419e-01, + 6.14795929310800737255e-01, 7.88686227407876638829e-01, + 6.27043603440996633047e-01, 7.78984158621810585110e-01, + 6.39138193783907904155e-01, 7.69091912092854990135e-01, + 6.51076747732772576072e-01, 7.59011902779999636515e-01, + 6.62856350634406732425e-01, 7.48746591594002808279e-01, + 6.74474126652610750376e-01, 7.38298484676894073431e-01, + 6.85927239488512419108e-01, 7.27670132771483846312e-01, + 6.97212893028884672653e-01, 7.16864130637244967303e-01, + 7.08328332056515685977e-01, 7.05883116391116449684e-01, + 7.19270842858181325141e-01, 6.94729770928295131682e-01, + 7.30037753982098469585e-01, 6.83406817174640912604e-01, + 7.40626436901593243611e-01, 6.71917019402284765306e-01, + 7.51034306483622016160e-01, 6.60263182742052423535e-01, + 7.61258821807797358971e-01, 6.48448152298858992992e-01, + 7.71297486713702129535e-01, 6.36474812533164180373e-01, + 7.81147850424134593261e-01, 6.24346086539952493943e-01, + 7.90807508031525441261e-01, 6.12064935477412253029e-01, + 8.00274101326324149852e-01, 5.99634357543281759639e-01, + 8.09545319236430693799e-01, 5.87057387401253572001e-01, + 8.18618898249645288168e-01, 5.74337095640301553701e-01, + 8.27492623168671781464e-01, 5.61476587758947043305e-01, + 8.36164327654276950952e-01, 5.48479003388890884452e-01, + 8.44631894603476873762e-01, 5.35347515748920921297e-01, + 8.52893256793554432882e-01, 5.22085330684634363330e-01, + 8.60946397328884449607e-01, 5.08695685971892852528e-01, + 8.68789350153792105935e-01, 4.95181850494696151888e-01, + 8.76420200509378299891e-01, 4.81547123487516159912e-01, + 8.83837085454141080376e-01, 4.67794833635354845303e-01, + 8.91038194240763248288e-01, 4.53928338401734798868e-01, + 8.98021768869999070795e-01, 4.39951022996421692302e-01, + 9.04786104293555437650e-01, 4.25866300001880193626e-01, + 9.11329549200603827863e-01, 4.11677607787725330368e-01, + 9.17650506064666360295e-01, 3.97388410398770597354e-01, + 9.23747431723077494503e-01, 3.83002196318791732210e-01, + 9.29618837697029576361e-01, 3.68522477738907783262e-01, + 9.35263290560562010612e-01, 3.53952789690701208336e-01, + 9.40679412304837647696e-01, 3.39296689146571628370e-01, + 9.45865880661811875285e-01, 3.24557754182295044032e-01, + 9.50821429431150111355e-01, 3.09739583092804637854e-01, + 9.55544848784945832776e-01, 2.94845793526980815003e-01, + 9.60034985637467253028e-01, 2.79880021352128749434e-01, + 9.64290743580318188144e-01, 2.64845920952762714506e-01, + 9.68311083831862262628e-01, 2.49747162002622591359e-01, + 9.72095024823529496594e-01, 2.34587430851146777622e-01, + 9.75641642748477533331e-01, 2.19370428579268944569e-01, + 9.78950071770562035844e-01, 2.04099870113656461923e-01, + 9.82019504170923540620e-01, 1.88779483598969205493e-01, + 9.84849190595408208182e-01, 1.73413009268535894813e-01, + 9.87438440210647860873e-01, 1.58004198660550820854e-01, + 9.89786620894844482166e-01, 1.42556813578185059832e-01, + 9.91893159367450705233e-01, 1.27074625319365142051e-01, + 9.93757541353338824663e-01, 1.11561413595234956708e-01, + 9.95379311685924861308e-01, 9.60209657713066710993e-02, + 9.96758074438687136087e-01, 8.04570757688883031467e-02, + 9.97893492999770703733e-01, 6.48735433648924275651e-02, + 9.98785290176304019205e-01, 4.92741730264058125366e-02, + 9.99433248251151762354e-01, 3.36627730609296640929e-02, + 9.99837209032161888800e-01, 1.80431547900308138221e-02, + 9.99997073896832011641e-01, 2.41913161566219324719e-03, + 9.99912803818512774257e-01, -1.32054821873487954892e-02, + 9.99584419370001642235e-01, -2.88268720595330242562e-02, + 9.99012000721555049054e-01, -4.44412242666163900817e-02, + 9.98195687620527460915e-01, -6.00447267941353959864e-02, + 9.97135679355775073063e-01, -7.56335702958476074897e-02, + 9.95832234722008102779e-01, -9.12039488650100010902e-02, + 9.94285671925894676271e-01, -1.06752061351864019345e-01, + 9.92496368545773943737e-01, -1.22274111828511194977e-01, + 9.90464761404806215417e-01, -1.37766310886661608182e-01 +}; + +const double _TBL_sincosx[] = { + 1.64062500000167837966e-01, 1.79687499999472477530e-01, + 1.95312499999996669331e-01, 2.10937500000106192832e-01, + 2.26562499999874683576e-01, 2.42187499999999750200e-01, + 2.57812499999549193941e-01, 2.73437500000180466753e-01, + 2.89062500000347444296e-01, 3.04687500000159650071e-01, + 3.20312500001052657961e-01, 3.35937499999853450561e-01, + 3.51562499998759436792e-01, 3.67187499998127386824e-01, + 3.82812499999808708573e-01, 3.98437499999694078046e-01, + 4.14062499999775512904e-01, 4.29687499999869215728e-01, + 4.45312499999981514787e-01, 4.60937499992721433362e-01, + 4.76562499999932387418e-01, 4.92187500000263733479e-01, + 5.07812500002462252624e-01, 5.23437499998664290679e-01, + 5.39062500000133337785e-01, 5.54687499999937494444e-01, + 5.70312499999814259688e-01, 5.85937500002074562744e-01, + 6.01562499999652833260e-01, 6.17187499999419131314e-01, + 6.32812500000347721851e-01, 6.48437500005533351555e-01, + 6.64062499997531863194e-01, 6.79687499999813815599e-01, + 6.95312500005013212068e-01, 7.10937499999876987289e-01, + 7.26562500001548428052e-01, 7.42187500000339617223e-01, + 7.57812499998633315457e-01, 7.73437500000337285755e-01, + 7.89062499996497468402e-01, 8.04687500000179967152e-01, + 8.20312500001350475287e-01, 8.35937499996779354028e-01, + 8.51562500000668243239e-01, 8.67187499999485522650e-01, + 8.82812500000538014078e-01, 8.98437500000525690602e-01, + 9.14062500000757727214e-01, 9.29687500002357114504e-01, + 9.45312499999430455588e-01, 9.60937500000796696042e-01, + 9.76562500001389000026e-01, 9.92187499998313238159e-01, + 1.00781250000027000624e+00, 1.02343750000073119288e+00, + 1.03906249999567279474e+00, 1.05468750000121480603e+00, + 1.07031249999813948826e+00, 1.08593749999936250994e+00, + 1.10156249999885291757e+00, 1.11718750000074029671e+00, + 1.13281249999926680871e+00, 1.14843749999650057703e+00, + 1.16406249999956079577e+00, 1.17968749999995736744e+00, + 1.19531250000235189646e+00, 1.21093750000001554312e+00, + 1.22656249999714606069e+00, 1.24218750000679789558e+00, + 1.25781249999789324079e+00, 1.27343750000030864200e+00, + 1.28906250000041366910e+00, 1.30468750000013344881e+00, + 1.32031249999823008245e+00, 1.33593749999817146268e+00, + 1.35156249999504352033e+00, 1.36718750000051336713e+00, + 1.38281250000255573340e+00, 1.39843749999889488400e+00, + 1.41406250000066702199e+00, 1.42968750000377853304e+00, + 1.44531250000268074452e+00, 1.46093749999857935862e+00, + 1.47656250000000177636e+00, 1.49218750000007549517e+00, + 1.50781249999986965982e+00, 1.52343749999979238829e+00, + 1.53906250000026356695e+00, 1.55468750000024247271e+00, + 1.57031250000686006807e+00, 1.58593749999970379250e+00, + 1.60156249999876076906e+00, 1.61718749999920530236e+00, + 1.63281249999894950697e+00, 1.64843749999433342168e+00, + 1.66406250000158717484e+00, 1.67968749999775224246e+00, + 1.69531250000185917948e+00, 1.71093749999863442568e+00, + 1.72656249999789279670e+00, 1.74218750000263478128e+00, + 1.75781250000296740410e+00, 1.77343749999920641258e+00, + 1.78906249999844191301e+00, 1.80468749999888578017e+00, + 1.82031250003296385387e+00, 1.83593749999912847493e+00, + 1.85156249999896371783e+00, 1.86718749999873900869e+00, + 1.88281249999986122212e+00, 1.89843750000025601743e+00, + 1.91406250000089750429e+00, 1.92968749999936717288e+00, + 1.94531249999502553472e+00, 1.96093749999814637164e+00, + 1.97656250000163713487e+00, 1.99218750000058819616e+00, + 2.00781250000015099033e+00, 2.02343750000025890401e+00, + 2.03906249999571986820e+00, 2.05468749999347455315e+00, + 2.07031249999880184731e+00, 2.08593749999950617280e+00, + 2.10156249999859534583e+00, 2.11718749999749178414e+00, + 2.13281250000269562150e+00, 2.14843750000770983277e+00, + 2.16406250000204325445e+00, 2.17968750000288169488e+00, + 2.19531250000207567297e+00, 2.21093749999685940111e+00, + 2.22656249999882449586e+00, 2.24218750000040500936e+00, + 2.25781249999956967756e+00, 2.27343749999970867748e+00, + 2.28906249999833111275e+00, 2.30468749999696020936e+00, + 2.32031250000405675493e+00, 2.33593750000527755617e+00, + 2.35156250000277511347e+00, 2.36718749998901101250e+00, + 2.38281250000068833828e+00, 2.39843750000151390012e+00, + 2.41406250000618571860e+00, 2.42968749999278221807e+00, + 2.44531250000394617672e+00, 2.46093750000379341003e+00, + 2.47656250000329514194e+00, 2.49218749999781508109e+00, + 2.50781249999807354101e+00, 2.52343750000954214485e+00, + 2.53906250000098099306e+00, 2.55468750001107025582e+00, + 2.57031250000341415785e+00, 2.58593750002171240965e+00, + 2.60156250000635891340e+00, 2.61718750000451771953e+00, + 2.63281250000028421709e+00, 2.64843750001994493459e+00, + 2.66406250000455235849e+00, 2.67968749999316235844e+00, + 2.69531249997396704643e+00, 2.71093749999957500663e+00, + 2.72656249999638511383e+00, 2.74218749999314947985e+00, + 2.75781249999954258811e+00, 2.77343750000063726802e+00, + 2.78906249999834177089e+00, 2.80468750000019895197e+00, + 2.82031249999983835153e+00, 2.83593749999777511306e+00, + 2.85156249999855315735e+00, 2.86718750000235678144e+00, + 2.88281249999902611236e+00, 2.89843749999328359479e+00, + 2.91406250000365130148e+00, 2.92968749999994892974e+00, + 2.94531249999847322130e+00, 2.96093749999701350006e+00, + 2.97656250000468292072e+00, 2.99218750000308997272e+00, + 3.00781249999819877416e+00, 3.02343749999709299203e+00, + 3.03906249999948618878e+00, 3.05468750000752597984e+00, + 3.07031250000433075797e+00, 3.08593749999511279825e+00, + 3.10156249999957589480e+00, 3.11718749999961186603e+00, + 3.13281249999836441944e+00, 3.14843750000262057043e+00, + 3.16406249999657873673e+00, 3.17968750000540190115e+00, + 3.19531250000325739435e+00, 3.21093750000270583556e+00, + 3.22656250000035882408e+00, 3.24218749999618305324e+00, + 3.25781250000001199041e+00, 3.27343750000431255032e+00, + 3.28906249999634914261e+00, 3.30468749999773381276e+00, + 3.32031250000108801856e+00, 3.33593750000042854609e+00, + 3.35156249999819699781e+00, 3.36718749999951061369e+00, + 3.38281250000727817806e+00, 3.39843750000385558252e+00, + 3.41406250000184297022e+00, 3.42968750000183808524e+00, + 3.44531249999830135877e+00, 3.46093749998354383024e+00, + 3.47656249999984101606e+00, 3.49218750000081934459e+00, + 3.50781249999577759979e+00, 3.52343749999866640010e+00, + 3.53906249999683852892e+00, 3.55468750000498978636e+00, + 3.57031249999826005848e+00, 3.58593750001092637092e+00, + 3.60156250000782085507e+00, 3.61718749999987299049e+00, + 3.63281250000544186918e+00, 3.64843749999226352188e+00, + 3.66406250000062438943e+00, 3.67968749999757616109e+00, + 3.69531250001872235700e+00, 3.71093750000574784664e+00, + 3.72656249999563016218e+00, 3.74218749999581179466e+00, + 3.75781250000033528735e+00, 3.77343749999415045693e+00, + 3.78906249995283994636e+00, 3.80468750000592104143e+00, + 3.82031249998920063859e+00, 3.83593750000164934733e+00, + 3.85156250000057731597e+00, 3.86718750000405053768e+00, + 3.88281249997192157153e+00, 3.89843749998371702503e+00, + 3.91406249999986277643e+00, 3.92968749999597033451e+00, + 3.94531249999519229021e+00, 3.96093749997563104870e+00, + 3.97656250000223510099e+00, 3.99218750000022870594e+00, + 4.00781250004454392410e+00, 4.02343749999355093649e+00, + 4.03906249999698196973e+00, 4.05468749999250022142e+00, + 4.07031249994990851349e+00, 4.08593749999590372113e+00, + 4.10156249999066258027e+00, 4.11718749999303490483e+00, + 4.13281249999853184107e+00, 4.14843749998088373587e+00, + 4.16406249999834177089e+00, 4.17968749999662758654e+00, + 4.19531249999891109326e+00, 4.21093749999872102308e+00, + 4.22656249999120881000e+00, 4.24218750000338129524e+00, + 4.25781250000494537744e+00, 4.27343749997698019172e+00, + 4.28906250000330668826e+00, 4.30468749999959232611e+00, + 4.32031250000562039304e+00, 4.33593749999550670537e+00, + 4.35156250000948219281e+00, 4.36718750000763922259e+00, + 4.38281249999987476684e+00, 4.39843750000314237525e+00, + 4.41406250000408473255e+00, 4.42968750000079314333e+00, + 4.44531249998868371875e+00, 4.46093750000322319949e+00, + 4.47656249999480770896e+00, 4.49218749997964028609e+00, + 4.50781250000320810045e+00, 4.52343749999724753508e+00, + 4.53906249999181721222e+00, 4.55468750000258193467e+00, + 4.57031249999976196818e+00, 4.58593750000821920310e+00, + 4.60156250004601385939e+00, 4.61718750000444977388e+00, + 4.63281249999695177166e+00, 4.64843749999638600201e+00, + 4.66406250000544897460e+00, 4.67968749999663469197e+00, + 4.69531249998381028377e+00, 4.71093749999796340688e+00, + 4.72656250000119992905e+00, 4.74218750001258992910e+00, + 4.75781250000492050845e+00, 4.77343750000340971695e+00, + 4.78906250000747402140e+00, 4.80468749998990762862e+00, + 4.82031250001256594828e+00, 4.83593750000031530334e+00, + 4.85156250000026023628e+00, 4.86718750000094679820e+00, + 4.88281250000185362836e+00, 4.89843749997600141910e+00, + 4.91406249999471889112e+00, 4.92968749998860822359e+00, + 4.94531250000475353090e+00, 4.96093749999659205940e+00, + 4.97656250000856825721e+00, 4.99218750002637179364e+00, + 5.00781249999760014191e+00, 5.02343749998691091463e+00, + 5.03906249999699618058e+00, 5.05468750000537525580e+00, + 5.07031250000353406193e+00, 5.08593749999286881547e+00, + 5.10156249998831601289e+00, 5.11718750000479172257e+00, + 5.13281250001085087575e+00, 5.14843750000346744855e+00, + 5.16406250000581845683e+00, 5.17968750000350119933e+00, + 5.19531249999482636071e+00, 5.21093750000432454073e+00, + 5.22656250000434585701e+00, 5.24218750001077093970e+00, + 5.25781249998869881779e+00, 5.27343750002139977084e+00, + 5.28906249999702104958e+00, 5.30468749998945909851e+00, + 5.32031249999385913441e+00, 5.33593749999546851370e+00, + 5.35156250001908162517e+00, 5.36718749999724487054e+00, + 5.38281249999679634044e+00, 5.39843750001770139590e+00, + 5.41406249999678212959e+00, 5.42968749999906563630e+00, + 5.44531250000517097476e+00, 5.46093749999811794993e+00, + 5.47656250001082511858e+00, 5.49218749999457500621e+00, + 5.50781250001214406353e+00, 5.52343750001415045858e+00, + 5.53906250000498356911e+00, 5.55468750000498889818e+00, + 5.57031250000316013882e+00, 5.58593750000908428888e+00, + 5.60156250002763478335e+00, 5.61718749999503863535e+00, + 5.63281250000129496414e+00, 5.64843750001081890133e+00, + 5.66406250000738609174e+00, 5.67968750000023270275e+00, + 5.69531249998335997731e+00, 5.71093749999160404940e+00, + 5.72656250000217958984e+00, 5.74218750000474997819e+00, + 5.75781250000163868918e+00, 5.77343749999750688318e+00, + 5.78906249999925304195e+00, 5.80468749999988631316e+00, + 5.82031249999487254598e+00, 5.83593749999551025809e+00, + 5.85156249999513455862e+00, 5.86718749999803179662e+00, + 5.88281250000295141689e+00, 5.89843750000985433957e+00, + 5.91406249999845634591e+00, 5.92968750000455990801e+00, + 5.94531250000243982612e+00, 5.96093750000733901828e+00, + 5.97656249999234212567e+00, 5.99218749999141753193e+00, + 6.00781250000843591863e+00, 6.02343749999880984092e+00, + 6.03906249999745359247e+00, 6.05468750000370548037e+00, + 6.07031250001220445967e+00, 6.08593750001188915633e+00, + 6.10156249999290700714e+00, 6.11718749998957456171e+00, + 6.13281249999975663911e+00, 6.14843749999015098950e+00, + 6.16406250000358646446e+00, 6.17968750000026467717e+00, + 6.19531249998414246249e+00, 6.21093749998937294521e+00, + 6.22656249999281197205e+00, 6.24218750000707967018e+00, + 6.25781250000234834374e+00, 6.27343749999462829692e+00, + 6.28906250001052136156e+00, 6.30468750000171862524e+00, + 6.32031250000594013727e+00, 6.33593750000045385917e+00, + 6.35156250000499689179e+00, 6.36718749999230215764e+00, + 6.38281249999868105505e+00, 6.39843749999853628196e+00, + 6.41406249999377564563e+00, 6.42968750000876010375e+00, + 6.44531250002396838283e+00, 6.46093750000062527761e+00, + 6.47656249999929212180e+00, 6.49218750000642064180e+00, + 6.50781249999003996720e+00, 6.52343750000912248055e+00, + 6.53906249998720845440e+00, 6.55468749999868371958e+00, + 6.57031249998638067211e+00, 6.58593750000546407364e+00, + 6.60156249994729282804e+00, 6.61718749997319211076e+00, + 6.63281249997879296387e+00, 6.64843749999244426618e+00, + 6.66406249999900524017e+00, 6.67968749999884092716e+00, + 6.69531249999227373593e+00, 6.71093749999063504674e+00, + 6.72656249999940136775e+00, 6.74218749999563193853e+00, + 6.75781249999463895506e+00, 6.77343750001427569174e+00, + 6.78906249999704858311e+00, 6.80468750000215738538e+00, + 6.82031250000341859874e+00, 6.83593749999844302323e+00, + 6.85156250001598987609e+00, 6.86718750000203925765e+00, + 6.88281250000989430760e+00, 6.89843750000604671868e+00, + 6.91406249999750777135e+00, 6.92968749999960120789e+00, + 6.94531249995244071016e+00, 6.96093750002739852789e+00, + 6.97656249999430233544e+00, 6.99218749999911892701e+00, + 7.00781250000804245559e+00, 7.02343750000080380147e+00, + 7.03906249999665778461e+00, 7.05468749999575539533e+00, + 7.07031250001700328767e+00, 7.08593750000647215614e+00, + 7.10156249997034372257e+00, 7.11718749999698641062e+00, + 7.13281250000188915550e+00, 7.14843750000192734717e+00, + 7.16406250000996358551e+00, 7.17968750005667022407e+00, + 7.19531250000950883816e+00, 7.21093749995827248966e+00, + 7.22656250000638511466e+00, 7.24218750002578115499e+00, + 7.25781249999116351290e+00, 7.27343749999614619384e+00, + 7.28906249998626343256e+00, 7.30468749998397015588e+00, + 7.32031249998488320330e+00, 7.33593749999550048813e+00, + 7.35156249998663557932e+00, 7.36718750000183675297e+00, + 7.38281249999652722238e+00, 7.39843750007829115134e+00, + 7.41406250000048494542e+00, 7.42968750000089794838e+00, + 7.44531249999165467557e+00, 7.46093749999333422096e+00, + 7.47656250000219557705e+00, 7.49218750000104360964e+00, + 7.50781249999853983468e+00, 7.52343749999573585541e+00, + 7.53906249999752819946e+00, 7.55468749999868549594e+00, + 7.57031250024692781153e+00, 7.58593750000690736357e+00, + 7.60156249999365662973e+00, 7.61718749999451283372e+00, + 7.63281249995806998498e+00, 7.64843749997276400876e+00, + 7.66406249998022381931e+00, 7.67968750000013145041e+00, + 7.69531249999808597551e+00, 7.71093750001158539931e+00, + 7.72656249999979038989e+00, 7.74218750000620037355e+00, + 7.75781249999318234245e+00, 7.77343750001715427800e+00, + 7.78906250000142730272e+00, 7.80468749997501465288e+00, + 7.82031249999300381859e+00, 7.83593750003314948316e+00, + 7.85156249999927524641e+00, 7.86718749999776001403e+00, + 7.88281249999449951105e+00, 7.89843749999351540936e+00, + 7.91406250000050803806e+00, 7.92968750004068656523e+00, + 7.94531249999952127183e+00, 7.96093750001230571200e+00, + 7.97656249999947331020e+00, 7.99218750003934363946e+00 +}; diff --git a/usr/src/lib/libm/common/C/_TBL_tan.c b/usr/src/lib/libm/common/C/_TBL_tan.c new file mode 100644 index 0000000000..3d5203a766 --- /dev/null +++ b/usr/src/lib/libm/common/C/_TBL_tan.c @@ -0,0 +1,85 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "libm_protos.h" + +const double _TBL_tan_hi[] = { + 1.57534107325271622e-01, 1.61539784049521462e-01, 1.65550519273933966e-01, + 1.69566445219766521e-01, 1.73587694767981526e-01, 1.77614401477446726e-01, + 1.81646699603321415e-01, 1.85684724115634414e-01, 1.89728610718059132e-01, + 1.93778495866891859e-01, 1.97834516790238668e-01, 2.01896811507417145e-01, + 2.05965518848578860e-01, 2.10040778474558987e-01, 2.14122730896958657e-01, + 2.18211517498467428e-01, 2.22307280553431325e-01, 2.26410163248673829e-01, + 2.30520309704576154e-01, 2.34637864996423667e-01, 2.38762975176025932e-01, + 2.42895787293616550e-01, 2.47036449420041271e-01, 2.51185110669240763e-01, + 2.55341921221036272e-01, 2.63680596419996804e-01, 2.72053698658770882e-01, + 2.80462470145251386e-01, 2.88908172440514699e-01, 2.97392087269024608e-01, + 3.05915517353059274e-01, 3.14479787272571532e-01, 3.23086244351745544e-01, + 3.31736259573572778e-01, 3.40431228523830398e-01, 3.49172572365910372e-01, + 3.57961738848017019e-01, 3.66800203344323394e-01, 3.75689469931754838e-01, + 3.84631072504149241e-01, 3.93626575925632771e-01, 4.02677577225140193e-01, + 4.11785706834108478e-01, 4.20952629869475847e-01, 4.30180047464230053e-01, + 4.39469698147866239e-01, 4.48823359279239720e-01, 4.58242848534432368e-01, + 4.67730025452391784e-01, 4.77286793041252266e-01, 4.86915099448406330e-01, + 4.96616939697565651e-01, 5.06394357496229852e-01, 5.16249447117175131e-01, + 5.26184355357779188e-01, 5.36201283581215993e-01, 5.46302489843790484e-01, + 5.66767065580586427e-01, 5.87597367591443209e-01, 6.08813740324380737e-01, + 6.30437673835884782e-01, 6.52491897928808018e-01, 6.75000485144242934e-01, + 6.97988963623599301e-01, 7.21484440990904474e-01, 7.45515740559391960e-01, + 7.70113551344208669e-01, 7.95310593568674173e-01, 8.21141801589894138e-01, + 8.47644526446552637e-01, 8.74858760554482306e-01, 9.02827387452673547e-01, + 9.31596459944072475e-01, 9.61215510494370373e-01, 9.91737898363268644e-01, +}; +const double _TBL_tan_lo[] = { +-1.10615392752930551e-17, 1.42255435911932711e-17, 1.02781342487141920e-17, +-1.04735896510580927e-17,-5.46679990560150911e-18, 1.50201543247778489e-18, + 1.22522327805930836e-17,-2.52772423968968903e-18, 9.78955701743985001e-19, + 4.61515122717816178e-18, 7.14813042382104539e-19,-1.25529909642919992e-17, + 1.19416304006222131e-17,-5.91325462642753544e-18, 7.53213214053688138e-18, + 4.77223821731568090e-18, 6.32882137760769522e-18, 8.33823681661647871e-18, +-1.25419320906151988e-17, 1.16585041935775587e-17,-1.19653634178542542e-17, +-7.22806346068389604e-18,-6.16674472236513534e-18, 4.26199277415660669e-18, +-5.58935834356478328e-18,-4.56998635843850688e-18, 1.78004627511465564e-18, + 1.74249040881549088e-17, 2.70817328270223006e-17,-1.80870634839170844e-17, +-1.00676145758650168e-17,-1.53577462986005684e-17,-2.38939880909534397e-17, +-1.08193046058071237e-17,-1.06856311222117164e-17,-1.96951245902998606e-17, +-2.08660034657941102e-17, 2.82596474303348100e-17, 2.34797942068937341e-18, +-1.76131026613802985e-17,-1.29729310968305823e-17, 1.87495311063417555e-17, +-2.29163073231136327e-18,-2.51936954463539765e-17,-4.11327516430776285e-18, + 1.50393242431203736e-18,-1.09029595007501330e-17,-6.87284752683418342e-19, + 1.55195027932634982e-17,-4.62284921534513474e-18,-5.45294879014110259e-18, +-2.56576334605328725e-17,-4.00960685506800741e-17, 1.35860113023765056e-17, +-4.34857062258506890e-17, 3.85791583096984630e-17, 2.90965762168371759e-17, + 1.90815918857458480e-17, 1.21159907937263400e-17,-1.52112721227855650e-17, +-1.51838757657007437e-17,-2.51352280752587451e-17,-2.66690480643161193e-17, +-4.59728584599455591e-17,-5.42439848134543255e-17, 3.56284233494755594e-17, + 3.61475127591663133e-17, 1.22197541073075113e-17,-1.61356193051149559e-17, + 1.66243632690603545e-17, 4.30578558405427098e-17,-4.43234026650131250e-17, +-1.35473813965930355e-17, 4.30118334112910435e-17, 3.62593428168003066e-17, +}; diff --git a/usr/src/lib/libm/common/C/__cos.c b/usr/src/lib/libm/common/C/__cos.c new file mode 100644 index 0000000000..03f637edca --- /dev/null +++ b/usr/src/lib/libm/common/C/__cos.c @@ -0,0 +1,127 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* INDENT OFF */ +/* + * __k_cos(double x; double y) + * kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164 + * Input x is assumed to be bounded by ~pi/4 in magnitude. + * Input y is the tail of x. + * + * Accurate Table look-up algorithm by K.C. Ng, May, 1995. + * + * Algorithm: see __sincos.c + */ + +#include "libm.h" + +static const double sc[] = { +/* ONE = */ 1.0, +/* NONE = */ -1.0, +/* + * |sin(x) - (x+pp1*x^3+pp2*x^5)| <= 2^-58.79 for |x| < 0.008 + */ +/* PP1 = */ -0.166666666666316558867252052378889521480627858683055567, +/* PP2 = */ .008333315652997472323564894248466758248475374977974017927, +/* + * |(sin(x) - (x+p1*x^3+...+p4*x^9)| + * |------------------------------ | <= 2^-57.63 for |x| < 0.1953125 + * | x | + */ +/* P1 = */ -1.666666666666629669805215138920301589656e-0001, +/* P2 = */ 8.333333332390951295683993455280336376663e-0003, +/* P3 = */ -1.984126237997976692791551778230098403960e-0004, +/* P4 = */ 2.753403624854277237649987622848330351110e-0006, +/* + * |cos(x) - (1+qq1*x^2+qq2*x^4)| <= 2^-55.99 for |x| <= 0.008 (0x3f80624d) + */ +/* QQ1 = */ -0.4999999999975492381842911981948418542742729, +/* QQ2 = */ 0.041666542904352059294545209158357640398771740, +/* + * |cos(x) - (1+q1*x^2+...+q4*x^8)| <= 2^-55.86 for |x| <= 0.1640625 (10.5/64) + */ +/* Q1 = */ -0.5, +/* Q2 = */ 4.166666666500350703680945520860748617445e-0002, +/* Q3 = */ -1.388888596436972210694266290577848696006e-0003, +/* Q4 = */ 2.478563078858589473679519517892953492192e-0005, +}; +/* INDENT ON */ + +#define ONE sc[0] +#define NONE sc[1] +#define PP1 sc[2] +#define PP2 sc[3] +#define P1 sc[4] +#define P2 sc[5] +#define P3 sc[6] +#define P4 sc[7] +#define QQ1 sc[8] +#define QQ2 sc[9] +#define Q1 sc[10] +#define Q2 sc[11] +#define Q3 sc[12] +#define Q4 sc[13] + +extern const double _TBL_sincos[], _TBL_sincosx[]; + +double +__k_cos(double x, double y) { + double z, w, s, v, p, q; + int i, j, n, hx, ix; + + hx = ((int *)&x)[HIWORD]; + ix = hx & ~0x80000000; + + if (ix <= 0x3fc50000) { /* |x| < 10.5/64 = 0.164062500 */ + if (ix < 0x3e400000) /* |x| < 2**-27 */ + if ((int)x == 0) + return (ONE); + z = x * x; + if (ix < 0x3f800000) /* |x| < 0.008 */ + q = z * (QQ1 + z * QQ2); + else + q = z * ((Q1 + z * Q2) + (z * z) * (Q3 + z * Q4)); + return (ONE + q); + } else { /* 0.164062500 < |x| < ~pi/4 */ + n = ix >> 20; + i = (((ix >> 12) & 0xff) | 0x100) >> (0x401 - n); + j = i - 10; + if (hx < 0) + v = -y - (_TBL_sincosx[j] + x); + else + v = y - (_TBL_sincosx[j] - x); + s = v * v; + j <<= 1; + w = _TBL_sincos[j]; + z = _TBL_sincos[j+1]; + p = s * (PP1 + s * PP2); + q = s * (QQ1 + s * QQ2); + p = v + v * p; + return (z - (w * p - z * q)); + } +} diff --git a/usr/src/lib/libm/common/C/__lgamma.c b/usr/src/lib/libm/common/C/__lgamma.c new file mode 100644 index 0000000000..32c00a6bf8 --- /dev/null +++ b/usr/src/lib/libm/common/C/__lgamma.c @@ -0,0 +1,269 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * double __k_lgamma(double x, int *signgamp); + * + * K.C. Ng, March, 1989. + * + * Part of the algorithm is based on W. Cody's lgamma function. + */ + +#include "libm.h" + +static const double +one = 1.0, +zero = 0.0, +hln2pi = 0.9189385332046727417803297, /* log(2*pi)/2 */ +pi = 3.1415926535897932384626434, +two52 = 4503599627370496.0, /* 43300000,00000000 (used by sin_pi) */ +/* + * Numerator and denominator coefficients for rational minimax Approximation + * P/Q over (0.5,1.5). + */ +D1 = -5.772156649015328605195174e-1, +p7 = 4.945235359296727046734888e0, +p6 = 2.018112620856775083915565e2, +p5 = 2.290838373831346393026739e3, +p4 = 1.131967205903380828685045e4, +p3 = 2.855724635671635335736389e4, +p2 = 3.848496228443793359990269e4, +p1 = 2.637748787624195437963534e4, +p0 = 7.225813979700288197698961e3, +q7 = 6.748212550303777196073036e1, +q6 = 1.113332393857199323513008e3, +q5 = 7.738757056935398733233834e3, +q4 = 2.763987074403340708898585e4, +q3 = 5.499310206226157329794414e4, +q2 = 6.161122180066002127833352e4, +q1 = 3.635127591501940507276287e4, +q0 = 8.785536302431013170870835e3, +/* + * Numerator and denominator coefficients for rational minimax Approximation + * G/H over (1.5,4.0). + */ +D2 = 4.227843350984671393993777e-1, +g7 = 4.974607845568932035012064e0, +g6 = 5.424138599891070494101986e2, +g5 = 1.550693864978364947665077e4, +g4 = 1.847932904445632425417223e5, +g3 = 1.088204769468828767498470e6, +g2 = 3.338152967987029735917223e6, +g1 = 5.106661678927352456275255e6, +g0 = 3.074109054850539556250927e6, +h7 = 1.830328399370592604055942e2, +h6 = 7.765049321445005871323047e3, +h5 = 1.331903827966074194402448e5, +h4 = 1.136705821321969608938755e6, +h3 = 5.267964117437946917577538e6, +h2 = 1.346701454311101692290052e7, +h1 = 1.782736530353274213975932e7, +h0 = 9.533095591844353613395747e6, +/* + * Numerator and denominator coefficients for rational minimax Approximation + * U/V over (4.0,12.0). + */ +D4 = 1.791759469228055000094023e0, +u7 = 1.474502166059939948905062e4, +u6 = 2.426813369486704502836312e6, +u5 = 1.214755574045093227939592e8, +u4 = 2.663432449630976949898078e9, +u3 = 2.940378956634553899906876e10, +u2 = 1.702665737765398868392998e11, +u1 = 4.926125793377430887588120e11, +u0 = 5.606251856223951465078242e11, +v7 = 2.690530175870899333379843e3, +v6 = 6.393885654300092398984238e5, +v5 = 4.135599930241388052042842e7, +v4 = 1.120872109616147941376570e9, +v3 = 1.488613728678813811542398e10, +v2 = 1.016803586272438228077304e11, +v1 = 3.417476345507377132798597e11, +v0 = 4.463158187419713286462081e11, +/* + * Coefficients for minimax approximation over (12, INF). + */ +c5 = -1.910444077728e-03, +c4 = 8.4171387781295e-04, +c3 = -5.952379913043012e-04, +c2 = 7.93650793500350248e-04, +c1 = -2.777777777777681622553e-03, +c0 = 8.333333333333333331554247e-02, +c6 = 5.7083835261e-03; + +/* + * Return sin(pi*x). We assume x is finite and negative, and if it + * is an integer, then the sign of the zero returned doesn't matter. + */ +static double +sin_pi(double x) { + double y, z; + int n; + + y = -x; + if (y <= 0.25) + return (__k_sin(pi * x, 0.0)); + if (y >= two52) + return (zero); + z = floor(y); + if (y == z) + return (zero); + + /* argument reduction: set y = |x| mod 2 */ + y *= 0.5; + y = 2.0 * (y - floor(y)); + + /* now floor(y * 4) tells which octant y is in */ + n = (int)(y * 4.0); + switch (n) { + case 0: + y = __k_sin(pi * y, 0.0); + break; + case 1: + case 2: + y = __k_cos(pi * (0.5 - y), 0.0); + break; + case 3: + case 4: + y = __k_sin(pi * (1.0 - y), 0.0); + break; + case 5: + case 6: + y = -__k_cos(pi * (y - 1.5), 0.0); + break; + default: + y = __k_sin(pi * (y - 2.0), 0.0); + break; + } + return (-y); +} + +static double +neg(double z, int *signgamp) { + double t, p; + + /* + * written by K.C. Ng, Feb 2, 1989. + * + * Since + * -z*G(-z)*G(z) = pi/sin(pi*z), + * we have + * G(-z) = -pi/(sin(pi*z)*G(z)*z) + * = pi/(sin(pi*(-z))*G(z)*z) + * Algorithm + * z = |z| + * t = sin_pi(z); ...note that when z>2**52, z is an int + * and hence t=0. + * + * if (t == 0.0) return 1.0/0.0; + * if (t< 0.0) *signgamp = -1; else t= -t; + * if (z+1.0 == 1.0) ...tiny z + * return -log(z); + * else + * return log(pi/(t*z))-__k_lgamma(z, signgamp); + */ + + t = sin_pi(z); /* t := sin(pi*z) */ + if (t == zero) /* return 1.0/0.0 = +INF */ + return (one / fabs(t)); + z = -z; + p = z + one; + if (p == one) + p = -log(z); + else + p = log(pi / (fabs(t) * z)) - __k_lgamma(z, signgamp); + if (t < zero) + *signgamp = -1; + return (p); +} + +double +__k_lgamma(double x, int *signgamp) { + double t, p, q, cr, y; + + /* purge off +-inf, NaN and negative arguments */ + if (!finite(x)) + return (x * x); + *signgamp = 1; + if (signbit(x)) + return (neg(x, signgamp)); + + /* lgamma(x) ~ log(1/x) for really tiny x */ + t = one + x; + if (t == one) { + if (x == zero) + return (one / x); + return (-log(x)); + } + + /* for tiny < x < inf */ + if (x <= 1.5) { + if (x < 0.6796875) { + cr = -log(x); + y = x; + } else { + cr = zero; + y = x - one; + } + + if (x <= 0.5 || x >= 0.6796875) { + if (x == one) + return (zero); + p = p0+y*(p1+y*(p2+y*(p3+y*(p4+y*(p5+y*(p6+y*p7)))))); + q = q0+y*(q1+y*(q2+y*(q3+y*(q4+y*(q5+y*(q6+y* + (q7+y))))))); + return (cr+y*(D1+y*(p/q))); + } else { + y = x - one; + p = g0+y*(g1+y*(g2+y*(g3+y*(g4+y*(g5+y*(g6+y*g7)))))); + q = h0+y*(h1+y*(h2+y*(h3+y*(h4+y*(h5+y*(h6+y* + (h7+y))))))); + return (cr+y*(D2+y*(p/q))); + } + } else if (x <= 4.0) { + if (x == 2.0) + return (zero); + y = x - 2.0; + p = g0+y*(g1+y*(g2+y*(g3+y*(g4+y*(g5+y*(g6+y*g7)))))); + q = h0+y*(h1+y*(h2+y*(h3+y*(h4+y*(h5+y*(h6+y*(h7+y))))))); + return (y*(D2+y*(p/q))); + } else if (x <= 12.0) { + y = x - 4.0; + p = u0+y*(u1+y*(u2+y*(u3+y*(u4+y*(u5+y*(u6+y*u7)))))); + q = v0+y*(v1+y*(v2+y*(v3+y*(v4+y*(v5+y*(v6+y*(v7-y))))))); + return (D4+y*(p/q)); + } else if (x <= 1.0e17) { /* x ~< 2**(prec+3) */ + t = one / x; + y = t * t; + p = hln2pi+t*(c0+y*(c1+y*(c2+y*(c3+y*(c4+y*(c5+y*c6)))))); + q = log(x); + return (x*(q-one)-(0.5*q-p)); + } else { /* may overflow */ + return (x * (log(x) - 1.0)); + } +} diff --git a/usr/src/lib/libm/common/C/__libx_errno.c b/usr/src/lib/libm/common/C/__libx_errno.c new file mode 100644 index 0000000000..d0438402ce --- /dev/null +++ b/usr/src/lib/libm/common/C/__libx_errno.c @@ -0,0 +1,34 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +extern int *___errno(void); + +int * +__libm_errno(void) { + return (___errno()); +} diff --git a/usr/src/lib/libm/common/C/__rem_pio2.c b/usr/src/lib/libm/common/C/__rem_pio2.c new file mode 100644 index 0000000000..c5cbeedc2a --- /dev/null +++ b/usr/src/lib/libm/common/C/__rem_pio2.c @@ -0,0 +1,168 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * __rem_pio2(x, y) passes back a better-than-double-precision + * approximation to x mod pi/2 in y[0]+y[1] and returns an integer + * congruent mod 8 to the integer part of x/(pi/2). + * + * This implementation tacitly assumes that x is finite and at + * least about pi/4 in magnitude. + */ + +#include "libm.h" + +extern const int _TBL_ipio2_inf[]; + +/* INDENT OFF */ +/* + * invpio2: 53 bits of 2/pi + * pio2_1: first 33 bit of pi/2 + * pio2_1t: pi/2 - pio2_1 + * pio2_2: second 33 bit of pi/2 + * pio2_2t: pi/2 - pio2_2 + * pio2_3: third 33 bit of pi/2 + * pio2_3t: pi/2 - pio2_3 + */ +static const double + half = 0.5, + invpio2 = 0.636619772367581343075535, /* 2^ -1 * 1.45F306DC9C883 */ + pio2_1 = 1.570796326734125614166, /* 2^ 0 * 1.921FB54400000 */ + pio2_1t = 6.077100506506192601475e-11, /* 2^-34 * 1.0B4611A626331 */ + pio2_2 = 6.077100506303965976596e-11, /* 2^-34 * 1.0B4611A600000 */ + pio2_2t = 2.022266248795950732400e-21, /* 2^-69 * 1.3198A2E037073 */ + pio2_3 = 2.022266248711166455796e-21, /* 2^-69 * 1.3198A2E000000 */ + pio2_3t = 8.478427660368899643959e-32; /* 2^-104 * 1.B839A252049C1 */ +/* INDENT ON */ + +int +__rem_pio2(double x, double *y) { + double w, t, r, fn; + double tx[3]; + int e0, i, j, nx, n, ix, hx, lx; + + hx = ((int *)&x)[HIWORD]; + ix = hx & 0x7fffffff; + + if (ix < 0x4002d97c) { + /* |x| < 3pi/4, special case with n=1 */ + t = fabs(x) - pio2_1; + if (ix != 0x3ff921fb) { /* 33+53 bit pi is good enough */ + y[0] = t - pio2_1t; + y[1] = (t - y[0]) - pio2_1t; + } else { /* near pi/2, use 33+33+53 bit pi */ + t -= pio2_2; + y[0] = t - pio2_2t; + y[1] = (t - y[0]) - pio2_2t; + } + if (hx < 0) { + y[0] = -y[0]; + y[1] = -y[1]; + return (-1); + } + return (1); + } + + if (ix <= 0x413921fb) { + /* |x| <= 2^19 pi */ + t = fabs(x); + n = (int)(t * invpio2 + half); + fn = (double)n; + r = t - fn * pio2_1; + j = ix >> 20; + w = fn * pio2_1t; /* 1st round good to 85 bit */ + y[0] = r - w; + i = j - ((((int *)y)[HIWORD] >> 20) & 0x7ff); + if (i > 16) { /* 2nd iteration (rare) */ + /* 2nd round good to 118 bit */ + if (i < 35) { + t = r; /* r-fn*pio2_2 may not be exact */ + w = fn * pio2_2; + r = t - w; + w = fn * pio2_2t - ((t - r) - w); + y[0] = r - w; + } else { + r -= fn * pio2_2; + w = fn * pio2_2t; + y[0] = r - w; + i = j - ((((int *)y)[HIWORD] >> 20) & 0x7ff); + if (i > 49) { + /* 3rd iteration (extremely rare) */ + if (i < 68) { + t = r; + w = fn * pio2_3; + r = t - w; + w = fn * pio2_3t - + ((t - r) - w); + y[0] = r - w; + } else { + /* + * 3rd round good to 151 bits; + * covered all possible cases + */ + r -= fn * pio2_3; + w = fn * pio2_3t; + y[0] = r - w; + } + } + } + } + y[1] = (r - y[0]) - w; + if (hx < 0) { + y[0] = -y[0]; + y[1] = -y[1]; + return (-n); + } + return (n); + } + + e0 = (ix >> 20) - 1046; /* e0 = ilogb(x)-23; */ + + /* break x into three 24 bit pieces */ + lx = ((int *)&x)[LOWORD]; + i = (lx & 0x1f) << 19; + tx[2] = (double)i; + j = (lx >> 5) & 0xffffff; + tx[1] = (double)j; + tx[0] = (double)((((ix & 0xfffff) | 0x100000) << 3) | + ((unsigned)lx >> 29)); + nx = 3; + if (i == 0) { + /* skip zero term */ + nx--; + if (j == 0) + nx--; + } + n = __rem_pio2m(tx, y, e0, nx, 2, _TBL_ipio2_inf); + if (hx < 0) { + y[0] = -y[0]; + y[1] = -y[1]; + return (-n); + } + return (n); +} diff --git a/usr/src/lib/libm/common/C/__rem_pio2m.c b/usr/src/lib/libm/common/C/__rem_pio2m.c new file mode 100644 index 0000000000..e9b4589e95 --- /dev/null +++ b/usr/src/lib/libm/common/C/__rem_pio2m.c @@ -0,0 +1,363 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * int __rem_pio2m(x,y,e0,nx,prec,ipio2) + * double x[],y[]; int e0,nx,prec; const int ipio2[]; + * + * __rem_pio2m return the last three digits of N with + * y = x - N*pi/2 + * so that |y| < pi/4. + * + * The method is to compute the integer (mod 8) and fraction parts of + * (2/pi)*x without doing the full multiplication. In general we + * skip the part of the product that are known to be a huge integer ( + * more accurately, = 0 mod 8 ). Thus the number of operations are + * independent of the exponent of the input. + * + * (2/PI) is represented by an array of 24-bit integers in ipio2[]. + * Here PI could as well be a machine value pi. + * + * Input parameters: + * x[] The input value (must be positive) is broken into nx + * pieces of 24-bit integers in double precision format. + * x[i] will be the i-th 24 bit of x. The scaled exponent + * of x[0] is given in input parameter e0 (i.e., x[0]*2^e0 + * match x's up to 24 bits. + * + * Example of breaking a double z into x[0]+x[1]+x[2]: + * e0 = ilogb(z)-23 + * z = scalbn(z,-e0) + * for i = 0,1,2 + * x[i] = floor(z) + * z = (z-x[i])*2**24 + * + * + * y[] ouput result in an array of double precision numbers. + * The dimension of y[] is: + * 24-bit precision 1 + * 53-bit precision 2 + * 64-bit precision 2 + * 113-bit precision 3 + * The actual value is the sum of them. Thus for 113-bit + * precsion, one may have to do something like: + * + * long double t,w,r_head, r_tail; + * t = (long double)y[2] + (long double)y[1]; + * w = (long double)y[0]; + * r_head = t+w; + * r_tail = w - (r_head - t); + * + * e0 The exponent of x[0] + * + * nx dimension of x[] + * + * prec an interger indicating the precision: + * 0 24 bits (single) + * 1 53 bits (double) + * 2 64 bits (extended) + * 3 113 bits (quad) + * + * ipio2[] + * integer array, contains the (24*i)-th to (24*i+23)-th + * bit of 2/pi or 2/PI after binary point. The corresponding + * floating value is + * + * ipio2[i] * 2^(-24(i+1)). + * + * External function: + * double scalbn( ), floor( ); + * + * + * Here is the description of some local variables: + * + * jk jk+1 is the initial number of terms of ipio2[] needed + * in the computation. The recommended value is 3,4,4, + * 6 for single, double, extended,and quad. + * + * jz local integer variable indicating the number of + * terms of ipio2[] used. + * + * jx nx - 1 + * + * jv index for pointing to the suitable ipio2[] for the + * computation. In general, we want + * ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8 + * is an integer. Thus + * e0-3-24*jv >= 0 or (e0-3)/24 >= jv + * Hence jv = max(0,(e0-3)/24). + * + * jp jp+1 is the number of terms in pio2[] needed, jp = jk. + * + * q[] double array with integral value, representing the + * 24-bits chunk of the product of x and 2/pi. + * + * q0 the corresponding exponent of q[0]. Note that the + * exponent for q[i] would be q0-24*i. + * + * pio2[] double precision array, obtained by cutting pi/2 + * into 24 bits chunks. + * + * f[] ipio2[] in floating point + * + * iq[] integer array by breaking up q[] in 24-bits chunk. + * + * fq[] final product of x*(2/pi) in fq[0],..,fq[jk] + * + * ih integer. If >0 it indicats q[] is >= 0.5, hence + * it also indicates the *sign* of the result. + * + */ + +#include "libm.h" + +#if defined(__i386) && !defined(__amd64) +extern int __swapRP(int); +#endif + +static const int init_jk[] = { 3, 4, 4, 6 }; /* initial value for jk */ + +static const double pio2[] = { + 1.57079625129699707031e+00, + 7.54978941586159635335e-08, + 5.39030252995776476554e-15, + 3.28200341580791294123e-22, + 1.27065575308067607349e-29, + 1.22933308981111328932e-36, + 2.73370053816464559624e-44, + 2.16741683877804819444e-51, +}; + +static const double + zero = 0.0, + one = 1.0, + half = 0.5, + eight = 8.0, + eighth = 0.125, + two24 = 16777216.0, + twon24 = 5.960464477539062500E-8; + +int +__rem_pio2m(double *x, double *y, int e0, int nx, int prec, const int *ipio2) +{ + int jz, jx, jv, jp, jk, carry, n, iq[20]; + int i, j, k, m, q0, ih; + double z, fw, f[20], fq[20], q[20]; +#if defined(__i386) && !defined(__amd64) + int rp; + + rp = __swapRP(fp_extended); +#endif + + /* initialize jk */ + jp = jk = init_jk[prec]; + + /* determine jx,jv,q0, note that 3>q0 */ + jx = nx - 1; + jv = (e0 - 3) / 24; + if (jv < 0) + jv = 0; + q0 = e0 - 24 * (jv + 1); + + /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */ + j = jv - jx; + m = jx + jk; + for (i = 0; i <= m; i++, j++) + f[i] = (j < 0)? zero : (double)ipio2[j]; + + /* compute q[0],q[1],...q[jk] */ + for (i = 0; i <= jk; i++) { + for (j = 0, fw = zero; j <= jx; j++) + fw += x[j] * f[jx+i-j]; + q[i] = fw; + } + + jz = jk; +recompute: + /* distill q[] into iq[] reversingly */ + for (i = 0, j = jz, z = q[jz]; j > 0; i++, j--) { + fw = (double)((int)(twon24 * z)); + iq[i] = (int)(z - two24 * fw); + z = q[j-1] + fw; + } + + /* compute n */ + z = scalbn(z, q0); /* actual value of z */ + z -= eight * floor(z * eighth); /* trim off integer >= 8 */ + n = (int)z; + z -= (double)n; + ih = 0; + if (q0 > 0) { /* need iq[jz-1] to determine n */ + i = (iq[jz-1] >> (24 - q0)); + n += i; + iq[jz-1] -= i << (24 - q0); + ih = iq[jz-1] >> (23 - q0); + } else if (q0 == 0) { + ih = iq[jz-1] >> 23; + } else if (z >= half) { + ih = 2; + } + + if (ih > 0) { /* q > 0.5 */ + n += 1; + carry = 0; + for (i = 0; i < jz; i++) { /* compute 1-q */ + j = iq[i]; + if (carry == 0) { + if (j != 0) { + carry = 1; + iq[i] = 0x1000000 - j; + } + } else { + iq[i] = 0xffffff - j; + } + } + if (q0 > 0) { /* rare case: chance is 1 in 12 */ + switch (q0) { + case 1: + iq[jz-1] &= 0x7fffff; + break; + case 2: + iq[jz-1] &= 0x3fffff; + break; + } + } + if (ih == 2) { + z = one - z; + if (carry != 0) + z -= scalbn(one, q0); + } + } + + /* check if recomputation is needed */ + if (z == zero) { + j = 0; + for (i = jz - 1; i >= jk; i--) + j |= iq[i]; + if (j == 0) { /* need recomputation */ + /* set k to no. of terms needed */ + for (k = 1; iq[jk-k] == 0; k++) + ; + + /* add q[jz+1] to q[jz+k] */ + for (i = jz + 1; i <= jz + k; i++) { + f[jx+i] = (double)ipio2[jv+i]; + for (j = 0, fw = zero; j <= jx; j++) + fw += x[j] * f[jx+i-j]; + q[i] = fw; + } + jz += k; + goto recompute; + } + } + + /* cut out zero terms */ + if (z == zero) { + jz -= 1; + q0 -= 24; + while (iq[jz] == 0) { + jz--; + q0 -= 24; + } + } else { /* break z into 24-bit if neccessary */ + z = scalbn(z, -q0); + if (z >= two24) { + fw = (double)((int)(twon24 * z)); + iq[jz] = (int)(z - two24 * fw); + jz += 1; + q0 += 24; + iq[jz] = (int)fw; + } else { + iq[jz] = (int)z; + } + } + + /* convert integer "bit" chunk to floating-point value */ + fw = scalbn(one, q0); + for (i = jz; i >= 0; i--) { + q[i] = fw * (double)iq[i]; + fw *= twon24; + } + + /* compute pio2[0,...,jp]*q[jz,...,0] */ + for (i = jz; i >= 0; i--) { + for (fw = zero, k = 0; k <= jp && k <= jz - i; k++) + fw += pio2[k] * q[i+k]; + fq[jz-i] = fw; + } + + /* compress fq[] into y[] */ + switch (prec) { + case 0: + fw = zero; + for (i = jz; i >= 0; i--) + fw += fq[i]; + y[0] = (ih == 0)? fw : -fw; + break; + + case 1: + case 2: + fw = zero; + for (i = jz; i >= 0; i--) + fw += fq[i]; + y[0] = (ih == 0)? fw : -fw; + fw = fq[0] - fw; + for (i = 1; i <= jz; i++) + fw += fq[i]; + y[1] = (ih == 0)? fw : -fw; + break; + + default: + for (i = jz; i > 0; i--) { + fw = fq[i-1] + fq[i]; + fq[i] += fq[i-1] - fw; + fq[i-1] = fw; + } + for (i = jz; i > 1; i--) { + fw = fq[i-1] + fq[i]; + fq[i] += fq[i-1] - fw; + fq[i-1] = fw; + } + for (fw = zero, i = jz; i >= 2; i--) + fw += fq[i]; + if (ih == 0) { + y[0] = fq[0]; + y[1] = fq[1]; + y[2] = fw; + } else { + y[0] = -fq[0]; + y[1] = -fq[1]; + y[2] = -fw; + } + } + +#if defined(__i386) && !defined(__amd64) + (void) __swapRP(rp); +#endif + return (n & 7); +} diff --git a/usr/src/lib/libm/common/C/__sin.c b/usr/src/lib/libm/common/C/__sin.c new file mode 100644 index 0000000000..323468709d --- /dev/null +++ b/usr/src/lib/libm/common/C/__sin.c @@ -0,0 +1,129 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* INDENT OFF */ +/* + * __k_sin( double x; double y ) + * kernel sin function on [-pi/4, pi/4], pi/4 ~ 0.785398164 + * Input x is assumed to be bounded by ~pi/4 in magnitude. + * Input y is the tail of x. + * + * Accurate Table look-up algorithm by K.C. Ng, May, 1995. + * + * Algorithm: see __sincos.c + */ + +#include "libm.h" + +static const double sc[] = { +/* ONE = */ 1.0, +/* NONE = */ -1.0, +/* + * |sin(x) - (x+pp1*x^3+pp2*x^5)| <= 2^-58.79 for |x| < 0.008 + */ +/* PP1 = */ -0.166666666666316558867252052378889521480627858683055567, +/* PP2 = */ .008333315652997472323564894248466758248475374977974017927, +/* + * |(sin(x) - (x+p1*x^3+...+p4*x^9)| + * |------------------------------ | <= 2^-57.63 for |x| < 0.1953125 + * | x | + */ +/* P1 = */ -1.666666666666629669805215138920301589656e-0001, +/* P2 = */ 8.333333332390951295683993455280336376663e-0003, +/* P3 = */ -1.984126237997976692791551778230098403960e-0004, +/* P4 = */ 2.753403624854277237649987622848330351110e-0006, +/* + * |cos(x) - (1+qq1*x^2+qq2*x^4)| <= 2^-55.99 for |x| <= 0.008 (0x3f80624d) + */ +/* QQ1 = */ -0.4999999999975492381842911981948418542742729, +/* QQ2 = */ 0.041666542904352059294545209158357640398771740, +/* + * |cos(x) - (1+q1*x^2+...+q4*x^8)| <= 2^-55.86 for |x| <= 0.1640625 (10.5/64) + */ +/* Q1 = */ -0.5, +/* Q2 = */ 4.166666666500350703680945520860748617445e-0002, +/* Q3 = */ -1.388888596436972210694266290577848696006e-0003, +/* Q4 = */ 2.478563078858589473679519517892953492192e-0005, +}; +/* INDENT ON */ + +#define ONE sc[0] +#define NONE sc[1] +#define PP1 sc[2] +#define PP2 sc[3] +#define P1 sc[4] +#define P2 sc[5] +#define P3 sc[6] +#define P4 sc[7] +#define QQ1 sc[8] +#define QQ2 sc[9] +#define Q1 sc[10] +#define Q2 sc[11] +#define Q3 sc[12] +#define Q4 sc[13] + +extern const double _TBL_sincos[], _TBL_sincosx[]; + +double +__k_sin(double x, double y) { + double z, w, s, v, p, q; + int i, j, n, hx, ix; + + hx = ((int *)&x)[HIWORD]; + ix = hx & ~0x80000000; + + if (ix <= 0x3fc50000) { /* |x| < 10.5/64 = 0.164062500 */ + if (ix < 0x3e400000) /* |x| < 2**-27 */ + if ((int)x == 0) + return (x + y); + z = x * x; + if (ix < 0x3f800000) /* |x| < 0.008 */ + p = (x * z) * (PP1 + z * PP2) + y; + else + p = (x * z) * ((P1 + z * P2) + (z * z) * (P3 + + z * P4)) + y; + return (x + p); + } else { /* 0.164062500 < |x| < ~pi/4 */ + n = ix >> 20; + i = (((ix >> 12) & 0xff) | 0x100) >> (0x401 - n); + j = i - 10; + if (hx < 0) + v = -y - (_TBL_sincosx[j] + x); + else + v = y - (_TBL_sincosx[j] - x); + s = v * v; + j <<= 1; + w = _TBL_sincos[j]; + z = _TBL_sincos[j+1]; + p = s * (PP1 + s * PP2); + q = s * (QQ1 + s * QQ2); + p = v + v * p; + s = w * q + z * p; + return ((hx >= 0)? w + s : -(w + s)); + } +} diff --git a/usr/src/lib/libm/common/C/__sincos.c b/usr/src/lib/libm/common/C/__sincos.c new file mode 100644 index 0000000000..f4158ab3ab --- /dev/null +++ b/usr/src/lib/libm/common/C/__sincos.c @@ -0,0 +1,164 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* INDENT OFF */ +/* + * double __k_sincos(double x, double y, double *c); + * kernel sincos function on [-pi/4, pi/4], pi/4 ~ 0.785398164 + * Input x is assumed to be bounded by ~pi/4 in magnitude. + * Input y is the tail of x. + * return sin(x) with *c = cos(x) + * + * Accurate Table look-up algorithm by K.C. Ng, May, 1995. + * + * 1. Reduce x to x>0 by sin(-x)=-sin(x),cos(-x)=cos(x). + * 2. For 0<= x < pi/4, let i = (64*x chopped)-10. Let d = x - a[i], where + * a[i] is a double that is close to (i+10.5)/64 and such that + * sin(a[i]) and cos(a[i]) is close to a double (with error less + * than 2**-8 ulp). Then + * cos(x) = cos(a[i]+d) = cos(a[i])cos(d) - sin(a[i])*sin(d) + * = TBL_cos_a[i]*(1+QQ1*d^2+QQ2*d^4) - + * TBL_sin_a[i]*(d+PP1*d^3+PP2*d^5) + * = TBL_cos_a[i] + (TBL_cos_a[i]*d^2*(QQ1+QQ2*d^2) - + * TBL_sin_a[i]*(d+PP1*d^3+PP2*d^5)) + * sin(x) = sin(a[i]+d) = sin(a[i])cos(d) + cos(a[i])*sin(d) + * = TBL_sin_a[i]*(1+QQ1*d^2+QQ2*d^4) + + * TBL_cos_a[i]*(d+PP1*d^3+PP2*d^5) + * = TBL_sin_a[i] + (TBL_sin_a[i]*d^2*(QQ1+QQ2*d^2) + + * TBL_cos_a[i]*(d+PP1*d^3+PP2*d^5)) + * + * For |y| less than 10.5/64 = 0.1640625, use + * sin(y) = y + y^3*(p1+y^2*(p2+y^2*(p3+y^2*p4))) + * cos(y) = 1 + y^2*(q1+y^2*(q2+y^2*(q3+y^2*q4))) + * + * For |y| less than 0.008, use + * sin(y) = y + y^3*(pp1+y^2*pp2) + * cos(y) = 1 + y^2*(qq1+y^2*qq2) + * + * Accuracy: + * TRIG(x) returns trig(x) nearly rounded (less than 1 ulp) + */ + +#include "libm.h" + +static const double sc[] = { +/* ONE = */ 1.0, +/* NONE = */ -1.0, +/* + * |sin(x) - (x+pp1*x^3+pp2*x^5)| <= 2^-58.79 for |x| < 0.008 + */ +/* PP1 = */ -0.166666666666316558867252052378889521480627858683055567, +/* PP2 = */ .008333315652997472323564894248466758248475374977974017927, +/* + * |(sin(x) - (x+p1*x^3+...+p4*x^9)| + * |------------------------------ | <= 2^-57.63 for |x| < 0.1953125 + * | x | + */ +/* P1 = */ -1.666666666666629669805215138920301589656e-0001, +/* P2 = */ 8.333333332390951295683993455280336376663e-0003, +/* P3 = */ -1.984126237997976692791551778230098403960e-0004, +/* P4 = */ 2.753403624854277237649987622848330351110e-0006, +/* + * |cos(x) - (1+qq1*x^2+qq2*x^4)| <= 2^-55.99 for |x| <= 0.008 (0x3f80624d) + */ +/* QQ1 = */ -0.4999999999975492381842911981948418542742729, +/* QQ2 = */ 0.041666542904352059294545209158357640398771740, +/* + * |cos(x) - (1+q1*x^2+...+q4*x^8)| <= 2^-55.86 for |x| <= 0.1640625 (10.5/64) + */ +/* Q1 = */ -0.5, +/* Q2 = */ 4.166666666500350703680945520860748617445e-0002, +/* Q3 = */ -1.388888596436972210694266290577848696006e-0003, +/* Q4 = */ 2.478563078858589473679519517892953492192e-0005, +}; +/* INDENT ON */ + +#define ONE sc[0] +#define NONE sc[1] +#define PP1 sc[2] +#define PP2 sc[3] +#define P1 sc[4] +#define P2 sc[5] +#define P3 sc[6] +#define P4 sc[7] +#define QQ1 sc[8] +#define QQ2 sc[9] +#define Q1 sc[10] +#define Q2 sc[11] +#define Q3 sc[12] +#define Q4 sc[13] + +extern const double _TBL_sincos[], _TBL_sincosx[]; + +double +__k_sincos(double x, double y, double *c) { + double z, w, s, v, p, q; + int i, j, n, hx, ix; + + hx = ((int *)&x)[HIWORD]; + ix = hx & ~0x80000000; + + if (ix <= 0x3fc50000) { /* |x| < 10.5/64 = 0.164062500 */ + if (ix < 0x3e400000) { /* |x| < 2**-27 */ + if ((int)x == 0) + *c = ONE; + return (x + y); + } else { + z = x * x; + if (ix < 0x3f800000) { /* |x| < 0.008 */ + q = z * (QQ1 + z * QQ2); + p = (x * z) * (PP1 + z * PP2) + y; + } else { + q = z * ((Q1 + z * Q2) + (z * z) * (Q3 + + z * Q4)); + p = (x * z) * ((P1 + z * P2) + (z * z) * (P3 + + z * P4)) + y; + } + *c = ONE + q; + return (x + p); + } + } else { /* 0.164062500 < |x| < ~pi/4 */ + n = ix >> 20; + i = (((ix >> 12) & 0xff) | 0x100) >> (0x401 - n); + j = i - 10; + if (hx < 0) + v = -y - (_TBL_sincosx[j] + x); + else + v = y - (_TBL_sincosx[j] - x); + s = v * v; + j <<= 1; + w = _TBL_sincos[j]; + z = _TBL_sincos[j+1]; + p = s * (PP1 + s * PP2); + q = s * (QQ1 + s * QQ2); + p = v + v * p; + *c = z - (w * p - z * q); + s = w * q + z * p; + return ((hx >= 0)? w + s : -(w + s)); + } +} diff --git a/usr/src/lib/libm/common/C/__tan.c b/usr/src/lib/libm/common/C/__tan.c new file mode 100644 index 0000000000..6e111b6be2 --- /dev/null +++ b/usr/src/lib/libm/common/C/__tan.c @@ -0,0 +1,195 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* INDENT OFF */ +/* + * __k_tan( double x; double y; int k ) + * kernel tan/cotan function on [-pi/4, pi/4], pi/4 ~ 0.785398164 + * Input x is assumed to be bounded by ~pi/4 in magnitude. + * Input y is the tail of x. + * Input k indicate -- tan if k=0; else -1/tan + * + * Table look up algorithm + * 1. by tan(-x) = -tan(x), need only to consider positive x + * 2. if x < 5/32 = [0x3fc40000, 0] = 0.15625 , then + * if x < 2^-27 (hx < 0x3e400000 0), set w=x with inexact if x != 0 + * else + * z = x*x; + * w = x + (y+(x*z)*(t1+z*(t2+z*(t3+z*(t4+z*(t5+z*t6)))))) + * return (k == 0)? w: 1/w; + * 3. else + * ht = (hx + 0x4000)&0x7fff8000 (round x to a break point t) + * lt = 0 + * i = (hy-0x3fc40000)>>15; (i<=64) + * x' = (x - t)+y (|x'| ~<= 2^-7) + * By + * tan(t+x') + * = (tan(t)+tan(x'))/(1-tan(x')tan(t)) + * We have + * sin(x')+tan(t)*(tan(t)*sin(x')) + * = tan(t) + ------------------------------- for k=0 + * cos(x') - tan(t)*sin(x') + * + * cos(x') - tan(t)*sin(x') + * = - -------------------------------------- for k=1 + * tan(t) + tan(t)*(cos(x')-1) + sin(x') + * + * + * where tan(t) is from the table, + * sin(x') = x + pp1*x^3 + pp2*x^5 + * cos(x') = 1 + qq1*x^2 + qq2*x^4 + */ + +#include "libm.h" + +extern const double _TBL_tan_hi[], _TBL_tan_lo[]; +static const double q[] = { +/* one = */ 1.0, +/* + * 2 2 -59.56 + * |sin(x) - pp1*x*(pp2+x *(pp3+x )| <= 2 for |x|<1/64 + */ +/* pp1 = */ 8.33326120969096230395312119298978359438478946686e-0003, +/* pp2 = */ 1.20001038589438965215025680596868692381425944526e+0002, +/* pp3 = */ -2.00001730975089451192161504877731204032897949219e+0001, + +/* + * 2 2 -56.19 + * |cos(x) - (1+qq1*x (qq2+x ))| <= 2 for |x|<=1/128 + */ +/* qq1 = */ 4.16665486385721928197511942926212213933467864990e-0002, +/* qq2 = */ -1.20000339921340035687080671777948737144470214844e+0001, + +/* + * |tan(x) - PF(x)| + * |--------------| <= 2^-58.57 for |x|<0.15625 + * | x | + * + * where (let z = x*x) + * PF(x) = x + (t1*x*z)(t2 + z(t3 + z))(t4 + z)(t5 + z(t6 + z)) + */ +/* t1 = */ 3.71923358986516816929168705030406272271648049355e-0003, +/* t2 = */ 6.02645120354857866118436504621058702468872070312e+0000, +/* t3 = */ 2.42627327587398156083509093150496482849121093750e+0000, +/* t4 = */ 2.44968983934252770851003333518747240304946899414e+0000, +/* t5 = */ 6.07089252571767978849948121933266520500183105469e+0000, +/* t6 = */ -2.49403756995593761658369658107403665781021118164e+0000, +}; + + +#define one q[0] +#define pp1 q[1] +#define pp2 q[2] +#define pp3 q[3] +#define qq1 q[4] +#define qq2 q[5] +#define t1 q[6] +#define t2 q[7] +#define t3 q[8] +#define t4 q[9] +#define t5 q[10] +#define t6 q[11] + +/* INDENT ON */ + + +double +__k_tan(double x, double y, int k) { + double a, t, z, w = 0.0L, s, c, r, rh, xh, xl; + int i, j, hx, ix; + + t = one; + hx = ((int *) &x)[HIWORD]; + ix = hx & 0x7fffffff; + if (ix < 0x3fc40000) { /* 0.15625 */ + if (ix < 0x3e400000) { /* 2^-27 */ + if ((i = (int) x) == 0) /* generate inexact */ + w = x; + t = y; + } else { + z = x * x; + t = y + (((t1 * x) * z) * (t2 + z * (t3 + z))) * + ((t4 + z) * (t5 + z * (t6 + z))); + w = x + t; + } + if (k == 0) + return (w); + /* + * Compute -1/(x+T) with great care + * Let r = -1/(x+T), rh = r chopped to 20 bits. + * Also let xh = x+T chopped to 20 bits, xl = (x-xh)+T. Then + * -1/(x+T) = rh + (-1/(x+T)-rh) = rh + r*(1+rh*(x+T)) + * = rh + r*((1+rh*xh)+rh*xl). + */ + rh = r = -one / w; + ((int *) &rh)[LOWORD] = 0; + xh = w; + ((int *) &xh)[LOWORD] = 0; + xl = (x - xh) + t; + return (rh + r * ((one + rh * xh) + rh * xl)); + } + j = (ix + 0x4000) & 0x7fff8000; + i = (j - 0x3fc40000) >> 15; + ((int *) &t)[HIWORD] = j; + if (hx > 0) + x = y - (t - x); + else + x = -y - (t + x); + a = _TBL_tan_hi[i]; + z = x * x; + s = (pp1 * x) * (pp2 + z * (pp3 + z)); /* sin(x) */ + t = (qq1 * z) * (qq2 + z); /* cos(x) - 1 */ + if (k == 0) { + w = a * s; + t = _TBL_tan_lo[i] + (s + a * w) / (one - (w - t)); + return (hx < 0 ? -a - t : a + t); + } else { + w = s + a * t; + c = w + _TBL_tan_lo[i]; + t = a * s - t; + /* + * Now try to compute [(1-T)/(a+c)] accurately + * + * Let r = 1/(a+c), rh = (1-T)*r chopped to 20 bits. + * Also let xh = a+c chopped to 20 bits, xl = (a-xh)+c. Then + * (1-T)/(a+c) = rh + ((1-T)/(a+c)-rh) + * = rh + r*(1-T-rh*(a+c)) + * = rh + r*((1-T-rh*xh)-rh*xl) + * = rh + r*(((1-rh*xh)-T)-rh*xl) + */ + r = one / (a + c); + rh = (one - t) * r; + ((int *) &rh)[LOWORD] = 0; + xh = a + c; + ((int *) &xh)[LOWORD] = 0; + xl = (a - xh) + c; + z = rh + r * (((one - rh * xh) - t) - rh * xl); + return (hx >= 0 ? -z : z); + } +} diff --git a/usr/src/lib/libm/common/C/__xpg6.c b/usr/src/lib/libm/common/C/__xpg6.c new file mode 100644 index 0000000000..a6a9580a90 --- /dev/null +++ b/usr/src/lib/libm/common/C/__xpg6.c @@ -0,0 +1,54 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/*LINTLIBRARY*/ + +/* + * See /ws/unix200x-gate/usr/src/lib/libc/port/gen/xpg6.c for libc default. + * __xpg6 (C99/SUSv3) is first included in Solaris 10 libc and libm + * as well as the K2 (S1S8) libsunmath and libmopt. + * + * The default setting, _C99SUSv3_mode_OFF, means to retain current Solaris + * behavior which is NOT C99/SUSv3 compliant. This is normal. These libraries + * determine which standard to use based on how applications are built. These + * libraries at runtime determine which behavior to choose based on the value + * of __xpg6. By default they retain their original Solaris behavior. + * + * __xpg6 is used to control certain behaviors between the C99 standard, the + * SUSv3 standard, and Solaris. More explanation in lib/libc/inc/xpg6.h. + * The XPG6 C compiler utility (c99) will add an object file that contains + * an alternate definition for __xpg6. The symbol interposition provided + * by the linker will allow these libraries to find that symbol instead. + * + * Possible settings are available and documented in lib/libc/inc/xpg6.h. + */ + +#include "xpg6.h" + +unsigned int __xpg6 = _C99SUSv3_mode_OFF; diff --git a/usr/src/lib/libm/common/C/_lib_version.c b/usr/src/lib/libm/common/C/_lib_version.c new file mode 100644 index 0000000000..5548104d70 --- /dev/null +++ b/usr/src/lib/libm/common/C/_lib_version.c @@ -0,0 +1,38 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * values-{X,x}?.o should define + initialize an *actual* symbol _lib_version. + */ + +#include <math.h> + +#pragma weak _lib_version = __libm_lib_version + +const enum version __libm_lib_version = libm_ieee; diff --git a/usr/src/lib/libm/common/C/acos.c b/usr/src/lib/libm/common/C/acos.c new file mode 100644 index 0000000000..523217a470 --- /dev/null +++ b/usr/src/lib/libm/common/C/acos.c @@ -0,0 +1,154 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak acos = __acos + +/* INDENT OFF */ +/* + * acos(x) + * Method : + * acos(x) = pi/2 - asin(x) + * acos(-x) = pi/2 + asin(x) + * For |x|<=0.5 + * acos(x) = pi/2 - (x + x*x^2*R(x^2)) (see asin.c) + * For x>0.5 + * acos(x) = pi/2 - (pi/2 - 2asin(sqrt((1-x)/2))) + * = 2asin(sqrt((1-x)/2)) + * = 2s + 2s*z*R(z) ...z=(1-x)/2, s=sqrt(z) + * = 2f + (2c + 2s*z*R(z)) + * where f=hi part of s, and c = (z-f*f)/(s+f) is the correction term + * for f so that f+c ~ sqrt(z). + * For x<-0.5 + * acos(x) = pi - 2asin(sqrt((1-|x|)/2)) + * = pi - 0.5*(s+s*z*R(z)), where z=(1-|x|)/2,s=sqrt(z) + * + * Special cases: + * if x is NaN, return x itself; + * if |x|>1, return NaN with invalid signal. + * + * Function needed: sqrt + */ +/* INDENT ON */ + +#include "libm_synonyms.h" /* __acos, __sqrt, __isnan */ +#include "libm_protos.h" /* _SVID_libm_error */ +#include "libm_macros.h" +#include <math.h> + +/* INDENT OFF */ +static const double xxx[] = { +/* one */ 1.00000000000000000000e+00, /* 3FF00000, 00000000 */ +/* pi */ 3.14159265358979311600e+00, /* 400921FB, 54442D18 */ +/* pio2_hi */ 1.57079632679489655800e+00, /* 3FF921FB, 54442D18 */ +/* pio2_lo */ 6.12323399573676603587e-17, /* 3C91A626, 33145C07 */ +/* pS0 */ 1.66666666666666657415e-01, /* 3FC55555, 55555555 */ +/* pS1 */ -3.25565818622400915405e-01, /* BFD4D612, 03EB6F7D */ +/* pS2 */ 2.01212532134862925881e-01, /* 3FC9C155, 0E884455 */ +/* pS3 */ -4.00555345006794114027e-02, /* BFA48228, B5688F3B */ +/* pS4 */ 7.91534994289814532176e-04, /* 3F49EFE0, 7501B288 */ +/* pS5 */ 3.47933107596021167570e-05, /* 3F023DE1, 0DFDF709 */ +/* qS1 */ -2.40339491173441421878e+00, /* C0033A27, 1C8A2D4B */ +/* qS2 */ 2.02094576023350569471e+00, /* 40002AE5, 9C598AC8 */ +/* qS3 */ -6.88283971605453293030e-01, /* BFE6066C, 1B8D0159 */ +/* qS4 */ 7.70381505559019352791e-02 /* 3FB3B8C5, B12E9282 */ +}; +#define one xxx[0] +#define pi xxx[1] +#define pio2_hi xxx[2] +#define pio2_lo xxx[3] +#define pS0 xxx[4] +#define pS1 xxx[5] +#define pS2 xxx[6] +#define pS3 xxx[7] +#define pS4 xxx[8] +#define pS5 xxx[9] +#define qS1 xxx[10] +#define qS2 xxx[11] +#define qS3 xxx[12] +#define qS4 xxx[13] +/* INDENT ON */ + +double +acos(double x) { + double z, p, q, r, w, s, c, df; + int hx, ix; + + hx = ((int *) &x)[HIWORD]; + ix = hx & 0x7fffffff; + if (ix >= 0x3ff00000) { /* |x| >= 1 */ + if (((ix - 0x3ff00000) | ((int *) &x)[LOWORD]) == 0) { + /* |x| == 1 */ + if (hx > 0) /* acos(1) = 0 */ + return (0.0); + else /* acos(-1) = pi */ + return (pi + 2.0 * pio2_lo); + } else if (isnan(x)) +#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN) + return (ix >= 0x7ff80000 ? x : (x - x) / (x - x)); + /* assumes sparc-like QNaN */ +#else + return (x - x) / (x - x); /* acos(|x|>1) is NaN */ +#endif + else + return (_SVID_libm_err(x, x, 1)); + } + if (ix < 0x3fe00000) { /* |x| < 0.5 */ + if (ix <= 0x3c600000) + return (pio2_hi + pio2_lo); /* if |x| < 2**-57 */ + z = x * x; + p = z * (pS0 + z * (pS1 + z * (pS2 + z * (pS3 + + z * (pS4 + z * pS5))))); + q = one + z * (qS1 + z * (qS2 + z * (qS3 + z * qS4))); + r = p / q; + return (pio2_hi - (x - (pio2_lo - x * r))); + } else if (hx < 0) { + /* x < -0.5 */ + z = (one + x) * 0.5; + p = z * (pS0 + z * (pS1 + z * (pS2 + z * (pS3 + + z * (pS4 + z * pS5))))); + q = one + z * (qS1 + z * (qS2 + z * (qS3 + z * qS4))); + s = sqrt(z); + r = p / q; + w = r * s - pio2_lo; + return (pi - 2.0 * (s + w)); + } else { + /* x > 0.5 */ + z = (one - x) * 0.5; + s = sqrt(z); + df = s; + ((int *) &df)[LOWORD] = 0; + c = (z - df * df) / (s + df); + p = z * (pS0 + z * (pS1 + z * (pS2 + z * (pS3 + + z * (pS4 + z * pS5))))); + q = one + z * (qS1 + z * (qS2 + z * (qS3 + z * qS4))); + r = p / q; + w = r * s + c; + return (2.0 * (df + w)); + } +} diff --git a/usr/src/lib/libm/common/C/acosh.c b/usr/src/lib/libm/common/C/acosh.c new file mode 100644 index 0000000000..7eecfe635e --- /dev/null +++ b/usr/src/lib/libm/common/C/acosh.c @@ -0,0 +1,96 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak acosh = __acosh + +/* INDENT OFF */ +/* + * acosh(x) + * Method : + * Based on + * acosh(x) = log [ x + sqrt(x*x-1) ] + * we have + * acosh(x) := log(x)+ln2, if x is large; else + * acosh(x) := log(2x-1/(sqrt(x*x-1)+x)) if x > 2; else + * acosh(x) := log1p(t+sqrt(2.0*t+t*t)); where t = x-1. + * + * Special cases: + * acosh(x) is NaN with signal if x < 1. + * acosh(NaN) is NaN without signal. + */ +/* INDENT ON */ + +#include "libm_synonyms.h" /* __acosh, __log, __log1p */ +#include "libm_protos.h" /* _SVID_libm_error */ +#include "libm_macros.h" +#include <math.h> + +static const double + one = 1.0, + ln2 = 6.93147180559945286227e-01; /* 3FE62E42, FEFA39EF */ + +double +acosh(double x) { + double t; + int hx; + + hx = ((int *) &x)[HIWORD]; + if (hx < 0x3ff00000) { /* x < 1 */ + if (isnan(x)) +#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN) + return (hx >= 0xfff80000 ? x : (x - x) / (x - x)); + /* assumes sparc-like QNaN */ +#else + return (x - x) / (x - x); +#endif + else + return (_SVID_libm_err(x, x, 29)); + } else if (hx >= 0x41b00000) { + /* x > 2**28 */ + if (hx >= 0x7ff00000) { /* x is inf of NaN */ +#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN) + return (hx >= 0x7ff80000 ? x : x + x); + /* assumes sparc-like QNaN */ +#else + return (x + x); +#endif + } else /* acosh(huge)=log(2x) */ + return (log(x) + ln2); + } else if (((hx - 0x3ff00000) | ((int *) &x)[LOWORD]) == 0) { + return (0.0); /* acosh(1) = 0 */ + } else if (hx > 0x40000000) { + /* 2**28 > x > 2 */ + t = x * x; + return (log(2.0 * x - one / (x + sqrt(t - one)))); + } else { + /* 1 < x < 2 */ + t = x - one; + return (log1p(t + sqrt(2.0 * t + t * t))); + } +} diff --git a/usr/src/lib/libm/common/C/asin.c b/usr/src/lib/libm/common/C/asin.c new file mode 100644 index 0000000000..04578a6fac --- /dev/null +++ b/usr/src/lib/libm/common/C/asin.c @@ -0,0 +1,158 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak asin = __asin + +/* INDENT OFF */ +/* + * asin(x) + * Method : + * Since asin(x) = x + x^3/6 + x^5*3/40 + x^7*15/336 + ... + * we approximate asin(x) on [0,0.5] by + * asin(x) = x + x*x^2*R(x^2) + * where + * R(x^2) is a rational approximation of (asin(x)-x)/x^3 + * and its remez error is bounded by + * |(asin(x)-x)/x^3 - R(x^2)| < 2^(-58.75) + * + * For x in [0.5,1] + * asin(x) = pi/2-2*asin(sqrt((1-x)/2)) + * Let y = (1-x), z = y/2, s := sqrt(z), and pio2_hi+pio2_lo=pi/2; + * then for x>0.98 + * asin(x) = pi/2 - 2*(s+s*z*R(z)) + * = pio2_hi - (2*(s+s*z*R(z)) - pio2_lo) + * For x<=0.98, let pio4_hi = pio2_hi/2, then + * f = hi part of s; + * c = sqrt(z) - f = (z-f*f)/(s+f) ...f+c=sqrt(z) + * and + * asin(x) = pi/2 - 2*(s+s*z*R(z)) + * = pio4_hi+(pio4-2s)-(2s*z*R(z)-pio2_lo) + * = pio4_hi+(pio4-2f)-(2s*z*R(z)-(pio2_lo+2c)) + * + * Special cases: + * if x is NaN, return x itself; + * if |x|>1, return NaN with invalid signal. + * + */ +/* INDENT ON */ + +#include "libm_synonyms.h" /* __asin, __sqrt, __isnan */ +#include "libm_protos.h" /* _SVID_libm_error */ +#include "libm_macros.h" +#include <math.h> + +/* INDENT OFF */ +static const double xxx[] = { +/* one */ 1.00000000000000000000e+00, /* 3FF00000, 00000000 */ +/* huge */ 1.000e+300, +/* pio2_hi */ 1.57079632679489655800e+00, /* 3FF921FB, 54442D18 */ +/* pio2_lo */ 6.12323399573676603587e-17, /* 3C91A626, 33145C07 */ +/* pio4_hi */ 7.85398163397448278999e-01, /* 3FE921FB, 54442D18 */ +/* coefficient for R(x^2) */ +/* pS0 */ 1.66666666666666657415e-01, /* 3FC55555, 55555555 */ +/* pS1 */ -3.25565818622400915405e-01, /* BFD4D612, 03EB6F7D */ +/* pS2 */ 2.01212532134862925881e-01, /* 3FC9C155, 0E884455 */ +/* pS3 */ -4.00555345006794114027e-02, /* BFA48228, B5688F3B */ +/* pS4 */ 7.91534994289814532176e-04, /* 3F49EFE0, 7501B288 */ +/* pS5 */ 3.47933107596021167570e-05, /* 3F023DE1, 0DFDF709 */ +/* qS1 */ -2.40339491173441421878e+00, /* C0033A27, 1C8A2D4B */ +/* qS2 */ 2.02094576023350569471e+00, /* 40002AE5, 9C598AC8 */ +/* qS3 */ -6.88283971605453293030e-01, /* BFE6066C, 1B8D0159 */ +/* qS4 */ 7.70381505559019352791e-02 /* 3FB3B8C5, B12E9282 */ +}; +#define one xxx[0] +#define huge xxx[1] +#define pio2_hi xxx[2] +#define pio2_lo xxx[3] +#define pio4_hi xxx[4] +#define pS0 xxx[5] +#define pS1 xxx[6] +#define pS2 xxx[7] +#define pS3 xxx[8] +#define pS4 xxx[9] +#define pS5 xxx[10] +#define qS1 xxx[11] +#define qS2 xxx[12] +#define qS3 xxx[13] +#define qS4 xxx[14] +/* INDENT ON */ + +double +asin(double x) { + double t, w, p, q, c, r, s; + int hx, ix, i; + + hx = ((int *) &x)[HIWORD]; + ix = hx & 0x7fffffff; + if (ix >= 0x3ff00000) { /* |x| >= 1 */ + if (((ix - 0x3ff00000) | ((int *) &x)[LOWORD]) == 0) + /* asin(1)=+-pi/2 with inexact */ + return (x * pio2_hi + x * pio2_lo); + else if (isnan(x)) +#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN) + return (ix >= 0x7ff80000 ? x : (x - x) / (x - x)); + /* assumes sparc-like QNaN */ +#else + return (x - x) / (x - x); /* asin(|x|>1) is NaN */ +#endif + else + return (_SVID_libm_err(x, x, 2)); + } else if (ix < 0x3fe00000) { /* |x| < 0.5 */ + if (ix < 0x3e400000) { /* if |x| < 2**-27 */ + if ((i = (int) x) == 0) + /* return x with inexact if x != 0 */ + return (x); + } + t = x * x; + p = t * (pS0 + t * (pS1 + t * (pS2 + t * (pS3 + + t * (pS4 + t * pS5))))); + q = one + t * (qS1 + t * (qS2 + t * (qS3 + t * qS4))); + w = p / q; + return (x + x * w); + } + /* 1 > |x| >= 0.5 */ + w = one - fabs(x); + t = w * 0.5; + p = t * (pS0 + t * (pS1 + t * (pS2 + t * (pS3 + t * (pS4 + t * pS5))))); + q = one + t * (qS1 + t * (qS2 + t * (qS3 + t * qS4))); + s = sqrt(t); + if (ix >= 0x3FEF3333) { /* if |x| > 0.975 */ + w = p / q; + t = pio2_hi - (2.0 * (s + s * w) - pio2_lo); + } else { + w = s; + ((int *) &w)[LOWORD] = 0; + c = (t - w * w) / (s + w); + r = p / q; + p = 2.0 * s * r - (pio2_lo - 2.0 * c); + q = pio4_hi - 2.0 * w; + t = pio4_hi - (p - q); + } + return (hx > 0 ? t : -t); +} diff --git a/usr/src/lib/libm/common/C/asinh.c b/usr/src/lib/libm/common/C/asinh.c new file mode 100644 index 0000000000..6d6b852009 --- /dev/null +++ b/usr/src/lib/libm/common/C/asinh.c @@ -0,0 +1,89 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak asinh = __asinh + +/* INDENT OFF */ +/* + * asinh(x) + * Method : + * Based on + * asinh(x) = sign(x) * log [ |x| + sqrt(x*x+1) ] + * we have + * asinh(x) := x if 1+x*x == 1, + * := sign(x)*(log(x)+ln2)) for large |x|, else + * := sign(x)*log(2|x|+1/(|x|+sqrt(x*x+1))) if|x| > 2, else + * := sign(x)*log1p(|x|+x^2/(1+sqrt(1+x^2))) + */ +/* INDENT ON */ + +#include "libm_synonyms.h" /* __asinh */ +#include "libm_macros.h" +#include <math.h> + +static const double xxx[] = { +/* one */ 1.00000000000000000000e+00, /* 3FF00000, 00000000 */ +/* ln2 */ 6.93147180559945286227e-01, /* 3FE62E42, FEFA39EF */ +/* huge */ 1.00000000000000000000e+300 +}; +#define one xxx[0] +#define ln2 xxx[1] +#define huge xxx[2] + +double +asinh(double x) { + double t, w; + int hx, ix; + + hx = ((int *) &x)[HIWORD]; + ix = hx & 0x7fffffff; + if (ix >= 0x7ff00000) +#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN) + return (ix >= 0x7ff80000 ? x : x + x); + /* assumes sparc-like QNaN */ +#else + return (x + x); /* x is inf or NaN */ +#endif + if (ix < 0x3e300000) { /* |x|<2**-28 */ + if (huge + x > one) + return (x); /* return x inexact except 0 */ + } + if (ix > 0x41b00000) { /* |x| > 2**28 */ + w = log(fabs(x)) + ln2; + } else if (ix > 0x40000000) { + /* 2**28 > |x| > 2.0 */ + t = fabs(x); + w = log(2.0 * t + one / (sqrt(x * x + one) + t)); + } else { + /* 2.0 > |x| > 2**-28 */ + t = x * x; + w = log1p(fabs(x) + t / (one + sqrt(one + t))); + } + return (hx > 0 ? w : -w); +} diff --git a/usr/src/lib/libm/common/C/atan.c b/usr/src/lib/libm/common/C/atan.c new file mode 100644 index 0000000000..f19b7e1fdd --- /dev/null +++ b/usr/src/lib/libm/common/C/atan.c @@ -0,0 +1,198 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak atan = __atan + +/* INDENT OFF */ +/* + * atan(x) + * Accurate Table look-up algorithm with polynomial approximation in + * partially product form. + * + * -- K.C. Ng, October 17, 2004 + * + * Algorithm + * + * (1). Purge off Inf and NaN and 0 + * (2). Reduce x to positive by atan(x) = -atan(-x). + * (3). For x <= 1/8 and let z = x*x, return + * (2.1) if x < 2^(-prec/2), atan(x) = x with inexact flag raised + * (2.2) if x < 2^(-prec/4-1), atan(x) = x+(x/3)(x*x) + * (2.3) if x < 2^(-prec/6-2), atan(x) = x+(z-5/3)(z*x/5) + * (2.4) Otherwise + * atan(x) = poly1(x) = x + A * B, + * where + * A = (p1*x*z) * (p2+z(p3+z)) + * B = (p4+z)+z*z) * (p5+z(p6+z)) + * Note: (i) domain of poly1 is [0, 1/8], (ii) remez relative + * approximation error of poly1 is bounded by + * |(atan(x)-poly1(x))/x| <= 2^-57.61 + * (4). For x >= 8 then + * (3.1) if x >= 2^prec, atan(x) = atan(inf) - pio2lo + * (3.2) if x >= 2^(prec/3), atan(x) = atan(inf) - 1/x + * (3.3) if x <= 65, atan(x) = atan(inf) - poly1(1/x) + * (3.4) otherwise atan(x) = atan(inf) - poly2(1/x) + * where + * poly2(r) = (q1*r) * (q2+z(q3+z)) * (q4+z), + * its domain is [0, 0.0154]; and its remez absolute + * approximation error is bounded by + * |atan(x)-poly2(x)|<= 2^-59.45 + * + * (5). Now x is in (0.125, 8). + * Recall identity + * atan(x) = atan(y) + atan((x-y)/(1+x*y)). + * Let j = (ix - 0x3fc00000) >> 16, 0 <= j < 96, where ix is the high + * part of x in IEEE double format. Then + * atan(x) = atan(y[j]) + poly2((x-y[j])/(1+x*y[j])) + * where y[j] are carefully chosen so that it matches x to around 4.5 + * bits and at the same time atan(y[j]) is very close to an IEEE double + * floating point number. Calculation indicates that + * max|(x-y[j])/(1+x*y[j])| < 0.0154 + * j,x + * + * Accuracy: Maximum error observed is bounded by 0.6 ulp after testing + * more than 10 million random arguments + */ +/* INDENT ON */ + +#include "libm.h" +#include "libm_synonyms.h" +#include "libm_protos.h" + +extern const double _TBL_atan[]; +static const double g[] = { +/* one = */ 1.0, +/* p1 = */ 8.02176624254765935351230154992663301527500152588e-0002, +/* p2 = */ 1.27223421700559402580665846471674740314483642578e+0000, +/* p3 = */ -1.20606901800503640842521235754247754812240600586e+0000, +/* p4 = */ -2.36088967922325565496066701598465442657470703125e+0000, +/* p5 = */ 1.38345799501389166152875986881554126739501953125e+0000, +/* p6 = */ 1.06742368078953453469637224770849570631980895996e+0000, +/* q1 = */ -1.42796626333911796935538518482644576579332351685e-0001, +/* q2 = */ 3.51427110447873227059810477159863497078605962912e+0000, +/* q3 = */ 5.92129112708164262457444237952586263418197631836e-0001, +/* q4 = */ -1.99272234785683144409063061175402253866195678711e+0000, +/* pio2hi */ 1.570796326794896558e+00, +/* pio2lo */ 6.123233995736765886e-17, +/* t1 = */ -0.333333333333333333333333333333333, +/* t2 = */ 0.2, +/* t3 = */ -1.666666666666666666666666666666666, +}; + +#define one g[0] +#define p1 g[1] +#define p2 g[2] +#define p3 g[3] +#define p4 g[4] +#define p5 g[5] +#define p6 g[6] +#define q1 g[7] +#define q2 g[8] +#define q3 g[9] +#define q4 g[10] +#define pio2hi g[11] +#define pio2lo g[12] +#define t1 g[13] +#define t2 g[14] +#define t3 g[15] + + +double +atan(double x) { + double y, z, r, p, s; + int ix, lx, hx, j; + + hx = ((int *) &x)[HIWORD]; + lx = ((int *) &x)[LOWORD]; + ix = hx & ~0x80000000; + j = ix >> 20; + + /* for |x| < 1/8 */ + if (j < 0x3fc) { + if (j < 0x3f5) { /* when |x| < 2**(-prec/6-2) */ + if (j < 0x3e3) { /* if |x| < 2**(-prec/2-2) */ + return ((int) x == 0 ? x : one); + } + if (j < 0x3f1) { /* if |x| < 2**(-prec/4-1) */ + return (x + (x * t1) * (x * x)); + } else { /* if |x| < 2**(-prec/6-2) */ + z = x * x; + s = t2 * x; + return (x + (t3 + z) * (s * z)); + } + } + z = x * x; s = p1 * x; + return (x + ((s * z) * (p2 + z * (p3 + z))) * + (((p4 + z) + z * z) * (p5 + z * (p6 + z)))); + } + + /* for |x| >= 8.0 */ + if (j >= 0x402) { + if (j < 0x436) { + r = one / x; + if (hx >= 0) { + y = pio2hi; p = pio2lo; + } else { + y = -pio2hi; p = -pio2lo; + } + if (ix < 0x40504000) { /* x < 65 */ + z = r * r; + s = p1 * r; + return (y + ((p - r) - ((s * z) * + (p2 + z * (p3 + z))) * + (((p4 + z) + z * z) * + (p5 + z * (p6 + z))))); + } else if (j < 0x412) { + z = r * r; + return (y + (p - ((q1 * r) * (q4 + z)) * + (q2 + z * (q3 + z)))); + } else + return (y + (p - r)); + } else { + if (j >= 0x7ff) /* x is inf or NaN */ + if (((ix - 0x7ff00000) | lx) != 0) +#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN) + return (ix >= 0x7ff80000 ? x : x - x); + /* assumes sparc-like QNaN */ +#else + return (x - x); +#endif + y = -pio2lo; + return (hx >= 0 ? pio2hi - y : y - pio2hi); + } + } else { /* now x is between 1/8 and 8 */ + double *w, w0, w1, s, z; + w = (double *) _TBL_atan + (((ix - 0x3fc00000) >> 16) << 1); + w0 = (hx >= 0)? w[0] : -w[0]; + s = (x - w0) / (one + x * w0); + w1 = (hx >= 0)? w[1] : -w[1]; + z = s * s; + return (((q1 * s) * (q4 + z)) * (q2 + z * (q3 + z)) + w1); + } +} diff --git a/usr/src/lib/libm/common/C/atan2.c b/usr/src/lib/libm/common/C/atan2.c new file mode 100644 index 0000000000..c85f13e649 --- /dev/null +++ b/usr/src/lib/libm/common/C/atan2.c @@ -0,0 +1,499 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak atan2 = __atan2 + +#include "libm.h" + +/* + * Let t(0) = 1 and for i = 1, ..., 160, let t(i) be the slope of + * the line bisecting the conical hull of the set of points (x,y) + * where x and y are positive normal floating point numbers and + * the high order words hx and hy of their binary representations + * satisfy |hx - hy - i * 0x8000| <= 0x4000. Then: + * + * TBL[4*i+2] is t(i) rounded to 21 significant bits (i.e., the + * low order word is zero), and + * + * TBL[4*i] + TBL[4*i+1] form a doubled-double approximation to + * atan(TBL[4*i+2]). + * + * Finally, TBL[4*161] = TBL[4*161+1] = TBL[4*161+2] = 0. + * + * Now for any (x,y) with 0 < y <= x and any 0 < t <= 1, we have + * atan(y/x) = atan(t) + atan((y-t*x)/(x+t*y)). By choosing t = + * TBL[4*i+2], where i is the multiple of 0x8000 nearest hx - hy, + * if this multiple is less than 161, and i = 161 otherwise, we + * find that |(y-t*x)/(x+t*y)| <~ 2^-5. + */ +static const double TBL[] = { + 7.8539816339744827900e-01, +3.0616169978683830179e-17, + 1.0000000000000000000e+00, +0, + 7.7198905126506112140e-01, +2.6989956960083153652e-16, + 9.7353506088256835938e-01, +0, + 7.6068143954461309164e-01, -3.5178810518941914972e-16, + 9.5174932479858398438e-01, +0, + 7.4953661876353638860e-01, -3.2548100004524337476e-16, + 9.3073129653930664062e-01, +0, + 7.3854614984728339522e-01, -2.0775571023910406668e-16, + 9.1042709350585937500e-01, +0, + 7.2770146962041337702e-01, +3.8883249403168348802e-16, + 8.9078664779663085938e-01, +0, + 7.1699492488093774512e-01, -4.0468841511547224071e-16, + 8.7176513671875000000e-01, +0, + 7.0641813488653149022e-01, +5.6902424353981484031e-17, + 8.5331964492797851562e-01, +0, + 6.9596351101035658360e-01, +2.8245513321075021303e-16, + 8.3541154861450195312e-01, +0, + 6.8562363680534943455e-01, -4.2316970721658854064e-16, + 8.1800508499145507812e-01, +0, + 6.7539055666438230219e-01, +4.3535917281300047233e-16, + 8.0106592178344726562e-01, +0, + 6.6525763346931832132e-01, +1.1830431602404727977e-17, + 7.8456401824951171875e-01, +0, + 6.5521767574310185722e-01, -1.7435923100651044208e-16, + 7.6847028732299804688e-01, +0, + 6.4526390999481897381e-01, -1.4741927403093983947e-16, + 7.5275802612304687500e-01, +0, + 6.3538979894204850041e-01, +1.5734535069995660853e-16, + 7.3740243911743164062e-01, +0, + 6.2558914346942717799e-01, -2.8175588856316910960e-16, + 7.2238063812255859375e-01, +0, + 6.1585586476157949676e-01, -4.3056167357725226449e-16, + 7.0767116546630859375e-01, +0, + 6.0618408027576098362e-01, +1.5018013918429320289e-16, + 6.9325399398803710938e-01, +0, + 5.9656817827486730010e-01, +5.5271942033557644157e-17, + 6.7911052703857421875e-01, +0, + 5.8700289083426504533e-01, -8.2411369282676383293e-17, + 6.6522359848022460938e-01, +0, + 5.7748303053627658699e-01, +4.9400383775709159558e-17, + 6.5157699584960937500e-01, +0, + 5.6800353968303252117e-01, +2.9924431103311109543e-16, + 6.3815546035766601562e-01, +0, + 5.5855953863493823519e-01, -2.0306003403868777403e-16, + 6.2494468688964843750e-01, +0, + 5.4914706708329674711e-01, +2.8255378613779667461e-17, + 6.1193227767944335938e-01, +0, + 5.3976176660618069292e-01, +1.6370248781078747995e-16, + 5.9910583496093750000e-01, +0, + 5.3039888601412332747e-01, -7.6196097360093680134e-17, + 5.8645296096801757812e-01, +0, + 5.2105543924318808990e-01, -2.2400815668154739561e-16, + 5.7396411895751953125e-01, +0, + 5.1172778873967050828e-01, -3.6888136019899681185e-16, + 5.6162929534912109375e-01, +0, + 5.0241199666452196482e-01, -2.5412891474397011281e-16, + 5.4943847656250000000e-01, +0, + 4.9310493954293743712e-01, +4.4132186128251152229e-16, + 5.3738307952880859375e-01, +0, + 4.8380436844750995817e-01, -2.7844387907776656488e-16, + 5.2545595169067382812e-01, +0, + 4.7450670361463753721e-01, -2.0494355197368286028e-16, + 5.1364850997924804688e-01, +0, + 4.6367660027976320691e-01, +3.1709878607954760668e-16, + 5.0003623962402343750e-01, +0, + 4.5304753104003925301e-01, +3.3593436122420574865e-16, + 4.8681926727294921875e-01, +0, + 4.4423658037407065535e-01, +2.1987183192008082015e-17, + 4.7596645355224609375e-01, +0, + 4.3567016972500294258e-01, +3.0118422805369552650e-16, + 4.6550178527832031250e-01, +0, + 4.2733152672544871820e-01, -3.2667693224866479909e-16, + 4.5539522171020507812e-01, +0, + 4.1920540176693954493e-01, -2.2454273841113897647e-16, + 4.4561982154846191406e-01, +0, + 4.1127722812701872357e-01, -3.1620568973494653391e-16, + 4.3615055084228515625e-01, +0, + 4.0353384063084263289e-01, -3.5932009901481421723e-16, + 4.2696499824523925781e-01, +0, + 3.9596319345246833166e-01, -4.0281533417458698585e-16, + 4.1804289817810058594e-01, +0, + 3.8855405220339722661e-01, +1.6132231486045176674e-16, + 4.0936565399169921875e-01, +0, + 3.8129566313738116889e-01, +1.7684657060650804570e-16, + 4.0091586112976074219e-01, +0, + 3.7417884791401867517e-01, +2.6897604227426977619e-16, + 3.9267849922180175781e-01, +0, + 3.6719421967585041955e-01, -4.5886151448673745001e-17, + 3.8463878631591796875e-01, +0, + 3.6033388248727771241e-01, +1.5804115573136074946e-16, + 3.7678408622741699219e-01, +0, + 3.5358982224579182940e-01, +1.2624619863035782939e-16, + 3.6910200119018554688e-01, +0, + 3.4695498404186952968e-01, +9.3221684607372865177e-17, + 3.6158156394958496094e-01, +0, + 3.4042268308109679964e-01, +2.7697913559445449137e-16, + 3.5421252250671386719e-01, +0, + 3.3398684598563566084e-01, +3.6085337449716011085e-16, + 3.4698557853698730469e-01, +0, + 3.2764182824591436827e-01, +2.0581506352606456186e-16, + 3.3989214897155761719e-01, +0, + 3.2138200938788497041e-01, -1.9015787485430693661e-16, + 3.3292388916015625000e-01, +0, + 3.1520245348069497737e-01, +2.6961839659264087022e-16, + 3.2607340812683105469e-01, +0, + 3.0909871873117023000e-01, -1.5641891686756272625e-16, + 3.1933403015136718750e-01, +0, + 3.0306644308947827682e-01, +2.8801634211591956223e-16, + 3.1269931793212890625e-01, +0, + 2.9710135482774191473e-01, -4.3148994478973365819e-16, + 3.0616307258605957031e-01, +0, + 2.9120015759141004708e-01, -6.8539854790808585159e-17, + 2.9972028732299804688e-01, +0, + 2.8535879880370362827e-01, -1.2231638445300492682e-16, + 2.9336524009704589844e-01, +0, + 2.7957422506893880865e-01, -4.6707752931043135528e-17, + 2.8709340095520019531e-01, +0, + 2.7384352102802367313e-01, -4.1215636366229625876e-16, + 2.8090047836303710938e-01, +0, + 2.6816369484161040049e-01, -2.3700583122400495333e-16, + 2.7478218078613281250e-01, +0, + 2.6253212627627764419e-01, +2.3123213692190889610e-16, + 2.6873469352722167969e-01, +0, + 2.5694635355759309903e-01, -4.0638513814701264145e-16, + 2.6275444030761718750e-01, +0, + 2.5140385572454615470e-01, -3.4795333793554943723e-16, + 2.5683784484863281250e-01, +0, + 2.4500357070096612233e-01, +6.6542334848010259289e-17, + 2.5002646446228027344e-01, +0, + 2.3877766609573036760e-01, -2.7756633678549343650e-16, + 2.4342155456542968750e-01, +0, + 2.3365669377188336142e-01, +3.2700803838522067998e-16, + 2.3800384998321533203e-01, +0, + 2.2870810463931334766e-01, -4.4279127662219799521e-16, + 2.3278105258941650391e-01, +0, + 2.2391820542294382790e-01, +3.7558889374284208052e-16, + 2.2773718833923339844e-01, +0, + 2.1927501815429550902e-01, -1.4829838176513811186e-16, + 2.2285830974578857422e-01, +0, + 2.1476740847367459253e-01, -2.0535381496063397578e-17, + 2.1813154220581054688e-01, +0, + 2.1038568111737454558e-01, -4.2826767738736168650e-16, + 2.1354568004608154297e-01, +0, + 2.0612057974373865221e-01, +4.2108051749502232359e-16, + 2.0909011363983154297e-01, +0, + 2.0196410359405447821e-01, +3.5157118083511092869e-16, + 2.0475566387176513672e-01, +0, + 1.9790861144712756925e-01, +3.7894950972257700994e-16, + 2.0053362846374511719e-01, +0, + 1.9394752160084305359e-01, +2.8270367403478935534e-16, + 1.9641649723052978516e-01, +0, + 1.9007440763641536563e-01, -2.0842758095683676397e-16, + 1.9239699840545654297e-01, +0, + 1.8628369629742813629e-01, +3.4710917040399448932e-16, + 1.8846881389617919922e-01, +0, + 1.8256998712939509488e-01, +1.1053834120570125251e-16, + 1.8462586402893066406e-01, +0, + 1.7892875067284830237e-01, +3.0486232913366680305e-16, + 1.8086302280426025391e-01, +0, + 1.7535529778449010507e-01, -2.3810135019970148624e-16, + 1.7717504501342773438e-01, +0, + 1.7184559192514736736e-01, +5.1432582846210893916e-17, + 1.7355740070343017578e-01, +0, + 1.6839590847744290159e-01, +3.1605623296041433586e-18, + 1.7000591754913330078e-01, +0, + 1.6500283902547518977e-01, +1.5405422268770998251e-16, + 1.6651678085327148438e-01, +0, + 1.6166306303174859949e-01, +4.0042241517254928672e-16, + 1.6308629512786865234e-01, +0, + 1.5837358268281231943e-01, -2.2786616251622967291e-16, + 1.5971112251281738281e-01, +0, + 1.5513160990288810126e-01, -3.7547723514797166336e-16, + 1.5638816356658935547e-01, +0, + 1.5193468535499299321e-01, +4.3497510505554267446e-16, + 1.5311467647552490234e-01, +0, + 1.4878033155427861089e-01, -2.3102860235324261895e-16, + 1.4988791942596435547e-01, +0, + 1.4566628729590647140e-01, +9.9227592950040279415e-17, + 1.4670538902282714844e-01, +0, + 1.4259050967286590605e-01, -3.3869909683813096906e-18, + 1.4356482028961181641e-01, +0, + 1.3955105903633846509e-01, +1.5500435650773331566e-17, + 1.4046406745910644531e-01, +0, + 1.3654610022831903393e-01, +3.3965918616682805753e-16, + 1.3740110397338867188e-01, +0, + 1.3357402082462854764e-01, +2.7572431581527535421e-16, + 1.3437414169311523438e-01, +0, + 1.3063319828908959153e-01, -3.4667213797076707331e-16, + 1.3138139247894287109e-01, +0, + 1.2772200049776749609e-01, +3.1089261947725651968e-16, + 1.2842106819152832031e-01, +0, + 1.2436931430778752627e-01, -4.0654251891464630059e-16, + 1.2501454353332519531e-01, +0, + 1.2111683701666819957e-01, -3.9381654342464836012e-16, + 1.2171256542205810547e-01, +0, + 1.1844801833536511282e-01, -3.6673155595150283444e-16, + 1.1900508403778076172e-01, +0, + 1.1587365536613614125e-01, -1.5026628801318421951e-16, + 1.1639505624771118164e-01, +0, + 1.1338607085741525538e-01, +1.2886806274050538880e-16, + 1.1387449502944946289e-01, +0, + 1.1097844020819369604e-01, +2.3848343623577768044e-16, + 1.1143630743026733398e-01, +0, + 1.0864456107308662069e-01, +4.2065430313285469408e-16, + 1.0907405614852905273e-01, +0, + 1.0637891628473727934e-01, -4.6883543790348472687e-18, + 1.0678201913833618164e-01, +0, + 1.0417650062205296990e-01, +1.4774925414624453292e-16, + 1.0455501079559326172e-01, +0, + 1.0203276464730581807e-01, -1.5677032794816452332e-16, + 1.0238832235336303711e-01, +0, + 9.9943617083734892503e-02, +3.4511310907979792828e-16, + 1.0027772188186645508e-01, +0, + 9.7905249824711049200e-02, +3.4489485563461708496e-16, + 9.8219275474548339844e-02, +0, + 9.5914316649349906641e-02, -1.3214510886789011569e-17, + 9.6209526062011718750e-02, +0, + 9.3967698614664918466e-02, +1.1048427091217964090e-16, + 9.4245254993438720703e-02, +0, + 9.2062564267554769515e-02, -3.7297463814697759309e-16, + 9.2323541641235351562e-02, +0, + 9.0196252506350660383e-02, -3.5280143043576718079e-16, + 9.0441644191741943359e-02, +0, + 8.8366391663268650802e-02, -6.1140673227541621183e-17, + 8.8597118854522705078e-02, +0, + 8.6570782100201526532e-02, -2.0998844594957629702e-16, + 8.6787700653076171875e-02, +0, + 8.4807337678923566671e-02, +3.9530981588194673068e-16, + 8.5011243820190429688e-02, +0, + 8.3074323040850828193e-02, -4.3022503210464894539e-17, + 8.3265960216522216797e-02, +0, + 8.1369880712663267275e-02, -6.3063867569127169744e-18, + 8.1549942493438720703e-02, +0, + 7.9692445771216036121e-02, -5.0787623072962671502e-17, + 7.9861581325531005859e-02, +0, + 7.8040568735575632786e-02, -3.8810063021216721741e-16, + 7.8199386596679687500e-02, +0, + 7.6412797391314235540e-02, +4.1246529500495762995e-16, + 7.6561868190765380859e-02, +0, + 7.4807854772808823896e-02, -3.7025599052186724156e-16, + 7.4947714805603027344e-02, +0, + 7.3224639528778112663e-02, +4.2209138483206712401e-17, + 7.3355793952941894531e-02, +0, + 7.1661929761571485642e-02, -3.2074473649855177622e-16, + 7.1784853935241699219e-02, +0, + 7.0118738881148168218e-02, -2.5371257235753296804e-16, + 7.0233881473541259766e-02, +0, + 6.8594137996416115755e-02, +3.3796987842548399135e-16, + 6.8701922893524169922e-02, +0, + 6.7087137393172291411e-02, +5.5061492696328852397e-17, + 6.7187964916229248047e-02, +0, + 6.5596983299946565182e-02, -2.1580863111502565280e-16, + 6.5691232681274414062e-02, +0, + 6.4122802037412718335e-02, -3.1315661827469233434e-16, + 6.4210832118988037109e-02, +0, + 6.2426231582525915087e-02, -2.5758980071296622188e-16, + 6.2507450580596923828e-02, +0, + 6.0781559928021700046e-02, +1.3736899336217710591e-16, + 6.0856521129608154297e-02, +0, + 5.9432882624005145544e-02, +2.2246097394328856474e-16, + 5.9502959251403808594e-02, +0, + 5.8132551274581167888e-02, -6.2525053236379489390e-18, + 5.8198124170303344727e-02, +0, + 5.6876611930681164608e-02, -2.6589930995607417149e-16, + 5.6938022375106811523e-02, +0, + 5.5661522654748551986e-02, -4.2736362859832186197e-16, + 5.5719077587127685547e-02, +0, + 5.4484124463757943602e-02, -1.6708067365310384253e-16, + 5.4538100957870483398e-02, +0, + 5.3341582449436764080e-02, +3.3271673004611311850e-17, + 5.3392231464385986328e-02, +0, + 5.2231267345892007370e-02, -3.5593396674200571616e-16, + 5.2278816699981689453e-02, +0, + 5.1150874758829623090e-02, +1.4432815841187114832e-16, + 5.1195532083511352539e-02, +0, + 5.0098306612679444072e-02, +9.4680943793589404083e-17, + 5.0140261650085449219e-02, +0, + 4.9071641675614507960e-02, +2.1131168520301896817e-16, + 4.9111068248748779297e-02, +0, + 4.8069135772851545596e-02, +1.6035336741307516296e-16, + 4.8106193542480468750e-02, +0, + 4.7089192241088539959e-02, -2.2491738698796901479e-16, + 4.7124028205871582031e-02, +0, + 4.6130362086062248750e-02, -1.5111423469578965206e-16, + 4.6163111925125122070e-02, +0, + 4.5191314382707403752e-02, +4.1989325207399786612e-16, + 4.5222103595733642578e-02, +0, + 4.4270836390474244126e-02, -4.1432635292331004454e-16, + 4.4299781322479248047e-02, +0, + 4.3367774164955186222e-02, -3.0615383054587355892e-16, + 4.3394982814788818359e-02, +0, + 4.2481121875321825598e-02, -3.6730166956273555173e-16, + 4.2506694793701171875e-02, +0, + 4.1609902899457651415e-02, -4.4226425958068821782e-16, + 4.1633933782577514648e-02, +0, + 4.0753259129372665370e-02, +1.9801161516527046872e-16, + 4.0775835514068603516e-02, +0, + 3.9910361780060910064e-02, +8.2560620036613164573e-18, + 3.9931565523147583008e-02, +0, + 3.9080441183869218946e-02, +3.9908991939242971628e-17, + 3.9100348949432373047e-02, +0, + 3.8262816593271686827e-02, +9.5182237812195590276e-17, + 3.8281500339508056641e-02, +0, + 3.7456806948784837630e-02, +1.5213508760679563439e-16, + 3.7474334239959716797e-02, +0, + 3.6661849947035918262e-02, +7.3335516005184616486e-17, + 3.6678284406661987305e-02, +0, + 3.5877353272533163420e-02, -1.3007348019891714540e-16, + 3.5892754793167114258e-02, +0, + 3.5102754135096780885e-02, -2.9903662298950558656e-16, + 3.5117179155349731445e-02, +0, + 3.4337638360670830195e-02, +2.9656295131966114331e-16, + 3.4351140260696411133e-02, +0, + 3.3581472523789734907e-02, +3.4810947205572817820e-16, + 3.3594101667404174805e-02, +0, + 3.2833871859357266487e-02, -3.8885440174405159838e-16, + 3.2845675945281982422e-02, +0, + 3.2094421679560447558e-02, +5.8805134853032009978e-17, + 3.2105445861816406250e-02, +0, + 3.1243584858944295490e-02, +2.8737383773884313066e-17, + 3.1253755092620849609e-02, +0, + 0, 0, 0, 0 +}; + +static const double C[] = { + 0.0, + 0.125, + 1.2980742146337069071e+33, + 7.8539816339744827900e-01, + 1.5707963267948965580e+00, + 6.1232339957367658860e-17, + -3.1415926535897931160e+00, + -1.2246467991473531772e-16, + -3.33333333333327571893331786354179101074860633009e-0001, + +1.99999999942671624230086497610394721817438631379e-0001, + -1.42856965565428636896183013324727205980484158356e-0001, + +1.10894981496317081405107718475040168084164825641e-0001, +}; + +#define zero C[0] +#define twom3 C[1] +#define two110 C[2] +#define pio4 C[3] +#define pio2 C[4] +#define pio2_lo C[5] +#define mpi C[6] +#define mpi_lo C[7] +#define p1 C[8] +#define p2 C[9] +#define p3 C[10] +#define p4 C[11] + +double +atan2(double oy, double ox) { + double ah, al, t, xh, x, y, z; + int i, k, hx, hy, sx, sy; +#ifndef lint + volatile int inexact; +#endif + + hy = ((int *)&oy)[HIWORD]; + sy = hy & 0x80000000; + hy &= ~0x80000000; + + hx = ((int *)&ox)[HIWORD]; + sx = hx & 0x80000000; + hx &= ~0x80000000; + + if (hy > hx || (hy == hx && ((unsigned *)&oy)[LOWORD] > + ((unsigned *)&ox)[LOWORD])) { + i = hx; + hx = hy; + hy = i; + x = fabs(oy); + y = fabs(ox); + if (sx) { + ah = pio2; + al = pio2_lo; + } else { + ah = -pio2; + al = -pio2_lo; + sy ^= 0x80000000; + } + } else { + x = fabs(ox); + y = fabs(oy); + if (sx) { + ah = mpi; + al = mpi_lo; + sy ^= 0x80000000; + } else { + ah = al = zero; + } + } + + if (hx >= 0x7fe00000 || hx - hy >= 0x03600000) { + if (hx >= 0x7ff00000) { + if (((hx ^ 0x7ff00000) | ((int *)&x)[LOWORD]) != 0) + return (ox * oy); + if (hy >= 0x7ff00000) + ah += pio4; +#ifndef lint + inexact = (int)ah; /* inexact if ah != 0 */ +#endif + return ((sy)? -ah : ah); + } + if (hx - hy >= 0x03600000) { + if ((int)ah == 0) + ah = y / x; + return ((sy)? -ah : ah); + } + y *= twom3; + x *= twom3; + hy -= 0x00300000; + hx -= 0x00300000; + } else if (hy < 0x00100000) { + if ((hy | ((int *)&y)[LOWORD]) == 0) { + if ((hx | ((int *)&x)[LOWORD]) == 0) + return (_SVID_libm_err(ox, oy, 3)); +#ifndef lint + inexact = (int)ah; /* inexact if ah != 0 */ +#endif + return ((sy)? -ah : ah); + } + y *= two110; + x *= two110; + hy = ((int *)&y)[HIWORD]; + hx = ((int *)&x)[HIWORD]; + } + + k = (((hx - hy) + 0x00004000) >> 13) & ~0x3; + if (k > 644) + k = 644; + ah += TBL[k]; + al += TBL[k+1]; + t = TBL[k+2]; + + xh = x; + ((int *)&xh)[LOWORD] = 0; + z = ((y - t * xh) - t * (x - xh)) / (x + y * t); + x = z * z; + t = ah + (z + (al + (z * x) * (p1 + x * (p2 + x * (p3 + x * p4))))); + return ((sy)? -t : t); +} diff --git a/usr/src/lib/libm/common/C/atan2pi.c b/usr/src/lib/libm/common/C/atan2pi.c new file mode 100644 index 0000000000..759dd1f489 --- /dev/null +++ b/usr/src/lib/libm/common/C/atan2pi.c @@ -0,0 +1,51 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak atan2pi = __atan2pi + +/* + * atan2pi(x) = atan2(x)/pi + */ + +#include "libm.h" + +static const double invpi = 0.3183098861837906715377675; + +double +atan2pi(double y, double x) { + int ix, iy; + + if (x == 0.0 && y == 0.0) { + ix = ((int *)&x)[HIWORD]; + iy = ((int *)&y)[HIWORD]; + if (ix >= 0) + return (y); + return ((iy >= 0)? 1.0 : -1.0); + } + return (atan2(y, x) * invpi); +} diff --git a/usr/src/lib/libm/common/C/atanh.c b/usr/src/lib/libm/common/C/atanh.c new file mode 100644 index 0000000000..48ff800e65 --- /dev/null +++ b/usr/src/lib/libm/common/C/atanh.c @@ -0,0 +1,71 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak atanh = __atanh + +/* INDENT OFF */ +/* + * atanh(x) + * Code originated from 4.3bsd. + * Modified by K.C. Ng for SUN 4.0 libm. + * Method : + * 1 2x x + * atanh(x) = --- * log(1 + -------) = 0.5 * log1p(2 * --------) + * 2 1 - x 1 - x + * Note: to guarantee atanh(-x) = -atanh(x), we use + * sign(x) |x| + * atanh(x) = ------- * log1p(2*-------). + * 2 1 - |x| + * + * Special cases: + * atanh(x) is NaN if |x| > 1 with signal; + * atanh(NaN) is that NaN with no signal; + * atanh(+-1) is +-INF with signal. + */ +/* INDENT ON */ + +#include "libm.h" +#include "libm_synonyms.h" +#include "libm_protos.h" +#include <math.h> + +double +atanh(double x) { + double t; + + if (isnan(x)) + return (x * x); /* switched from x + x for Cheetah */ + t = fabs(x); + if (t > 1.0) + return (_SVID_libm_err(x, x, 30)); /* sNaN */ + if (t == 1.0) + return (_SVID_libm_err(x, x, 31)); /* x/0; */ + t = t / (1.0 - t); + return (copysign(0.5, x) * log1p(t + t)); +} diff --git a/usr/src/lib/libm/common/C/cbrt.c b/usr/src/lib/libm/common/C/cbrt.c new file mode 100644 index 0000000000..52749061d0 --- /dev/null +++ b/usr/src/lib/libm/common/C/cbrt.c @@ -0,0 +1,272 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "libm_macros.h" + +/* INDENT OFF */ + +/* + * cbrt: double precision cube root + * + * Algorithm: bit hacking, table lookup, and polynomial approximation + * + * For normal x, write x = s*2^(3j)*z where s = +/-1, j is an integer, + * and 1 <= z < 8. Let y := s*2^j. From a table, find u such that + * u^3 is computable exactly and |(z-u^3)/u^3| <~ 2^-8. We construct + * y, z, and the table index from x by a few integer operations. + * + * Now cbrt(x) = y*u*(1+t)^(1/3) where t = (z-u^3)/u^3. We approximate + * (1+t)^(1/3) by a polynomial 1+p(t), where p(t) := t*(p1+t*(p2+...+ + * (p5+t*p6))). By computing the result as y*(u+u*p(t)), we can bound + * the worst case error by .51 ulp. + * + * Notes: + * + * 1. For subnormal x, we scale x by 2^54, compute the cube root, and + * scale the result by 2^-18. + * + * 2. cbrt(+/-inf) = +/-inf and cbrt(NaN) is NaN. + */ + +/* + * for i = 0, ..., 385 + * form x(i) with high word 0x3ff00000 + (i << 13) and low word 0; + * then TBL[i] = cbrt(x(i)) rounded to 17 significant bits + */ +static const double __libm_TBL_cbrt[] = { + 1.00000000000000000e+00, 1.00259399414062500e+00, 1.00518798828125000e+00, + 1.00775146484375000e+00, 1.01031494140625000e+00, 1.01284790039062500e+00, + 1.01538085937500000e+00, 1.01791381835937500e+00, 1.02041625976562500e+00, + 1.02290344238281250e+00, 1.02539062500000000e+00, 1.02786254882812500e+00, + 1.03031921386718750e+00, 1.03277587890625000e+00, 1.03520202636718750e+00, + 1.03762817382812500e+00, 1.04003906250000000e+00, 1.04244995117187500e+00, + 1.04483032226562500e+00, 1.04721069335937500e+00, 1.04959106445312500e+00, + 1.05194091796875000e+00, 1.05429077148437500e+00, 1.05662536621093750e+00, + 1.05895996093750000e+00, 1.06127929687500000e+00, 1.06358337402343750e+00, + 1.06587219238281250e+00, 1.06816101074218750e+00, 1.07044982910156250e+00, + 1.07270812988281250e+00, 1.07496643066406250e+00, 1.07722473144531250e+00, + 1.07945251464843750e+00, 1.08168029785156250e+00, 1.08390808105468750e+00, + 1.08612060546875000e+00, 1.08831787109375000e+00, 1.09051513671875000e+00, + 1.09269714355468750e+00, 1.09487915039062500e+00, 1.09704589843750000e+00, + 1.09921264648437500e+00, 1.10136413574218750e+00, 1.10350036621093750e+00, + 1.10563659667968750e+00, 1.10775756835937500e+00, 1.10987854003906250e+00, + 1.11198425292968750e+00, 1.11408996582031250e+00, 1.11618041992187500e+00, + 1.11827087402343750e+00, 1.12034606933593750e+00, 1.12242126464843750e+00, + 1.12448120117187500e+00, 1.12654113769531250e+00, 1.12858581542968750e+00, + 1.13063049316406250e+00, 1.13265991210937500e+00, 1.13468933105468750e+00, + 1.13670349121093750e+00, 1.13871765136718750e+00, 1.14073181152343750e+00, + 1.14273071289062500e+00, 1.14471435546875000e+00, 1.14669799804687500e+00, + 1.14868164062500000e+00, 1.15065002441406250e+00, 1.15260314941406250e+00, + 1.15457153320312500e+00, 1.15650939941406250e+00, 1.15846252441406250e+00, + 1.16040039062500000e+00, 1.16232299804687500e+00, 1.16424560546875000e+00, + 1.16616821289062500e+00, 1.16807556152343750e+00, 1.16998291015625000e+00, + 1.17189025878906250e+00, 1.17378234863281250e+00, 1.17567443847656250e+00, + 1.17755126953125000e+00, 1.17942810058593750e+00, 1.18128967285156250e+00, + 1.18315124511718750e+00, 1.18501281738281250e+00, 1.18685913085937500e+00, + 1.18870544433593750e+00, 1.19055175781250000e+00, 1.19238281250000000e+00, + 1.19421386718750000e+00, 1.19602966308593750e+00, 1.19786071777343750e+00, + 1.19966125488281250e+00, 1.20147705078125000e+00, 1.20327758789062500e+00, + 1.20507812500000000e+00, 1.20686340332031250e+00, 1.20864868164062500e+00, + 1.21043395996093750e+00, 1.21220397949218750e+00, 1.21397399902343750e+00, + 1.21572875976562500e+00, 1.21749877929687500e+00, 1.21925354003906250e+00, + 1.22099304199218750e+00, 1.22274780273437500e+00, 1.22448730468750000e+00, + 1.22621154785156250e+00, 1.22795104980468750e+00, 1.22967529296875000e+00, + 1.23138427734375000e+00, 1.23310852050781250e+00, 1.23481750488281250e+00, + 1.23652648925781250e+00, 1.23822021484375000e+00, 1.23991394042968750e+00, + 1.24160766601562500e+00, 1.24330139160156250e+00, 1.24497985839843750e+00, + 1.24665832519531250e+00, 1.24833679199218750e+00, 1.25000000000000000e+00, + 1.25166320800781250e+00, 1.25332641601562500e+00, 1.25497436523437500e+00, + 1.25663757324218750e+00, 1.25828552246093750e+00, 1.25991821289062500e+00, + 1.26319885253906250e+00, 1.26644897460937500e+00, 1.26968383789062500e+00, + 1.27290344238281250e+00, 1.27612304687500000e+00, 1.27931213378906250e+00, + 1.28248596191406250e+00, 1.28564453125000000e+00, 1.28878784179687500e+00, + 1.29191589355468750e+00, 1.29502868652343750e+00, 1.29812622070312500e+00, + 1.30120849609375000e+00, 1.30427551269531250e+00, 1.30732727050781250e+00, + 1.31036376953125000e+00, 1.31340026855468750e+00, 1.31640625000000000e+00, + 1.31941223144531250e+00, 1.32238769531250000e+00, 1.32536315917968750e+00, + 1.32832336425781250e+00, 1.33126831054687500e+00, 1.33419799804687500e+00, + 1.33712768554687500e+00, 1.34002685546875000e+00, 1.34292602539062500e+00, + 1.34580993652343750e+00, 1.34867858886718750e+00, 1.35153198242187500e+00, + 1.35437011718750000e+00, 1.35720825195312500e+00, 1.36003112792968750e+00, + 1.36283874511718750e+00, 1.36564636230468750e+00, 1.36842346191406250e+00, + 1.37120056152343750e+00, 1.37396240234375000e+00, 1.37672424316406250e+00, + 1.37945556640625000e+00, 1.38218688964843750e+00, 1.38491821289062500e+00, + 1.38761901855468750e+00, 1.39031982421875000e+00, 1.39302062988281250e+00, + 1.39569091796875000e+00, 1.39836120605468750e+00, 1.40101623535156250e+00, + 1.40367126464843750e+00, 1.40631103515625000e+00, 1.40893554687500000e+00, + 1.41156005859375000e+00, 1.41416931152343750e+00, 1.41676330566406250e+00, + 1.41935729980468750e+00, 1.42193603515625000e+00, 1.42449951171875000e+00, + 1.42706298828125000e+00, 1.42962646484375000e+00, 1.43215942382812500e+00, + 1.43469238281250000e+00, 1.43722534179687500e+00, 1.43974304199218750e+00, + 1.44224548339843750e+00, 1.44474792480468750e+00, 1.44723510742187500e+00, + 1.44972229003906250e+00, 1.45219421386718750e+00, 1.45466613769531250e+00, + 1.45712280273437500e+00, 1.45956420898437500e+00, 1.46200561523437500e+00, + 1.46444702148437500e+00, 1.46687316894531250e+00, 1.46928405761718750e+00, + 1.47169494628906250e+00, 1.47409057617187500e+00, 1.47648620605468750e+00, + 1.47886657714843750e+00, 1.48124694824218750e+00, 1.48361206054687500e+00, + 1.48597717285156250e+00, 1.48834228515625000e+00, 1.49067687988281250e+00, + 1.49302673339843750e+00, 1.49536132812500000e+00, 1.49768066406250000e+00, + 1.50000000000000000e+00, 1.50230407714843750e+00, 1.50460815429687500e+00, + 1.50691223144531250e+00, 1.50920104980468750e+00, 1.51148986816406250e+00, + 1.51376342773437500e+00, 1.51603698730468750e+00, 1.51829528808593750e+00, + 1.52055358886718750e+00, 1.52279663085937500e+00, 1.52503967285156250e+00, + 1.52728271484375000e+00, 1.52951049804687500e+00, 1.53173828125000000e+00, + 1.53395080566406250e+00, 1.53616333007812500e+00, 1.53836059570312500e+00, + 1.54055786132812500e+00, 1.54275512695312500e+00, 1.54493713378906250e+00, + 1.54711914062500000e+00, 1.54928588867187500e+00, 1.55145263671875000e+00, + 1.55361938476562500e+00, 1.55577087402343750e+00, 1.55792236328125000e+00, + 1.56005859375000000e+00, 1.56219482421875000e+00, 1.56433105468750000e+00, + 1.56645202636718750e+00, 1.56857299804687500e+00, 1.57069396972656250e+00, + 1.57279968261718750e+00, 1.57490539550781250e+00, 1.57699584960937500e+00, + 1.57908630371093750e+00, 1.58117675781250000e+00, 1.58325195312500000e+00, + 1.58532714843750000e+00, 1.58740234375000000e+00, 1.59152221679687500e+00, + 1.59562683105468750e+00, 1.59970092773437500e+00, 1.60375976562500000e+00, + 1.60780334472656250e+00, 1.61183166503906250e+00, 1.61582946777343750e+00, + 1.61981201171875000e+00, 1.62376403808593750e+00, 1.62770080566406250e+00, + 1.63162231445312500e+00, 1.63552856445312500e+00, 1.63941955566406250e+00, + 1.64328002929687500e+00, 1.64714050292968750e+00, 1.65097045898437500e+00, + 1.65476989746093750e+00, 1.65856933593750000e+00, 1.66235351562500000e+00, + 1.66610717773437500e+00, 1.66986083984375000e+00, 1.67358398437500000e+00, + 1.67729187011718750e+00, 1.68098449707031250e+00, 1.68466186523437500e+00, + 1.68832397460937500e+00, 1.69197082519531250e+00, 1.69560241699218750e+00, + 1.69921875000000000e+00, 1.70281982421875000e+00, 1.70640563964843750e+00, + 1.70997619628906250e+00, 1.71353149414062500e+00, 1.71707153320312500e+00, + 1.72059631347656250e+00, 1.72410583496093750e+00, 1.72760009765625000e+00, + 1.73109436035156250e+00, 1.73455810546875000e+00, 1.73800659179687500e+00, + 1.74145507812500000e+00, 1.74488830566406250e+00, 1.74829101562500000e+00, + 1.75169372558593750e+00, 1.75508117675781250e+00, 1.75846862792968750e+00, + 1.76182556152343750e+00, 1.76516723632812500e+00, 1.76850891113281250e+00, + 1.77183532714843750e+00, 1.77514648437500000e+00, 1.77844238281250000e+00, + 1.78173828125000000e+00, 1.78500366210937500e+00, 1.78826904296875000e+00, + 1.79151916503906250e+00, 1.79476928710937500e+00, 1.79798889160156250e+00, + 1.80120849609375000e+00, 1.80441284179687500e+00, 1.80760192871093750e+00, + 1.81079101562500000e+00, 1.81396484375000000e+00, 1.81712341308593750e+00, + 1.82026672363281250e+00, 1.82341003417968750e+00, 1.82653808593750000e+00, + 1.82965087890625000e+00, 1.83276367187500000e+00, 1.83586120605468750e+00, + 1.83894348144531250e+00, 1.84201049804687500e+00, 1.84507751464843750e+00, + 1.84812927246093750e+00, 1.85118103027343750e+00, 1.85421752929687500e+00, + 1.85723876953125000e+00, 1.86026000976562500e+00, 1.86326599121093750e+00, + 1.86625671386718750e+00, 1.86924743652343750e+00, 1.87222290039062500e+00, + 1.87518310546875000e+00, 1.87814331054687500e+00, 1.88108825683593750e+00, + 1.88403320312500000e+00, 1.88696289062500000e+00, 1.88987731933593750e+00, + 1.89279174804687500e+00, 1.89569091796875000e+00, 1.89859008789062500e+00, + 1.90147399902343750e+00, 1.90435791015625000e+00, 1.90722656250000000e+00, + 1.91007995605468750e+00, 1.91293334960937500e+00, 1.91577148437500000e+00, + 1.91860961914062500e+00, 1.92143249511718750e+00, 1.92425537109375000e+00, + 1.92706298828125000e+00, 1.92985534667968750e+00, 1.93264770507812500e+00, + 1.93544006347656250e+00, 1.93821716308593750e+00, 1.94097900390625000e+00, + 1.94374084472656250e+00, 1.94650268554687500e+00, 1.94924926757812500e+00, + 1.95198059082031250e+00, 1.95471191406250000e+00, 1.95742797851562500e+00, + 1.96014404296875000e+00, 1.96286010742187500e+00, 1.96556091308593750e+00, + 1.96824645996093750e+00, 1.97093200683593750e+00, 1.97361755371093750e+00, + 1.97628784179687500e+00, 1.97894287109375000e+00, 1.98159790039062500e+00, + 1.98425292968750000e+00, 1.98689270019531250e+00, 1.98953247070312500e+00, + 1.99215698242187500e+00, 1.99478149414062500e+00, 1.99739074707031250e+00, + 2.00000000000000000e+00, +}; + +/* + * The polynomial p(x) := p1*x + p2*x^2 + ... + p6*x^6 satisfies + * + * |(1+x)^(1/3) - 1 - p(x)| < 2^-63 for |x| < 0.003914 + */ +static const double C[] = { + 3.33333333333333340735623180707664400321413178600e-0001, + -1.11111111111111111992797989129069515334791432304e-0001, + 6.17283950578506695710302115234720605072083379082e-0002, + -4.11522633731005164138964638666647311514892319010e-0002, + 3.01788343105268728151735586597807324859173704847e-0002, + -2.34723340038386971009665073968507263074215090751e-0002, + 18014398509481984.0 +}; + +#define p1 C[0] +#define p2 C[1] +#define p3 C[2] +#define p4 C[3] +#define p5 C[4] +#define p6 C[5] +#define two54 C[6] + +/* INDENT ON */ + +#pragma weak cbrt = __cbrt + +double __cbrt(double x) +{ + union { + unsigned int i[2]; + double d; + } xx, yy; + double t, u, w; + unsigned int hx, sx, ex, j, offset; + + xx.d = x; + hx = xx.i[HIWORD] & ~0x80000000; + sx = xx.i[HIWORD] & 0x80000000; + + /* handle special cases */ + if (hx >= 0x7ff00000) /* x is inf or nan */ +#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN) + return hx >= 0x7ff80000 ? x : x + x; + /* assumes sparc-like QNaN */ +#else + return x + x; +#endif + + if (hx < 0x00100000) { /* x is subnormal or zero */ + if ((hx | xx.i[LOWORD]) == 0) + return x; + + /* scale x to normal range */ + xx.d = x * two54; + hx = xx.i[HIWORD] & ~0x80000000; + offset = 0x29800000; + } + else + offset = 0x2aa00000; + + ex = hx & 0x7ff00000; + j = (ex >> 2) + (ex >> 4) + (ex >> 6); + j = j + (j >> 6); + j = 0x7ff00000 & (j + 0x2aa00); /* j is ex/3 */ + hx -= (j + j + j); + xx.i[HIWORD] = 0x3ff00000 + hx; + + u = __libm_TBL_cbrt[(hx + 0x1000) >> 13]; + w = u * u * u; + t = (xx.d - w) / w; + + yy.i[HIWORD] = sx | (j + offset); + yy.i[LOWORD] = 0; + + w = t * t; + return yy.d * (u + u * (t * (p1 + t * p2 + w * p3) + + (w * w) * (p4 + t * p5 + w * p6))); +} diff --git a/usr/src/lib/libm/common/C/ceil.c b/usr/src/lib/libm/common/C/ceil.c new file mode 100644 index 0000000000..4d772a4c7b --- /dev/null +++ b/usr/src/lib/libm/common/C/ceil.c @@ -0,0 +1,65 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak ceil = __ceil + +/* + * ceil(x) returns the least integral value bigger than or equal to x. + * NOTE: ceil(x) returns result with the same sign as x's, including 0. + * + * Modified 8/4/04 for performance. + */ + +#include "libm.h" + +static const double + zero = 0.0, + one = 1.0, + two52 = 4503599627370496.0; + +double +ceil(double x) { + double t, w; + int hx, lx, ix; + + hx = ((int *)&x)[HIWORD]; + lx = ((int *)&x)[LOWORD]; + ix = hx & ~0x80000000; + if (ix >= 0x43300000) /* return x if |x| >= 2^52, or x is NaN */ + return (x * one); + t = (hx >= 0)? two52 : -two52; + w = x + t; + t = w - t; + if (ix < 0x3ff00000) { + if ((ix | lx) == 0) + return (x); + else + return ((hx < 0)? -zero : one); + } + return ((t >= x)? t : t + one); +} diff --git a/usr/src/lib/libm/common/C/copysign.c b/usr/src/lib/libm/common/C/copysign.c new file mode 100644 index 0000000000..bf8f0caf12 --- /dev/null +++ b/usr/src/lib/libm/common/C/copysign.c @@ -0,0 +1,43 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak copysign = __copysign +#endif + +#include "libm.h" + +double +copysign(double x, double y) { + int hx, hy; + + hx = ((int *) &x)[HIWORD]; + hy = ((int *) &y)[HIWORD]; + return (hx ^ hy) >= 0 ? (x) : (-x); +} diff --git a/usr/src/lib/libm/common/C/cos.c b/usr/src/lib/libm/common/C/cos.c new file mode 100644 index 0000000000..1558872b70 --- /dev/null +++ b/usr/src/lib/libm/common/C/cos.c @@ -0,0 +1,223 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak cos = __cos + +/* INDENT OFF */ +/* + * cos(x) + * Accurate Table look-up algorithm by K.C. Ng, May, 1995. + * + * Algorithm: see sincos.c + */ + +#include "libm.h" + +static const double sc[] = { +/* ONE = */ 1.0, +/* NONE = */ -1.0, +/* + * |sin(x) - (x+pp1*x^3+pp2*x^5)| <= 2^-58.79 for |x| < 0.008 + */ +/* PP1 = */ -0.166666666666316558867252052378889521480627858683055567, +/* PP2 = */ .008333315652997472323564894248466758248475374977974017927, +/* + * |(sin(x) - (x+p1*x^3+...+p4*x^9)| + * |------------------------------ | <= 2^-57.63 for |x| < 0.1953125 + * | x | + */ +/* P1 = */ -1.666666666666629669805215138920301589656e-0001, +/* P2 = */ 8.333333332390951295683993455280336376663e-0003, +/* P3 = */ -1.984126237997976692791551778230098403960e-0004, +/* P4 = */ 2.753403624854277237649987622848330351110e-0006, +/* + * |cos(x) - (1+qq1*x^2+qq2*x^4)| <= 2^-55.99 for |x| <= 0.008 (0x3f80624d) + */ +/* QQ1 = */ -0.4999999999975492381842911981948418542742729, +/* QQ2 = */ 0.041666542904352059294545209158357640398771740, +/* Q1 = */ -0.5, +/* Q2 = */ 4.166666666500350703680945520860748617445e-0002, +/* Q3 = */ -1.388888596436972210694266290577848696006e-0003, +/* Q4 = */ 2.478563078858589473679519517892953492192e-0005, +/* PIO2_H = */ 1.570796326794896557999, +/* PIO2_L = */ 6.123233995736765886130e-17, +/* PIO2_L0 = */ 6.123233995727922165564e-17, +/* PIO2_L1 = */ 8.843720566135701120255e-29, +/* PI3O2_H = */ 4.712388980384689673997, +/* PI3O2_L = */ 1.836970198721029765839e-16, +/* PI3O2_L0 = */ 1.836970198720396133587e-16, +/* PI3O2_L1 = */ 6.336322524749201142226e-29, +/* PI5O2_H = */ 7.853981633974482789995, +/* PI5O2_L = */ 3.061616997868382943065e-16, +/* PI5O2_L0 = */ 3.061616997861941598865e-16, +/* PI5O2_L1 = */ 6.441344200433640781982e-28, +}; +/* INDENT ON */ + +#define ONE sc[0] +#define PP1 sc[2] +#define PP2 sc[3] +#define P1 sc[4] +#define P2 sc[5] +#define P3 sc[6] +#define P4 sc[7] +#define QQ1 sc[8] +#define QQ2 sc[9] +#define Q1 sc[10] +#define Q2 sc[11] +#define Q3 sc[12] +#define Q4 sc[13] +#define PIO2_H sc[14] +#define PIO2_L sc[15] +#define PIO2_L0 sc[16] +#define PIO2_L1 sc[17] +#define PI3O2_H sc[18] +#define PI3O2_L sc[19] +#define PI3O2_L0 sc[20] +#define PI3O2_L1 sc[21] +#define PI5O2_H sc[22] +#define PI5O2_L sc[23] +#define PI5O2_L0 sc[24] +#define PI5O2_L1 sc[25] + +extern const double _TBL_sincos[], _TBL_sincosx[]; + +double +cos(double x) { + double z, y[2], w, s, v, p, q; + int i, j, n, hx, ix, lx; + + hx = ((int *)&x)[HIWORD]; + lx = ((int *)&x)[LOWORD]; + ix = hx & ~0x80000000; + + if (ix <= 0x3fc50000) { /* |x| < 10.5/64 = 0.164062500 */ + if (ix < 0x3e400000) { /* |x| < 2**-27 */ + if ((int)x == 0) + return (ONE); + } + z = x * x; + if (ix < 0x3f800000) /* |x| < 0.008 */ + w = z * (QQ1 + z * QQ2); + else + w = z * ((Q1 + z * Q2) + (z * z) * (Q3 + z * Q4)); + return (ONE + w); + } + + /* for 0.164062500 < x < M, */ + n = ix >> 20; + if (n < 0x402) { /* x < 8 */ + i = (((ix >> 12) & 0xff) | 0x100) >> (0x401 - n); + j = i - 10; + x = fabs(x); + v = x - _TBL_sincosx[j]; + if (((j - 81) ^ (j - 101)) < 0) { + /* near pi/2, cos(pi/2-x)=sin(x) */ + p = PIO2_H - x; + i = ix - 0x3ff921fb; + x = p + PIO2_L; + if ((i | ((lx - 0x54442D00) & 0xffffff00)) == 0) { + /* very close to pi/2 */ + x = p + PIO2_L0; + return (x + PIO2_L1); + } + z = x * x; + if (((ix - 0x3ff92000) >> 12) == 0) { + /* |pi/2-x|<2**-8 */ + w = PIO2_L + (z * x) * (PP1 + z * PP2); + } else { + w = PIO2_L + (z * x) * ((P1 + z * P2) + + (z * z) * (P3 + z * P4)); + } + return (p + w); + } + s = v * v; + if (((j - 282) ^ (j - 302)) < 0) { + /* near 3/2pi, cos(x-3/2pi)=sin(x) */ + p = x - PI3O2_H; + i = ix - 0x4012D97C; + x = p - PI3O2_L; + if ((i | ((lx - 0x7f332100) & 0xffffff00)) == 0) { + /* very close to 3/2pi */ + x = p - PI3O2_L0; + return (x - PI3O2_L1); + } + z = x * x; + if (((ix - 0x4012D800) >> 9) == 0) { + /* |x-3/2pi|<2**-8 */ + w = (z * x) * (PP1 + z * PP2) - PI3O2_L; + } else { + w = (z * x) * ((P1 + z * P2) + (z * z) + * (P3 + z * P4)) - PI3O2_L; + } + return (p + w); + } + if (((j - 483) ^ (j - 503)) < 0) { + /* near 5pi/2, cos(5pi/2-x)=sin(x) */ + p = PI5O2_H - x; + i = ix - 0x401F6A7A; + x = p + PI5O2_L; + if ((i | ((lx - 0x29553800) & 0xffffff00)) == 0) { + /* very close to pi/2 */ + x = p + PI5O2_L0; + return (x + PI5O2_L1); + } + z = x * x; + if (((ix - 0x401F6A7A) >> 7) == 0) { + /* |pi/2-x|<2**-8 */ + w = PI5O2_L + (z * x) * (PP1 + z * PP2); + } else { + w = PI5O2_L + (z * x) * ((P1 + z * P2) + + (z * z) * (P3 + z * P4)); + } + return (p + w); + } + j <<= 1; + w = _TBL_sincos[j]; + z = _TBL_sincos[j+1]; + p = v + (v * s) * (PP1 + s * PP2); + q = s * (QQ1 + s * QQ2); + return (z - (w * p - z * q)); + } + + if (ix >= 0x7ff00000) /* cos(Inf or NaN) is NaN */ + return (x / x); + + /* argument reduction needed */ + n = __rem_pio2(x, y); + switch (n & 3) { + case 0: + return (__k_cos(y[0], y[1])); + case 1: + return (-__k_sin(y[0], y[1])); + case 2: + return (-__k_cos(y[0], y[1])); + default: + return (__k_sin(y[0], y[1])); + } +} diff --git a/usr/src/lib/libm/common/C/cosh.c b/usr/src/lib/libm/common/C/cosh.c new file mode 100644 index 0000000000..91573149e6 --- /dev/null +++ b/usr/src/lib/libm/common/C/cosh.c @@ -0,0 +1,90 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak cosh = __cosh + +/* INDENT OFF */ +/* + * cosh(x) + * Code originated from 4.3bsd. + * Modified by K.C. Ng for SUN 4.0 libm. + * Method : + * 1. Replace x by |x| (cosh(x) = cosh(-x)). + * 2. + * [ exp(x) - 1 ]^2 + * 0 <= x <= 0.3465 : cosh(x) := 1 + ------------------- + * 2*exp(x) + * + * exp(x) + 1/exp(x) + * 0.3465 <= x <= 22 : cosh(x) := ------------------- + * 2 + * 22 <= x <= lnovft : cosh(x) := exp(x)/2 + * lnovft <= x < INF : cosh(x) := scalbn(exp(x-1024*ln2),1023) + * + * Note: .3465 is a number near one half of ln2. + * + * Special cases: + * cosh(x) is |x| if x is +INF, -INF, or NaN. + * only cosh(0)=1 is exact for finite x. + */ +/* INDENT ON */ + +#include "libm.h" + +static const double + ln2 = 6.93147180559945286227e-01, + ln2hi = 6.93147180369123816490e-01, + ln2lo = 1.90821492927058770002e-10, + lnovft = 7.09782712893383973096e+02; + +double +cosh(double x) { + double t, w; + + w = fabs(x); + if (!finite(w)) + return (w * w); + if (w < 0.3465) { + t = expm1(w); + w = 1.0 + t; + if (w != 1.0) + w = 1.0 + (t * t) / (w + w); + return (w); + } else if (w < 22.0) { + t = exp(w); + return (0.5 * (t + 1.0 / t)); + } else if (w <= lnovft) { + return (0.5 * exp(w)); + } else { + w = (w - 1024 * ln2hi) - 1024 * ln2lo; + if (w >= ln2) + return (_SVID_libm_err(x, x, 5)); + else + return (scalbn(exp(w), 1023)); + } +} diff --git a/usr/src/lib/libm/common/C/erf.c b/usr/src/lib/libm/common/C/erf.c new file mode 100644 index 0000000000..780876e3e5 --- /dev/null +++ b/usr/src/lib/libm/common/C/erf.c @@ -0,0 +1,427 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak erf = __erf +#pragma weak erfc = __erfc + +/* INDENT OFF */ +/* + * double erf(double x) + * double erfc(double x) + * x + * 2 |\ + * erf(x) = --------- | exp(-t*t)dt + * sqrt(pi) \| + * 0 + * + * erfc(x) = 1-erf(x) + * Note that + * erf(-x) = -erf(x) + * erfc(-x) = 2 - erfc(x) + * + * Method: + * 1. For |x| in [0, 0.84375] + * erf(x) = x + x*R(x^2) + * erfc(x) = 1 - erf(x) if x in [-.84375,0.25] + * = 0.5 + ((0.5-x)-x*R) if x in [0.25,0.84375] + * where R = P/Q where P is an odd poly of degree 8 and + * Q is an odd poly of degree 10. + * -57.90 + * | R - (erf(x)-x)/x | <= 2 + * + * + * Remark. The formula is derived by noting + * erf(x) = (2/sqrt(pi))*(x - x^3/3 + x^5/10 - x^7/42 + ....) + * and that + * 2/sqrt(pi) = 1.128379167095512573896158903121545171688 + * is close to one. The interval is chosen because the fix + * point of erf(x) is near 0.6174 (i.e., erf(x)=x when x is + * near 0.6174), and by some experiment, 0.84375 is chosen to + * guarantee the error is less than one ulp for erf. + * + * 2. For |x| in [0.84375,1.25], let s = |x| - 1, and + * c = 0.84506291151 rounded to single (24 bits) + * erf(x) = sign(x) * (c + P1(s)/Q1(s)) + * erfc(x) = (1-c) - P1(s)/Q1(s) if x > 0 + * 1+(c+P1(s)/Q1(s)) if x < 0 + * |P1/Q1 - (erf(|x|)-c)| <= 2**-59.06 + * Remark: here we use the taylor series expansion at x=1. + * erf(1+s) = erf(1) + s*Poly(s) + * = 0.845.. + P1(s)/Q1(s) + * That is, we use rational approximation to approximate + * erf(1+s) - (c = (single)0.84506291151) + * Note that |P1/Q1|< 0.078 for x in [0.84375,1.25] + * where + * P1(s) = degree 6 poly in s + * Q1(s) = degree 6 poly in s + * + * 3. For x in [1.25,1/0.35(~2.857143)], + * erfc(x) = (1/x)*exp(-x*x-0.5625+R1/S1) + * erf(x) = 1 - erfc(x) + * where + * R1(z) = degree 7 poly in z, (z=1/x^2) + * S1(z) = degree 8 poly in z + * + * 4. For x in [1/0.35,28] + * erfc(x) = (1/x)*exp(-x*x-0.5625+R2/S2) if x > 0 + * = 2.0 - (1/x)*exp(-x*x-0.5625+R2/S2) if -6<x<0 + * = 2.0 - tiny (if x <= -6) + * erf(x) = sign(x)*(1.0 - erfc(x)) if x < 6, else + * erf(x) = sign(x)*(1.0 - tiny) + * where + * R2(z) = degree 6 poly in z, (z=1/x^2) + * S2(z) = degree 7 poly in z + * + * Note1: + * To compute exp(-x*x-0.5625+R/S), let s be a single + * precision number and s := x; then + * -x*x = -s*s + (s-x)*(s+x) + * exp(-x*x-0.5626+R/S) = + * exp(-s*s-0.5625)*exp((s-x)*(s+x)+R/S); + * Note2: + * Here 4 and 5 make use of the asymptotic series + * exp(-x*x) + * erfc(x) ~ ---------- * ( 1 + Poly(1/x^2) ) + * x*sqrt(pi) + * We use rational approximation to approximate + * g(s)=f(1/x^2) = log(erfc(x)*x) - x*x + 0.5625 + * Here is the error bound for R1/S1 and R2/S2 + * |R1/S1 - f(x)| < 2**(-62.57) + * |R2/S2 - f(x)| < 2**(-61.52) + * + * 5. For inf > x >= 28 + * erf(x) = sign(x) *(1 - tiny) (raise inexact) + * erfc(x) = tiny*tiny (raise underflow) if x > 0 + * = 2 - tiny if x<0 + * + * 7. Special case: + * erf(0) = 0, erf(inf) = 1, erf(-inf) = -1, + * erfc(0) = 1, erfc(inf) = 0, erfc(-inf) = 2, + * erfc/erf(NaN) is NaN + */ +/* INDENT ON */ + +#include "libm_synonyms.h" /* __erf, __erfc, __exp */ +#include "libm_macros.h" +#include <math.h> + +static const double xxx[] = { +/* tiny */ 1e-300, +/* half */ 5.00000000000000000000e-01, /* 3FE00000, 00000000 */ +/* one */ 1.00000000000000000000e+00, /* 3FF00000, 00000000 */ +/* two */ 2.00000000000000000000e+00, /* 40000000, 00000000 */ +/* erx */ 8.45062911510467529297e-01, /* 3FEB0AC1, 60000000 */ +/* + * Coefficients for approximation to erf on [0,0.84375] + */ +/* efx */ 1.28379167095512586316e-01, /* 3FC06EBA, 8214DB69 */ +/* efx8 */ 1.02703333676410069053e+00, /* 3FF06EBA, 8214DB69 */ +/* pp0 */ 1.28379167095512558561e-01, /* 3FC06EBA, 8214DB68 */ +/* pp1 */ -3.25042107247001499370e-01, /* BFD4CD7D, 691CB913 */ +/* pp2 */ -2.84817495755985104766e-02, /* BF9D2A51, DBD7194F */ +/* pp3 */ -5.77027029648944159157e-03, /* BF77A291, 236668E4 */ +/* pp4 */ -2.37630166566501626084e-05, /* BEF8EAD6, 120016AC */ +/* qq1 */ 3.97917223959155352819e-01, /* 3FD97779, CDDADC09 */ +/* qq2 */ 6.50222499887672944485e-02, /* 3FB0A54C, 5536CEBA */ +/* qq3 */ 5.08130628187576562776e-03, /* 3F74D022, C4D36B0F */ +/* qq4 */ 1.32494738004321644526e-04, /* 3F215DC9, 221C1A10 */ +/* qq5 */ -3.96022827877536812320e-06, /* BED09C43, 42A26120 */ +/* + * Coefficients for approximation to erf in [0.84375,1.25] + */ +/* pa0 */ -2.36211856075265944077e-03, /* BF6359B8, BEF77538 */ +/* pa1 */ 4.14856118683748331666e-01, /* 3FDA8D00, AD92B34D */ +/* pa2 */ -3.72207876035701323847e-01, /* BFD7D240, FBB8C3F1 */ +/* pa3 */ 3.18346619901161753674e-01, /* 3FD45FCA, 805120E4 */ +/* pa4 */ -1.10894694282396677476e-01, /* BFBC6398, 3D3E28EC */ +/* pa5 */ 3.54783043256182359371e-02, /* 3FA22A36, 599795EB */ +/* pa6 */ -2.16637559486879084300e-03, /* BF61BF38, 0A96073F */ +/* qa1 */ 1.06420880400844228286e-01, /* 3FBB3E66, 18EEE323 */ +/* qa2 */ 5.40397917702171048937e-01, /* 3FE14AF0, 92EB6F33 */ +/* qa3 */ 7.18286544141962662868e-02, /* 3FB2635C, D99FE9A7 */ +/* qa4 */ 1.26171219808761642112e-01, /* 3FC02660, E763351F */ +/* qa5 */ 1.36370839120290507362e-02, /* 3F8BEDC2, 6B51DD1C */ +/* qa6 */ 1.19844998467991074170e-02, /* 3F888B54, 5735151D */ +/* + * Coefficients for approximation to erfc in [1.25,1/0.35] + */ +/* ra0 */ -9.86494403484714822705e-03, /* BF843412, 600D6435 */ +/* ra1 */ -6.93858572707181764372e-01, /* BFE63416, E4BA7360 */ +/* ra2 */ -1.05586262253232909814e+01, /* C0251E04, 41B0E726 */ +/* ra3 */ -6.23753324503260060396e+01, /* C04F300A, E4CBA38D */ +/* ra4 */ -1.62396669462573470355e+02, /* C0644CB1, 84282266 */ +/* ra5 */ -1.84605092906711035994e+02, /* C067135C, EBCCABB2 */ +/* ra6 */ -8.12874355063065934246e+01, /* C0545265, 57E4D2F2 */ +/* ra7 */ -9.81432934416914548592e+00, /* C023A0EF, C69AC25C */ +/* sa1 */ 1.96512716674392571292e+01, /* 4033A6B9, BD707687 */ +/* sa2 */ 1.37657754143519042600e+02, /* 4061350C, 526AE721 */ +/* sa3 */ 4.34565877475229228821e+02, /* 407B290D, D58A1A71 */ +/* sa4 */ 6.45387271733267880336e+02, /* 40842B19, 21EC2868 */ +/* sa5 */ 4.29008140027567833386e+02, /* 407AD021, 57700314 */ +/* sa6 */ 1.08635005541779435134e+02, /* 405B28A3, EE48AE2C */ +/* sa7 */ 6.57024977031928170135e+00, /* 401A47EF, 8E484A93 */ +/* sa8 */ -6.04244152148580987438e-02, /* BFAEEFF2, EE749A62 */ +/* + * Coefficients for approximation to erfc in [1/.35,28] + */ +/* rb0 */ -9.86494292470009928597e-03, /* BF843412, 39E86F4A */ +/* rb1 */ -7.99283237680523006574e-01, /* BFE993BA, 70C285DE */ +/* rb2 */ -1.77579549177547519889e+01, /* C031C209, 555F995A */ +/* rb3 */ -1.60636384855821916062e+02, /* C064145D, 43C5ED98 */ +/* rb4 */ -6.37566443368389627722e+02, /* C083EC88, 1375F228 */ +/* rb5 */ -1.02509513161107724954e+03, /* C0900461, 6A2E5992 */ +/* rb6 */ -4.83519191608651397019e+02, /* C07E384E, 9BDC383F */ +/* sb1 */ 3.03380607434824582924e+01, /* 403E568B, 261D5190 */ +/* sb2 */ 3.25792512996573918826e+02, /* 40745CAE, 221B9F0A */ +/* sb3 */ 1.53672958608443695994e+03, /* 409802EB, 189D5118 */ +/* sb4 */ 3.19985821950859553908e+03, /* 40A8FFB7, 688C246A */ +/* sb5 */ 2.55305040643316442583e+03, /* 40A3F219, CEDF3BE6 */ +/* sb6 */ 4.74528541206955367215e+02, /* 407DA874, E79FE763 */ +/* sb7 */ -2.24409524465858183362e+01 /* C03670E2, 42712D62 */ +}; + +#define tiny xxx[0] +#define half xxx[1] +#define one xxx[2] +#define two xxx[3] +#define erx xxx[4] +/* + * Coefficients for approximation to erf on [0,0.84375] + */ +#define efx xxx[5] +#define efx8 xxx[6] +#define pp0 xxx[7] +#define pp1 xxx[8] +#define pp2 xxx[9] +#define pp3 xxx[10] +#define pp4 xxx[11] +#define qq1 xxx[12] +#define qq2 xxx[13] +#define qq3 xxx[14] +#define qq4 xxx[15] +#define qq5 xxx[16] +/* + * Coefficients for approximation to erf in [0.84375,1.25] + */ +#define pa0 xxx[17] +#define pa1 xxx[18] +#define pa2 xxx[19] +#define pa3 xxx[20] +#define pa4 xxx[21] +#define pa5 xxx[22] +#define pa6 xxx[23] +#define qa1 xxx[24] +#define qa2 xxx[25] +#define qa3 xxx[26] +#define qa4 xxx[27] +#define qa5 xxx[28] +#define qa6 xxx[29] +/* + * Coefficients for approximation to erfc in [1.25,1/0.35] + */ +#define ra0 xxx[30] +#define ra1 xxx[31] +#define ra2 xxx[32] +#define ra3 xxx[33] +#define ra4 xxx[34] +#define ra5 xxx[35] +#define ra6 xxx[36] +#define ra7 xxx[37] +#define sa1 xxx[38] +#define sa2 xxx[39] +#define sa3 xxx[40] +#define sa4 xxx[41] +#define sa5 xxx[42] +#define sa6 xxx[43] +#define sa7 xxx[44] +#define sa8 xxx[45] +/* + * Coefficients for approximation to erfc in [1/.35,28] + */ +#define rb0 xxx[46] +#define rb1 xxx[47] +#define rb2 xxx[48] +#define rb3 xxx[49] +#define rb4 xxx[50] +#define rb5 xxx[51] +#define rb6 xxx[52] +#define sb1 xxx[53] +#define sb2 xxx[54] +#define sb3 xxx[55] +#define sb4 xxx[56] +#define sb5 xxx[57] +#define sb6 xxx[58] +#define sb7 xxx[59] + +double +erf(double x) { + int hx, ix, i; + double R, S, P, Q, s, y, z, r; + + hx = ((int *) &x)[HIWORD]; + ix = hx & 0x7fffffff; + if (ix >= 0x7ff00000) { /* erf(nan)=nan */ +#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN) + if (ix >= 0x7ff80000) /* assumes sparc-like QNaN */ + return (x); +#endif + i = ((unsigned) hx >> 31) << 1; + return ((double) (1 - i) + one / x); /* erf(+-inf)=+-1 */ + } + + if (ix < 0x3feb0000) { /* |x|<0.84375 */ + if (ix < 0x3e300000) { /* |x|<2**-28 */ + if (ix < 0x00800000) /* avoid underflow */ + return (0.125 * (8.0 * x + efx8 * x)); + return (x + efx * x); + } + z = x * x; + r = pp0 + z * (pp1 + z * (pp2 + z * (pp3 + z * pp4))); + s = one + + z *(qq1 + z * (qq2 + z * (qq3 + z * (qq4 + z * qq5)))); + y = r / s; + return (x + x * y); + } + if (ix < 0x3ff40000) { /* 0.84375 <= |x| < 1.25 */ + s = fabs(x) - one; + P = pa0 + s * (pa1 + s * (pa2 + s * (pa3 + s * (pa4 + + s * (pa5 + s * pa6))))); + Q = one + s * (qa1 + s * (qa2 + s * (qa3 + s * (qa4 + + s * (qa5 + s * qa6))))); + if (hx >= 0) + return (erx + P / Q); + else + return (-erx - P / Q); + } + if (ix >= 0x40180000) { /* inf > |x| >= 6 */ + if (hx >= 0) + return (one - tiny); + else + return (tiny - one); + } + x = fabs(x); + s = one / (x * x); + if (ix < 0x4006DB6E) { /* |x| < 1/0.35 */ + R = ra0 + s * (ra1 + s * (ra2 + s * (ra3 + s * (ra4 + + s * (ra5 + s * (ra6 + s * ra7)))))); + S = one + s * (sa1 + s * (sa2 + s * (sa3 + s * (sa4 + + s * (sa5 + s * (sa6 + s * (sa7 + s * sa8))))))); + } else { /* |x| >= 1/0.35 */ + R = rb0 + s * (rb1 + s * (rb2 + s * (rb3 + s * (rb4 + + s * (rb5 + s * rb6))))); + S = one + s * (sb1 + s * (sb2 + s * (sb3 + s * (sb4 + + s * (sb5 + s * (sb6 + s * sb7)))))); + } + z = x; + ((int *) &z)[LOWORD] = 0; + r = exp(-z * z - 0.5625) * exp((z - x) * (z + x) + R / S); + if (hx >= 0) + return (one - r / x); + else + return (r / x - one); +} + +double +erfc(double x) { + int hx, ix; + double R, S, P, Q, s, y, z, r; + + hx = ((int *) &x)[HIWORD]; + ix = hx & 0x7fffffff; + if (ix >= 0x7ff00000) { /* erfc(nan)=nan */ +#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN) + if (ix >= 0x7ff80000) /* assumes sparc-like QNaN */ + return (x); +#endif + /* erfc(+-inf)=0,2 */ + return ((double) (((unsigned) hx >> 31) << 1) + one / x); + } + + if (ix < 0x3feb0000) { /* |x| < 0.84375 */ + if (ix < 0x3c700000) /* |x| < 2**-56 */ + return (one - x); + z = x * x; + r = pp0 + z * (pp1 + z * (pp2 + z * (pp3 + z * pp4))); + s = one + + z * (qq1 + z * (qq2 + z * (qq3 + z * (qq4 + z * qq5)))); + y = r / s; + if (hx < 0x3fd00000) { /* x < 1/4 */ + return (one - (x + x * y)); + } else { + r = x * y; + r += (x - half); + return (half - r); + } + } + if (ix < 0x3ff40000) { /* 0.84375 <= |x| < 1.25 */ + s = fabs(x) - one; + P = pa0 + s * (pa1 + s * (pa2 + s * (pa3 + s * (pa4 + + s * (pa5 + s * pa6))))); + Q = one + s * (qa1 + s * (qa2 + s * (qa3 + s * (qa4 + + s * (qa5 + s * qa6))))); + if (hx >= 0) { + z = one - erx; + return (z - P / Q); + } else { + z = erx + P / Q; + return (one + z); + } + } + if (ix < 0x403c0000) { /* |x|<28 */ + x = fabs(x); + s = one / (x * x); + if (ix < 0x4006DB6D) { /* |x| < 1/.35 ~ 2.857143 */ + R = ra0 + s * (ra1 + s * (ra2 + s * (ra3 + s * (ra4 + + s * (ra5 + s * (ra6 + s * ra7)))))); + S = one + s * (sa1 + s * (sa2 + s * (sa3 + s * (sa4 + + s * (sa5 + s * (sa6 + s * (sa7 + s * sa8))))))); + } else { + /* |x| >= 1/.35 ~ 2.857143 */ + if (hx < 0 && ix >= 0x40180000) + return (two - tiny); /* x < -6 */ + + R = rb0 + s * (rb1 + s * (rb2 + s * (rb3 + s * (rb4 + + s * (rb5 + s * rb6))))); + S = one + s * (sb1 + s * (sb2 + s * (sb3 + s * (sb4 + + s * (sb5 + s * (sb6 + s * sb7)))))); + } + z = x; + ((int *) &z)[LOWORD] = 0; + r = exp(-z * z - 0.5625) * exp((z - x) * (z + x) + R / S); + if (hx > 0) + return (r / x); + else + return (two - r / x); + } else { + if (hx > 0) + return (tiny * tiny); + else + return (two - tiny); + } +} diff --git a/usr/src/lib/libm/common/C/exp.c b/usr/src/lib/libm/common/C/exp.c new file mode 100644 index 0000000000..9245c36c5c --- /dev/null +++ b/usr/src/lib/libm/common/C/exp.c @@ -0,0 +1,357 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak exp = __exp + +/* + * exp(x) + * Hybrid algorithm of Peter Tang's Table driven method (for large + * arguments) and an accurate table (for small arguments). + * Written by K.C. Ng, November 1988. + * Method (large arguments): + * 1. Argument Reduction: given the input x, find r and integer k + * and j such that + * x = (k+j/32)*(ln2) + r, |r| <= (1/64)*ln2 + * + * 2. exp(x) = 2^k * (2^(j/32) + 2^(j/32)*expm1(r)) + * a. expm1(r) is approximated by a polynomial: + * expm1(r) ~ r + t1*r^2 + t2*r^3 + ... + t5*r^6 + * Here t1 = 1/2 exactly. + * b. 2^(j/32) is represented to twice double precision + * as TBL[2j]+TBL[2j+1]. + * + * Note: If divide were fast enough, we could use another approximation + * in 2.a: + * expm1(r) ~ (2r)/(2-R), R = r - r^2*(t1 + t2*r^2) + * (for the same t1 and t2 as above) + * + * Special cases: + * exp(INF) is INF, exp(NaN) is NaN; + * exp(-INF)= 0; + * for finite argument, only exp(0)=1 is exact. + * + * Accuracy: + * According to an error analysis, the error is always less than + * an ulp (unit in the last place). The largest errors observed + * are less than 0.55 ulp for normal results and less than 0.75 ulp + * for subnormal results. + * + * Misc. info. + * For IEEE double + * if x > 7.09782712893383973096e+02 then exp(x) overflow + * if x < -7.45133219101941108420e+02 then exp(x) underflow + */ + +#include "libm.h" + +static const double TBL[] = { + 1.00000000000000000000e+00, 0.00000000000000000000e+00, + 1.02189714865411662714e+00, 5.10922502897344389359e-17, + 1.04427378242741375480e+00, 8.55188970553796365958e-17, + 1.06714040067682369717e+00, -7.89985396684158212226e-17, + 1.09050773266525768967e+00, -3.04678207981247114697e-17, + 1.11438674259589243221e+00, 1.04102784568455709549e-16, + 1.13878863475669156458e+00, 8.91281267602540777782e-17, + 1.16372485877757747552e+00, 3.82920483692409349872e-17, + 1.18920711500272102690e+00, 3.98201523146564611098e-17, + 1.21524735998046895524e+00, -7.71263069268148813091e-17, + 1.24185781207348400201e+00, 4.65802759183693679123e-17, + 1.26905095719173321989e+00, 2.66793213134218609523e-18, + 1.29683955465100964055e+00, 2.53825027948883149593e-17, + 1.32523664315974132322e+00, -2.85873121003886075697e-17, + 1.35425554693689265129e+00, 7.70094837980298946162e-17, + 1.38390988196383202258e+00, -6.77051165879478628716e-17, + 1.41421356237309514547e+00, -9.66729331345291345105e-17, + 1.44518080697704665027e+00, -3.02375813499398731940e-17, + 1.47682614593949934623e+00, -3.48399455689279579579e-17, + 1.50916442759342284141e+00, -1.01645532775429503911e-16, + 1.54221082540794074411e+00, 7.94983480969762085616e-17, + 1.57598084510788649659e+00, -1.01369164712783039808e-17, + 1.61049033194925428347e+00, 2.47071925697978878522e-17, + 1.64575547815396494578e+00, -1.01256799136747726038e-16, + 1.68179283050742900407e+00, 8.19901002058149652013e-17, + 1.71861929812247793414e+00, -1.85138041826311098821e-17, + 1.75625216037329945351e+00, 2.96014069544887330703e-17, + 1.79470907500310716820e+00, 1.82274584279120867698e-17, + 1.83400808640934243066e+00, 3.28310722424562658722e-17, + 1.87416763411029996256e+00, -6.12276341300414256164e-17, + 1.91520656139714740007e+00, -1.06199460561959626376e-16, + 1.95714412417540017941e+00, 8.96076779103666776760e-17, +}; + +/* + * For i = 0, ..., 66, + * TBL2[2*i] is a double precision number near (i+1)*2^-6, and + * TBL2[2*i+1] = exp(TBL2[2*i]) to within a relative error less + * than 2^-60. + * + * For i = 67, ..., 133, + * TBL2[2*i] is a double precision number near -(i+1)*2^-6, and + * TBL2[2*i+1] = exp(TBL2[2*i]) to within a relative error less + * than 2^-60. + */ +static const double TBL2[] = { + 1.56249999999984491572e-02, 1.01574770858668417262e+00, + 3.12499999999998716305e-02, 1.03174340749910253834e+00, + 4.68750000000011102230e-02, 1.04799100201663386578e+00, + 6.24999999999990632493e-02, 1.06449445891785843266e+00, + 7.81249999999999444888e-02, 1.08125780744903954300e+00, + 9.37500000000013322676e-02, 1.09828514030782731226e+00, + 1.09375000000001346145e-01, 1.11558061464248226002e+00, + 1.24999999999999417133e-01, 1.13314845306682565607e+00, + 1.40624999999995337063e-01, 1.15099294469117108264e+00, + 1.56249999999996141975e-01, 1.16911844616949989195e+00, + 1.71874999999992894573e-01, 1.18752938276309216725e+00, + 1.87500000000000888178e-01, 1.20623024942098178158e+00, + 2.03124999999361649516e-01, 1.22522561187652545556e+00, + 2.18750000000000416334e-01, 1.24452010776609567344e+00, + 2.34375000000003524958e-01, 1.26411844775347081971e+00, + 2.50000000000006328271e-01, 1.28402541668774961003e+00, + 2.65624999999982791543e-01, 1.30424587476761533189e+00, + 2.81249999999993727240e-01, 1.32478475872885725906e+00, + 2.96875000000003275158e-01, 1.34564708304941493822e+00, + 3.12500000000002886580e-01, 1.36683794117380030819e+00, + 3.28124999999993394173e-01, 1.38836250675661765364e+00, + 3.43749999999998612221e-01, 1.41022603492570874906e+00, + 3.59374999999992450483e-01, 1.43243386356506730017e+00, + 3.74999999999991395772e-01, 1.45499141461818881638e+00, + 3.90624999999997613020e-01, 1.47790419541173490003e+00, + 4.06249999999991895372e-01, 1.50117780000011058483e+00, + 4.21874999999996613820e-01, 1.52481791053132154090e+00, + 4.37500000000004607426e-01, 1.54883029863414023453e+00, + 4.53125000000004274359e-01, 1.57322082682725961078e+00, + 4.68750000000008326673e-01, 1.59799544995064657371e+00, + 4.84374999999985456078e-01, 1.62316021661928200359e+00, + 4.99999999999997335465e-01, 1.64872127070012375327e+00, + 5.15625000000000222045e-01, 1.67468485281178436352e+00, + 5.31250000000003441691e-01, 1.70105730184840653330e+00, + 5.46874999999999111822e-01, 1.72784505652716169344e+00, + 5.62499999999999333866e-01, 1.75505465696029738787e+00, + 5.78124999999993338662e-01, 1.78269274625180318417e+00, + 5.93749999999999666933e-01, 1.81076607211938656050e+00, + 6.09375000000003441691e-01, 1.83928148854178719063e+00, + 6.24999999999995559108e-01, 1.86824595743221411048e+00, + 6.40625000000009103829e-01, 1.89766655033813602671e+00, + 6.56249999999993782751e-01, 1.92755045016753268072e+00, + 6.71875000000002109424e-01, 1.95790495294292221651e+00, + 6.87499999999992450483e-01, 1.98873746958227681780e+00, + 7.03125000000004996004e-01, 2.02005552770870666635e+00, + 7.18750000000007105427e-01, 2.05186677348799140219e+00, + 7.34375000000008770762e-01, 2.08417897349558689513e+00, + 7.49999999999983901766e-01, 2.11700001661264058939e+00, + 7.65624999999997002398e-01, 2.15033791595229351046e+00, + 7.81250000000005884182e-01, 2.18420081081563077774e+00, + 7.96874999999991451283e-01, 2.21859696867912603579e+00, + 8.12500000000000000000e-01, 2.25353478721320854561e+00, + 8.28125000000008215650e-01, 2.28902279633221983346e+00, + 8.43749999999997890576e-01, 2.32506966027711614586e+00, + 8.59374999999999444888e-01, 2.36168417973090827289e+00, + 8.75000000000003219647e-01, 2.39887529396710563745e+00, + 8.90625000000013433699e-01, 2.43665208303232461162e+00, + 9.06249999999980571097e-01, 2.47502376996297712708e+00, + 9.21874999999984456878e-01, 2.51399972303748420188e+00, + 9.37500000000001887379e-01, 2.55358945806293169412e+00, + 9.53125000000003330669e-01, 2.59380264069854327147e+00, + 9.68749999999989119814e-01, 2.63464908881560244680e+00, + 9.84374999999997890576e-01, 2.67613877489447116176e+00, + 1.00000000000001154632e+00, 2.71828182845907662113e+00, + 1.01562499999999333866e+00, 2.76108853855008318234e+00, + 1.03124999999995980993e+00, 2.80456935623711389738e+00, + 1.04687499999999933387e+00, 2.84873489717039740654e+00, + -1.56249999999999514277e-02, 9.84496437005408453480e-01, + -3.12499999999955972718e-02, 9.69233234476348348707e-01, + -4.68749999999993824384e-02, 9.54206665969188905230e-01, + -6.24999999999976130205e-02, 9.39413062813478028090e-01, + -7.81249999999989314103e-02, 9.24848813216205822840e-01, + -9.37499999999995975442e-02, 9.10510361380034494161e-01, + -1.09374999999998584466e-01, 8.96394206635151680196e-01, + -1.24999999999998556710e-01, 8.82496902584596676355e-01, + -1.40624999999999361622e-01, 8.68815056262843721235e-01, + -1.56249999999999111822e-01, 8.55345327307423297647e-01, + -1.71874999999924144012e-01, 8.42084427143446223596e-01, + -1.87499999999996752598e-01, 8.29029118180403035154e-01, + -2.03124999999988037347e-01, 8.16176213022349550386e-01, + -2.18749999999995947686e-01, 8.03522573689063990265e-01, + -2.34374999999996419531e-01, 7.91065110850298847112e-01, + -2.49999999999996280753e-01, 7.78800783071407765057e-01, + -2.65624999999999888978e-01, 7.66726596070820165529e-01, + -2.81249999999989397370e-01, 7.54839601989015340777e-01, + -2.96874999999996114219e-01, 7.43136898668761203268e-01, + -3.12499999999999555911e-01, 7.31615628946642115871e-01, + -3.28124999999993782751e-01, 7.20272979955444259126e-01, + -3.43749999999997946087e-01, 7.09106182437399867879e-01, + -3.59374999999994337863e-01, 6.98112510068129799023e-01, + -3.74999999999994615418e-01, 6.87289278790975899369e-01, + -3.90624999999999000799e-01, 6.76633846161729612945e-01, + -4.06249999999947264406e-01, 6.66143610703522903727e-01, + -4.21874999999988453681e-01, 6.55816011271509125002e-01, + -4.37499999999999111822e-01, 6.45648526427892610613e-01, + -4.53124999999999278355e-01, 6.35638673826052436056e-01, + -4.68749999999999278355e-01, 6.25784009604591573428e-01, + -4.84374999999992894573e-01, 6.16082127790682609891e-01, + -4.99999999999998168132e-01, 6.06530659712634534486e-01, + -5.15625000000000000000e-01, 5.97127273421627413619e-01, + -5.31249999999989785948e-01, 5.87869673122352498496e-01, + -5.46874999999972688514e-01, 5.78755598612500032907e-01, + -5.62500000000000000000e-01, 5.69782824730923009859e-01, + -5.78124999999992339461e-01, 5.60949160814475100700e-01, + -5.93749999999948707696e-01, 5.52252450163048691500e-01, + -6.09374999999552580121e-01, 5.43690569513243682209e-01, + -6.24999999999984789945e-01, 5.35261428518998383375e-01, + -6.40624999999983457677e-01, 5.26962969243379708573e-01, + -6.56249999999998334665e-01, 5.18793165653890220312e-01, + -6.71874999999943378626e-01, 5.10750023129039609771e-01, + -6.87499999999997002398e-01, 5.02831577970942467104e-01, + -7.03124999999991118216e-01, 4.95035896926202978463e-01, + -7.18749999999991340260e-01, 4.87361076713623331269e-01, + -7.34374999999985678123e-01, 4.79805243559684402310e-01, + -7.49999999999997335465e-01, 4.72366552741015965911e-01, + -7.65624999999993782751e-01, 4.65043188134059204408e-01, + -7.81249999999863220523e-01, 4.57833361771676883301e-01, + -7.96874999999998112621e-01, 4.50735313406363247157e-01, + -8.12499999999990119015e-01, 4.43747310081084256339e-01, + -8.28124999999996003197e-01, 4.36867645705559026759e-01, + -8.43749999999988120614e-01, 4.30094640640067360504e-01, + -8.59374999999994115818e-01, 4.23426641285265303871e-01, + -8.74999999999977129406e-01, 4.16862019678517936594e-01, + -8.90624999999983346655e-01, 4.10399173096376801428e-01, + -9.06249999999991784350e-01, 4.04036523663345414903e-01, + -9.21874999999994004796e-01, 3.97772517966614058693e-01, + -9.37499999999994337863e-01, 3.91605626676801210628e-01, + -9.53124999999999444888e-01, 3.85534344174578935682e-01, + -9.68749999999986677324e-01, 3.79557188183094640355e-01, + -9.84374999999992339461e-01, 3.73672699406045860648e-01, + -9.99999999999995892175e-01, 3.67879441171443832825e-01, + -1.01562499999994315658e+00, 3.62175999080846300338e-01, + -1.03124999999991096011e+00, 3.56560980663978732697e-01, + -1.04687499999999067413e+00, 3.51033015038813400732e-01, +}; + +static const double C[] = { + 0.5, + 4.61662413084468283841e+01, /* 0x40471547, 0x652b82fe */ + 2.16608493865351192653e-02, /* 0x3f962e42, 0xfee00000 */ + 5.96317165397058656257e-12, /* 0x3d9a39ef, 0x35793c76 */ + 1.6666666666526086527e-1, /* 3fc5555555548f7c */ + 4.1666666666226079285e-2, /* 3fa5555555545d4e */ + 8.3333679843421958056e-3, /* 3f811115b7aa905e */ + 1.3888949086377719040e-3, /* 3f56c1728d739765 */ + 1.0, + 0.0, + 7.09782712893383973096e+02, /* 0x40862E42, 0xFEFA39EF */ + 7.45133219101941108420e+02, /* 0x40874910, 0xD52D3051 */ + 5.55111512312578270212e-17, /* 0x3c900000, 0x00000000 */ +}; + +#define half C[0] +#define invln2_32 C[1] +#define ln2_32hi C[2] +#define ln2_32lo C[3] +#define t2 C[4] +#define t3 C[5] +#define t4 C[6] +#define t5 C[7] +#define one C[8] +#define zero C[9] +#define threshold1 C[10] +#define threshold2 C[11] +#define twom54 C[12] + +double +exp(double x) { + double y, z, t; + int hx, ix, k, j, m; + + ix = ((int *)&x)[HIWORD]; + hx = ix & ~0x80000000; + + if (hx < 0x3ff0a2b2) { /* |x| < 3/2 ln 2 */ + if (hx < 0x3f862e42) { /* |x| < 1/64 ln 2 */ + if (hx < 0x3ed00000) { /* |x| < 2^-18 */ + volatile int dummy; + + dummy = (int)x; /* raise inexact if x != 0 */ +#ifdef lint + dummy = dummy; +#endif + if (hx < 0x3e300000) + return (one + x); + return (one + x * (one + half * x)); + } + t = x * x; + y = x + (t * (half + x * t2) + + (t * t) * (t3 + x * t4 + t * t5)); + return (one + y); + } + + /* find the multiple of 2^-6 nearest x */ + k = hx >> 20; + j = (0x00100000 | (hx & 0x000fffff)) >> (0x40c - k); + j = (j - 1) & ~1; + if (ix < 0) + j += 134; + z = x - TBL2[j]; + t = z * z; + y = z + (t * (half + z * t2) + + (t * t) * (t3 + z * t4 + t * t5)); + return (TBL2[j+1] + TBL2[j+1] * y); + } + + if (hx >= 0x40862e42) { /* x is large, infinite, or nan */ + if (hx >= 0x7ff00000) { + if (ix == 0xfff00000 && ((int *)&x)[LOWORD] == 0) + return (zero); + return (x * x); + } + if (x > threshold1) + return (_SVID_libm_err(x, x, 6)); + if (-x > threshold2) + return (_SVID_libm_err(x, x, 7)); + } + + t = invln2_32 * x; + if (ix < 0) + t -= half; + else + t += half; + k = (int)t; + j = (k & 0x1f) << 1; + m = k >> 5; + z = (x - k * ln2_32hi) - k * ln2_32lo; + + /* z is now in primary range */ + t = z * z; + y = z + (t * (half + z * t2) + (t * t) * (t3 + z * t4 + t * t5)); + y = TBL[j] + (TBL[j+1] + TBL[j] * y); + if (m < -1021) { + ((int *)&y)[HIWORD] += (m + 54) << 20; + return (twom54 * y); + } + ((int *)&y)[HIWORD] += m << 20; + return (y); +} diff --git a/usr/src/lib/libm/common/C/exp10.c b/usr/src/lib/libm/common/C/exp10.c new file mode 100644 index 0000000000..f259ed0a9a --- /dev/null +++ b/usr/src/lib/libm/common/C/exp10.c @@ -0,0 +1,110 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak exp10 = __exp10 + +/* INDENT OFF */ +/* + * exp10(x) + * Code by K.C. Ng for SUN 4.0 libm. + * Method : + * n = nint(x*(log10/log2)); + * exp10(x) = 10**x = exp(x*ln(10)) = exp(n*ln2+(x*ln10-n*ln2)) + * = 2**n*exp(ln10*(x-n*log2/log10))) + * If x is an integer < 23 then use repeat multiplication. For + * 10**22 is the largest representable integer. + */ +/* INDENT ON */ + +#include "libm.h" + +static const double C[] = { + 3.3219280948736234787, /* log(10)/log(2) */ + 2.3025850929940456840, /* log(10) */ + 3.0102999565860955045E-1, /* log(2)/log(10) high */ + 5.3716447674669983622E-12, /* log(2)/log(10) low */ + 0.0, + 0.5, + 1.0, + 10.0, + 1.0e300, + 1.0e-300, +}; + +#define lg10 C[0] +#define ln10 C[1] +#define logt2hi C[2] +#define logt2lo C[3] +#define zero C[4] +#define half C[5] +#define one C[6] +#define ten C[7] +#define huge C[8] +#define tiny C[9] + +double +exp10(double x) { + double t, pt; + int ix, hx, k; + + ix = ((int *)&x)[HIWORD]; + hx = ix & ~0x80000000; + + if (hx >= 0x4074a000) { /* |x| >= 330 or x is nan */ + if (hx >= 0x7ff00000) { /* x is inf or nan */ + if (ix == 0xfff00000 && ((int *)&x)[LOWORD] == 0) + return (zero); + return (x * x); + } + t = (ix < 0)? tiny : huge; + return (t * t); + } + + if (hx < 0x3c000000) + return (one + x); + + k = (int)x; + if (0 <= k && k < 23 && (double)k == x) { + /* x is a small positive integer */ + t = one; + pt = ten; + if (k & 1) + t = ten; + k >>= 1; + while (k) { + pt *= pt; + if (k & 1) + t *= pt; + k >>= 1; + } + return (t); + } + t = x * lg10; + k = (int)((ix < 0)? t - half : t + half); + return (scalbn(exp(ln10 * ((x - k * logt2hi) - k * logt2lo)), k)); +} diff --git a/usr/src/lib/libm/common/C/exp2.c b/usr/src/lib/libm/common/C/exp2.c new file mode 100644 index 0000000000..f447890f9e --- /dev/null +++ b/usr/src/lib/libm/common/C/exp2.c @@ -0,0 +1,88 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak exp2 = __exp2 + +/* INDENT OFF */ +/* + * exp2(x) + * Code by K.C. Ng for SUN 4.0 libm. + * Method : + * exp2(x) = 2**x = 2**((x-anint(x))+anint(x)) + * = 2**anint(x)*2**(x-anint(x)) + * = 2**anint(x)*exp((x-anint(x))*ln2) + */ +/* INDENT ON */ + +#include "libm.h" + +static const double C[] = { + 0.0, + 1.0, + 0.5, + 6.93147180559945286227e-01, + 1.0e300, + 1.0e-300, +}; + +#define zero C[0] +#define one C[1] +#define half C[2] +#define ln2 C[3] +#define huge C[4] +#define tiny C[5] + +double +exp2(double x) { + int ix, hx, k; + double t; + + ix = ((int *)&x)[HIWORD]; + hx = ix & ~0x80000000; + + if (hx >= 0x4090e000) { /* |x| >= 1080 or x is nan */ + if (hx >= 0x7ff00000) { /* x is inf or nan */ + if (ix == 0xfff00000 && ((int *)&x)[LOWORD] == 0) + return (zero); + return (x * x); + } + t = (ix < 0)? tiny : huge; + return (t * t); + } + + if (hx < 0x3fe00000) { /* |x| < 0.5 */ + if (hx < 0x3c000000) + return (one + x); + return (exp(ln2 * x)); + } + + k = (int)x; + if (x != (double)k) + k = (int)((ix < 0)? x - half : x + half); + return (scalbn(exp(ln2 * (x - (double)k)), k)); +} diff --git a/usr/src/lib/libm/common/C/expm1.c b/usr/src/lib/libm/common/C/expm1.c new file mode 100644 index 0000000000..fad9d55bc7 --- /dev/null +++ b/usr/src/lib/libm/common/C/expm1.c @@ -0,0 +1,266 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak expm1 = __expm1 + +/* INDENT OFF */ +/* + * expm1(x) + * Returns exp(x)-1, the exponential of x minus 1. + * + * Method + * 1. Arugment reduction: + * Given x, find r and integer k such that + * + * x = k*ln2 + r, |r| <= 0.5*ln2 ~ 0.34658 + * + * Here a correction term c will be computed to compensate + * the error in r when rounded to a floating-point number. + * + * 2. Approximating expm1(r) by a special rational function on + * the interval [0,0.34658]: + * Since + * r*(exp(r)+1)/(exp(r)-1) = 2+ r^2/6 - r^4/360 + ... + * we define R1(r*r) by + * r*(exp(r)+1)/(exp(r)-1) = 2+ r^2/6 * R1(r*r) + * That is, + * R1(r**2) = 6/r *((exp(r)+1)/(exp(r)-1) - 2/r) + * = 6/r * ( 1 + 2.0*(1/(exp(r)-1) - 1/r)) + * = 1 - r^2/60 + r^4/2520 - r^6/100800 + ... + * We use a special Reme algorithm on [0,0.347] to generate + * a polynomial of degree 5 in r*r to approximate R1. The + * maximum error of this polynomial approximation is bounded + * by 2**-61. In other words, + * R1(z) ~ 1.0 + Q1*z + Q2*z**2 + Q3*z**3 + Q4*z**4 + Q5*z**5 + * where Q1 = -1.6666666666666567384E-2, + * Q2 = 3.9682539681370365873E-4, + * Q3 = -9.9206344733435987357E-6, + * Q4 = 2.5051361420808517002E-7, + * Q5 = -6.2843505682382617102E-9; + * (where z=r*r, and the values of Q1 to Q5 are listed below) + * with error bounded by + * | 5 | -61 + * | 1.0+Q1*z+...+Q5*z - R1(z) | <= 2 + * | | + * + * expm1(r) = exp(r)-1 is then computed by the following + * specific way which minimize the accumulation rounding error: + * 2 3 + * r r [ 3 - (R1 + R1*r/2) ] + * expm1(r) = r + --- + --- * [--------------------] + * 2 2 [ 6 - r*(3 - R1*r/2) ] + * + * To compensate the error in the argument reduction, we use + * expm1(r+c) = expm1(r) + c + expm1(r)*c + * ~ expm1(r) + c + r*c + * Thus c+r*c will be added in as the correction terms for + * expm1(r+c). Now rearrange the term to avoid optimization + * screw up: + * ( 2 2 ) + * ({ ( r [ R1 - (3 - R1*r/2) ] ) } r ) + * expm1(r+c)~r - ({r*(--- * [--------------------]-c)-c} - --- ) + * ({ ( 2 [ 6 - r*(3 - R1*r/2) ] ) } 2 ) + * ( ) + * + * = r - E + * 3. Scale back to obtain expm1(x): + * From step 1, we have + * expm1(x) = either 2^k*[expm1(r)+1] - 1 + * = or 2^k*[expm1(r) + (1-2^-k)] + * 4. Implementation notes: + * (A). To save one multiplication, we scale the coefficient Qi + * to Qi*2^i, and replace z by (x^2)/2. + * (B). To achieve maximum accuracy, we compute expm1(x) by + * (i) if x < -56*ln2, return -1.0, (raise inexact if x != inf) + * (ii) if k=0, return r-E + * (iii) if k=-1, return 0.5*(r-E)-0.5 + * (iv) if k=1 if r < -0.25, return 2*((r+0.5)- E) + * else return 1.0+2.0*(r-E); + * (v) if (k<-2||k>56) return 2^k(1-(E-r)) - 1 (or exp(x)-1) + * (vi) if k <= 20, return 2^k((1-2^-k)-(E-r)), else + * (vii) return 2^k(1-((E+2^-k)-r)) + * + * Special cases: + * expm1(INF) is INF, expm1(NaN) is NaN; + * expm1(-INF) is -1, and + * for finite argument, only expm1(0)=0 is exact. + * + * Accuracy: + * according to an error analysis, the error is always less than + * 1 ulp (unit in the last place). + * + * Misc. info. + * For IEEE double + * if x > 7.09782712893383973096e+02 then expm1(x) overflow + * + * Constants: + * The hexadecimal values are the intended ones for the following + * constants. The decimal values may be used, provided that the + * compiler will convert from decimal to binary accurately enough + * to produce the hexadecimal values shown. + */ +/* INDENT ON */ + +#include "libm_synonyms.h" /* __expm1 */ +#include "libm_macros.h" +#include <math.h> + +static const double xxx[] = { +/* one */ 1.0, +/* huge */ 1.0e+300, +/* tiny */ 1.0e-300, +/* o_threshold */ 7.09782712893383973096e+02, /* 40862E42 FEFA39EF */ +/* ln2_hi */ 6.93147180369123816490e-01, /* 3FE62E42 FEE00000 */ +/* ln2_lo */ 1.90821492927058770002e-10, /* 3DEA39EF 35793C76 */ +/* invln2 */ 1.44269504088896338700e+00, /* 3FF71547 652B82FE */ +/* scaled coefficients related to expm1 */ +/* Q1 */ -3.33333333333331316428e-02, /* BFA11111 111110F4 */ +/* Q2 */ 1.58730158725481460165e-03, /* 3F5A01A0 19FE5585 */ +/* Q3 */ -7.93650757867487942473e-05, /* BF14CE19 9EAADBB7 */ +/* Q4 */ 4.00821782732936239552e-06, /* 3ED0CFCA 86E65239 */ +/* Q5 */ -2.01099218183624371326e-07 /* BE8AFDB7 6E09C32D */ +}; +#define one xxx[0] +#define huge xxx[1] +#define tiny xxx[2] +#define o_threshold xxx[3] +#define ln2_hi xxx[4] +#define ln2_lo xxx[5] +#define invln2 xxx[6] +#define Q1 xxx[7] +#define Q2 xxx[8] +#define Q3 xxx[9] +#define Q4 xxx[10] +#define Q5 xxx[11] + +double +expm1(double x) { + double y, hi, lo, c = 0.0L, t, e, hxs, hfx, r1; + int k, xsb; + unsigned hx; + + hx = ((unsigned *) &x)[HIWORD]; /* high word of x */ + xsb = hx & 0x80000000; /* sign bit of x */ + if (xsb == 0) + y = x; + else + y = -x; /* y = |x| */ + hx &= 0x7fffffff; /* high word of |x| */ + + /* filter out huge and non-finite argument */ + /* for example exp(38)-1 is approximately 3.1855932e+16 */ + if (hx >= 0x4043687A) { + /* if |x|>=56*ln2 (~38.8162...) */ + if (hx >= 0x40862E42) { /* if |x|>=709.78... -> inf */ + if (hx >= 0x7ff00000) { + if (((hx & 0xfffff) | ((int *) &x)[LOWORD]) + != 0) + return (x * x); /* + -> * for Cheetah */ + else + /* exp(+-inf)={inf,-1} */ + return (xsb == 0 ? x : -1.0); + } + if (x > o_threshold) + return (huge * huge); /* overflow */ + } + if (xsb != 0) { /* x < -56*ln2, return -1.0 w/inexact */ + if (x + tiny < 0.0) /* raise inexact */ + return (tiny - one); /* return -1 */ + } + } + + /* argument reduction */ + if (hx > 0x3fd62e42) { /* if |x| > 0.5 ln2 */ + if (hx < 0x3FF0A2B2) { /* and |x| < 1.5 ln2 */ + if (xsb == 0) { /* positive number */ + hi = x - ln2_hi; + lo = ln2_lo; + k = 1; + } else { + /* negative number */ + hi = x + ln2_hi; + lo = -ln2_lo; + k = -1; + } + } else { + /* |x| > 1.5 ln2 */ + k = (int) (invln2 * x + (xsb == 0 ? 0.5 : -0.5)); + t = k; + hi = x - t * ln2_hi; /* t*ln2_hi is exact here */ + lo = t * ln2_lo; + } + x = hi - lo; + c = (hi - x) - lo; /* still at |x| > 0.5 ln2 */ + } else if (hx < 0x3c900000) { + /* when |x|<2**-54, return x */ + t = huge + x; /* return x w/inexact when x != 0 */ + return (x - (t - (huge + x))); + } else + /* |x| <= 0.5 ln2 */ + k = 0; + + /* x is now in primary range */ + hfx = 0.5 * x; + hxs = x * hfx; + r1 = one + hxs * (Q1 + hxs * (Q2 + hxs * (Q3 + hxs * (Q4 + hxs * Q5)))); + t = 3.0 - r1 * hfx; + e = hxs * ((r1 - t) / (6.0 - x * t)); + if (k == 0) /* |x| <= 0.5 ln2 */ + return (x - (x * e - hxs)); + else { /* |x| > 0.5 ln2 */ + e = (x * (e - c) - c); + e -= hxs; + if (k == -1) + return (0.5 * (x - e) - 0.5); + if (k == 1) { + if (x < -0.25) + return (-2.0 * (e - (x + 0.5))); + else + return (one + 2.0 * (x - e)); + } + if (k <= -2 || k > 56) { /* suffice to return exp(x)-1 */ + y = one - (e - x); + ((int *) &y)[HIWORD] += k << 20; + return (y - one); + } + t = one; + if (k < 20) { + ((int *) &t)[HIWORD] = 0x3ff00000 - (0x200000 >> k); + /* t = 1 - 2^-k */ + y = t - (e - x); + ((int *) &y)[HIWORD] += k << 20; + } else { + ((int *) &t)[HIWORD] = (0x3ff - k) << 20; /* 2^-k */ + y = x - (e + t); + y += one; + ((int *) &y)[HIWORD] += k << 20; + } + } + return (y); +} diff --git a/usr/src/lib/libm/common/C/fabs.c b/usr/src/lib/libm/common/C/fabs.c new file mode 100644 index 0000000000..4c3b431c7c --- /dev/null +++ b/usr/src/lib/libm/common/C/fabs.c @@ -0,0 +1,43 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak fabs = __fabs + +#include "libm.h" +#include "libm_synonyms.h" +#include "libm_macros.h" +#include <math.h> + +double +fabs(double x) { + int *px = (int *) &x; + + px[HIWORD] &= ~0x80000000; + return (x); +} diff --git a/usr/src/lib/libm/common/C/floor.c b/usr/src/lib/libm/common/C/floor.c new file mode 100644 index 0000000000..6c3a612c21 --- /dev/null +++ b/usr/src/lib/libm/common/C/floor.c @@ -0,0 +1,65 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak floor = __floor + +/* + * floor(x) returns the biggest integral value less than or equal to x. + * NOTE: floor(x) returns result with the same sign as x's, including 0. + * + * Modified 8/4/04 for performance. + */ + +#include "libm.h" + +static const double + zero = 0.0, + one = 1.0, + two52 = 4503599627370496.0; + +double +floor(double x) { + double t, w; + int hx, lx, ix; + + hx = ((int *)&x)[HIWORD]; + lx = ((int *)&x)[LOWORD]; + ix = hx & ~0x80000000; + if (ix >= 0x43300000) /* return x if |x| >= 2^52, or x is NaN */ + return (x * one); + t = (hx >= 0)? two52 : -two52; + w = x + t; + t = w - t; + if (ix < 0x3ff00000) { + if ((ix | lx) == 0) + return (x); + else + return ((hx < 0)? -one : zero); + } + return ((t <= x)? t : t - one); +} diff --git a/usr/src/lib/libm/common/C/fmod.c b/usr/src/lib/libm/common/C/fmod.c new file mode 100644 index 0000000000..f10d027a02 --- /dev/null +++ b/usr/src/lib/libm/common/C/fmod.c @@ -0,0 +1,127 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak fmod = __fmod + +#include "libm.h" + +static const double zero = 0.0; + +/* + * The following implementation assumes fast 64-bit integer arith- + * metic. This is fine for sparc because we build libm in v8plus + * mode. It's also fine for sparcv9 and amd64, although we have + * assembly code on amd64. For x86, it would be better to use + * 32-bit code, but we have assembly for x86, too. + */ +double +fmod(double x, double y) { + double w; + long long hx, ix, iy, iz; + int nd, k, ny; + + hx = *(long long *)&x; + ix = hx & ~0x8000000000000000ull; + iy = *(long long *)&y & ~0x8000000000000000ull; + + /* handle special cases */ + if (iy == 0ll) + return (_SVID_libm_err(x, y, 27)); + + if (ix >= 0x7ff0000000000000ll || iy > 0x7ff0000000000000ll) + return ((x * y) * zero); + + if (ix <= iy) + return ((ix < iy)? x : x * zero); + + /* + * Set: + * ny = true exponent of y + * nd = true exponent of x minus true exponent of y + * ix = normalized significand of x + * iy = normalized significand of y + */ + ny = iy >> 52; + k = ix >> 52; + if (ny == 0) { + /* y is subnormal, x could be normal or subnormal */ + ny = 1; + while (iy < 0x0010000000000000ll) { + ny -= 1; + iy += iy; + } + nd = k - ny; + if (k == 0) { + nd += 1; + while (ix < 0x0010000000000000ll) { + nd -= 1; + ix += ix; + } + } else { + ix = 0x0010000000000000ll | (ix & 0x000fffffffffffffll); + } + } else { + /* both x and y are normal */ + nd = k - ny; + ix = 0x0010000000000000ll | (ix & 0x000fffffffffffffll); + iy = 0x0010000000000000ll | (iy & 0x000fffffffffffffll); + } + + /* perform fixed point mod */ + while (nd--) { + iz = ix - iy; + if (iz >= 0) + ix = iz; + ix += ix; + } + iz = ix - iy; + if (iz >= 0) + ix = iz; + + /* convert back to floating point and restore the sign */ + if (ix == 0ll) + return (x * zero); + while (ix < 0x0010000000000000ll) { + ix += ix; + ny -= 1; + } + while (ix > 0x0020000000000000ll) { /* XXX can this ever happen? */ + ny += 1; + ix >>= 1; + } + if (ny <= 0) { + /* result is subnormal */ + k = -ny + 1; + ix >>= k; + *(long long *)&w = (hx & 0x8000000000000000ull) | ix; + return (w); + } + *(long long *)&w = (hx & 0x8000000000000000ull) | + ((long long)ny << 52) | (ix & 0x000fffffffffffffll); + return (w); +} diff --git a/usr/src/lib/libm/common/C/gamma.c b/usr/src/lib/libm/common/C/gamma.c new file mode 100644 index 0000000000..3e8073946c --- /dev/null +++ b/usr/src/lib/libm/common/C/gamma.c @@ -0,0 +1,52 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak gamma = __gamma + +#include "libm.h" + +extern int signgam; + +double +gamma(double x) { + double g; + + if (!finite(x)) + return (x * x); + + g = rint(x); + if (x == g && x <= 0.0) { + signgam = 1; + return (_SVID_libm_err(x, x, 41)); + } + + g = __k_lgamma(x, &signgam); + if (!finite(g)) + g = _SVID_libm_err(x, x, 40); + return (g); +} diff --git a/usr/src/lib/libm/common/C/gamma_r.c b/usr/src/lib/libm/common/C/gamma_r.c new file mode 100644 index 0000000000..26aa433192 --- /dev/null +++ b/usr/src/lib/libm/common/C/gamma_r.c @@ -0,0 +1,36 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak gamma_r = __gamma_r + +#include "libm.h" + +double +gamma_r(double x, int *signgamp) { + return (lgamma_r(x, signgamp)); +} diff --git a/usr/src/lib/libm/common/C/hypot.c b/usr/src/lib/libm/common/C/hypot.c new file mode 100644 index 0000000000..47edfe923f --- /dev/null +++ b/usr/src/lib/libm/common/C/hypot.c @@ -0,0 +1,212 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak hypot = __hypot +#endif + +/* INDENT OFF */ +/* + * Hypot(x, y) + * by K.C. Ng for SUN 4.0 libm, updated 3/11/2003. + * Method : + * A. When rounding is rounded-to-nearest: + * If z = x * x + y * y has error less than sqrt(2) / 2 ulp than + * sqrt(z) has error less than 1 ulp. + * So, compute sqrt(x*x+y*y) with some care as follows: + * Assume x > y > 0; + * 1. Check whether save and set rounding to round-to-nearest + * 2. if x > 2y use + * xh*xh+(y*y+((x-xh)*(x+xh))) for x*x+y*y + * where xh = x with lower 32 bits cleared; else + * 3. if x <= 2y use + * x2h*yh+((x-y)*(x-y)+(x2h*(y-yh)+(x2-x2h)*y)) + * where x2 = 2*x, x2h = 2x with lower 32 bits cleared, yh = y with + * lower 32 bits chopped. + * + * B. When rounding is not rounded-to-nearest: + * The following (magic) formula will yield an error less than 1 ulp. + * z = sqrt(x * x + y * y) + * hypot(x, y) = x + (y / ((x + z) / y)) + * + * NOTE: DO NOT remove parenthsis! + * + * Special cases: + * hypot(x, y) is INF if x or y is +INF or -INF; else + * hypot(x, y) is NAN if x or y is NAN. + * + * Accuracy: + * hypot(x, y) returns sqrt(x^2+y^2) with error less than 1 ulps + * (units in the last place) + */ + +#include "libm.h" + +static const double + zero = 0.0, + onep1u = 1.00000000000000022204e+00, /* 0x3ff00000 1 = 1+2**-52 */ + twom53 = 1.11022302462515654042e-16, /* 0x3ca00000 0 = 2**-53 */ + twom768 = 6.441148769597133308e-232, /* 2^-768 */ + two768 = 1.552518092300708935e+231; /* 2^768 */ + +/* INDENT ON */ + +double +hypot(double x, double y) { + double xh, yh, w, ax, ay; + int i, j, nx, ny, ix, iy, iscale = 0; + unsigned lx, ly; + + ix = ((int *) &x)[HIWORD] & ~0x80000000; + lx = ((int *) &x)[LOWORD]; + iy = ((int *) &y)[HIWORD] & ~0x80000000; + ly = ((int *) &y)[LOWORD]; +/* + * Force ax = |x| ~>~ ay = |y| + */ + if (iy > ix) { + ax = fabs(y); + ay = fabs(x); + i = ix; + ix = iy; + iy = i; + i = lx; + lx = ly; + ly = i; + } else { + ax = fabs(x); + ay = fabs(y); + } + nx = ix >> 20; + ny = iy >> 20; + j = nx - ny; +/* + * x >= 2^500 (x*x or y*y may overflow) + */ + if (nx >= 0x5f3) { + if (nx == 0x7ff) { /* inf or NaN, signal of sNaN */ + if (((ix - 0x7ff00000) | lx) == 0) + return (ax == ay ? ay : ax); + else if (((iy - 0x7ff00000) | ly) == 0) + return (ay == ax ? ax : ay); + else + return (ax * ay); /* + -> * for Cheetah */ + } else if (j > 32) { /* x >> y */ + if (j <= 53) + ay *= twom53; + ax += ay; + if (((int *) &ax)[HIWORD] == 0x7ff00000) + ax = _SVID_libm_err(x, y, 4); + return (ax); + } + ax *= twom768; + ay *= twom768; + iscale = 2; + ix -= 768 << 20; + iy -= 768 << 20; + } +/* + * y < 2^-450 (x*x or y*y may underflow) + */ + else if (ny < 0x23d) { + if ((ix | lx) == 0) + return (ay); + if ((iy | ly) == 0) + return (ax); + if (j > 53) /* x >> y */ + return (ax + ay); + iscale = 1; + ax *= two768; + ay *= two768; + if (nx == 0) { + if (ax == zero) /* guard subnormal flush to zero */ + return (ax); + ix = ((int *) &ax)[HIWORD]; + } else + ix += 768 << 20; + if (ny == 0) { + if (ay == zero) /* guard subnormal flush to zero */ + return (ax * twom768); + iy = ((int *) &ay)[HIWORD]; + } else + iy += 768 << 20; + j = (ix >> 20) - (iy >> 20); + if (j > 32) { /* x >> y */ + if (j <= 53) + ay *= twom53; + return ((ax + ay) * twom768); + } + } else if (j > 32) { /* x >> y */ + if (j <= 53) + ay *= twom53; + return (ax + ay); + } +/* + * Medium range ax and ay with max{|ax/ay|,|ay/ax|} bounded by 2^32 + * First check rounding mode by comparing onep1u*onep1u with onep1u+twom53. + * Make sure the computation is done at run-time. + */ + if (((lx | ly) << 5) == 0) { + ay = ay * ay; + ax += ay / (ax + sqrt(ax * ax + ay)); + } else + if (onep1u * onep1u != onep1u + twom53) { + /* round-to-zero, positive, negative mode */ + /* magic formula with less than an ulp error */ + w = sqrt(ax * ax + ay * ay); + ax += ay / ((ax + w) / ay); + } else { + /* round-to-nearest mode */ + w = ax - ay; + if (w > ay) { + ((int *) &xh)[HIWORD] = ix; + ((int *) &xh)[LOWORD] = 0; + ay = ay * ay + (ax - xh) * (ax + xh); + ax = sqrt(xh * xh + ay); + } else { + ax = ax + ax; + ((int *) &xh)[HIWORD] = ix + 0x00100000; + ((int *) &xh)[LOWORD] = 0; + ((int *) &yh)[HIWORD] = iy; + ((int *) &yh)[LOWORD] = 0; + ay = w * w + ((ax - xh) * yh + (ay - yh) * ax); + ax = sqrt(xh * yh + ay); + } + } + if (iscale > 0) { + if (iscale == 1) + ax *= twom768; + else { + ax *= two768; /* must generate side effect here */ + if (((int *) &ax)[HIWORD] == 0x7ff00000) + ax = _SVID_libm_err(x, y, 4); + } + } + return (ax); +} diff --git a/usr/src/lib/libm/common/C/ilogb.c b/usr/src/lib/libm/common/C/ilogb.c new file mode 100644 index 0000000000..e302bc74a6 --- /dev/null +++ b/usr/src/lib/libm/common/C/ilogb.c @@ -0,0 +1,94 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak ilogb = __ilogb +#endif + +#include "libm.h" +#include "xpg6.h" /* __xpg6 */ + +#if defined(USE_FPSCALE) || defined(__x86) +static const double two52 = 4503599627370496.0; +#else +/* + * v: high part of a non-zero subnormal |x|; w: low part of |x| + */ +static int +ilogb_subnormal(unsigned v, unsigned w) { + int r = -1022 - 52; + + if (v) + r += 32; + else + v = w; + if (v & 0xffff0000) + r += 16, v >>= 16; + if (v & 0xff00) + r += 8, v >>= 8; + if (v & 0xf0) + r += 4, v >>= 4; + v <<= 1; + return (r + ((0xffffaa50 >> v) & 0x3)); +} +#endif /* defined(USE_FPSCALE) */ + +static int +raise_invalid(int v) { /* SUSv3 requires ilogb(0,+/-Inf,NaN) raise invalid */ +#ifndef lint + if ((__xpg6 & _C99SUSv3_ilogb_0InfNaN_raises_invalid) != 0) { + static const double zero = 0.0; + volatile double dummy; + + dummy = zero / zero; + } +#endif + return (v); +} + +int +ilogb(double x) { + int *px = (int *) &x, k = px[HIWORD] & ~0x80000000; + + if (k < 0x00100000) { + if ((px[LOWORD] | k) == 0) + return (raise_invalid(0x80000001)); + else { +#if defined(USE_FPSCALE) || defined(__x86) + x *= two52; + return (((px[HIWORD] & 0x7ff00000) >> 20) - 1075); +#else + return (ilogb_subnormal(k, px[LOWORD])); +#endif + } + } else if (k < 0x7ff00000) + return ((k >> 20) - 1023); + else + return (raise_invalid(0x7fffffff)); +} diff --git a/usr/src/lib/libm/common/C/isnan.c b/usr/src/lib/libm/common/C/isnan.c new file mode 100644 index 0000000000..5df577c863 --- /dev/null +++ b/usr/src/lib/libm/common/C/isnan.c @@ -0,0 +1,48 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak isnan = __isnan +#pragma weak _isnan = __isnan +#pragma weak _isnand = __isnan +#pragma weak isnand = __isnan + +#include "libm.h" + +/* + * The following implementation assumes fast 64-bit integer arith- + * metic. This is fine for sparc because we build libm in v8plus + * mode. It's also fine for sparcv9 and amd64. For x86, it would + * be better to use 32-bit code, but we have assembly for x86. + */ +int +__isnan(double x) { + long long llx; + + llx = *(long long *)&x & ~0x8000000000000000ull; + return ((unsigned long long)(0x7ff0000000000000ll - llx) >> 63); +} diff --git a/usr/src/lib/libm/common/C/j0.c b/usr/src/lib/libm/common/C/j0.c new file mode 100644 index 0000000000..ba98be428d --- /dev/null +++ b/usr/src/lib/libm/common/C/j0.c @@ -0,0 +1,322 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * Floating point Bessel's function of the first and second kinds + * of order zero: j0(x),y0(x); + * + * Special cases: + * y0(0)=y1(0)=yn(n,0) = -inf with division by zero signal; + * y0(-ve)=y1(-ve)=yn(n,-ve) are NaN with invalid signal. + */ + +#pragma weak j0 = __j0 +#pragma weak y0 = __y0 + +#include "libm.h" +#include "libm_synonyms.h" +#include "libm_protos.h" +#include <math.h> +#include <values.h> + +#define GENERIC double +static const GENERIC +zero = 0.0, +small = 1.0e-5, +tiny = 1.0e-18, +one = 1.0, +eight = 8.0, +invsqrtpi = 5.641895835477562869480794515607725858441e-0001, +tpi = 0.636619772367581343075535053490057448; + +static GENERIC pzero(GENERIC), qzero(GENERIC); +static const GENERIC r0[4] = { /* [1.e-5, 1.28] */ + -2.500000000000003622131880894830476755537e-0001, + 1.095597547334830263234433855932375353303e-0002, + -1.819734750463320921799187258987098087697e-0004, + 9.977001946806131657544212501069893930846e-0007, +}; +static const GENERIC s0[4] = { /* [1.e-5, 1.28] */ + 1.0, + 1.867609810662950169966782360588199673741e-0002, + 1.590389206181565490878430827706972074208e-0004, + 6.520867386742583632375520147714499522721e-0007, +}; +static const GENERIC r1[9] = { /* [1.28,8] */ + 9.999999999999999942156495584397047660949e-0001, + -2.389887722731319130476839836908143731281e-0001, + 1.293359476138939027791270393439493640570e-0002, + -2.770985642343140122168852400228563364082e-0004, + 2.905241575772067678086738389169625218912e-0006, + -1.636846356264052597969042009265043251279e-0008, + 5.072306160724884775085431059052611737827e-0011, + -8.187060730684066824228914775146536139112e-0014, + 5.422219326959949863954297860723723423842e-0017, +}; +static const GENERIC s1[9] = { /* [1.28,8] */ + 1.0, + 1.101122772686807702762104741932076228349e-0002, + 6.140169310641649223411427764669143978228e-0005, + 2.292035877515152097976946119293215705250e-0007, + 6.356910426504644334558832036362219583789e-0010, + 1.366626326900219555045096999553948891401e-0012, + 2.280399586866739522891837985560481180088e-0015, + 2.801559820648939665270492520004836611187e-0018, + 2.073101088320349159764410261466350732968e-0021, +}; + +GENERIC +j0(GENERIC x) { + GENERIC z, s, c, ss, cc, r, u, v, ox; + int i; + + if (isnan(x)) + return (x*x); /* + -> * for Cheetah */ + ox = x; + x = fabs(x); + if (x > 8.0) { + if (!finite(x)) + return (zero); + s = sin(x); + c = cos(x); + /* + * j0(x) = sqrt(2/(pi*x))*(p0(x)*cos(x0)-q0(x)*sin(x0)) + * where x0 = x-pi/4 + * Better formula: + * cos(x0) = cos(x)cos(pi/4)+sin(x)sin(pi/4) + * = 1/sqrt(2) * (cos(x) + sin(x)) + * sin(x0) = sin(x)cos(pi/4)-cos(x)sin(pi/4) + * = 1/sqrt(2) * (sin(x) - cos(x)) + * To avoid cancellation, use + * sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x)) + * to compute the worse one. + */ + if (x > 8.9e307) { /* x+x may overflow */ + ss = s-c; + cc = s+c; + } else if (signbit(s) != signbit(c)) { + ss = s - c; + cc = -cos(x+x)/ss; + } else { + cc = s + c; + ss = -cos(x+x)/cc; + } + /* + * j0(x) = 1/sqrt(pi) * (P(0,x)*cc - Q(0,x)*ss) / sqrt(x) + * y0(x) = 1/sqrt(pi) * (P(0,x)*ss + Q(0,x)*cc) / sqrt(x) + */ + if (x > 1.0e40) z = (invsqrtpi*cc)/sqrt(x); + else { + u = pzero(x); v = qzero(x); + z = invsqrtpi*(u*cc-v*ss)/sqrt(x); + } + /* force to pass SVR4 even the result is wrong (sign) */ + if (x > X_TLOSS) + return (_SVID_libm_err(ox, z, 34)); + else + return (z); + } + if (x <= small) { + if (x <= tiny) + return (one-x); + else + return (one-x*x*0.25); + } + z = x*x; + if (x <= 1.28) { + r = r0[0]+z*(r0[1]+z*(r0[2]+z*r0[3])); + s = s0[0]+z*(s0[1]+z*(s0[2]+z*s0[3])); + return (one + z*(r/s)); + } else { + for (r = r1[8], s = s1[8], i = 7; i >= 0; i--) { + r = r*z + r1[i]; + s = s*z + s1[i]; + } + return (r/s); + } +} + +static const GENERIC u0[13] = { + -7.380429510868722526754723020704317641941e-0002, + 1.772607102684869924301459663049874294814e-0001, + -1.524370666542713828604078090970799356306e-0002, + 4.650819100693891757143771557629924591915e-0004, + -7.125768872339528975036316108718239946022e-0006, + 6.411017001656104598327565004771515257146e-0008, + -3.694275157433032553021246812379258781665e-0010, + 1.434364544206266624252820889648445263842e-0012, + -3.852064731859936455895036286874139896861e-0015, + 7.182052899726138381739945881914874579696e-0018, + -9.060556574619677567323741194079797987200e-0021, + 7.124435467408860515265552217131230511455e-0024, + -2.709726774636397615328813121715432044771e-0027, +}; +static const GENERIC v0[5] = { + 1.0, + 4.678678931512549002587702477349214886475e-0003, + 9.486828955529948534822800829497565178985e-0006, + 1.001495929158861646659010844136682454906e-0008, + 4.725338116256021660204443235685358593611e-0012, +}; + +GENERIC +y0(GENERIC x) { + GENERIC z, /* d, */ s, c, ss, cc, u, v; + int i; + + if (isnan(x)) + return (x*x); /* + -> * for Cheetah */ + if (x <= zero) { + if (x == zero) + /* d= -one/(x-x); */ + return (_SVID_libm_err(x, x, 8)); + else + /* d = zero/(x-x); */ + return (_SVID_libm_err(x, x, 9)); + } + if (x > 8.0) { + if (!finite(x)) + return (zero); + s = sin(x); + c = cos(x); + /* + * j0(x) = sqrt(2/(pi*x))*(p0(x)*cos(x0)-q0(x)*sin(x0)) + * where x0 = x-pi/4 + * Better formula: + * cos(x0) = cos(x)cos(pi/4)+sin(x)sin(pi/4) + * = 1/sqrt(2) * (cos(x) + sin(x)) + * sin(x0) = sin(x)cos(pi/4)-cos(x)sin(pi/4) + * = 1/sqrt(2) * (sin(x) - cos(x)) + * To avoid cancellation, use + * sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x)) + * to compute the worse one. + */ + if (x > 8.9e307) { /* x+x may overflow */ + ss = s-c; + cc = s+c; + } else if (signbit(s) != signbit(c)) { + ss = s - c; + cc = -cos(x+x)/ss; + } else { + cc = s + c; + ss = -cos(x+x)/cc; + } + /* + * j0(x) = 1/sqrt(pi*x) * (P(0,x)*cc - Q(0,x)*ss) + * y0(x) = 1/sqrt(pi*x) * (P(0,x)*ss + Q(0,x)*cc) + */ + if (x > 1.0e40) + z = (invsqrtpi*ss)/sqrt(x); + else + z = invsqrtpi*(pzero(x)*ss+qzero(x)*cc)/sqrt(x); + if (x > X_TLOSS) + return (_SVID_libm_err(x, z, 35)); + else + return (z); + + } + if (x <= tiny) { + return (u0[0] + tpi*log(x)); + } + z = x*x; + for (u = u0[12], i = 11; i >= 0; i--) u = u*z + u0[i]; + v = v0[0]+z*(v0[1]+z*(v0[2]+z*(v0[3]+z*v0[4]))); + return (u/v + tpi*(j0(x)*log(x))); +} + +static const GENERIC pr[7] = { /* [8 -- inf] pzero 6550 */ + .4861344183386052721391238447e5, + .1377662549407112278133438945e6, + .1222466364088289731869114004e6, + .4107070084315176135583353374e5, + .5026073801860637125889039915e4, + .1783193659125479654541542419e3, + .88010344055383421691677564e0, +}; +static const GENERIC ps[7] = { /* [8 -- inf] pzero 6550 */ + .4861344183386052721414037058e5, + .1378196632630384670477582699e6, + .1223967185341006542748936787e6, + .4120150243795353639995862617e5, + .5068271181053546392490184353e4, + .1829817905472769960535671664e3, + 1.0, +}; +static const GENERIC huge = 1.0e10; + +static GENERIC +pzero(GENERIC x) { + GENERIC s, r, t, z; + int i; + if (x > huge) + return (one); + t = eight/x; z = t*t; + r = pr[5]+z*pr[6]; + s = ps[5]+z; + for (i = 4; i >= 0; i--) { + r = r*z + pr[i]; + s = s*z + ps[i]; + } + return (r/s); +} + +static const GENERIC qr[7] = { /* [8 -- inf] qzero 6950 */ + -.1731210995701068539185611951e3, + -.5522559165936166961235240613e3, + -.5604935606637346590614529613e3, + -.2200430300226009379477365011e3, + -.323869355375648849771296746e2, + -.14294979207907956223499258e1, + -.834690374102384988158918e-2, +}; +static const GENERIC qs[7] = { /* [8 -- inf] qzero 6950 */ + .1107975037248683865326709645e5, + .3544581680627082674651471873e5, + .3619118937918394132179019059e5, + .1439895563565398007471485822e5, + .2190277023344363955930226234e4, + .106695157020407986137501682e3, + 1.0, +}; + +static GENERIC +qzero(GENERIC x) { + GENERIC s, r, t, z; + int i; + if (x > huge) + return (-0.125/x); + t = eight/x; z = t*t; + r = qr[5]+z*qr[6]; + s = qs[5]+z; + for (i = 4; i >= 0; i--) { + r = r*z + qr[i]; + s = s*z + qs[i]; + } + return (t*(r/s)); +} diff --git a/usr/src/lib/libm/common/C/j1.c b/usr/src/lib/libm/common/C/j1.c new file mode 100644 index 0000000000..ba85f028bd --- /dev/null +++ b/usr/src/lib/libm/common/C/j1.c @@ -0,0 +1,352 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * floating point Bessel's function of the first and second kinds + * of order zero: j1(x),y1(x); + * + * Special cases: + * y0(0)=y1(0)=yn(n,0) = -inf with division by zero signal; + * y0(-ve)=y1(-ve)=yn(n,-ve) are NaN with invalid signal. + */ + +#pragma weak j1 = __j1 +#pragma weak y1 = __y1 + +#include "libm.h" +#include "libm_synonyms.h" +#include "libm_protos.h" +#include <math.h> +#include <values.h> + +#define GENERIC double +static const GENERIC +zero = 0.0, +small = 1.0e-5, +tiny = 1.0e-20, +one = 1.0, +invsqrtpi = 5.641895835477562869480794515607725858441e-0001, +tpi = 0.636619772367581343075535053490057448; + +static GENERIC pone(GENERIC), qone(GENERIC); +static const GENERIC r0[4] = { + -6.250000000000002203053200981413218949548e-0002, + 1.600998455640072901321605101981501263762e-0003, + -1.963888815948313758552511884390162864930e-0005, + 8.263917341093549759781339713418201620998e-0008, +}; +static const GENERIC s0[7] = { + 1.0e0, + 1.605069137643004242395356851797873766927e-0002, + 1.149454623251299996428500249509098499383e-0004, + 3.849701673735260970379681807910852327825e-0007, +}; +static const GENERIC r1[12] = { + 4.999999999999999995517408894340485471724e-0001, + -6.003825028120475684835384519945468075423e-0002, + 2.301719899263321828388344461995355419832e-0003, + -4.208494869238892934859525221654040304068e-0005, + 4.377745135188837783031540029700282443388e-0007, + -2.854106755678624335145364226735677754179e-0009, + 1.234002865443952024332943901323798413689e-0011, + -3.645498437039791058951273508838177134310e-0014, + 7.404320596071797459925377103787837414422e-0017, + -1.009457448277522275262808398517024439084e-0019, + 8.520158355824819796968771418801019930585e-0023, + -3.458159926081163274483854614601091361424e-0026, +}; +static const GENERIC s1[5] = { + 1.0e0, + 4.923499437590484879081138588998986303306e-0003, + 1.054389489212184156499666953501976688452e-0005, + 1.180768373106166527048240364872043816050e-0008, + 5.942665743476099355323245707680648588540e-0012, +}; + +GENERIC +j1(GENERIC x) { + GENERIC z, d, s, c, ss, cc, r; + int i, sgn; + + if (!finite(x)) + return (one/x); + sgn = signbit(x); + x = fabs(x); + if (x > 8.00) { + s = sin(x); + c = cos(x); + /* + * j1(x) = sqrt(2/(pi*x))*(p1(x)*cos(x0)-q1(x)*sin(x0)) + * where x0 = x-3pi/4 + * Better formula: + * cos(x0) = cos(x)cos(3pi/4)+sin(x)sin(3pi/4) + * = 1/sqrt(2) * (sin(x) - cos(x)) + * sin(x0) = sin(x)cos(3pi/4)-cos(x)sin(3pi/4) + * = -1/sqrt(2) * (cos(x) + sin(x)) + * To avoid cancellation, use + * sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x)) + * to compute the worse one. + */ + if (x > 8.9e307) { /* x+x may overflow */ + ss = -s-c; + cc = s-c; + } else if (signbit(s) != signbit(c)) { + cc = s - c; + ss = cos(x+x)/cc; + } else { + ss = -s-c; + cc = cos(x+x)/ss; + } + /* + * j1(x) = 1/sqrt(pi*x) * (P(1,x)*cc - Q(1,x)*ss) + * y1(x) = 1/sqrt(pi*x) * (P(1,x)*ss + Q(1,x)*cc) + */ + if (x > 1.0e40) + d = (invsqrtpi*cc)/sqrt(x); + else + d = invsqrtpi*(pone(x)*cc-qone(x)*ss)/sqrt(x); + + if (x > X_TLOSS) { + if (sgn != 0) { d = -d; x = -x; } + return (_SVID_libm_err(x, d, 36)); + } else + if (sgn == 0) + return (d); + else + return (-d); + } + if (x <= small) { + if (x <= tiny) + d = 0.5*x; + else + d = x*(0.5-x*x*0.125); + if (sgn == 0) + return (d); + else + return (-d); + } + z = x*x; + if (x < 1.28) { + r = r0[3]; + s = s0[3]; + for (i = 2; i >= 0; i--) { + r = r*z + r0[i]; + s = s*z + s0[i]; + } + d = x*0.5+x*(z*(r/s)); + } else { + r = r1[11]; + for (i = 10; i >= 0; i--) r = r*z + r1[i]; + s = s1[0]+z*(s1[1]+z*(s1[2]+z*(s1[3]+z*s1[4]))); + d = x*(r/s); + } + if (sgn == 0) + return (d); + else + return (-d); +} + +static const GENERIC u0[4] = { + -1.960570906462389461018983259589655961560e-0001, + 4.931824118350661953459180060007970291139e-0002, + -1.626975871565393656845930125424683008677e-0003, + 1.359657517926394132692884168082224258360e-0005, +}; +static const GENERIC v0[5] = { + 1.0e0, + 2.565807214838390835108224713630901653793e-0002, + 3.374175208978404268650522752520906231508e-0004, + 2.840368571306070719539936935220728843177e-0006, + 1.396387402048998277638900944415752207592e-0008, +}; +static const GENERIC u1[12] = { + -1.960570906462389473336339614647555351626e-0001, + 5.336268030335074494231369159933012844735e-0002, + -2.684137504382748094149184541866332033280e-0003, + 5.737671618979185736981543498580051903060e-0005, + -6.642696350686335339171171785557663224892e-0007, + 4.692417922568160354012347591960362101664e-0009, + -2.161728635907789319335231338621412258355e-0011, + 6.727353419738316107197644431844194668702e-0014, + -1.427502986803861372125234355906790573422e-0016, + 2.020392498726806769468143219616642940371e-0019, + -1.761371948595104156753045457888272716340e-0022, + 7.352828391941157905175042420249225115816e-0026, +}; +static const GENERIC v1[5] = { + 1.0e0, + 5.029187436727947764916247076102283399442e-0003, + 1.102693095808242775074856548927801750627e-0005, + 1.268035774543174837829534603830227216291e-0008, + 6.579416271766610825192542295821308730206e-0012, +}; + + +GENERIC +y1(GENERIC x) { + GENERIC z, d, s, c, ss, cc, u, v; + int i; + + if (isnan(x)) + return (x*x); /* + -> * for Cheetah */ + if (x <= zero) { + if (x == zero) + /* return -one/zero; */ + return (_SVID_libm_err(x, x, 10)); + else + /* return zero/zero; */ + return (_SVID_libm_err(x, x, 11)); + } + if (x > 8.0) { + if (!finite(x)) + return (zero); + s = sin(x); + c = cos(x); + /* + * j1(x) = sqrt(2/(pi*x))*(p1(x)*cos(x0)-q1(x)*sin(x0)) + * where x0 = x-3pi/4 + * Better formula: + * cos(x0) = cos(x)cos(3pi/4)+sin(x)sin(3pi/4) + * = 1/sqrt(2) * (sin(x) - cos(x)) + * sin(x0) = sin(x)cos(3pi/4)-cos(x)sin(3pi/4) + * = -1/sqrt(2) * (cos(x) + sin(x)) + * To avoid cancellation, use + * sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x)) + * to compute the worse one. + */ + if (x > 8.9e307) { /* x+x may overflow */ + ss = -s-c; + cc = s-c; + } else if (signbit(s) != signbit(c)) { + cc = s - c; + ss = cos(x+x)/cc; + } else { + ss = -s-c; + cc = cos(x+x)/ss; + } + /* + * j1(x) = 1/sqrt(pi*x) * (P(1,x)*cc - Q(1,x)*ss) + * y1(x) = 1/sqrt(pi*x) * (P(1,x)*ss + Q(1,x)*cc) + */ + if (x > 1.0e91) + d = (invsqrtpi*ss)/sqrt(x); + else + d = invsqrtpi*(pone(x)*ss+qone(x)*cc)/sqrt(x); + + if (x > X_TLOSS) + return (_SVID_libm_err(x, d, 37)); + else + return (d); + } + if (x <= tiny) { + return (-tpi/x); + } + z = x*x; + if (x < 1.28) { + u = u0[3]; v = v0[3]+z*v0[4]; + for (i = 2; i >= 0; i--) { + u = u*z + u0[i]; + v = v*z + v0[i]; + } + } else { + for (u = u1[11], i = 10; i >= 0; i--) u = u*z+u1[i]; + v = v1[0]+z*(v1[1]+z*(v1[2]+z*(v1[3]+z*v1[4]))); + } + return (x*(u/v) + tpi*(j1(x)*log(x)-one/x)); +} + +static const GENERIC pr0[6] = { + -.4435757816794127857114720794e7, + -.9942246505077641195658377899e7, + -.6603373248364939109255245434e7, + -.1523529351181137383255105722e7, + -.1098240554345934672737413139e6, + -.1611616644324610116477412898e4, +}; +static const GENERIC ps0[6] = { + -.4435757816794127856828016962e7, + -.9934124389934585658967556309e7, + -.6585339479723087072826915069e7, + -.1511809506634160881644546358e7, + -.1072638599110382011903063867e6, + -.1455009440190496182453565068e4, +}; +static const GENERIC huge = 1.0e10; + +static GENERIC +pone(GENERIC x) { + GENERIC s, r, t, z; + int i; + /* assume x > 8 */ + if (x > huge) + return (one); + + t = 8.0/x; z = t*t; + r = pr0[5]; s = ps0[5]+z; + for (i = 4; i >= 0; i--) { + r = z*r + pr0[i]; + s = z*s + ps0[i]; + } + return (r/s); +} + + +static const GENERIC qr0[6] = { + 0.3322091340985722351859704442e5, + 0.8514516067533570196555001171e5, + 0.6617883658127083517939992166e5, + 0.1849426287322386679652009819e5, + 0.1706375429020768002061283546e4, + 0.3526513384663603218592175580e2, +}; +static const GENERIC qs0[6] = { + 0.7087128194102874357377502472e6, + 0.1819458042243997298924553839e7, + 0.1419460669603720892855755253e7, + 0.4002944358226697511708610813e6, + 0.3789022974577220264142952256e5, + 0.8638367769604990967475517183e3, +}; + +static GENERIC +qone(GENERIC x) { + GENERIC s, r, t, z; + int i; + if (x > huge) + return (0.375/x); + + t = 8.0/x; z = t*t; + /* assume x > 8 */ + r = qr0[5]; s = qs0[5]+z; + for (i = 4; i >= 0; i--) { + r = z*r + qr0[i]; + s = z*s + qs0[i]; + } + return (t*(r/s)); +} diff --git a/usr/src/lib/libm/common/C/jn.c b/usr/src/lib/libm/common/C/jn.c new file mode 100644 index 0000000000..b8d507dd59 --- /dev/null +++ b/usr/src/lib/libm/common/C/jn.c @@ -0,0 +1,306 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak jn = __jn +#pragma weak yn = __yn + +/* + * floating point Bessel's function of the 1st and 2nd kind + * of order n: jn(n,x),yn(n,x); + * + * Special cases: + * y0(0)=y1(0)=yn(n,0) = -inf with division by zero signal; + * y0(-ve)=y1(-ve)=yn(n,-ve) are NaN with invalid signal. + * Note 2. About jn(n,x), yn(n,x) + * For n=0, j0(x) is called, + * for n=1, j1(x) is called, + * for n<x, forward recursion us used starting + * from values of j0(x) and j1(x). + * for n>x, a continued fraction approximation to + * j(n,x)/j(n-1,x) is evaluated and then backward + * recursion is used starting from a supposed value + * for j(n,x). The resulting value of j(0,x) is + * compared with the actual value to correct the + * supposed value of j(n,x). + * + * yn(n,x) is similar in all respects, except + * that forward recursion is used for all + * values of n>1. + * + */ + +#include "libm.h" +#include <float.h> /* DBL_MIN */ +#include <values.h> /* X_TLOSS */ +#include "xpg6.h" /* __xpg6 */ + +#define GENERIC double + +static const GENERIC + invsqrtpi = 5.641895835477562869480794515607725858441e-0001, + two = 2.0, + zero = 0.0, + one = 1.0; + +GENERIC +jn(int n, GENERIC x) { + int i, sgn; + GENERIC a, b, temp = 0; + GENERIC z, w, ox, on; + + /* + * J(-n,x) = (-1)^n * J(n, x), J(n, -x) = (-1)^n * J(n, x) + * Thus, J(-n,x) = J(n,-x) + */ + ox = x; on = (GENERIC)n; + if (n < 0) { + n = -n; + x = -x; + } + if (isnan(x)) + return (x*x); /* + -> * for Cheetah */ + if (!((int) _lib_version == libm_ieee || + (__xpg6 & _C99SUSv3_math_errexcept) != 0)) { + if (fabs(x) > X_TLOSS) + return (_SVID_libm_err(on, ox, 38)); + } + if (n == 0) + return (j0(x)); + if (n == 1) + return (j1(x)); + if ((n&1) == 0) + sgn = 0; /* even n */ + else + sgn = signbit(x); /* old n */ + x = fabs(x); + if (x == zero||!finite(x)) b = zero; + else if ((GENERIC)n <= x) { + /* + * Safe to use + * J(n+1,x)=2n/x *J(n,x)-J(n-1,x) + */ + if (x > 1.0e91) { + /* + * x >> n**2 + * Jn(x) = cos(x-(2n+1)*pi/4)*sqrt(2/x*pi) + * Yn(x) = sin(x-(2n+1)*pi/4)*sqrt(2/x*pi) + * Let s=sin(x), c=cos(x), + * xn=x-(2n+1)*pi/4, sqt2 = sqrt(2),then + * + * n sin(xn)*sqt2 cos(xn)*sqt2 + * ---------------------------------- + * 0 s-c c+s + * 1 -s-c -c+s + * 2 -s+c -c-s + * 3 s+c c-s + */ + switch (n&3) { + case 0: temp = cos(x)+sin(x); break; + case 1: temp = -cos(x)+sin(x); break; + case 2: temp = -cos(x)-sin(x); break; + case 3: temp = cos(x)-sin(x); break; + } + b = invsqrtpi*temp/sqrt(x); + } else { + a = j0(x); + b = j1(x); + for (i = 1; i < n; i++) { + temp = b; + b = b*((GENERIC)(i+i)/x) - a; /* avoid underflow */ + a = temp; + } + } + } else { + if (x < 1e-9) { /* use J(n,x) = 1/n!*(x/2)^n */ + b = pow(0.5*x, (GENERIC) n); + if (b != zero) { + for (a = one, i = 1; i <= n; i++) a *= (GENERIC)i; + b = b/a; + } + } else { + /* + * use backward recurrence + * x x^2 x^2 + * J(n,x)/J(n-1,x) = ---- ------ ------ ..... + * 2n - 2(n+1) - 2(n+2) + * + * 1 1 1 + * (for large x) = ---- ------ ------ ..... + * 2n 2(n+1) 2(n+2) + * -- - ------ - ------ - + * x x x + * + * Let w = 2n/x and h = 2/x, then the above quotient + * is equal to the continued fraction: + * 1 + * = ----------------------- + * 1 + * w - ----------------- + * 1 + * w+h - --------- + * w+2h - ... + * + * To determine how many terms needed, let + * Q(0) = w, Q(1) = w(w+h) - 1, + * Q(k) = (w+k*h)*Q(k-1) - Q(k-2), + * When Q(k) > 1e4 good for single + * When Q(k) > 1e9 good for double + * When Q(k) > 1e17 good for quaduple + */ + /* determin k */ + GENERIC t, v; + double q0, q1, h, tmp; int k, m; + w = (n+n)/(double)x; h = 2.0/(double)x; + q0 = w; z = w + h; q1 = w*z - 1.0; k = 1; + while (q1 < 1.0e9) { + k += 1; z += h; + tmp = z*q1 - q0; + q0 = q1; + q1 = tmp; + } + m = n+n; + for (t = zero, i = 2*(n+k); i >= m; i -= 2) t = one/(i/x-t); + a = t; + b = one; + /* + * estimate log((2/x)^n*n!) = n*log(2/x)+n*ln(n) + * hence, if n*(log(2n/x)) > ... + * single 8.8722839355e+01 + * double 7.09782712893383973096e+02 + * long double 1.1356523406294143949491931077970765006170e+04 + * then recurrent value may overflow and the result is + * likely underflow to zero + */ + tmp = n; + v = two/x; + tmp = tmp*log(fabs(v*tmp)); + if (tmp < 7.09782712893383973096e+02) { + for (i = n-1; i > 0; i--) { + temp = b; + b = ((i+i)/x)*b - a; + a = temp; + } + } else { + for (i = n-1; i > 0; i--) { + temp = b; + b = ((i+i)/x)*b - a; + a = temp; + if (b > 1e100) { + a /= b; + t /= b; + b = 1.0; + } + } + } + b = (t*j0(x)/b); + } + } + if (sgn == 1) + return (-b); + else + return (b); +} + +GENERIC +yn(int n, GENERIC x) { + int i; + int sign; + GENERIC a, b, temp = 0, ox, on; + + ox = x; on = (GENERIC)n; + if (isnan(x)) + return (x*x); /* + -> * for Cheetah */ + if (x <= zero) { + if (x == zero) { + /* return -one/zero; */ + return (_SVID_libm_err((GENERIC)n, x, 12)); + } else { + /* return zero/zero; */ + return (_SVID_libm_err((GENERIC)n, x, 13)); + } + } + if (!((int) _lib_version == libm_ieee || + (__xpg6 & _C99SUSv3_math_errexcept) != 0)) { + if (x > X_TLOSS) + return (_SVID_libm_err(on, ox, 39)); + } + sign = 1; + if (n < 0) { + n = -n; + if ((n&1) == 1) sign = -1; + } + if (n == 0) + return (y0(x)); + if (n == 1) + return (sign*y1(x)); + if (!finite(x)) + return (zero); + + if (x > 1.0e91) { + /* + * x >> n**2 + * Jn(x) = cos(x-(2n+1)*pi/4)*sqrt(2/x*pi) + * Yn(x) = sin(x-(2n+1)*pi/4)*sqrt(2/x*pi) + * Let s = sin(x), c = cos(x), + * xn = x-(2n+1)*pi/4, sqt2 = sqrt(2), then + * + * n sin(xn)*sqt2 cos(xn)*sqt2 + * ---------------------------------- + * 0 s-c c+s + * 1 -s-c -c+s + * 2 -s+c -c-s + * 3 s+c c-s + */ + switch (n&3) { + case 0: temp = sin(x)-cos(x); break; + case 1: temp = -sin(x)-cos(x); break; + case 2: temp = -sin(x)+cos(x); break; + case 3: temp = sin(x)+cos(x); break; + } + b = invsqrtpi*temp/sqrt(x); + } else { + a = y0(x); + b = y1(x); + /* + * fix 1262058 and take care of non-default rounding + */ + for (i = 1; i < n; i++) { + temp = b; + b *= (GENERIC) (i + i) / x; + if (b <= -DBL_MAX) + break; + b -= a; + a = temp; + } + } + if (sign > 0) + return (b); + else + return (-b); +} diff --git a/usr/src/lib/libm/common/C/lgamma.c b/usr/src/lib/libm/common/C/lgamma.c new file mode 100644 index 0000000000..99dbb66ebc --- /dev/null +++ b/usr/src/lib/libm/common/C/lgamma.c @@ -0,0 +1,52 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak lgamma = __lgamma + +#include "libm.h" + +extern int signgam; + +double +lgamma(double x) { + double g; + + if (!finite(x)) + return (x * x); + + g = rint(x); + if (x == g && x <= 0.0) { + signgam = 1; + return (_SVID_libm_err(x, x, 15)); + } + + g = __k_lgamma(x, &signgam); + if (!finite(g)) + g = _SVID_libm_err(x, x, 14); + return (g); +} diff --git a/usr/src/lib/libm/common/C/lgamma_r.c b/usr/src/lib/libm/common/C/lgamma_r.c new file mode 100644 index 0000000000..a79adca96d --- /dev/null +++ b/usr/src/lib/libm/common/C/lgamma_r.c @@ -0,0 +1,50 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak lgamma_r = __lgamma_r + +#include "libm.h" + +double +lgamma_r(double x, int *signgamp) { + double g; + + if (isnan(x)) + return (x * x); + + g = rint(x); + if (x == g && x <= 0.0) { + *signgamp = 1; + return (_SVID_libm_err(x, x, 15)); + } + + g = __k_lgamma(x, signgamp); + if (!finite(g)) + g = _SVID_libm_err(x, x, 14); + return (g); +} diff --git a/usr/src/lib/libm/common/C/libm.h b/usr/src/lib/libm/common/C/libm.h new file mode 100644 index 0000000000..aa9f722d86 --- /dev/null +++ b/usr/src/lib/libm/common/C/libm.h @@ -0,0 +1,209 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LIBM_H +#define _LIBM_H + +#include <sys/isa_defs.h> + +#ifdef _ASM +/* BEGIN CSTYLED */ + +/* + * Disable amd64 assembly code profiling for now. + */ +#if defined(__amd64) +#undef PROF +#endif + +#include <sys/asm_linkage.h> + +#define NAME(x) x +#define TEXT .section ".text" +#define DATA .section ".data" +#define RO_DATA .section ".rodata" +#define IDENT(x) .ident x + +#if defined(__sparc) + +#define LIBM_ANSI_PRAGMA_WEAK(sym,stype) \ + .weak sym; \ + .type sym,#stype; \ +sym = __/**/sym + +#ifndef SET_FILE +#define SET_FILE(x) \ + .file x +#endif /* !defined(SET_FILE) */ + +#ifdef PIC +/* + * One should *never* pass o7 to PIC_SETUP. + */ +#define PIC_SETUP(via) \ +9: call 8f; \ + sethi %hi(NAME(_GLOBAL_OFFSET_TABLE_)-(9b-.)),%via; \ +8: or %via,%lo(NAME(_GLOBAL_OFFSET_TABLE_)-(9b-.)),%via; \ + add %via,%o7,%via +/* + * Must save/restore %o7 in leaf routines; may *not* use jmpl! + */ +#define PIC_LEAF_SETUP(via) \ + or %g0,%o7,%g1; \ +9: call 8f; \ + sethi %hi(NAME(_GLOBAL_OFFSET_TABLE_)-(9b-.)),%via; \ +8: or %via,%lo(NAME(_GLOBAL_OFFSET_TABLE_)-(9b-.)),%via; \ + add %via,%o7,%via; \ + or %g0,%g1,%o7 +#ifdef __sparcv9 +#define PIC_SET(via,sym,dst) ldx [%via+sym],%dst +#else /* defined(__sparcv9) */ +#define PIC_SET(via,sym,dst) ld [%via+sym],%dst +#endif /* defined(__sparcv9) */ +#else /* defined(PIC) */ +#define PIC_SETUP(via) +#define PIC_LEAF_SETUP(via) +#ifdef __sparcv9 +/* + * g1 is used as scratch register in V9 mode + */ +#define PIC_SET(via,sym,dst) setx sym,%g1,%dst +#else /* defined(__sparcv9) */ +#define PIC_SET(via,sym,dst) set sym,%dst +#endif /* defined(__sparcv9) */ +#endif /* defined(PIC) */ + +/* + * Workaround for 4337025: MCOUNT in asm_linkage.h does not support __sparcv9 + */ +#if defined(PROF) && defined(__sparcv9) + +#undef MCOUNT_SIZE +#undef MCOUNT + +#if !defined(PIC) +#define MCOUNT_SIZE (9*4) /* 9 instructions */ +#define MCOUNT(x) \ + save %sp, -SA(MINFRAME), %sp; \ + sethi %hh(.L_/**/x/**/1), %o0; \ + sethi %lm(.L_/**/x/**/1), %o1; \ + or %o0, %hm(.L_/**/x/**/1), %o0; \ + or %o1, %lo(.L_/**/x/**/1), %o1; \ + sllx %o0, 32, %o0; \ + call _mcount; \ + or %o0, %o1, %o0; \ + restore; \ + .common .L_/**/x/**/1, 8, 8 +#elif defined(PIC32) +#define MCOUNT_SIZE (10*4) /* 10 instructions */ +#define MCOUNT(x) \ + save %sp,-SA(MINFRAME),%sp; \ +1: call .+8; \ + sethi %hi(_GLOBAL_OFFSET_TABLE_-(1b-.)),%o0; \ + sethi %hi(.L_/**/x/**/1),%o1; \ + add %o0,%lo(_GLOBAL_OFFSET_TABLE_-(1b-.)),%o0; \ + add %o1,%lo(.L_/**/x/**/1),%o1; \ + add %o0,%o7,%o0; \ + call _mcount; \ + ldx [%o0+%o1],%o0; \ + restore; \ + .common .L_/**/x/**/1,8,8 +#else /* PIC13 */ +#define MCOUNT_SIZE (8*4) /* 8 instructions */ +#define MCOUNT(x) \ + save %sp,-SA(MINFRAME),%sp; \ +1: call .+8; \ + sethi %hi(_GLOBAL_OFFSET_TABLE_-(1b-.)),%o0; \ + add %o0,%lo(_GLOBAL_OFFSET_TABLE_-(1b-.)),%o0; \ + add %o0,%o7,%o0; \ + call _mcount; \ + ldx [%o0+%lo(.L_/**/x/**/1)],%o0; \ + restore; \ + .common .L_/**/x/**/1,8,8 +#endif /* !defined(PIC) */ +#endif /* defined(PROF) && defined(__sparcv9) */ + +#elif defined(__x86) + +#define LIBM_ANSI_PRAGMA_WEAK(sym,stype) \ + .weak sym; \ + .type sym,@stype; \ +sym = __/**/sym + +#ifdef PIC +#if defined(__amd64) +#define PIC_SETUP(x) +#define PIC_WRAPUP +#define PIC_F(x) x@PLT +#define PIC_G(x) x@GOTPCREL(%rip) +#define PIC_L(x) x(%rip) +#define PIC_G_LOAD(insn,sym,dst) \ + movq PIC_G(sym),%dst; \ + insn (%dst),%dst +#else +#define PIC_SETUP(label) \ + pushl %ebx; \ + call .label; \ +.label: popl %ebx; \ + addl $_GLOBAL_OFFSET_TABLE_+[.-.label],%ebx +#define PIC_WRAPUP popl %ebx +#define PIC_F(x) x@PLT +#define PIC_G(x) x@GOT(%ebx) +#define PIC_L(x) x@GOTOFF(%ebx) +#define PIC_G_LOAD(insn,sym,dst) \ + mov PIC_G(sym),%dst; \ + insn (%dst),%dst +#endif +#else /* defined(PIC) */ +#define PIC_SETUP(x) +#define PIC_WRAPUP +#define PIC_F(x) x +#define PIC_G(x) x +#define PIC_L(x) x +#define PIC_G_LOAD(insn,sym,dst) insn sym,%dst +#endif /* defined(PIC) */ + +#else +#error Unknown architecture +#endif + +/* END CSTYLED */ +#else /* defined(_ASM) */ + +#include "libm_macros.h" +#include "libm_synonyms.h" +#include "libm_protos.h" +#include "libm_inlines.h" +#include <math.h> +#if defined(__SUNPRO_C) +#include <sunmath.h> +#endif + +#endif /* defined(_ASM) */ + +#endif /* _LIBM_H */ diff --git a/usr/src/lib/libm/common/C/libm_macros.h b/usr/src/lib/libm/common/C/libm_macros.h new file mode 100644 index 0000000000..45f268ef06 --- /dev/null +++ b/usr/src/lib/libm/common/C/libm_macros.h @@ -0,0 +1,79 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LIBM_MACROS_H +#define _LIBM_MACROS_H + +#include <sys/isa_defs.h> + +#if defined(__sparc) + +#define HIWORD 0 +#define LOWORD 1 +#define HIXWORD 0 /* index of int containing exponent */ +#define XSGNMSK 0x80000000 /* exponent bit mask within the int */ +#define XBIASED_EXP(x) ((((int *)&x)[HIXWORD] & ~0x80000000) >> 16) +#define ISZEROL(x) (((((int *)&x)[0] & ~XSGNMSK) | ((int *)&x)[1] | \ + ((int *)&x)[2] | ((int *)&x)[3]) == 0) + +#elif defined(__x86) + +#define HIWORD 1 +#define LOWORD 0 +#define HIXWORD 2 +#define XSGNMSK 0x8000 +#define XBIASED_EXP(x) (((int *)&x)[HIXWORD] & 0x7fff) +#define ISZEROL(x) (x == 0.0L) + +#define HANDLE_UNSUPPORTED + +/* + * "convert" the high-order 32 bits of a SPARC quad precision + * value ("I") to the sign, exponent, and high-order bits of an + * x86 extended double precision value ("E"); the low-order bits + * in the 12-byte quantity are left intact + */ +#define ITOX(I, E) \ + E[2] = 0xffff & ((I) >> 16); \ + E[1] = (((I) & 0x7fff0000) == 0)? \ + (E[1] & 0x7fff) | (0x7fff8000 & ((I) << 15)) :\ + 0x80000000 | (E[1] & 0x7fff) | (0x7fff8000 & ((I) << 15)) + +/* + * "convert" the sign, exponent, and high-order bits of an x86 + * extended double precision value ("E") to the high-order 32 bits + * of a SPARC quad precision value ("I") + */ +#define XTOI(E, I) \ + I = ((E[2]<<16) | (0xffff & (E[1]>>15))) + +#else +#error Unknown architecture +#endif + +#endif /* _LIBM_MACROS_H */ diff --git a/usr/src/lib/libm/common/C/libm_protos.h b/usr/src/lib/libm/common/C/libm_protos.h new file mode 100644 index 0000000000..3dc248a63c --- /dev/null +++ b/usr/src/lib/libm/common/C/libm_protos.h @@ -0,0 +1,218 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _C_LIBM_PROTOS_H +#define _C_LIBM_PROTOS_H + +#ifdef LIBMOPT_BUILD +#define _TBL_cos __libmopt_TBL_cos +#define _TBL_exp2_512 __libmopt_TBL_exp2_512 +#define _TBL_ipio2_inf __libmopt_TBL_ipio2_inf +#define _TBL_jlog_n1 __libmopt_TBL_jlog_n1 +#define _TBL_jlog_n2 __libmopt_TBL_jlog_n2 +#define _TBL_jlog_p1 __libmopt_TBL_jlog_p1 +#define _TBL_jlog_p2 __libmopt_TBL_jlog_p2 +#define _TBL_log10 __libmopt_TBL_log10 +#define _TBL_log2_14 __libmopt_TBL_log2_14 +#define _TBL_log2_9 __libmopt_TBL_log2_9 +#define _TBL_sin __libmopt_TBL_sin +#define _TBL_sincosx __libmopt_TBL_sincosx +#define _TBL_xexp __libmopt_TBL_xexp +#define _TBL_xlog __libmopt_TBL_xlog +#define __k_cos_ __libmopt__k_cos_ +#define __k_sin_ __libmopt__k_sin_ +#define __k_sincos_ __libmopt__k_sincos_ +#define __reduction __libmopt__reduction +#define __rem_pio2 __libmopt__rem_pio2 +#define __rem_pio2m __libmopt__rem_pio2m +#else /* defined(LIBMOPT_BUILD) */ +#ifdef LIBM_BUILD +#define _SVID_libm_err __libm_SVID_libm_err /* not used by -lsunmath */ +#define _TBL_atan __libm_TBL_atan +#define _TBL_atan1 __libm_TBL_atan1 +#define _TBL_atan_hi __libm_TBL_atan_hi /* not used by -lsunmath */ +#define _TBL_atan_lo __libm_TBL_atan_lo /* not used by -lsunmath */ +#define _TBL_exp2_hi __libm_TBL_exp2_hi /* not used by -lsunmath */ +#define _TBL_exp2_lo __libm_TBL_exp2_lo /* not used by -lsunmath */ +#define _TBL_ipio2_inf __libm_TBL_ipio2_inf +#define _TBL_log __libm_TBL_log +#define _TBL_log2_hi __libm_TBL_log2_hi /* not used by -lsunmath */ +#define _TBL_log2_lo __libm_TBL_log2_lo /* not used by -lsunmath */ +#define _TBL_log_hi __libm_TBL_log_hi /* not used by -lsunmath */ +#define _TBL_log_lo __libm_TBL_log_lo /* not used by -lsunmath */ +#define _TBL_sincos __libm_TBL_sincos +#define _TBL_sincosx __libm_TBL_sincosx +#define _TBL_tan_hi __libm_TBL_tan_hi /* not used by -lsunmath */ +#define _TBL_tan_lo __libm_TBL_tan_lo /* not used by -lsunmath */ +#define __k_cexp __libm__k_cexp /* C99 libm */ +#define __k_cexpl __libm__k_cexpl /* C99 libm */ +#define __k_clog_r __libm__k_clog_r /* C99 libm */ +#define __k_clog_rl __libm__k_clog_rl /* C99 libm */ +#define __k_atan2 __libm__k_atan2 /* C99 libm */ +#define __k_atan2l __libm__k_atan2l /* C99 libm */ +#define __k_cos __libm__k_cos +#define __k_lgamma __libm__k_lgamma +#define __k_sin __libm__k_sin +#define __k_sincos __libm__k_sincos +#define __k_tan __libm__k_tan +#define __reduction __libm__reduction /* i386 only */ +#define __rem_pio2 __libm__rem_pio2 +#define __rem_pio2m __libm__rem_pio2m +#define __k_cosf __libm__k_cosf /* C99 libm */ +#define __k_cosl __libm__k_cosl /* C99 libm */ +#define __k_lgammal __libm__k_lgammal /* C99 libm */ +#define __k_sincosf __libm__k_sincosf /* C99 libm */ +#define __k_sincosl __libm__k_sincosl /* C99 libm */ +#define __k_sinf __libm__k_sinf /* C99 libm */ +#define __k_sinl __libm__k_sinl /* C99 libm */ +#define __k_tanf __libm__k_tanf /* C99 libm */ +#define __k_tanl __libm__k_tanl /* C99 libm */ +#define __poly_libmq __libm__poly_libmq /* C99 libm */ +#define __rem_pio2l __libm__rem_pio2l /* C99 libm */ +#define _TBL_atanl_hi __libm_TBL_atanl_hi /* C99 libm */ +#define _TBL_atanl_lo __libm_TBL_atanl_lo /* C99 libm */ +#define _TBL_cosl_hi __libm_TBL_cosl_hi /* C99 libm */ +#define _TBL_cosl_lo __libm_TBL_cosl_lo /* C99 libm */ +#define _TBL_expl_hi __libm_TBL_expl_hi /* C99 libm */ +#define _TBL_expl_lo __libm_TBL_expl_lo /* C99 libm */ +#define _TBL_expm1l __libm_TBL_expm1l /* C99 libm */ +#define _TBL_expm1lx __libm_TBL_expm1lx /* C99 libm */ +#define _TBL_ipio2l_inf __libm_TBL_ipio2l_inf /* C99 libm */ +#define _TBL_logl_hi __libm_TBL_logl_hi /* C99 libm */ +#define _TBL_logl_lo __libm_TBL_logl_lo /* C99 libm */ +#define _TBL_r_atan_hi __libm_TBL_r_atan_hi /* C99 libm */ +#define _TBL_r_atan_lo __libm_TBL_r_atan_lo /* C99 libm */ +#define _TBL_sinl_hi __libm_TBL_sinl_hi /* C99 libm */ +#define _TBL_sinl_lo __libm_TBL_sinl_lo /* C99 libm */ +#define _TBL_tanl_hi __libm_TBL_tanl_hi /* C99 libm */ +#define _TBL_tanl_lo __libm_TBL_tanl_lo /* C99 libm */ +#endif /* defined(LIBM_BUILD) */ +#endif /* defined(LIBMOPT_BUILD) */ + +#ifndef _ASM +#ifdef __STDC__ +#define __P(p) p +#else +#define __P(p) () +#endif + +#include <sys/ieeefp.h> + +extern double _SVID_libm_err __P((double, double, int)); +extern double __k_cos __P((double, double)); +extern double __k_cos_ __P((double *)); +extern double __k_lgamma __P((double, int *)); +extern double __k_sin __P((double, double)); +extern double __k_sin_ __P((double *)); +extern double __k_sincos __P((double, double, double *)); +extern double __k_sincos_ __P((double *, double *)); +extern double __k_tan __P((double, double, int)); +extern double __k_cexp __P((double, int *)); +extern long double __k_cexpl __P((long double, int *)); +extern double __k_clog_r __P((double, double, double *)); +extern long double __k_clog_rl __P((long double, long double, long double *)); +extern double __k_atan2 __P((double, double, double *)); +extern long double __k_atan2l __P((long double, long double, long double *)); +extern int __rem_pio2 __P((double, double *)); +extern int __rem_pio2m __P((double *, double *, int, int, int, const int *)); + +/* + * entry points that are in-lined + */ +extern double copysign __P((double, double)); +extern int finite __P((double)); +extern enum fp_class_type fp_class __P((double)); +extern double infinity __P((void)); +extern int isinf __P((double)); +extern int signbit __P((double)); + +/* + * new C99 entry points + */ +extern double fdim __P((double, double)); +extern double fma __P((double, double, double)); +extern double fmax __P((double, double)); +extern double fmin __P((double, double)); +extern double frexp __P((double, int *)); +extern double ldexp __P((double, int)); +extern double modf __P((double, double *)); +extern double nan __P((const char *)); +extern double nearbyint __P((double)); +extern double nexttoward __P((double, long double)); +extern double remquo __P((double, double, int *)); +extern double round __P((double)); +extern double scalbln __P((double, long int)); +extern double tgamma __P((double)); +extern double trunc __P((double)); +extern float fdimf __P((float, float)); +extern float fmaf __P((float, float, float)); +extern float fmaxf __P((float, float)); +extern float fminf __P((float, float)); +extern float frexpf __P((float, int *)); +extern float ldexpf __P((float, int)); +extern float modff __P((float, float *)); +extern float nanf __P((const char *)); +extern float nearbyintf __P((float)); +extern float nextafterf __P((float, float)); +extern float nexttowardf __P((float, long double)); +extern float remquof __P((float, float, int *)); +extern float roundf __P((float)); +extern float scalblnf __P((float, long int)); +extern float tgammaf __P((float)); +extern float truncf __P((float)); +extern long double frexpl(long double, int *); +extern long double fdiml __P((long double, long double)); +extern long double fmal __P((long double, long double, long double)); +extern long double fmaxl __P((long double, long double)); +extern long double fminl __P((long double, long double)); +extern long double ldexpl __P((long double, int)); +extern long double modfl __P((long double, long double *)); +extern long double nanl __P((const char *)); +extern long double nearbyintl __P((long double)); +extern long double nextafterl __P((long double, long double)); +extern long double nexttowardl __P((long double, long double)); +extern long double remquol __P((long double, long double, int *)); +extern long double roundl __P((long double)); +extern long double scalblnl __P((long double, long int)); +extern long double tgammal __P((long double)); +extern long double truncl __P((long double)); +extern long int lrint __P((double)); +extern long int lrintf __P((float)); +extern long int lrintl __P((long double)); +extern long int lround __P((double)); +extern long int lroundf __P((float)); +extern long int lroundl __P((long double)); +extern long long int llrint __P((double)); +extern long long int llrintf __P((float)); +extern long long int llrintl __P((long double)); +extern long long int llround __P((double)); +extern long long int llroundf __P((float)); +extern long long int llroundl __P((long double)); +#endif /* _ASM */ + +#endif /* _C_LIBM_PROTOS_H */ diff --git a/usr/src/lib/libm/common/C/libm_synonyms.h b/usr/src/lib/libm/common/C/libm_synonyms.h new file mode 100644 index 0000000000..0025a3dfe8 --- /dev/null +++ b/usr/src/lib/libm/common/C/libm_synonyms.h @@ -0,0 +1,749 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LIBM_SYNONYMS_H +#define _LIBM_SYNONYMS_H + +#if defined(ELFOBJ) && !defined(lint) + +#define cabs __cabs /* C99 <complex.h> */ +#define cabsf __cabsf /* C99 <complex.h> */ +#define cabsl __cabsl /* C99 <complex.h> */ +#define cacos __cacos /* C99 <complex.h> */ +#define cacosf __cacosf /* C99 <complex.h> */ +#define cacosl __cacosl /* C99 <complex.h> */ +#define cacosh __cacosh /* C99 <complex.h> */ +#define cacoshf __cacoshf /* C99 <complex.h> */ +#define cacoshl __cacoshl /* C99 <complex.h> */ +#define carg __carg /* C99 <complex.h> */ +#define cargf __cargf /* C99 <complex.h> */ +#define cargl __cargl /* C99 <complex.h> */ +#define casin __casin /* C99 <complex.h> */ +#define casinf __casinf /* C99 <complex.h> */ +#define casinl __casinl /* C99 <complex.h> */ +#define casinh __casinh /* C99 <complex.h> */ +#define casinhf __casinhf /* C99 <complex.h> */ +#define casinhl __casinhl /* C99 <complex.h> */ +#define catan __catan /* C99 <complex.h> */ +#define catanf __catanf /* C99 <complex.h> */ +#define catanl __catanl /* C99 <complex.h> */ +#define catanh __catanh /* C99 <complex.h> */ +#define catanhf __catanhf /* C99 <complex.h> */ +#define catanhl __catanhl /* C99 <complex.h> */ +#define ccos __ccos /* C99 <complex.h> */ +#define ccosf __ccosf /* C99 <complex.h> */ +#define ccosl __ccosl /* C99 <complex.h> */ +#define ccosh __ccosh /* C99 <complex.h> */ +#define ccoshf __ccoshf /* C99 <complex.h> */ +#define ccoshl __ccoshl /* C99 <complex.h> */ +#define cexp __cexp /* C99 <complex.h> */ +#define cexpf __cexpf /* C99 <complex.h> */ +#define cexpl __cexpl /* C99 <complex.h> */ +#define cimag __cimag /* C99 <complex.h> */ +#define cimagf __cimagf /* C99 <complex.h> */ +#define cimagl __cimagl /* C99 <complex.h> */ +#define clog __clog /* C99 <complex.h> */ +#define clogf __clogf /* C99 <complex.h> */ +#define clogl __clogl /* C99 <complex.h> */ +#define conj __conj /* C99 <complex.h> */ +#define conjf __conjf /* C99 <complex.h> */ +#define conjl __conjl /* C99 <complex.h> */ +#define cpow __cpow /* C99 <complex.h> */ +#define cpowf __cpowf /* C99 <complex.h> */ +#define cpowl __cpowl /* C99 <complex.h> */ +#define cproj __cproj /* C99 <complex.h> */ +#define cprojf __cprojf /* C99 <complex.h> */ +#define cprojl __cprojl /* C99 <complex.h> */ +#define creal __creal /* C99 <complex.h> */ +#define crealf __crealf /* C99 <complex.h> */ +#define creall __creall /* C99 <complex.h> */ +#define csin __csin /* C99 <complex.h> */ +#define csinf __csinf /* C99 <complex.h> */ +#define csinl __csinl /* C99 <complex.h> */ +#define csinh __csinh /* C99 <complex.h> */ +#define csinhf __csinhf /* C99 <complex.h> */ +#define csinhl __csinhl /* C99 <complex.h> */ +#define csqrt __csqrt /* C99 <complex.h> */ +#define csqrtf __csqrtf /* C99 <complex.h> */ +#define csqrtl __csqrtl /* C99 <complex.h> */ +#define ctan __ctan /* C99 <complex.h> */ +#define ctanf __ctanf /* C99 <complex.h> */ +#define ctanl __ctanl /* C99 <complex.h> */ +#define ctanh __ctanh /* C99 <complex.h> */ +#define ctanhf __ctanhf /* C99 <complex.h> */ +#define ctanhl __ctanhl /* C99 <complex.h> */ +#define abrupt_underflow_ __abrupt_underflow_ +#define acos __acos +#define acosd __acosd +#define acosdf __acosdf +#define acosdl __acosdl +#define acosf __acosf +#define acosh __acosh +#define acoshf __acoshf +#define acoshl __acoshl +#define acosl __acosl +#define acosp __acosp +#define acospf __acospf +#define acospi __acospi +#define acospif __acospif +#define acospil __acospil +#define acospl __acospl +#define aint __aint +#define aintf __aintf +#define aintl __aintl +#define anint __anint +#define anintf __anintf +#define anintl __anintl +#define annuity __annuity +#define annuityf __annuityf +#define annuityl __annuityl +#define asin __asin +#define asind __asind +#define asindf __asindf +#define asindl __asindl +#define asinf __asinf +#define asinh __asinh +#define asinhf __asinhf +#define asinhl __asinhl +#define asinl __asinl +#define asinp __asinp +#define asinpf __asinpf +#define asinpi __asinpi +#define asinpif __asinpif +#define asinpil __asinpil +#define asinpl __asinpl +#define atan __atan +#define atan2 __atan2 +#define atan2d __atan2d +#define atan2df __atan2df +#define atan2dl __atan2dl +#define atan2f __atan2f +#define atan2l __atan2l +#define atan2pi __atan2pi +#define atan2pif __atan2pif +#define atan2pil __atan2pil +#define atand __atand +#define atandf __atandf +#define atandl __atandl +#define atanf __atanf +#define atanh __atanh +#define atanhf __atanhf +#define atanhl __atanhl +#define atanl __atanl +#define atanp __atanp +#define atanpf __atanpf +#define atanpi __atanpi +#define atanpif __atanpif +#define atanpil __atanpil +#define atanpl __atanpl +#define cbrt __cbrt +#define cbrtf __cbrtf +#define cbrtl __cbrtl +#define ceil __ceil +#define ceilf __ceilf +#define ceill __ceill +#define compound __compound +#define compoundf __compoundf +#define compoundl __compoundl +#define convert_external __convert_external +#define convert_external_ __convert_external_ +#define copysign __copysign +#define copysignf __copysignf +#define copysignl __copysignl +#define cos __cos +#define cosd __cosd +#define cosdf __cosdf +#define cosdl __cosdl +#define cosf __cosf +#define cosh __cosh +#define coshf __coshf +#define coshl __coshl +#define cosl __cosl +#define cosp __cosp +#define cospf __cospf +#define cospi __cospi +#define cospif __cospif +#define cospil __cospil +#define cospl __cospl +#define d_acos_ __d_acos_ +#define d_acosd_ __d_acosd_ +#define d_acosh_ __d_acosh_ +#define d_acosp_ __d_acosp_ +#define d_acospi_ __d_acospi_ +#define d_addran_ __d_addran_ +#define d_addrans_ __d_addrans_ +#define d_aint_ __d_aint_ +#define d_anint_ __d_anint_ +#define d_annuity_ __d_annuity_ +#define d_asin_ __d_asin_ +#define d_asind_ __d_asind_ +#define d_asinh_ __d_asinh_ +#define d_asinp_ __d_asinp_ +#define d_asinpi_ __d_asinpi_ +#define d_atan2_ __d_atan2_ +#define d_atan2d_ __d_atan2d_ +#define d_atan2pi_ __d_atan2pi_ +#define d_atan_ __d_atan_ +#define d_atand_ __d_atand_ +#define d_atanh_ __d_atanh_ +#define d_atanp_ __d_atanp_ +#define d_atanpi_ __d_atanpi_ +#define d_cbrt_ __d_cbrt_ +#define d_ceil_ __d_ceil_ +#define d_compound_ __d_compound_ +#define d_copysign_ __d_copysign_ +#define d_cos_ __d_cos_ +#define d_cosd_ __d_cosd_ +#define d_cosh_ __d_cosh_ +#define d_cosp_ __d_cosp_ +#define d_cospi_ __d_cospi_ +#define d_erf_ __d_erf_ +#define d_erfc_ __d_erfc_ +#define d_exp10_ __d_exp10_ +#define d_exp2_ __d_exp2_ +#define d_exp_ __d_exp_ +#define d_expm1_ __d_expm1_ +#define d_fabs_ __d_fabs_ +#define d_floor_ __d_floor_ +#define d_fmod_ __d_fmod_ +#define d_get_addrans_ __d_get_addrans_ +#define d_hypot_ __d_hypot_ +#define d_infinity_ __d_infinity_ +#define d_init_addrans_ __d_init_addrans_ +#define d_j0_ __d_j0_ +#define d_j1_ __d_j1_ +#define d_jn_ __d_jn_ +#define d_lcran_ __d_lcran_ +#define d_lcrans_ __d_lcrans_ +#define d_lgamma_ __d_lgamma_ +#define d_lgamma_r_ __d_lgamma_r_ +#define d_log10_ __d_log10_ +#define d_log1p_ __d_log1p_ +#define d_log2_ __d_log2_ +#define d_log_ __d_log_ +#define d_logb_ __d_logb_ +#define d_max_normal_ __d_max_normal_ +#define d_max_subnormal_ __d_max_subnormal_ +#define d_min_normal_ __d_min_normal_ +#define d_min_subnormal_ __d_min_subnormal_ +#define d_mwcran_ __d_mwcran_ +#define d_mwcrans_ __d_mwcrans_ +#define d_nextafter_ __d_nextafter_ +#define d_pow_ __d_pow_ +#define d_quiet_nan_ __d_quiet_nan_ +#define d_remainder_ __d_remainder_ +#define d_rint_ __d_rint_ +#define d_scalb_ __d_scalb_ +#define d_scalbn_ __d_scalbn_ +#define d_set_addrans_ __d_set_addrans_ +#define d_shufrans_ __d_shufrans_ +#define d_signaling_nan_ __d_signaling_nan_ +#define d_significand_ __d_significand_ +#define d_sin_ __d_sin_ +#define d_sincos_ __d_sincos_ +#define d_sincosd_ __d_sincosd_ +#define d_sincosp_ __d_sincosp_ +#define d_sincospi_ __d_sincospi_ +#define d_sind_ __d_sind_ +#define d_sinh_ __d_sinh_ +#define d_sinp_ __d_sinp_ +#define d_sinpi_ __d_sinpi_ +#define d_sqrt_ __d_sqrt_ +#define d_tan_ __d_tan_ +#define d_tand_ __d_tand_ +#define d_tanh_ __d_tanh_ +#define d_tanp_ __d_tanp_ +#define d_tanpi_ __d_tanpi_ +#define d_y0_ __d_y0_ +#define d_y1_ __d_y1_ +#define d_yn_ __d_yn_ +#define drem __drem +#define erf __erf +#define erfc __erfc +#define erfcf __erfcf +#define erfcl __erfcl +#define erff __erff +#define erfl __erfl +#define exp __exp +#define exp10 __exp10 +#define exp10f __exp10f +#define exp10l __exp10l +#define exp2 __exp2 +#define exp2f __exp2f +#define exp2l __exp2l +#define expf __expf +#define expl __expl +#define expm1 __expm1 +#define expm1f __expm1f +#define expm1l __expm1l +#define fabs __fabs +#define fabsf __fabsf +#define fabsl __fabsl +#define fdim __fdim /* C99 */ +#define fdimf __fdimf /* C99 */ +#define fdiml __fdiml /* C99 */ +#define finitef __finitef +#define finitel __finitel +#define floor __floor +#define floorf __floorf +#define floorl __floorl +#define fma __fma /* C99 */ +#define fmaf __fmaf /* C99 */ +#define fmal __fmal /* C99 */ +#define fmax __fmax /* C99 */ +#define fmaxf __fmaxf /* C99 */ +#define fmaxl __fmaxl /* C99 */ +#define fmin __fmin /* C99 */ +#define fminf __fminf /* C99 */ +#define fminl __fminl /* C99 */ +#define fmod __fmod +#define fmodf __fmodf +#define fmodl __fmodl +#define fp_class __fp_class +#define fp_classf __fp_classf +#define fp_classl __fp_classl +#define frexp __frexp /* S10 */ +#define frexpf __frexpf /* S10 */ +#define frexpl __frexpl /* S10 */ +#define gamma __gamma +#define gamma_r __gamma_r +#define gammaf __gammaf +#define gammaf_r __gammaf_r +#define gammal __gammal +#define gammal_r __gammal_r +#define gradual_underflow_ __gradual_underflow_ +#define hypot __hypot +#define hypotf __hypotf +#define hypotl __hypotl +#define i_addran_ __i_addran_ +#define i_addrans_ __i_addrans_ +#define i_get_addrans_ __i_get_addrans_ +#define i_get_lcrans_ __i_get_lcrans_ +#define i_get_mwcrans_ __i_get_mwcrans_ +#define i_init_addrans_ __i_init_addrans_ +#define i_init_lcrans_ __i_init_lcrans_ +#define i_init_mwcrans_ __i_init_mwcrans_ +#define i_lcran_ __i_lcran_ +#define i_lcrans_ __i_lcrans_ +#define i_llmwcran_ __i_llmwcran_ +#define i_llmwcrans_ __i_llmwcrans_ +#define i_mwcran_ __i_mwcran_ +#define i_mwcrans_ __i_mwcrans_ +#define i_set_addrans_ __i_set_addrans_ +#define i_set_lcrans_ __i_set_lcrans_ +#define i_set_mwcrans_ __i_set_mwcrans_ +#define i_shufrans_ __i_shufrans_ +#define id_finite_ __id_finite_ +#define id_fp_class_ __id_fp_class_ +#define id_ilogb_ __id_ilogb_ +#define id_irint_ __id_irint_ +#define id_isinf_ __id_isinf_ +#define id_isnan_ __id_isnan_ +#define id_isnormal_ __id_isnormal_ +#define id_issubnormal_ __id_issubnormal_ +#define id_iszero_ __id_iszero_ +#define id_nint_ __id_nint_ +#define id_signbit_ __id_signbit_ +#define ieee_flags __ieee_flags +#define ieee_flags_ __ieee_flags_ +#define ieee_handler __ieee_handler +#define ieee_handler_ __ieee_handler_ +#define ieee_handlers __ieee_handlers +#define ieee_retrospective __ieee_retrospective +#define ieee_retrospective_ __ieee_retrospective_ +#define ilogb __ilogb +#define ilogbf __ilogbf +#define ilogbl __ilogbl +#define infinity __infinity +#define infinityf __infinityf +#define infinityl __infinityl +#define iq_finite_ __iq_finite_ +#define iq_fp_class_ __iq_fp_class_ +#define iq_ilogb_ __iq_ilogb_ +#define iq_isinf_ __iq_isinf_ +#define iq_isnan_ __iq_isnan_ +#define iq_isnormal_ __iq_isnormal_ +#define iq_issubnormal_ __iq_issubnormal_ +#define iq_iszero_ __iq_iszero_ +#define iq_signbit_ __iq_signbit_ +#define ir_finite_ __ir_finite_ +#define ir_fp_class_ __ir_fp_class_ +#define ir_ilogb_ __ir_ilogb_ +#define ir_irint_ __ir_irint_ +#define ir_isinf_ __ir_isinf_ +#define ir_isnan_ __ir_isnan_ +#define ir_isnormal_ __ir_isnormal_ +#define ir_issubnormal_ __ir_issubnormal_ +#define ir_iszero_ __ir_iszero_ +#define ir_nint_ __ir_nint_ +#define ir_signbit_ __ir_signbit_ +#define irint __irint +#define irintf __irintf +#define irintl __irintl +#define isinf __isinf +#define isinff __isinff +#define isinfl __isinfl +#define isnan __isnan +#define isnanf __isnanf +#define isnanl __isnanl +#define isnormal __isnormal +#define isnormalf __isnormalf +#define isnormall __isnormall +#define issubnormal __issubnormal +#define issubnormalf __issubnormalf +#define issubnormall __issubnormall +#define iszero __iszero +#define iszerof __iszerof +#define iszerol __iszerol +#define j0 __j0 +#define j0f __j0f +#define j0l __j0l +#define j1 __j1 +#define j1f __j1f +#define j1l __j1l +#define jn __jn +#define jnf __jnf +#define jnl __jnl +#define ldexp __ldexp /* S10 */ +#define ldexpf __ldexpf /* S10 */ +#define ldexpl __ldexpl /* S10 */ +#define lgamma __lgamma +#define lgamma_r __lgamma_r +#define lgammaf __lgammaf +#define lgammaf_r __lgammaf_r +#define lgammal __lgammal +#define lgammal_r __lgammal_r +#define llrint __llrint /* C99 */ +#define llrintf __llrintf /* C99 */ +#define llrintl __llrintl /* C99 */ +#define llround __llround /* C99 */ +#define llroundf __llroundf /* C99 */ +#define llroundl __llroundl /* C99 */ +#define lrint __lrint /* C99 */ +#define lrintf __lrintf /* C99 */ +#define lrintl __lrintl /* C99 */ +#define lround __lround /* C99 */ +#define lroundf __lroundf /* C99 */ +#define lroundl __lroundl /* C99 */ +#define log __log +#define log10 __log10 +#define log10f __log10f +#define log10l __log10l +#define log1p __log1p +#define log1pf __log1pf +#define log1pl __log1pl +#define log2 __log2 +#define log2f __log2f +#define log2l __log2l +#define logb __logb +#define logbf __logbf +#define logbl __logbl +#define logf __logf +#define logl __logl +#define max_normal __max_normal +#define max_normalf __max_normalf +#define max_normall __max_normall +#define max_subnormal __max_subnormal +#define max_subnormalf __max_subnormalf +#define max_subnormall __max_subnormall +#define min_normal __min_normal +#define min_normalf __min_normalf +#define min_normall __min_normall +#define min_subnormal __min_subnormal +#define min_subnormalf __min_subnormalf +#define min_subnormall __min_subnormall +#define modf __modf /* S10 */ +#define modff __modff /* S10 */ +#define modfl __modfl /* S10 */ +#define nan __nan /* C99 */ +#define nanf __nanf /* C99 */ +#define nanl __nanl /* C99 */ +#define nearbyint __nearbyint /* C99 */ +#define nearbyintf __nearbyintf /* C99 */ +#define nearbyintl __nearbyintl /* C99 */ +#define nextafter __nextafter +#define nextafterf __nextafterf +#define nextafterl __nextafterl +#define nexttoward __nexttoward /* C99 */ +#define nexttowardf __nexttowardf /* C99 */ +#define nexttowardl __nexttowardl /* C99 */ +#define nint __nint +#define nintf __nintf +#define nintl __nintl +#define nonstandard_arithmetic __nonstandard_arithmetic +#define nonstandard_arithmetic_ __nonstandard_arithmetic_ +#define pow __pow +#define pow_di __pow_di +#define pow_li __pow_li +#define pow_ri __pow_ri +#define powf __powf +#define powl __powl +#define q_copysign_ __q_copysign_ +#define q_fabs_ __q_fabs_ +#define q_fmod_ __q_fmod_ +#define q_infinity_ __q_infinity_ +#define q_max_normal_ __q_max_normal_ +#define q_max_subnormal_ __q_max_subnormal_ +#define q_min_normal_ __q_min_normal_ +#define q_min_subnormal_ __q_min_subnormal_ +#define q_nextafter_ __q_nextafter_ +#define q_quiet_nan_ __q_quiet_nan_ +#define q_remainder_ __q_remainder_ +#define q_scalbn_ __q_scalbn_ +#define q_signaling_nan_ __q_signaling_nan_ +#define quiet_nan __quiet_nan +#define quiet_nanf __quiet_nanf +#define quiet_nanl __quiet_nanl +#define r_acos_ __r_acos_ +#define r_acosd_ __r_acosd_ +#define r_acosh_ __r_acosh_ +#define r_acosp_ __r_acosp_ +#define r_acospi_ __r_acospi_ +#define r_addran_ __r_addran_ +#define r_addrans_ __r_addrans_ +#define r_aint_ __r_aint_ +#define r_anint_ __r_anint_ +#define r_annuity_ __r_annuity_ +#define r_asin_ __r_asin_ +#define r_asind_ __r_asind_ +#define r_asinh_ __r_asinh_ +#define r_asinp_ __r_asinp_ +#define r_asinpi_ __r_asinpi_ +#define r_atan2_ __r_atan2_ +#define r_atan2d_ __r_atan2d_ +#define r_atan2pi_ __r_atan2pi_ +#define r_atan_ __r_atan_ +#define r_atand_ __r_atand_ +#define r_atanh_ __r_atanh_ +#define r_atanp_ __r_atanp_ +#define r_atanpi_ __r_atanpi_ +#define r_cbrt_ __r_cbrt_ +#define r_ceil_ __r_ceil_ +#define r_compound_ __r_compound_ +#define r_copysign_ __r_copysign_ +#define r_cos_ __r_cos_ +#define r_cosd_ __r_cosd_ +#define r_cosh_ __r_cosh_ +#define r_cosp_ __r_cosp_ +#define r_cospi_ __r_cospi_ +#define r_erf_ __r_erf_ +#define r_erfc_ __r_erfc_ +#define r_exp10_ __r_exp10_ +#define r_exp2_ __r_exp2_ +#define r_exp_ __r_exp_ +#define r_expm1_ __r_expm1_ +#define r_fabs_ __r_fabs_ +#define r_floor_ __r_floor_ +#define r_fmod_ __r_fmod_ +#define r_get_addrans_ __r_get_addrans_ +#define r_hypot_ __r_hypot_ +#define r_infinity_ __r_infinity_ +#define r_init_addrans_ __r_init_addrans_ +#define r_j0_ __r_j0_ +#define r_j1_ __r_j1_ +#define r_jn_ __r_jn_ +#define r_lcran_ __r_lcran_ +#define r_lcrans_ __r_lcrans_ +#define r_lgamma_ __r_lgamma_ +#define r_lgamma_r_ __r_lgamma_r_ +#define r_log10_ __r_log10_ +#define r_log1p_ __r_log1p_ +#define r_log2_ __r_log2_ +#define r_log_ __r_log_ +#define r_logb_ __r_logb_ +#define r_max_normal_ __r_max_normal_ +#define r_max_subnormal_ __r_max_subnormal_ +#define r_min_normal_ __r_min_normal_ +#define r_min_subnormal_ __r_min_subnormal_ +#define r_mwcran_ __r_mwcran_ +#define r_mwcrans_ __r_mwcrans_ +#define r_nextafter_ __r_nextafter_ +#define r_pow_ __r_pow_ +#define r_quiet_nan_ __r_quiet_nan_ +#define r_remainder_ __r_remainder_ +#define r_rint_ __r_rint_ +#define r_scalb_ __r_scalb_ +#define r_scalbn_ __r_scalbn_ +#define r_set_addrans_ __r_set_addrans_ +#define r_shufrans_ __r_shufrans_ +#define r_signaling_nan_ __r_signaling_nan_ +#define r_significand_ __r_significand_ +#define r_sin_ __r_sin_ +#define r_sincos_ __r_sincos_ +#define r_sincosd_ __r_sincosd_ +#define r_sincosp_ __r_sincosp_ +#define r_sincospi_ __r_sincospi_ +#define r_sind_ __r_sind_ +#define r_sinh_ __r_sinh_ +#define r_sinp_ __r_sinp_ +#define r_sinpi_ __r_sinpi_ +#define r_sqrt_ __r_sqrt_ +#define r_tan_ __r_tan_ +#define r_tand_ __r_tand_ +#define r_tanh_ __r_tanh_ +#define r_tanp_ __r_tanp_ +#define r_tanpi_ __r_tanpi_ +#define r_y0_ __r_y0_ +#define r_y1_ __r_y1_ +#define r_yn_ __r_yn_ +#define remainder __remainder +#define remainderf __remainderf +#define remainderl __remainderl +#define remquo __remquo /* C99 */ +#define remquof __remquof /* C99 */ +#define remquol __remquol /* C99 */ +#define rint __rint +#define rintf __rintf +#define rintl __rintl +#define round __round /* C99 */ +#define roundf __roundf /* C99 */ +#define roundl __roundl /* C99 */ +#define scalb __scalb +#define scalbf __scalbf +#define scalbl __scalbl +#define scalbln __scalbln /* C99 */ +#define scalblnf __scalblnf /* C99 */ +#define scalblnl __scalblnl /* C99 */ +#define scalbn __scalbn +#define scalbnf __scalbnf +#define scalbnl __scalbnl +#define sigfpe __sigfpe +#define sigfpe_ __sigfpe_ +#define signaling_nan __signaling_nan +#define signaling_nanf __signaling_nanf +#define signaling_nanl __signaling_nanl +#define signbit __signbit +#define signbitf __signbitf +#define signbitl __signbitl +#define signgam __signgam +#define signgamf __signgamf +#define signgaml __signgaml +#define significand __significand +#define significandf __significandf +#define significandl __significandl +#define sin __sin +#define sincos __sincos +#define sincosd __sincosd +#define sincosdf __sincosdf +#define sincosdl __sincosdl +#define sincosf __sincosf +#define sincosl __sincosl +#define sincosp __sincosp +#define sincospf __sincospf +#define sincospi __sincospi +#define sincospif __sincospif +#define sincospil __sincospil +#define sincospl __sincospl +#define sind __sind +#define sindf __sindf +#define sindl __sindl +#define sinf __sinf +#define sinh __sinh +#define sinhf __sinhf +#define sinhl __sinhl +#define sinl __sinl +#define sinp __sinp +#define sinpf __sinpf +#define sinpi __sinpi +#define sinpif __sinpif +#define sinpil __sinpil +#define sinpl __sinpl +#define smwcran_ __smwcran_ +#define sqrt __sqrt +#define sqrtf __sqrtf +#define sqrtl __sqrtl +#define standard_arithmetic __standard_arithmetic +#define standard_arithmetic_ __standard_arithmetic_ +#define tan __tan +#define tand __tand +#define tandf __tandf +#define tandl __tandl +#define tanf __tanf +#define tanh __tanh +#define tanhf __tanhf +#define tanhl __tanhl +#define tanl __tanl +#define tanp __tanp +#define tanpf __tanpf +#define tanpi __tanpi +#define tanpif __tanpif +#define tanpil __tanpil +#define tanpl __tanpl +#define tgamma __tgamma /* C99 */ +#define tgammaf __tgammaf /* C99 */ +#define tgammal __tgammal /* C99 */ +#define trunc __trunc /* C99 */ +#define truncf __truncf /* C99 */ +#define truncl __truncl /* C99 */ +#define u_addrans_ __u_addrans_ +#define u_lcrans_ __u_lcrans_ +#define u_llmwcran_ __u_llmwcran_ +#define u_llmwcrans_ __u_llmwcrans_ +#define u_mwcran_ __u_mwcran_ +#define u_mwcrans_ __u_mwcrans_ +#define u_shufrans_ __u_shufrans_ +#define y0 __y0 +#define y0f __y0f +#define y0l __y0l +#define y1 __y1 +#define y1f __y1f +#define y1l __y1l +#define yn __yn +#define ynf __ynf +#define ynl __ynl + +/* + * these are libdl entry points + */ +#define dlclose _dlclose +#define dlopen _dlopen +#define dlsym _dlsym + +/* + * these are libc entry points + */ +#define finite _finite +#define fpclass _fpclass +#define isnand _isnand +#define sigaction _sigaction +#define sigemptyset _sigemptyset +#define unordered _unordered +#define write _write +#ifdef _REENTRANT +#define mutex_lock _mutex_lock +#define mutex_unlock _mutex_unlock +#define thr_getspecific _thr_getspecific +#define thr_keycreate _thr_keycreate +#define thr_main _thr_main +#define thr_setspecific _thr_setspecific +#endif + +#endif /* defined(ELFOBJ) && !defined(lint) */ + +#endif /* _LIBM_SYNONYMS_H */ diff --git a/usr/src/lib/libm/common/C/libm_thread.h b/usr/src/lib/libm/common/C/libm_thread.h new file mode 100644 index 0000000000..d22d3d5a5e --- /dev/null +++ b/usr/src/lib/libm/common/C/libm_thread.h @@ -0,0 +1,44 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LIBM_THREAD_H +#define _LIBM_THREAD_H + +#include <synch.h> +#include <thread.h> + +/* + * -lthread function(s) not prototyped anywhere + */ +extern int thr_main(void); +/* + * function call(s) local to libsunmath + */ +extern void *__tsd_alloc(thread_key_t *, int, int); +#endif /* _LIBM_THREAD_H */ diff --git a/usr/src/lib/libm/common/C/log.c b/usr/src/lib/libm/common/C/log.c new file mode 100644 index 0000000000..7d755b4220 --- /dev/null +++ b/usr/src/lib/libm/common/C/log.c @@ -0,0 +1,220 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak log = __log + +/* INDENT OFF */ +/* + * log(x) + * Table look-up algorithm with product polynomial approximation. + * By K.C. Ng, Oct 23, 2004. Updated Oct 18, 2005. + * + * (a). For x in [1-0.125, 1+0.1328125], using a special approximation: + * Let f = x - 1 and z = f*f. + * return f + ((a1*z) * + * ((a2 + (a3*f)*(a4+f)) + (f*z)*(a5+f))) * + * (((a6 + f*(a7+f)) + (f*z)*(a8+f)) * + * ((a9 + (a10*f)*(a11+f)) + (f*z)*(a12+f))) + * a1 -6.88821452420390473170286327331268694251775741577e-0002, + * a2 1.97493380704769294631262255279580131173133850098e+0000, + * a3 2.24963218866067560242072431719861924648284912109e+0000, + * a4 -9.02975906958474405783476868236903101205825805664e-0001, + * a5 -1.47391630715542865104339398385491222143173217773e+0000, + * a6 1.86846544648220058704168877738993614912033081055e+0000, + * a7 1.82277370459347465292410106485476717352867126465e+0000, + * a8 1.25295479915214102994980294170090928673744201660e+0000, + * a9 1.96709676945198275177517643896862864494323730469e+0000, + * a10 -4.00127989749189894030934055990655906498432159424e-0001, + * a11 3.01675528558798333733648178167641162872314453125e+0000, + * a12 -9.52325445049240770778453679668018594384193420410e-0001, + * + * with remez error |(log(1+f) - P(f))/f| <= 2**-56.81 and + * + * (b). For 0.09375 <= x < 24 + * Use an 8-bit table look-up (3-bit for exponent and 5 bit for + * significand): + * Let ix stands for the high part of x in IEEE double format. + * Since 0.09375 <= x < 24, we have + * 0x3fb80000 <= ix < 0x40380000. + * Let j = (ix - 0x3fb80000) >> 15. Then 0 <= j < 256. Choose + * a Y[j] such that HIWORD(Y[j]) ~ 0x3fb8400 + (j<<15) (the middle + * number between 0x3fb80000 + (j<<15) and 3fb80000 + ((j+1)<<15)), + * and at the same time 1/Y[j] as well as log(Y[j]) are very close + * to 53-bits floating point numbers. + * A table of Y[j], 1/Y[j], and log(Y[j]) are pre-computed and thus + * log(x) = log(Y[j]) + log(1 + (x-Y[j])*(1/Y[j])) + * = log(Y[j]) + log(1 + s) + * where + * s = (x-Y[j])*(1/Y[j]) + * We compute max (x-Y[j])*(1/Y[j]) for the chosen Y[j] and obtain + * |s| < 0.0154. By applying remez algorithm with Product Polynomial + * Approximiation, we find the following approximated of log(1+s) + * (b1*s)*(b2+s*(b3+s))*((b4+s*b5)+(s*s)*(b6+s))*(b7+s*(b8+s)) + * with remez error |log(1+s) - P(s)| <= 2**-63.5 + * + * (c). Otherwise, get "n", the exponent of x, and then normalize x to + * z in [1,2). Then similar to (b) find a Y[i] that matches z to 5.5 + * significant bits. Then + * log(x) = n*ln2 + log(Y[i]) + log(z/Y[i]). + * + * Special cases: + * log(x) is NaN with signal if x < 0 (including -INF) ; + * log(+INF) is +INF; log(0) is -INF with signal; + * log(NaN) is that NaN with no signal. + * + * Maximum error observed: less than 0.90 ulp + * + * Constants: + * The hexadecimal values are the intended ones for the following constants. + * The decimal values may be used, provided that the compiler will convert + * from decimal to binary accurately enough to produce the hexadecimal values + * shown. + */ +/* INDENT ON */ + +#include "libm.h" + +extern const double _TBL_log[]; + +static const double P[] = { +/* ONE */ 1.0, +/* TWO52 */ 4503599627370496.0, +/* LN2HI */ 6.93147180369123816490e-01, /* 3fe62e42, fee00000 */ +/* LN2LO */ 1.90821492927058770002e-10, /* 3dea39ef, 35793c76 */ +/* A1 */ -6.88821452420390473170286327331268694251775741577e-0002, +/* A2 */ 1.97493380704769294631262255279580131173133850098e+0000, +/* A3 */ 2.24963218866067560242072431719861924648284912109e+0000, +/* A4 */ -9.02975906958474405783476868236903101205825805664e-0001, +/* A5 */ -1.47391630715542865104339398385491222143173217773e+0000, +/* A6 */ 1.86846544648220058704168877738993614912033081055e+0000, +/* A7 */ 1.82277370459347465292410106485476717352867126465e+0000, +/* A8 */ 1.25295479915214102994980294170090928673744201660e+0000, +/* A9 */ 1.96709676945198275177517643896862864494323730469e+0000, +/* A10 */ -4.00127989749189894030934055990655906498432159424e-0001, +/* A11 */ 3.01675528558798333733648178167641162872314453125e+0000, +/* A12 */ -9.52325445049240770778453679668018594384193420410e-0001, +/* B1 */ -1.25041641589283658575482149899471551179885864258e-0001, +/* B2 */ 1.87161713283355151891381127914642725337613123482e+0000, +/* B3 */ -1.89082956295731507978530316904652863740921020508e+0000, +/* B4 */ -2.50562891673640253387134180229622870683670043945e+0000, +/* B5 */ 1.64822828085258366037635369139024987816810607910e+0000, +/* B6 */ -1.24409107065868340669112512841820716857910156250e+0000, +/* B7 */ 1.70534231658220414296067701798165217041969299316e+0000, +/* B8 */ 1.99196833784655646937267192697618156671524047852e+0000, +}; + +#define ONE P[0] +#define TWO52 P[1] +#define LN2HI P[2] +#define LN2LO P[3] +#define A1 P[4] +#define A2 P[5] +#define A3 P[6] +#define A4 P[7] +#define A5 P[8] +#define A6 P[9] +#define A7 P[10] +#define A8 P[11] +#define A9 P[12] +#define A10 P[13] +#define A11 P[14] +#define A12 P[15] +#define B1 P[16] +#define B2 P[17] +#define B3 P[18] +#define B4 P[19] +#define B5 P[20] +#define B6 P[21] +#define B7 P[22] +#define B8 P[23] + +double +log(double x) { + double *tb, dn, dn1, s, z, r, w; + int i, hx, ix, n, lx; + + n = 0; + hx = ((int *)&x)[HIWORD]; + ix = hx & 0x7fffffff; + lx = ((int *)&x)[LOWORD]; + + /* subnormal,0,negative,inf,nan */ + if ((hx + 0x100000) < 0x200000) { + if (ix > 0x7ff00000 || (ix == 0x7ff00000 && lx != 0)) /* nan */ + return (x * x); + if (((hx << 1) | lx) == 0) /* zero */ + return (_SVID_libm_err(x, x, 16)); + if (hx < 0) /* negative */ + return (_SVID_libm_err(x, x, 17)); + if (((hx - 0x7ff00000) | lx) == 0) /* +inf */ + return (x); + + /* x must be positive and subnormal */ + x *= TWO52; + n = -52; + ix = ((int *)&x)[HIWORD]; + lx = ((int *)&x)[LOWORD]; + } + + i = ix >> 19; + if (i >= 0x7f7 && i <= 0x806) { + /* 0.09375 (0x3fb80000) <= x < 24 (0x40380000) */ + if (ix >= 0x3fec0000 && ix < 0x3ff22000) { + /* 0.875 <= x < 1.125 */ + s = x - ONE; + z = s * s; + if (((ix - 0x3ff00000) | lx) == 0) /* x = 1 */ + return (z); + r = (A10 * s) * (A11 + s); + w = z * s; + return (s + ((A1 * z) * + (A2 + ((A3 * s) * (A4 + s) + w * (A5 + s)))) * + ((A6 + (s * (A7 + s) + w * (A8 + s))) * + (A9 + (r + w * (A12 + s))))); + } else { + i = (ix - 0x3fb80000) >> 15; + tb = (double *)_TBL_log + (i + i + i); + s = (x - tb[0]) * tb[1]; + return (tb[2] + ((B1 * s) * (B2 + s * (B3 + s))) * + (((B4 + s * B5) + (s * s) * (B6 + s)) * + (B7 + s * (B8 + s)))); + } + } else { + dn = (double)(n + ((ix >> 20) - 0x3ff)); + dn1 = dn * LN2HI; + i = (ix & 0x000fffff) | 0x3ff00000; /* scale x to [1,2] */ + ((int *)&x)[HIWORD] = i; + i = (i - 0x3fb80000) >> 15; + tb = (double *)_TBL_log + (i + i + i); + s = (x - tb[0]) * tb[1]; + dn = dn * LN2LO + tb[2]; + return (dn1 + (dn + ((B1 * s) * (B2 + s * (B3 + s))) * + (((B4 + s * B5) + (s * s) * (B6 + s)) * + (B7 + s * (B8 + s))))); + } +} diff --git a/usr/src/lib/libm/common/C/log10.c b/usr/src/lib/libm/common/C/log10.c new file mode 100644 index 0000000000..edbb230ceb --- /dev/null +++ b/usr/src/lib/libm/common/C/log10.c @@ -0,0 +1,218 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak log10 = __log10 + +/* INDENT OFF */ +/* + * log10(x) = log(x)/log10 + * + * Base on Table look-up algorithm with product polynomial + * approximation for log(x). + * + * By K.C. Ng, Nov 29, 2004 + * + * (a). For x in [1-0.125, 1+0.125], from log.c we have + * log(x) = f + ((a1*f^2) * + * ((a2 + (a3*f)*(a4+f)) + (f^3)*(a5+f))) * + * (((a6 + f*(a7+f)) + (f^3)*(a8+f)) * + * ((a9 + (a10*f)*(a11+f)) + (f^3)*(a12+f))) + * where f = x - 1. + * (i) modify a1 <- a1 / log10 + * (ii) 1/log10 = 0.4342944819... + * = 0.4375 - 0.003205518... (7 bit shift) + * Let lgv = 0.4375 - 1/log10, then + * lgv = 0.003205518096748172348871081083395..., + * (iii) f*0.4375 is exact because f has 3 trailing zero. + * (iv) Thus, log10(x) = f*0.4375 - (lgv*f - PPoly) + * + * (b). For 0.09375 <= x < 24 + * Let j = (ix - 0x3fb80000) >> 15. Look up Y[j], 1/Y[j], and log(Y[j]) + * from _TBL_log.c. Then + * log10(x) = log10(Y[j]) + log10(1 + (x-Y[j])*(1/Y[j])) + * = log(Y[j])(1/log10) + log10(1 + s) + * where + * s = (x-Y[j])*(1/Y[j]) + * From log.c, we have log(1+s) = + * 2 2 2 + * (b s) (b + b s + s ) [b + b s + s (b + s)] (b + b s + s ) + * 1 2 3 4 5 6 7 8 + * + * By setting b1 <- b1/log10, we have + * log10(x) = 0.4375 * T - (lgv * T - POLY(s)) + * + * (c). Otherwise, get "n", the exponent of x, and then normalize x to + * z in [1,2). Then similar to (b) find a Y[i] that matches z to 5.5 + * significant bits. Then + * log(x) = n*ln2 + log(Y[i]) + log(z/Y[i]). + * log10(x) = n*(ln2/ln10) + log10(z). + * + * Special cases: + * log10(x) is NaN with signal if x < 0 (including -INF) ; + * log10(+INF) is +INF; log10(0) is -INF with signal; + * log10(NaN) is that NaN with no signal. + * + * Maximum error observed: less than 0.89 ulp + * + * Constants: + * The hexadecimal values are the intended ones for the following constants. + * The decimal values may be used, provided that the compiler will convert + * from decimal to binary accurately enough to produce the hexadecimal values + * shown. + */ +/* INDENT ON */ + +#include "libm.h" + +extern const double _TBL_log[]; + +static const double P[] = { +/* ONE */ 1.0, +/* TWO52 */ 4503599627370496.0, +/* LNAHI */ 3.01029995607677847147e-01, /* 3FD34413 50900000 */ +/* LNALO */ 5.63033480667509769841e-11, /* 3DCEF3FD E623E256 */ +/* A1 */ -2.9142521960136582507385480707044582802184e-02, +/* A2 */ 1.99628461483039965074226529395673424005508422852e+0000, +/* A3 */ 2.26812367662950720159642514772713184356689453125e+0000, +/* A4 */ -9.05030639084976384900471657601883634924888610840e-0001, +/* A5 */ -1.48275767132434044270894446526654064655303955078e+0000, +/* A6 */ 1.88158320939722756293122074566781520843505859375e+0000, +/* A7 */ 1.83309386046986411145098827546462416648864746094e+0000, +/* A8 */ 1.24847063988317086291601754055591300129890441895e+0000, +/* A9 */ 1.98372421445537705508854742220137268304824829102e+0000, +/* A10 */ -3.94711735767898475035764249696512706577777862549e-0001, +/* A11 */ 3.07890395362954372160402272129431366920471191406e+0000, +/* A12 */ -9.60099585275022149311041630426188930869102478027e-0001, +/* B1 */ -5.4304894950350052960838096752491540286689e-02, +/* B2 */ 1.87161713283355151891381127914642725337613123482e+0000, +/* B3 */ -1.89082956295731507978530316904652863740921020508e+0000, +/* B4 */ -2.50562891673640253387134180229622870683670043945e+0000, +/* B5 */ 1.64822828085258366037635369139024987816810607910e+0000, +/* B6 */ -1.24409107065868340669112512841820716857910156250e+0000, +/* B7 */ 1.70534231658220414296067701798165217041969299316e+0000, +/* B8 */ 1.99196833784655646937267192697618156671524047852e+0000, +/* LGH */ 0.4375, +/* LGL */ 0.003205518096748172348871081083395, +/* LG10V */ 0.43429448190325182765112891891660509576226, +}; + +#define ONE P[0] +#define TWO52 P[1] +#define LNAHI P[2] +#define LNALO P[3] +#define A1 P[4] +#define A2 P[5] +#define A3 P[6] +#define A4 P[7] +#define A5 P[8] +#define A6 P[9] +#define A7 P[10] +#define A8 P[11] +#define A9 P[12] +#define A10 P[13] +#define A11 P[14] +#define A12 P[15] +#define B1 P[16] +#define B2 P[17] +#define B3 P[18] +#define B4 P[19] +#define B5 P[20] +#define B6 P[21] +#define B7 P[22] +#define B8 P[23] +#define LGH P[24] +#define LGL P[25] +#define LG10V P[26] + +double +log10(double x) { + double *tb, dn, dn1, s, z, r, w; + int i, hx, ix, n, lx; + + n = 0; + hx = ((int *)&x)[HIWORD]; + ix = hx & 0x7fffffff; + lx = ((int *)&x)[LOWORD]; + + /* subnormal,0,negative,inf,nan */ + if ((hx + 0x100000) < 0x200000) { + if (ix > 0x7ff00000 || (ix == 0x7ff00000 && lx != 0)) /* nan */ + return (x * x); + if (((hx << 1) | lx) == 0) /* zero */ + return (_SVID_libm_err(x, x, 18)); + if (hx < 0) /* negative */ + return (_SVID_libm_err(x, x, 19)); + if (((hx - 0x7ff00000) | lx) == 0) /* +inf */ + return (x); + + /* x must be positive and subnormal */ + x *= TWO52; + n = -52; + ix = ((int *)&x)[HIWORD]; + lx = ((int *)&x)[LOWORD]; + } + + i = ix >> 19; + if (i >= 0x7f7 && i <= 0x806) { + /* 0.09375 (0x3fb80000) <= x < 24 (0x40380000) */ + if (ix >= 0x3fec0000 && ix < 0x3ff20000) { + /* 0.875 <= x < 1.125 */ + s = x - ONE; + z = s * s; + if (((ix - 0x3ff00000) | lx) == 0) /* x = 1 */ + return (z); + r = (A10 * s) * (A11 + s); + w = z * s; + return (LGH * s - (LGL * s - ((A1 * z) * + ((A2 + (A3 * s) * (A4 + s)) + w * (A5 + s))) * + (((A6 + s * (A7 + s)) + w * (A8 + s)) * + ((A9 + r) + w * (A12 + s))))); + } else { + i = (ix - 0x3fb80000) >> 15; + tb = (double *)_TBL_log + (i + i + i); + s = (x - tb[0]) * tb[1]; + return (LGH * tb[2] - (LGL * tb[2] - ((B1 * s) * + (B2 + s * (B3 + s))) * + (((B4 + s * B5) + (s * s) * (B6 + s)) * + (B7 + s * (B8 + s))))); + } + } else { + dn = (double)(n + ((ix >> 20) - 0x3ff)); + dn1 = dn * LNAHI; + i = (ix & 0x000fffff) | 0x3ff00000; /* scale x to [1,2] */ + ((int *)&x)[HIWORD] = i; + i = (i - 0x3fb80000) >> 15; + tb = (double *)_TBL_log + (i + i + i); + s = (x - tb[0]) * tb[1]; + dn = dn * LNALO + tb[2] * LG10V; + return (dn1 + (dn + ((B1 * s) * + (B2 + s * (B3 + s))) * + (((B4 + s * B5) + (s * s) * (B6 + s)) * + (B7 + s * (B8 + s))))); + } +} diff --git a/usr/src/lib/libm/common/C/log1p.c b/usr/src/lib/libm/common/C/log1p.c new file mode 100644 index 0000000000..00a19fdaf5 --- /dev/null +++ b/usr/src/lib/libm/common/C/log1p.c @@ -0,0 +1,204 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak log1p = __log1p + +/* INDENT OFF */ +/* + * Method : + * 1. Argument Reduction: find k and f such that + * 1+x = 2^k * (1+f), + * where sqrt(2)/2 < 1+f < sqrt(2) . + * + * Note. If k=0, then f=x is exact. However, if k != 0, then f + * may not be representable exactly. In that case, a correction + * term is need. Let u=1+x rounded. Let c = (1+x)-u, then + * log(1+x) - log(u) ~ c/u. Thus, we proceed to compute log(u), + * and add back the correction term c/u. + * (Note: when x > 2**53, one can simply return log(x)) + * + * 2. Approximation of log1p(f). + * Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s) + * = 2s + 2/3 s**3 + 2/5 s**5 + ....., + * = 2s + s*R + * We use a special Reme algorithm on [0,0.1716] to generate + * a polynomial of degree 14 to approximate R The maximum error + * of this polynomial approximation is bounded by 2**-58.45. In + * other words, + * 2 4 6 8 10 12 14 + * R(z) ~ Lp1*s +Lp2*s +Lp3*s +Lp4*s +Lp5*s +Lp6*s +Lp7*s + * (the values of Lp1 to Lp7 are listed in the program) + * and + * | 2 14 | -58.45 + * | Lp1*s +...+Lp7*s - R(z) | <= 2 + * | | + * Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2. + * In order to guarantee error in log below 1ulp, we compute log + * by + * log1p(f) = f - (hfsq - s*(hfsq+R)). + * + * 3. Finally, log1p(x) = k*ln2 + log1p(f). + * = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo))) + * Here ln2 is splitted into two floating point number: + * ln2_hi + ln2_lo, + * where n*ln2_hi is always exact for |n| < 2000. + * + * Special cases: + * log1p(x) is NaN with signal if x < -1 (including -INF) ; + * log1p(+INF) is +INF; log1p(-1) is -INF with signal; + * log1p(NaN) is that NaN with no signal. + * + * Accuracy: + * according to an error analysis, the error is always less than + * 1 ulp (unit in the last place). + * + * Constants: + * The hexadecimal values are the intended ones for the following + * constants. The decimal values may be used, provided that the + * compiler will convert from decimal to binary accurately enough + * to produce the hexadecimal values shown. + * + * Note: Assuming log() return accurate answer, the following + * algorithm can be used to compute log1p(x) to within a few ULP: + * + * u = 1+x; + * if (u == 1.0) return x ; else + * return log(u)*(x/(u-1.0)); + * + * See HP-15C Advanced Functions Handbook, p.193. + */ +/* INDENT ON */ + +#include "libm.h" + +static const double xxx[] = { +/* ln2_hi */ 6.93147180369123816490e-01, /* 3fe62e42 fee00000 */ +/* ln2_lo */ 1.90821492927058770002e-10, /* 3dea39ef 35793c76 */ +/* two54 */ 1.80143985094819840000e+16, /* 43500000 00000000 */ +/* Lp1 */ 6.666666666666735130e-01, /* 3FE55555 55555593 */ +/* Lp2 */ 3.999999999940941908e-01, /* 3FD99999 9997FA04 */ +/* Lp3 */ 2.857142874366239149e-01, /* 3FD24924 94229359 */ +/* Lp4 */ 2.222219843214978396e-01, /* 3FCC71C5 1D8E78AF */ +/* Lp5 */ 1.818357216161805012e-01, /* 3FC74664 96CB03DE */ +/* Lp6 */ 1.531383769920937332e-01, /* 3FC39A09 D078C69F */ +/* Lp7 */ 1.479819860511658591e-01, /* 3FC2F112 DF3E5244 */ +/* zero */ 0.0 +}; +#define ln2_hi xxx[0] +#define ln2_lo xxx[1] +#define two54 xxx[2] +#define Lp1 xxx[3] +#define Lp2 xxx[4] +#define Lp3 xxx[5] +#define Lp4 xxx[6] +#define Lp5 xxx[7] +#define Lp6 xxx[8] +#define Lp7 xxx[9] +#define zero xxx[10] + +double +log1p(double x) { + double hfsq, f, c = 0.0, s, z, R, u; + int k, hx, hu, ax; + + hx = ((int *)&x)[HIWORD]; /* high word of x */ + ax = hx & 0x7fffffff; + + if (ax >= 0x7ff00000) { /* x is inf or nan */ + if (((hx - 0xfff00000) | ((int *)&x)[LOWORD]) == 0) /* -inf */ + return (_SVID_libm_err(x, x, 44)); + return (x * x); + } + + k = 1; + if (hx < 0x3FDA827A) { /* x < 0.41422 */ + if (ax >= 0x3ff00000) /* x <= -1.0 */ + return (_SVID_libm_err(x, x, x == -1.0 ? 43 : 44)); + if (ax < 0x3e200000) { /* |x| < 2**-29 */ + if (two54 + x > zero && /* raise inexact */ + ax < 0x3c900000) /* |x| < 2**-54 */ + return (x); + else + return (x - x * x * 0.5); + } + if (hx > 0 || hx <= (int)0xbfd2bec3) { /* -0.2929<x<0.41422 */ + k = 0; + f = x; + hu = 1; + } + } + /* We will initialize 'c' here. */ + if (k != 0) { + if (hx < 0x43400000) { + u = 1.0 + x; + hu = ((int *)&u)[HIWORD]; /* high word of u */ + k = (hu >> 20) - 1023; + /* + * correction term + */ + c = k > 0 ? 1.0 - (u - x) : x - (u - 1.0); + c /= u; + } else { + u = x; + hu = ((int *)&u)[HIWORD]; /* high word of u */ + k = (hu >> 20) - 1023; + c = 0; + } + hu &= 0x000fffff; + if (hu < 0x6a09e) { /* normalize u */ + ((int *)&u)[HIWORD] = hu | 0x3ff00000; + } else { /* normalize u/2 */ + k += 1; + ((int *)&u)[HIWORD] = hu | 0x3fe00000; + hu = (0x00100000 - hu) >> 2; + } + f = u - 1.0; + } + hfsq = 0.5 * f * f; + if (hu == 0) { /* |f| < 2**-20 */ + if (f == zero) { + if (k == 0) + return (zero); + /* We already initialized 'c' before, when (k != 0) */ + c += k * ln2_lo; + return (k * ln2_hi + c); + } + R = hfsq * (1.0 - 0.66666666666666666 * f); + if (k == 0) + return (f - R); + return (k * ln2_hi - ((R - (k * ln2_lo + c)) - f)); + } + s = f / (2.0 + f); + z = s * s; + R = z * (Lp1 + z * (Lp2 + z * (Lp3 + z * (Lp4 + z * (Lp5 + + z * (Lp6 + z * Lp7)))))); + if (k == 0) + return (f - (hfsq - s * (hfsq + R))); + return (k * ln2_hi - ((hfsq - (s * (hfsq + R) + + (k * ln2_lo + c))) - f)); +} diff --git a/usr/src/lib/libm/common/C/log2.c b/usr/src/lib/libm/common/C/log2.c new file mode 100644 index 0000000000..6b588f17f4 --- /dev/null +++ b/usr/src/lib/libm/common/C/log2.c @@ -0,0 +1,227 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak log2 = __log2 + +/* INDENT OFF */ +/* + * log2(x) = log(x)/log2 + * + * Base on Table look-up algorithm with product polynomial + * approximation for log(x). + * + * By K.C. Ng, Nov 29, 2004 + * + * (a). For x in [1-0.125, 1+0.125], from log.c we have + * log(x) = f + ((a1*f^2) * + * ((a2 + (a3*f)*(a4+f)) + (f^3)*(a5+f))) * + * (((a6 + f*(a7+f)) + (f^3)*(a8+f)) * + * ((a9 + (a10*f)*(a11+f)) + (f^3)*(a12+f))) + * where f = x - 1. + * (i) modify a1 <- a1 / log2 + * (ii) 1/log2 = 1.4426950408889634... + * = 1.5 - 0.057304959... (4 bit shift) + * Let lv = 1.5 - 1/log2, then + * lv = 0.057304959111036592640075318998107956665325, + * (iii) f*1.5 is exact because f has 3 trailing zero. + * (iv) Thus, log2(x) = f*1.5 - (lv*f - PPoly) + * + * (b). For 0.09375 <= x < 24 + * Let j = (ix - 0x3fb80000) >> 15. Look up Y[j], 1/Y[j], and log(Y[j]) + * from _TBL_log.c. Then + * log2(x) = log2(Y[j]) + log2(1 + (x-Y[j])*(1/Y[j])) + * = log(Y[j])(1/log2) + log2(1 + s) + * where + * s = (x-Y[j])*(1/Y[j]) + * From log.c, we have log(1+s) = + * 2 2 2 + * (b s) (b + b s + s ) [b + b s + s (b + s)] (b + b s + s ) + * 1 2 3 4 5 6 7 8 + * + * By setting b1 <- b1/log2, we have + * log2(x) = 1.5 * T - (lv * T - POLY(s)) + * + * (c). Otherwise, get "n", the exponent of x, and then normalize x to + * z in [1,2). Then similar to (b) find a Y[i] that matches z to 5.5 + * significant bits. Then + * log2(x) = n + log2(z). + * + * Special cases: + * log2(x) is NaN with signal if x < 0 (including -INF) ; + * log2(+INF) is +INF; log2(0) is -INF with signal; + * log2(NaN) is that NaN with no signal. + * + * Maximum error observed: less than 0.84 ulp + * + * Constants: + * The hexadecimal values are the intended ones for the following constants. + * The decimal values may be used, provided that the compiler will convert + * from decimal to binary accurately enough to produce the hexadecimal values + * shown. + */ +/* INDENT ON */ + +#include "libm.h" +#include "libm_synonyms.h" +#include "libm_protos.h" + +extern const double _TBL_log[]; + +static const double P[] = { +/* ONE */ 1.0, +/* TWO52 */ 4503599627370496.0, +/* LN10V */ 1.4426950408889634073599246810018920433347, /* 1/log10 */ +/* ZERO */ 0.0, +/* A1 */ -9.6809362455249638217841932228967194640116e-02, +/* A2 */ 1.99628461483039965074226529395673424005508422852e+0000, +/* A3 */ 2.26812367662950720159642514772713184356689453125e+0000, +/* A4 */ -9.05030639084976384900471657601883634924888610840e-0001, +/* A5 */ -1.48275767132434044270894446526654064655303955078e+0000, +/* A6 */ 1.88158320939722756293122074566781520843505859375e+0000, +/* A7 */ 1.83309386046986411145098827546462416648864746094e+0000, +/* A8 */ 1.24847063988317086291601754055591300129890441895e+0000, +/* A9 */ 1.98372421445537705508854742220137268304824829102e+0000, +/* A10 */ -3.94711735767898475035764249696512706577777862549e-0001, +/* A11 */ 3.07890395362954372160402272129431366920471191406e+0000, +/* A12 */ -9.60099585275022149311041630426188930869102478027e-0001, +/* B1 */ -1.8039695622547469514898963204616532885451e-01, +/* B2 */ 1.87161713283355151891381127914642725337613123482e+0000, +/* B3 */ -1.89082956295731507978530316904652863740921020508e+0000, +/* B4 */ -2.50562891673640253387134180229622870683670043945e+0000, +/* B5 */ 1.64822828085258366037635369139024987816810607910e+0000, +/* B6 */ -1.24409107065868340669112512841820716857910156250e+0000, +/* B7 */ 1.70534231658220414296067701798165217041969299316e+0000, +/* B8 */ 1.99196833784655646937267192697618156671524047852e+0000, +/* LGH */ 1.5, +/* LGL */ 0.057304959111036592640075318998107956665325, +}; + +#define ONE P[0] +#define TWO52 P[1] +#define LN10V P[2] +#define ZERO P[3] +#define A1 P[4] +#define A2 P[5] +#define A3 P[6] +#define A4 P[7] +#define A5 P[8] +#define A6 P[9] +#define A7 P[10] +#define A8 P[11] +#define A9 P[12] +#define A10 P[13] +#define A11 P[14] +#define A12 P[15] +#define B1 P[16] +#define B2 P[17] +#define B3 P[18] +#define B4 P[19] +#define B5 P[20] +#define B6 P[21] +#define B7 P[22] +#define B8 P[23] +#define LGH P[24] +#define LGL P[25] + +double +log2(double x) { + int i, hx, ix, n, lx; + + n = 0; + hx = ((int *) &x)[HIWORD]; ix = hx & 0x7fffffff; + lx = ((int *) &x)[LOWORD]; + + /* subnormal,0,negative,inf,nan */ + if ((hx + 0x100000) < 0x200000) { +#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN) + if (ix >= 0x7ff80000) /* assumes sparc-like QNaN */ + return (x); /* for Cheetah when x is QNaN */ +#endif + if (((hx << 1) | lx) == 0) /* log(0.0) = -inf */ + return (A5 / fabs(x)); + if (hx < 0) { /* x < 0 */ + if (ix >= 0x7ff00000) + return (x - x); /* x is -inf or NaN */ + else + return (ZERO / (x - x)); + } + if (((hx - 0x7ff00000) | lx) == 0) /* log(inf) = inf */ + return (x); + if (ix >= 0x7ff00000) /* log(NaN) = NaN */ + return (x - x); + x *= TWO52; + n = -52; + hx = ((int *) &x)[HIWORD]; ix = hx & 0x7fffffff; + lx = ((int *) &x)[LOWORD]; + } + + /* 0.09375 (0x3fb80000) <= x < 24 (0x40380000) */ + i = ix >> 19; + if (i >= 0x7f7 && i <= 0x806) { + /* 0.875 <= x < 1.125 */ + if (ix >= 0x3fec0000 && ix < 0x3ff20000) { + double s, z, r, w; + s = x - ONE; z = s * s; r = (A10 * s) * (A11 + s); + w = z * s; + if (((ix << 12) | lx) == 0) + return (z); + else + return (LGH * s - (LGL * s - ((A1 * z) * + ((A2 + (A3 * s) * (A4 + s)) + w * (A5 + s))) * + (((A6 + s * (A7 + s)) + w * (A8 + s)) * + ((A9 + r) + w * (A12 + s))))); + } else { + double *tb, s; + i = (ix - 0x3fb80000) >> 15; + tb = (double *) _TBL_log + (i + i + i); + if (((ix << 12) | lx) == 0) /* 2's power */ + return ((double) ((ix >> 20) - 0x3ff)); + s = (x - tb[0]) * tb[1]; + return (LGH * tb[2] - (LGL * tb[2] - ((B1 * s) * + (B2 + s * (B3 + s))) * + (((B4 + s * B5) + (s * s) * (B6 + s)) * + (B7 + s * (B8 + s))))); + } + } else { + double *tb, dn, s; + dn = (double) (n + ((ix >> 20) - 0x3ff)); + ix <<= 12; + if ((ix | lx) == 0) + return (dn); + i = ((unsigned) ix >> 12) | 0x3ff00000; /* scale x to [1,2) */ + ((int *) &x)[HIWORD] = i; + i = (i - 0x3fb80000) >> 15; + tb = (double *) _TBL_log + (i + i + i); + s = (x - tb[0]) * tb[1]; + return (dn + (tb[2] * LN10V + ((B1 * s) * + (B2 + s * (B3 + s))) * + (((B4 + s * B5) + (s * s) * (B6 + s)) * + (B7 + s * (B8 + s))))); + } +} diff --git a/usr/src/lib/libm/common/C/logb.c b/usr/src/lib/libm/common/C/logb.c new file mode 100644 index 0000000000..71c31e069c --- /dev/null +++ b/usr/src/lib/libm/common/C/logb.c @@ -0,0 +1,85 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak logb = __logb +#pragma weak _logb = __logb +#endif + +#include "libm.h" +#include "xpg6.h" /* __xpg6 */ +#define _C99SUSv3_logb _C99SUSv3_logb_subnormal_is_like_ilogb + +#if defined(USE_FPSCALE) || defined(__x86) +static const double two52 = 4503599627370496.0; +#else +/* + * v: high part of a non-zero subnormal |x|; w: low part of |x| + */ +static int +ilogb_subnormal(unsigned v, unsigned w) { + int r = -1022 - 52; + + if (v) + r += 32; + else + v = w; + if (v & 0xffff0000) + r += 16, v >>= 16; + if (v & 0xff00) + r += 8, v >>= 8; + if (v & 0xf0) + r += 4, v >>= 4; + v <<= 1; + return (r + ((0xffffaa50 >> v) & 0x3)); +} +#endif /* defined(USE_FPSCALE) */ + +double +logb(double x) { + int *px = (int *) &x, k = px[HIWORD] & ~0x80000000; + + if (k < 0x00100000) { + if ((px[LOWORD] | k) == 0) + return (_SVID_libm_err(x, x, 45)); + else if ((__xpg6 & _C99SUSv3_logb) != 0) { +#if defined(USE_FPSCALE) || defined(__x86) + x *= two52; + return ((double) (((px[HIWORD] & 0x7ff00000) >> 20) + - 1075)); +#else + return ((double) ilogb_subnormal(k, px[LOWORD])); +#endif + } else + return (-1022.0); + } else if (k < 0x7ff00000) + return ((double) ((k >> 20) - 1023)); + else + return (x * x); +} diff --git a/usr/src/lib/libm/common/C/matherr.c b/usr/src/lib/libm/common/C/matherr.c new file mode 100644 index 0000000000..de39843d16 --- /dev/null +++ b/usr/src/lib/libm/common/C/matherr.c @@ -0,0 +1,38 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak matherr = __matherr + +#include "libm.h" + +/* ARGSUSED0 */ +int +__matherr(struct exception *x) { + return (0); +} diff --git a/usr/src/lib/libm/common/C/nextafter.c b/usr/src/lib/libm/common/C/nextafter.c new file mode 100644 index 0000000000..9e3505fc54 --- /dev/null +++ b/usr/src/lib/libm/common/C/nextafter.c @@ -0,0 +1,87 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak nextafter = __nextafter +#pragma weak _nextafter = __nextafter + +#include "libm.h" +#include <float.h> /* DBL_MIN */ + +double +nextafter(double x, double y) { + int hx, hy, k; + double ans; + unsigned lx; + volatile double dummy; + + hx = ((int *)&x)[HIWORD]; + lx = ((int *)&x)[LOWORD]; + hy = ((int *)&y)[HIWORD]; + k = (hx & ~0x80000000) | lx; + + if (x == y) + return (y); /* C99 requirement */ + if (x != x || y != y) + return (x * y); + if (k == 0) { /* x = 0 */ + k = hy & 0x80000000; + ((int *)&ans)[HIWORD] = k; + ((int *)&ans)[LOWORD] = 1; + } else if (hx >= 0) { + if (x > y) { + ((int *)&ans)[LOWORD] = lx - 1; + k = (lx == 0)? hx - 1 : hx; + ((int *)&ans)[HIWORD] = k; + } else { + ((int *)&ans)[LOWORD] = lx + 1; + k = (lx == 0xffffffff)? hx + 1 : hx; + ((int *)&ans)[HIWORD] = k; + } + } else { + if (x < y) { + ((int *)&ans)[LOWORD] = lx - 1; + k = (lx == 0)? hx - 1 : hx; + ((int *)&ans)[HIWORD] = k; + } else { + ((int *)&ans)[LOWORD] = lx + 1; + k = (lx == 0xffffffff)? hx + 1 : hx; + ((int *)&ans)[HIWORD] = k; + } + } + k = (k >> 20) & 0x7ff; + if (k == 0x7ff) { + /* overflow */ + return (_SVID_libm_err(x, y, 46)); +#if !defined(__lint) + } else if (k == 0) { + /* underflow */ + dummy = DBL_MIN * copysign(DBL_MIN, x); +#endif + } + return (ans); +} diff --git a/usr/src/lib/libm/common/C/pow.c b/usr/src/lib/libm/common/C/pow.c new file mode 100644 index 0000000000..2a790821b6 --- /dev/null +++ b/usr/src/lib/libm/common/C/pow.c @@ -0,0 +1,343 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak pow = __pow +#endif + +/* + * pow(x,y) return x**y + * n + * Method: Let x = 2 * (1+f) + * 1. Compute and return log2(x) in two pieces: + * log2(x) = w1 + w2, + * where w1 has 24 bits trailing zero. + * 2. Perform y*log2(x) by simulating muti-precision arithmetic + * 3. Return x**y = exp2(y*log(x)) + * + * Special cases: + * 1. (anything) ** +-0 is 1 + * 1'. 1 ** (anything) is 1 (C99; 1 ** +-INF/NAN used to be NAN) + * 2. (anything) ** 1 is itself + * 3. (anything except 1) ** NAN is NAN ("except 1" is C99) + * 4. NAN ** (anything except 0) is NAN + * 5. +-(|x| > 1) ** +INF is +INF + * 6. +-(|x| > 1) ** -INF is +0 + * 7. +-(|x| < 1) ** +INF is +0 + * 8. +-(|x| < 1) ** -INF is +INF + * 9. -1 ** +-INF is 1 (C99; -1 ** +-INF used to be NAN) + * 10. +0 ** (+anything except 0, NAN) is +0 + * 11. -0 ** (+anything except 0, NAN, odd integer) is +0 + * 12. +0 ** (-anything except 0, NAN) is +INF + * 13. -0 ** (-anything except 0, NAN, odd integer) is +INF + * 14. -0 ** (odd integer) = -( +0 ** (odd integer) ) + * 15. +INF ** (+anything except 0,NAN) is +INF + * 16. +INF ** (-anything except 0,NAN) is +0 + * 17. -INF ** (anything) = -0 ** (-anything) + * 18. (-anything) ** (integer) is (-1)**(integer)*(+anything**integer) + * 19. (-anything except 0 and inf) ** (non-integer) is NAN + * + * Accuracy: + * pow(x,y) returns x**y nearly rounded. In particular + * pow(integer,integer) + * always returns the correct integer provided it is representable. + */ + +#include "libm.h" +#include "xpg6.h" /* __xpg6 */ +#define _C99SUSv3_pow _C99SUSv3_pow_treats_Inf_as_an_even_int + +static const double zero = 0.0, one = 1.0, two = 2.0; + +extern const double _TBL_log2_hi[], _TBL_log2_lo[]; +static const double + two53 = 9007199254740992.0, + A1_hi = 2.8853900432586669921875, + A1_lo = 3.8519259825035041963606002e-8, + A1 = 2.885390081777926817222541963606002026086e+0000, + A2 = 9.617966939207270828380543979852286255862e-0001, + A3 = 5.770807680887875964868853124873696201995e-0001, + B0_hi = 2.8853900432586669921875, + B0_lo = 3.8519259822532793056374320585e-8, + B0 = 2.885390081777926814720293056374320585689e+0000, + B1 = 9.617966939259755138949202350396200257632e-0001, + B2 = 5.770780163585687000782112776448797953382e-0001, + B3 = 4.121985488948771523290174512461778354953e-0001, + B4 = 3.207590534812432970433641789022666850193e-0001; + +static double +log2_x(double x, double *w) { + double f, s, z, qn, h, t; + int *px = (int *) &x; + int *pz = (int *) &z; + int i, j, ix, n; + + n = 0; + ix = px[HIWORD]; + if (ix >= 0x3fef03f1 && ix < 0x3ff08208) { /* 65/63 > x > 63/65 */ + double f1, v; + f = x - one; + if (((ix - 0x3ff00000) | px[LOWORD]) == 0) { + *w = zero; + return (zero); /* log2(1)= +0 */ + } + qn = one / (two + f); + s = f * qn; /* |s|<2**-6 */ + v = s * s; + h = (double) ((float) s); + f1 = (double) ((float) f); + t = qn * (((f - two * h) - h * f1) - h * (f - f1)); + /* s = h+t */ + f1 = h * B0_lo + s * (v * (B1 + v * (B2 + v * (B3 + v * B4)))); + t = f1 + t * B0; + h *= B0_hi; + s = (double) ((float) (h + t)); + *w = t - (s - h); + return (s); + } + if (ix < 0x00100000) { /* subnormal x */ + x *= two53; + n = -53; + ix = px[HIWORD]; + } + /* LARGE N */ + n += ((ix + 0x1000) >> 20) - 0x3ff; + ix = (ix & 0x000fffff) | 0x3ff00000; /* scale x to [1,2] */ + px[HIWORD] = ix; + i = ix + 0x1000; + pz[HIWORD] = i & 0xffffe000; + pz[LOWORD] = 0; + qn = one / (x + z); + f = x - z; + s = f * qn; + h = (double) ((float) s); + t = qn * ((f - (h + h) * z) - h * f); + j = (i >> 13) & 0x7f; + f = s * s; + t = t * A1 + h * A1_lo; + t += (s * f) * (A2 + f * A3); + qn = h * A1_hi; + s = n + _TBL_log2_hi[j]; + h = qn + s; + t += _TBL_log2_lo[j] - ((h - s) - qn); + f = (double) ((float) (h + t)); + *w = t - (f - h); + return (f); +} + +extern const double _TBL_exp2_hi[], _TBL_exp2_lo[]; +static const double /* poly app of 2^x-1 on [-1e-10,2^-7+1e-10] */ + E1 = 6.931471805599453100674958533810346197328e-0001, + E2 = 2.402265069587779347846769151717493815979e-0001, + E3 = 5.550410866475410512631124892773937864699e-0002, + E4 = 9.618143209991026824853712740162451423355e-0003, + E5 = 1.333357676549940345096774122231849082991e-0003; + +double +pow(double x, double y) { + double z, ax; + double y1, y2, w1, w2; + int sbx, sby, j, k, yisint; + int hx, hy, ahx, ahy; + unsigned lx, ly; + int *pz = (int *) &z; + + hx = ((int *) &x)[HIWORD]; + lx = ((unsigned *) &x)[LOWORD]; + hy = ((int *) &y)[HIWORD]; + ly = ((unsigned *) &y)[LOWORD]; + ahx = hx & ~0x80000000; + ahy = hy & ~0x80000000; + if ((ahy | ly) == 0) { /* y==zero */ + if ((ahx | lx) == 0) + z = _SVID_libm_err(x, y, 20); /* +-0**+-0 */ + else if ((ahx | (((lx | -lx) >> 31) & 1)) > 0x7ff00000) + z = _SVID_libm_err(x, y, 42); /* NaN**+-0 */ + else + z = one; /* x**+-0 = 1 */ + return (z); + } else if (hx == 0x3ff00000 && lx == 0 && + (__xpg6 & _C99SUSv3_pow) != 0) + return (one); /* C99: 1**anything = 1 */ + else if (ahx > 0x7ff00000 || (ahx == 0x7ff00000 && lx != 0) || + ahy > 0x7ff00000 || (ahy == 0x7ff00000 && ly != 0)) + return (x * y); /* +-NaN return x*y; + -> * for Cheetah */ + /* includes Sun: 1**NaN = NaN */ + sbx = (unsigned) hx >> 31; + sby = (unsigned) hy >> 31; + ax = fabs(x); + + /* + * determine if y is an odd int when x < 0 + * yisint = 0 ... y is not an integer + * yisint = 1 ... y is an odd int + * yisint = 2 ... y is an even int + */ + yisint = 0; + if (sbx) { + if (ahy >= 0x43400000) + yisint = 2; /* even integer y */ + else if (ahy >= 0x3ff00000) { + k = (ahy >> 20) - 0x3ff; /* exponent */ + if (k > 20) { + j = ly >> (52 - k); + if ((j << (52 - k)) == ly) + yisint = 2 - (j & 1); + } else if (ly == 0) { + j = ahy >> (20 - k); + if ((j << (20 - k)) == ahy) + yisint = 2 - (j & 1); + } + } + } + /* special value of y */ + if (ly == 0) { + if (ahy == 0x7ff00000) { /* y is +-inf */ + if (((ahx - 0x3ff00000) | lx) == 0) { + if ((__xpg6 & _C99SUSv3_pow) != 0) + return (one); + /* C99: (-1)**+-inf = 1 */ + else + return (y - y); + /* Sun: (+-1)**+-inf = NaN */ + } else if (ahx >= 0x3ff00000) + /* (|x|>1)**+,-inf = inf,0 */ + return (sby == 0 ? y : zero); + else /* (|x|<1)**-,+inf = inf,0 */ + return (sby != 0 ? -y : zero); + } + if (ahy == 0x3ff00000) { /* y is +-1 */ + if (sby != 0) { /* y is -1 */ + if (x == zero) /* divided by zero */ + return (_SVID_libm_err(x, y, 23)); + else if (ahx < 0x40000 || ((ahx - 0x40000) | + lx) == 0) /* overflow */ + return (_SVID_libm_err(x, y, 21)); + else + return (one / x); + } else + return (x); + } + if (hy == 0x40000000) { /* y is 2 */ + if (ahx >= 0x5ff00000 && ahx < 0x7ff00000) + return (_SVID_libm_err(x, y, 21)); + /* x*x overflow */ + else if ((ahx < 0x1e56a09e && (ahx | lx) != 0) || + (ahx == 0x1e56a09e && lx < 0x667f3bcd)) + return (_SVID_libm_err(x, y, 22)); + /* x*x underflow */ + else + return (x * x); + } + if (hy == 0x3fe00000) { + if (!((ahx | lx) == 0 || ((ahx - 0x7ff00000) | lx) == + 0 || sbx == 1)) + return (sqrt(x)); /* y is 0.5 and x > 0 */ + } + } + /* special value of x */ + if (lx == 0) { + if (ahx == 0x7ff00000 || ahx == 0 || ahx == 0x3ff00000) { + /* x is +-0,+-inf,-1 */ + z = ax; + if (sby == 1) { + z = one / z; /* z = |x|**y */ + if (ahx == 0) + return (_SVID_libm_err(x, y, 23)); + } + if (sbx == 1) { + if (ahx == 0x3ff00000 && yisint == 0) + z = _SVID_libm_err(x, y, 24); + /* neg**non-integral is NaN + invalid */ + else if (yisint == 1) + z = -z; /* (x<0)**odd = -(|x|**odd) */ + } + return (z); + } + } + /* (x<0)**(non-int) is NaN */ + if (sbx == 1 && yisint == 0) + return (_SVID_libm_err(x, y, 24)); + /* Now ax is finite, y is finite */ + /* first compute log2(ax) = w1+w2, with 24 bits w1 */ + w1 = log2_x(ax, &w2); + + /* split up y into y1+y2 and compute (y1+y2)*(w1+w2) */ + if (((ly & 0x07ffffff) == 0) || ahy >= 0x47e00000 || + ahy <= 0x38100000) { + /* no need to split if y is short or too large or too small */ + y1 = y * w1; + y2 = y * w2; + } else { + y1 = (double) ((float) y); + y2 = (y - y1) * w1 + y * w2; + y1 *= w1; + } + z = y1 + y2; + j = pz[HIWORD]; + if (j >= 0x40900000) { /* z >= 1024 */ + if (!(j == 0x40900000 && pz[LOWORD] == 0)) /* z > 1024 */ + return (_SVID_libm_err(x, y, 21)); /* overflow */ + else { + w2 = y1 - z; + w2 += y2; + /* rounded to inf */ + if (w2 >= -8.008566259537296567160e-17) + return (_SVID_libm_err(x, y, 21)); + /* overflow */ + } + } else if ((j & ~0x80000000) >= 0x4090cc00) { /* z <= -1075 */ + if (!(j == 0xc090cc00 && pz[LOWORD] == 0)) /* z < -1075 */ + return (_SVID_libm_err(x, y, 22)); /* underflow */ + else { + w2 = y1 - z; + w2 += y2; + if (w2 <= zero) /* underflow */ + return (_SVID_libm_err(x, y, 22)); + } + } + /* + * compute 2**(k+f[j]+g) + */ + k = (int) (z * 64.0 + (((hy ^ (ahx - 0x3ff00000)) > 0) ? 0.5 : -0.5)); + j = k & 63; + w1 = y2 - ((double) k * 0.015625 - y1); + w2 = _TBL_exp2_hi[j]; + z = _TBL_exp2_lo[j] + (w2 * w1) * (E1 + w1 * (E2 + w1 * (E3 + w1 * + (E4 + w1 * E5)))); + z += w2; + k >>= 6; + if (k < -1021) + z = scalbn(z, k); + else /* subnormal output */ + pz[HIWORD] += k << 20; + if (sbx == 1 && yisint == 1) + z = -z; /* (-ve)**(odd int) */ + return (z); +} diff --git a/usr/src/lib/libm/common/C/remainder.c b/usr/src/lib/libm/common/C/remainder.c new file mode 100644 index 0000000000..ababc45cfb --- /dev/null +++ b/usr/src/lib/libm/common/C/remainder.c @@ -0,0 +1,87 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak remainder = __remainder + +/* + * remainder(x,p) + * Code originated from 4.3bsd. + * Modified by K.C. Ng for SUN 4.0 libm. + * Return : + * returns x REM p = x - [x/p]*p as if in infinite precise arithmetic, + * where [x/p] is the (inifinite bit) integer nearest x/p (in half way + * case choose the even one). + * Method : + * Based on fmod() return x-[x/p]chopped*p exactly. + */ + +#include "libm.h" + +static const double zero = 0.0, half = 0.5; + +double +remainder(double x, double p) { + double halfp; + int ix, hx, hp; + + ix = ((int *)&x)[HIWORD]; + hx = ix & ~0x80000000; + hp = ((int *)&p)[HIWORD] & ~0x80000000; + + if (hp > 0x7ff00000 || (hp == 0x7ff00000 && ((int *)&p)[LOWORD] != 0)) + return (x * p); + if (hx > 0x7ff00000 || (hx == 0x7ff00000 && ((int *)&x)[LOWORD] != 0)) + return (x * p); + + if ((hp | ((int *)&p)[LOWORD]) == 0 || hx == 0x7ff00000) + return (_SVID_libm_err(x, p, 28)); + + p = fabs(p); + if (hp < 0x7fe00000) + x = fmod(x, p + p); + x = fabs(x); + if (hp < 0x00200000) { + if (x + x > p) { + if (x == p) /* avoid x-x=-0 in RM mode */ + return ((ix < 0)? -zero : zero); + x -= p; + if (x + x >= p) + x -= p; + } + } else { + halfp = half * p; + if (x > halfp) { + if (x == p) /* avoid x-x=-0 in RM mode */ + return ((ix < 0)? -zero : zero); + x -= p; + if (x >= halfp) + x -= p; + } + } + return ((ix < 0)? -x : x); +} diff --git a/usr/src/lib/libm/common/C/rint.c b/usr/src/lib/libm/common/C/rint.c new file mode 100644 index 0000000000..f272969286 --- /dev/null +++ b/usr/src/lib/libm/common/C/rint.c @@ -0,0 +1,73 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak rint = __rint + +/* + * rint(x) return x rounded to integral according to the rounding direction + * rint(x) returns result with the same sign as x's, including 0.0. + */ + +#include "libm.h" + +#if defined(__i386) && !defined(__amd64) && (!defined(__FLT_EVAL_METHOD__) || \ + __FLT_EVAL_METHOD__ != 0) +extern enum fp_precision_type __swapRP(enum fp_precision_type); +#define DECLRP(x) enum fp_precision_type x; +#define SWAPRP(new, x) x = __swapRP(new); +#define RESTRP(x) (void) __swapRP(x); +#else +#define DECLRP(x) +#define SWAPRP(new, x) +#define RESTRP(x) +#endif + +static const double + two52 = 4503599627370496.0, + zero = 0.0, + one = 1.0; + +double +rint(double x) { + DECLRP(rp) + double t, w; + int ix, hx; + + ix = ((int *)&x)[HIWORD]; + hx = ix & ~0x80000000; + + if (hx >= 0x43300000) + return (x * one); + t = (ix < 0)? -two52 : two52; + SWAPRP(fp_double, rp) /* set precision mode to double */ + w = x + t; /* x+sign(x)*2**52 rounded */ + RESTRP(rp) /* restore precision mode */ + if (w == t) + return ((ix < 0)? -zero : zero); + return (w - t); +} diff --git a/usr/src/lib/libm/common/C/scalb.c b/usr/src/lib/libm/common/C/scalb.c new file mode 100644 index 0000000000..ea5555266a --- /dev/null +++ b/usr/src/lib/libm/common/C/scalb.c @@ -0,0 +1,73 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak scalb = __scalb +#pragma weak _scalb = __scalb + +#include "libm.h" + +double +scalb(double x, double fn) { + int hn, in, n; + double z; + + if (isnan(x) || isnan(fn)) + return (x * fn); + + in = ((int *)&fn)[HIWORD]; + hn = in & ~0x80000000; + if (hn == 0x7ff00000) /* fn is inf */ + return (_SVID_libm_err(x, fn, 47)); + + /* see if fn is an integer without raising inexact */ + if (hn >= 0x43300000) { + /* |fn| >= 2^52, so it must be an integer */ + n = (in < 0)? -65000 : 65000; + } else if (hn < 0x3ff00000) { + /* |fn| < 1, so it must be zero or non-integer */ + return ((fn == 0.0)? x : (x - x) / (x - x)); + } else if (hn < 0x41400000) { + /* |fn| < 2^21 */ + if ((hn & ((1 << (0x413 - (hn >> 20))) - 1)) + | ((int *)&fn)[LOWORD]) + return ((x - x) / (x - x)); + n = (int)fn; + } else { + if (((int *)&fn)[LOWORD] & ((1 << (0x433 - (hn >> 20))) - 1)) + return ((x - x) / (x - x)); + n = (in < 0)? -65000 : 65000; + } + z = scalbn(x, n); + if (z != x) { + if (z == 0.0) + return (_SVID_libm_err(x, fn, 33)); + if (!finite(z)) + return (_SVID_libm_err(x, fn, 32)); + } + return (z); +} diff --git a/usr/src/lib/libm/common/C/scalbn.c b/usr/src/lib/libm/common/C/scalbn.c new file mode 100644 index 0000000000..1d344b8cbe --- /dev/null +++ b/usr/src/lib/libm/common/C/scalbn.c @@ -0,0 +1,120 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak scalbn = __scalbn + +#include "libm.h" + +static const double + one = 1.0, + huge = 1.0e300, + tiny = 1.0e-300, + twom54 = 5.5511151231257827021181583404541015625e-17; + +#if defined(USE_FPSCALE) || defined(__x86) +static const double two52 = 4503599627370496.0; +#else +/* + * Normalize non-zero subnormal x and return biased exponent of x in [-51,0] + */ +static int +ilogb_biased(unsigned *px) { + int s = 52; + unsigned v = px[HIWORD] & ~0x80000000, w = px[LOWORD], t = v; + + if (t) + s -= 32; + else + t = w; + if (t & 0xffff0000) + s -= 16, t >>= 16; + if (t & 0xff00) + s -= 8, t >>= 8; + if (t & 0xf0) + s -= 4, t >>= 4; + t <<= 1; + s -= (0xffffaa50 >> t) & 0x3; + if (s < 32) { + v = (v << s) | w >> (32 - s); + w <<= s; + } else { + v = w << (s - 32); + w = 0; + } + px[HIWORD] = (px[HIWORD] & 0x80000000) | v; + px[LOWORD] = w; + return (1 - s); +} +#endif /* defined(USE_FPSCALE) */ + +double +scalbn(double x, int n) { + int *px, ix, hx, k; + + px = (int *)&x; + ix = px[HIWORD]; + hx = ix & ~0x80000000; + k = hx >> 20; + + if (k == 0x7ff) /* x is inf or NaN */ + return (x * one); + + if (k == 0) { + if ((hx | px[LOWORD]) == 0 || n == 0) + return (x); +#if defined(USE_FPSCALE) || defined(__x86) + x *= two52; + ix = px[HIWORD]; + k = ((ix & ~0x80000000) >> 20) - 52; +#else + k = ilogb_biased((unsigned *)px); + ix = px[HIWORD]; +#endif + /* now k is in the range -51..0 */ + k += n; + if (k > n) /* integer overflow occurred */ + k = -100; + } else { + /* k is in the range 1..1023 */ + k += n; + if (k < n) /* integer overflow occurred */ + k = 0x7ff; + } + + if (k > 0x7fe) + return (huge * ((ix < 0)? -huge : huge)); + if (k < 1) { + if (k <= -54) + return (tiny * ((ix < 0)? -tiny : tiny)); + k += 54; + px[HIWORD] = (ix & ~0x7ff00000) | (k << 20); + return (x * twom54); + } + px[HIWORD] = (ix & ~0x7ff00000) | (k << 20); + return (x); +} diff --git a/usr/src/lib/libm/common/C/signgam.c b/usr/src/lib/libm/common/C/signgam.c new file mode 100644 index 0000000000..8eaf8cda22 --- /dev/null +++ b/usr/src/lib/libm/common/C/signgam.c @@ -0,0 +1,35 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak signgam = __signgam + +#include "libm_synonyms.h" +#include <math.h> + +int signgam = 0; diff --git a/usr/src/lib/libm/common/C/significand.c b/usr/src/lib/libm/common/C/significand.c new file mode 100644 index 0000000000..4f2e3f95a6 --- /dev/null +++ b/usr/src/lib/libm/common/C/significand.c @@ -0,0 +1,50 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak significand = __significand +#endif + +#include "libm.h" + +double +significand(double x) { + int ix = ((int *) &x)[HIWORD] & ~0x80000000; + + /* weed out 0/+-Inf/NaN because C99 ilogb raises invalid on them */ + if ((ix | ((int *) &x)[LOWORD]) == 0 || ix >= 0x7ff00000) +#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN) + return ((ix & 0x80000) != 0 ? x : x + x); + /* assumes sparc-like QNaN */ +#else + return (x + x); +#endif + else + return (scalbn(x, -ilogb(x))); +} diff --git a/usr/src/lib/libm/common/C/sin.c b/usr/src/lib/libm/common/C/sin.c new file mode 100644 index 0000000000..d15f54f1b5 --- /dev/null +++ b/usr/src/lib/libm/common/C/sin.c @@ -0,0 +1,189 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak sin = __sin + +/* INDENT OFF */ +/* + * sin(x) + * Accurate Table look-up algorithm by K.C. Ng, May, 1995. + * + * Algorithm: see sincos.c + */ + +#include "libm.h" + +static const double sc[] = { +/* ONE = */ 1.0, +/* NONE = */ -1.0, +/* + * |sin(x) - (x+pp1*x^3+pp2*x^5)| <= 2^-58.79 for |x| < 0.008 + */ +/* PP1 = */ -0.166666666666316558867252052378889521480627858683055567, +/* PP2 = */ .008333315652997472323564894248466758248475374977974017927, +/* + * |(sin(x) - (x+p1*x^3+...+p4*x^9)| + * |------------------------------ | <= 2^-57.63 for |x| < 0.1953125 + * | x | + */ +/* P1 = */ -1.666666666666629669805215138920301589656e-0001, +/* P2 = */ 8.333333332390951295683993455280336376663e-0003, +/* P3 = */ -1.984126237997976692791551778230098403960e-0004, +/* P4 = */ 2.753403624854277237649987622848330351110e-0006, +/* + * |cos(x) - (1+qq1*x^2+qq2*x^4)| <= 2^-55.99 for |x| <= 0.008 (0x3f80624d) + */ +/* QQ1 = */ -0.4999999999975492381842911981948418542742729, +/* QQ2 = */ 0.041666542904352059294545209158357640398771740, +/* PI_H = */ 3.1415926535897931159979634685, +/* PI_L = */ 1.22464679914735317722606593227425e-16, +/* PI_L0 = */ 1.22464679914558443311283879205095e-16, +/* PI_L1 = */ 1.768744113227140223300005233735517376e-28, +/* PI2_H = */ 6.2831853071795862319959269370, +/* PI2_L = */ 2.44929359829470635445213186454850e-16, +/* PI2_L0 = */ 2.44929359829116886622567758410190e-16, +/* PI2_L1 = */ 3.537488226454280446600010467471034752e-28, +}; +/* INDENT ON */ + +#define ONEA sc +#define ONE sc[0] +#define NONE sc[1] +#define PP1 sc[2] +#define PP2 sc[3] +#define P1 sc[4] +#define P2 sc[5] +#define P3 sc[6] +#define P4 sc[7] +#define QQ1 sc[8] +#define QQ2 sc[9] +#define PI_H sc[10] +#define PI_L sc[11] +#define PI_L0 sc[12] +#define PI_L1 sc[13] +#define PI2_H sc[14] +#define PI2_L sc[15] +#define PI2_L0 sc[16] +#define PI2_L1 sc[17] + +extern const double _TBL_sincos[], _TBL_sincosx[]; + +double +sin(double x) { + double z, y[2], w, s, v, p, q; + int i, j, n, hx, ix, lx; + + hx = ((int *)&x)[HIWORD]; + lx = ((int *)&x)[LOWORD]; + ix = hx & ~0x80000000; + + if (ix <= 0x3fc50000) { /* |x| < .1640625 */ + if (ix < 0x3e400000) /* |x| < 2**-27 */ + if ((int)x == 0) + return (x); + z = x * x; + if (ix < 0x3f800000) /* |x| < 2**-8 */ + w = (z * x) * (PP1 + z * PP2); + else + w = (x * z) * ((P1 + z * P2) + (z * z) * (P3 + z * P4)); + return (x + w); + } + + /* for .1640625 < x < M, */ + n = ix >> 20; + if (n < 0x402) { /* x < 8 */ + i = (((ix >> 12) & 0xff) | 0x100) >> (0x401 - n); + j = i - 10; + x = fabs(x); + v = x - _TBL_sincosx[j]; + if (((j - 181) ^ (j - 201)) < 0) { + /* near pi, sin(x) = sin(pi-x) */ + p = PI_H - x; + i = ix - 0x400921fb; + x = p + PI_L; + if ((i | ((lx - 0x54442D00) & 0xffffff00)) == 0) { + /* very close to pi */ + x = p + PI_L0; + return ((hx >= 0)? x + PI_L1 : -(x + PI_L1)); + } + z = x * x; + if (((ix - 0x40092000) >> 11) == 0) { + /* |pi-x|<2**-8 */ + w = PI_L + (z * x) * (PP1 + z * PP2); + } else { + w = PI_L + (z * x) * ((P1 + z * P2) + + (z * z) * (P3 + z * P4)); + } + return ((hx >= 0)? p + w : -p - w); + } + s = v * v; + if (((j - 382) ^ (j - 402)) < 0) { + /* near 2pi, sin(x) = sin(x-2pi) */ + p = x - PI2_H; + i = ix - 0x401921fb; + x = p - PI2_L; + if ((i | ((lx - 0x54442D00) & 0xffffff00)) == 0) { + /* very close to 2pi */ + x = p - PI2_L0; + return ((hx >= 0)? x - PI2_L1 : -(x - PI2_L1)); + } + z = x * x; + if (((ix - 0x40192000) >> 10) == 0) { + /* |x-2pi|<2**-8 */ + w = (z * x) * (PP1 + z * PP2) - PI2_L; + } else { + w = (z * x) * ((P1 + z * P2) + + (z * z) * (P3 + z * P4)) - PI2_L; + } + return ((hx >= 0)? p + w : -p - w); + } + j <<= 1; + w = _TBL_sincos[j+1]; + z = _TBL_sincos[j]; + p = v + (v * s) * (PP1 + s * PP2); + q = s * (QQ1 + s * QQ2); + v = w * p + z * q; + return ((hx >= 0)? z + v : -z - v); + } + + if (ix >= 0x7ff00000) /* sin(Inf or NaN) is NaN */ + return (x / x); + + /* argument reduction needed */ + n = __rem_pio2(x, y); + switch (n & 3) { + case 0: + return (__k_sin(y[0], y[1])); + case 1: + return (__k_cos(y[0], y[1])); + case 2: + return (-__k_sin(y[0], y[1])); + default: + return (-__k_cos(y[0], y[1])); + } +} diff --git a/usr/src/lib/libm/common/C/sincos.c b/usr/src/lib/libm/common/C/sincos.c new file mode 100644 index 0000000000..461ba30f7f --- /dev/null +++ b/usr/src/lib/libm/common/C/sincos.c @@ -0,0 +1,368 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak sincos = __sincos + +/* INDENT OFF */ +/* + * sincos(x,s,c) + * Accurate Table look-up algorithm by K.C. Ng, 2000. + * + * 1. Reduce x to x>0 by cos(-x)=cos(x), sin(-x)=-sin(x). + * 2. For 0<= x < 8, let i = (64*x chopped)-10. Let d = x - a[i], where + * a[i] is a double that is close to (i+10.5)/64 (and hence |d|< 10.5/64) + * and such that sin(a[i]) and cos(a[i]) is close to a double (with error + * less than 2**-8 ulp). Then + * + * cos(x) = cos(a[i]+d) = cos(a[i])cos(d) - sin(a[i])*sin(d) + * = TBL_cos_a[i]*(1+QQ1*d^2+QQ2*d^4) - + * TBL_sin_a[i]*(d+PP1*d^3+PP2*d^5) + * = TBL_cos_a[i] + (TBL_cos_a[i]*d^2*(QQ1+QQ2*d^2) - + * TBL_sin_a[i]*(d+PP1*d^3+PP2*d^5)) + * + * sin(x) = sin(a[i]+d) = sin(a[i])cos(d) + cos(a[i])*sin(d) + * = TBL_sin_a[i]*(1+QQ1*d^2+QQ2*d^4) + + * TBL_cos_a[i]*(d+PP1*d^3+PP2*d^5) + * = TBL_sin_a[i] + (TBL_sin_a[i]*d^2*(QQ1+QQ2*d^2) + + * TBL_cos_a[i]*(d+PP1*d^3+PP2*d^5)) + * + * Note: for x close to n*pi/2, special treatment is need for either + * sin or cos: + * i in [81, 100] ( pi/2 +-10.5/64 => tiny cos(x) = sin(pi/2-x) + * i in [181,200] ( pi +-10.5/64 => tiny sin(x) = sin(pi-x) + * i in [282,301] ( 3pi/2+-10.5/64 => tiny cos(x) = sin(x-3pi/2) + * i in [382,401] ( 2pi +-10.5/64 => tiny sin(x) = sin(x-2pi) + * i in [483,502] ( 5pi/2+-10.5/64 => tiny cos(x) = sin(5pi/2-x) + * + * 3. For x >= 8.0, use kernel function __rem_pio2 to perform argument + * reduction and call __k_sincos_ to compute sin and cos. + * + * kernel function: + * __rem_pio2 ... argument reduction routine + * __k_sincos_ ... sine and cosine function on [-pi/4,pi/4] + * + * Method. + * Let S and C denote the sin and cos respectively on [-PI/4, +PI/4]. + * 1. Assume the argument x is reduced to y1+y2 = x-k*pi/2 in + * [-pi/2 , +pi/2], and let n = k mod 4. + * 2. Let S=S(y1+y2), C=C(y1+y2). Depending on n, we have + * + * n sin(x) cos(x) tan(x) + * ---------------------------------------------------------- + * 0 S C S/C + * 1 C -S -C/S + * 2 -S -C S/C + * 3 -C S -C/S + * ---------------------------------------------------------- + * + * Special cases: + * Let trig be any of sin, cos, or tan. + * trig(+-INF) is NaN, with signals; + * trig(NaN) is that NaN; + * + * Accuracy: + * TRIG(x) returns trig(x) nearly rounded (less than 1 ulp) + */ + +#include "libm.h" + +static const double sc[] = { +/* ONE = */ 1.0, +/* NONE = */ -1.0, +/* + * |sin(x) - (x+pp1*x^3+pp2*x^5)| <= 2^-58.79 for |x| < 0.008 + */ +/* PP1 = */ -0.166666666666316558867252052378889521480627858683055567, +/* PP2 = */ .008333315652997472323564894248466758248475374977974017927, +/* + * |(sin(x) - (x+p1*x^3+...+p4*x^9)| + * |------------------------------ | <= 2^-57.63 for |x| < 0.1953125 + * | x | + */ +/* P1 = */ -1.666666666666629669805215138920301589656e-0001, +/* P2 = */ 8.333333332390951295683993455280336376663e-0003, +/* P3 = */ -1.984126237997976692791551778230098403960e-0004, +/* P4 = */ 2.753403624854277237649987622848330351110e-0006, +/* + * |cos(x) - (1+qq1*x^2+qq2*x^4)| <= 2^-55.99 for |x| <= 0.008 (0x3f80624d) + */ +/* QQ1 = */ -0.4999999999975492381842911981948418542742729, +/* QQ2 = */ 0.041666542904352059294545209158357640398771740, +/* Q1 = */ -0.5, +/* Q2 = */ 4.166666666500350703680945520860748617445e-0002, +/* Q3 = */ -1.388888596436972210694266290577848696006e-0003, +/* Q4 = */ 2.478563078858589473679519517892953492192e-0005, +/* PIO2_H = */ 1.570796326794896557999, +/* PIO2_L = */ 6.123233995736765886130e-17, +/* PIO2_L0 = */ 6.123233995727922165564e-17, +/* PIO2_L1 = */ 8.843720566135701120255e-29, +/* PI_H = */ 3.1415926535897931159979634685, +/* PI_L = */ 1.22464679914735317722606593227425e-16, +/* PI_L0 = */ 1.22464679914558443311283879205095e-16, +/* PI_L1 = */ 1.768744113227140223300005233735517376e-28, +/* PI3O2_H = */ 4.712388980384689673997, +/* PI3O2_L = */ 1.836970198721029765839e-16, +/* PI3O2_L0 = */ 1.836970198720396133587e-16, +/* PI3O2_L1 = */ 6.336322524749201142226e-29, +/* PI2_H = */ 6.2831853071795862319959269370, +/* PI2_L = */ 2.44929359829470635445213186454850e-16, +/* PI2_L0 = */ 2.44929359829116886622567758410190e-16, +/* PI2_L1 = */ 3.537488226454280446600010467471034752e-28, +/* PI5O2_H = */ 7.853981633974482789995, +/* PI5O2_L = */ 3.061616997868382943065e-16, +/* PI5O2_L0 = */ 3.061616997861941598865e-16, +/* PI5O2_L1 = */ 6.441344200433640781982e-28, +}; +/* INDENT ON */ + +#define ONE sc[0] +#define PP1 sc[2] +#define PP2 sc[3] +#define P1 sc[4] +#define P2 sc[5] +#define P3 sc[6] +#define P4 sc[7] +#define QQ1 sc[8] +#define QQ2 sc[9] +#define Q1 sc[10] +#define Q2 sc[11] +#define Q3 sc[12] +#define Q4 sc[13] +#define PIO2_H sc[14] +#define PIO2_L sc[15] +#define PIO2_L0 sc[16] +#define PIO2_L1 sc[17] +#define PI_H sc[18] +#define PI_L sc[19] +#define PI_L0 sc[20] +#define PI_L1 sc[21] +#define PI3O2_H sc[22] +#define PI3O2_L sc[23] +#define PI3O2_L0 sc[24] +#define PI3O2_L1 sc[25] +#define PI2_H sc[26] +#define PI2_L sc[27] +#define PI2_L0 sc[28] +#define PI2_L1 sc[29] +#define PI5O2_H sc[30] +#define PI5O2_L sc[31] +#define PI5O2_L0 sc[32] +#define PI5O2_L1 sc[33] +#define PoS(x, z) ((x * z) * (PP1 + z * PP2)) +#define PoL(x, z) ((x * z) * ((P1 + z * P2) + (z * z) * (P3 + z * P4))) + +extern const double _TBL_sincos[], _TBL_sincosx[]; + +void +sincos(double x, double *s, double *c) { + double z, y[2], w, t, v, p, q; + int i, j, n, hx, ix, lx; + + hx = ((int *)&x)[HIWORD]; + lx = ((int *)&x)[LOWORD]; + ix = hx & ~0x80000000; + + if (ix <= 0x3fc50000) { /* |x| < 10.5/64 = 0.164062500 */ + if (ix < 0x3e400000) { /* |x| < 2**-27 */ + if ((int)x == 0) + *c = ONE; + *s = x; + } else { + z = x * x; + if (ix < 0x3f800000) { /* |x| < 0.008 */ + q = z * (QQ1 + z * QQ2); + p = PoS(x, z); + } else { + q = z * ((Q1 + z * Q2) + (z * z) * + (Q3 + z * Q4)); + p = PoL(x, z); + } + *c = ONE + q; + *s = x + p; + } + return; + } + + n = ix >> 20; + i = (((ix >> 12) & 0xff) | 0x100) >> (0x401 - n); + j = i - 10; + if (n < 0x402) { /* |x| < 8 */ + x = fabs(x); + v = x - _TBL_sincosx[j]; + t = v * v; + w = _TBL_sincos[(j<<1)]; + z = _TBL_sincos[(j<<1)+1]; + p = v + PoS(v, t); + q = t * (QQ1 + t * QQ2); + if ((((j - 81) ^ (j - 101)) | + ((j - 282) ^ (j - 302)) | + ((j - 483) ^ (j - 503)) | + ((j - 181) ^ (j - 201)) | + ((j - 382) ^ (j - 402))) < 0) { + if (j <= 101) { + /* near pi/2, cos(x) = sin(pi/2-x) */ + t = w * q + z * p; + *s = (hx >= 0)? w + t : -w - t; + p = PIO2_H - x; + i = ix - 0x3ff921fb; + x = p + PIO2_L; + if ((i | ((lx - 0x54442D00) & + 0xffffff00)) == 0) { + /* very close to pi/2 */ + x = p + PIO2_L0; + *c = x + PIO2_L1; + } else { + z = x * x; + if (((ix - 0x3ff92000) >> 12) == 0) { + /* |pi/2-x|<2**-8 */ + w = PIO2_L + PoS(x, z); + } else { + w = PIO2_L + PoL(x, z); + } + *c = p + w; + } + } else if (j <= 201) { + /* near pi, sin(x) = sin(pi-x) */ + *c = z - (w * p - z * q); + p = PI_H - x; + i = ix - 0x400921fb; + x = p + PI_L; + if ((i | ((lx - 0x54442D00) & + 0xffffff00)) == 0) { + /* very close to pi */ + x = p + PI_L0; + *s = (hx >= 0)? x + PI_L1 : + -(x + PI_L1); + } else { + z = x * x; + if (((ix - 0x40092000) >> 11) == 0) { + /* |pi-x|<2**-8 */ + w = PI_L + PoS(x, z); + } else { + w = PI_L + PoL(x, z); + } + *s = (hx >= 0)? p + w : -p - w; + } + } else if (j <= 302) { + /* near 3/2pi, cos(x)=sin(x-3/2pi) */ + t = w * q + z * p; + *s = (hx >= 0)? w + t : -w - t; + p = x - PI3O2_H; + i = ix - 0x4012D97C; + x = p - PI3O2_L; + if ((i | ((lx - 0x7f332100) & + 0xffffff00)) == 0) { + /* very close to 3/2pi */ + x = p - PI3O2_L0; + *c = x - PI3O2_L1; + } else { + z = x * x; + if (((ix - 0x4012D800) >> 9) == 0) { + /* |3/2pi-x|<2**-8 */ + w = PoS(x, z) - PI3O2_L; + } else { + w = PoL(x, z) - PI3O2_L; + } + *c = p + w; + } + } else if (j <= 402) { + /* near 2pi, sin(x)=sin(x-2pi) */ + *c = z - (w * p - z * q); + p = x - PI2_H; + i = ix - 0x401921fb; + x = p - PI2_L; + if ((i | ((lx - 0x54442D00) & + 0xffffff00)) == 0) { + /* very close to 2pi */ + x = p - PI2_L0; + *s = (hx >= 0)? x - PI2_L1 : + -(x - PI2_L1); + } else { + z = x * x; + if (((ix - 0x40192000) >> 10) == 0) { + /* |x-2pi|<2**-8 */ + w = PoS(x, z) - PI2_L; + } else { + w = PoL(x, z) - PI2_L; + } + *s = (hx >= 0)? p + w : -p - w; + } + } else { + /* near 5pi/2, cos(x) = sin(5pi/2-x) */ + t = w * q + z * p; + *s = (hx >= 0)? w + t : -w - t; + p = PI5O2_H - x; + i = ix - 0x401F6A7A; + x = p + PI5O2_L; + if ((i | ((lx - 0x29553800) & + 0xffffff00)) == 0) { + /* very close to pi/2 */ + x = p + PI5O2_L0; + *c = x + PI5O2_L1; + } else { + z = x * x; + if (((ix - 0x401F6A7A) >> 7) == 0) { + /* |5pi/2-x|<2**-8 */ + w = PI5O2_L + PoS(x, z); + } else { + w = PI5O2_L + PoL(x, z); + } + *c = p + w; + } + } + } else { + *c = z - (w * p - z * q); + t = w * q + z * p; + *s = (hx >= 0)? w + t : -w - t; + } + return; + } + + if (ix >= 0x7ff00000) { + *s = *c = x / x; + return; + } + + /* argument reduction needed */ + n = __rem_pio2(x, y); + switch (n & 3) { + case 0: + *s = __k_sincos(y[0], y[1], c); + break; + case 1: + *c = -__k_sincos(y[0], y[1], s); + break; + case 2: + *s = -__k_sincos(y[0], y[1], c); + *c = -*c; + break; + default: + *c = __k_sincos(y[0], y[1], s); + *s = -*s; + } +} diff --git a/usr/src/lib/libm/common/C/sincospi.c b/usr/src/lib/libm/common/C/sincospi.c new file mode 100644 index 0000000000..66c3821dcc --- /dev/null +++ b/usr/src/lib/libm/common/C/sincospi.c @@ -0,0 +1,191 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak sincospi = __sincospi + +/* INDENT OFF */ +/* + * void sincospi(double x, double *s, double *c) + * *s = sin(pi*x); *c = cos(pi*x); + * + * Algorithm, 10/17/2002, K.C. Ng + * ------------------------------ + * Let y = |4x|, z = floor(y), and n = (int)(z mod 8.0) (displayed in binary). + * 1. If y == z, then x is a multiple of pi/4. Return the following values: + * --------------------------------------------------- + * n x mod 2 sin(x*pi) cos(x*pi) tan(x*pi) + * --------------------------------------------------- + * 000 0.00 +0 ___ +1 ___ +0 + * 001 0.25 +\/0.5 +\/0.5 +1 + * 010 0.50 +1 ___ +0 ___ +inf + * 011 0.75 +\/0.5 -\/0.5 -1 + * 100 1.00 -0 ___ -1 ___ +0 + * 101 1.25 -\/0.5 -\/0.5 +1 + * 110 1.50 -1 ___ -0 ___ +inf + * 111 1.75 -\/0.5 +\/0.5 -1 + * --------------------------------------------------- + * 2. Otherwise, + * --------------------------------------------------- + * n t sin(x*pi) cos(x*pi) tan(x*pi) + * --------------------------------------------------- + * 000 (y-z)/4 sinpi(t) cospi(t) tanpi(t) + * 001 (z+1-y)/4 cospi(t) sinpi(t) 1/tanpi(t) + * 010 (y-z)/4 cospi(t) -sinpi(t) -1/tanpi(t) + * 011 (z+1-y)/4 sinpi(t) -cospi(t) -tanpi(t) + * 100 (y-z)/4 -sinpi(t) -cospi(t) tanpi(t) + * 101 (z+1-y)/4 -cospi(t) -sinpi(t) 1/tanpi(t) + * 110 (y-z)/4 -cospi(t) sinpi(t) -1/tanpi(t) + * 111 (z+1-y)/4 -sinpi(t) cospi(t) -tanpi(t) + * --------------------------------------------------- + * + * NOTE. This program compute sinpi/cospi(t<0.25) by __k_sin/cos(pi*t, 0.0). + * This will return a result with error slightly more than one ulp (but less + * than 2 ulp). If one wants accurate result, one may break up pi*t in + * high (tpi_h) and low (tpi_l) parts and call __k_sin/cos(tip_h, tip_lo) + * instead. + */ + +#include "libm.h" +#include "libm_synonyms.h" +#include "libm_protos.h" +#include "libm_macros.h" +#include <math.h> +#if defined(__SUNPRO_C) +#include <sunmath.h> +#endif + +static const double + pi = 3.14159265358979323846, /* 400921FB,54442D18 */ + sqrth_h = 0.70710678118654757273731092936941422522068023681640625, + sqrth_l = -4.8336466567264565185935844299127932213411660131004e-17; +/* INDENT ON */ + +void +sincospi(double x, double *s, double *c) { + double y, z, t; + int n, ix, k; + int hx = ((int *) &x)[HIWORD]; + unsigned h, lx = ((unsigned *) &x)[LOWORD]; + + ix = hx & ~0x80000000; + n = (ix >> 20) - 0x3ff; + if (n >= 51) { /* |x| >= 2**51 */ + if (n >= 1024) +#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN) + *s = *c = ix >= 0x7ff80000 ? x : x - x; + /* assumes sparc-like QNaN */ +#else + *s = *c = x - x; +#endif + else { + if (n >= 53) { + *s = 0.0; + *c = 1.0; + } + else if (n == 52) { + if ((lx & 1) == 0) { + *s = 0.0; + *c = 1.0; + } + else { + *s = -0.0; + *c = -1.0; + } + } + else { /* n == 51 */ + if ((lx & 1) == 0) { + *s = 0.0; + *c = 1.0; + } + else { + *s = 1.0; + *c = 0.0; + } + if ((lx & 2) != 0) { + *s = -*s; + *c = -*c; + } + } + } + } + else if (n < -2) /* |x| < 0.25 */ + *s = __k_sincos(pi * fabs(x), 0.0, c); + else { + /* y = |4x|, z = floor(y), and n = (int)(z mod 8.0) */ + if (ix < 0x41C00000) { /* |x| < 2**29 */ + y = 4.0 * fabs(x); + n = (int) y; /* exact */ + z = (double) n; + k = z == y; + t = (y - z) * 0.25; + } + else { /* 2**29 <= |x| < 2**51 */ + y = fabs(x); + k = 50 - n; + n = lx >> k; + h = n << k; + ((unsigned *) &z)[LOWORD] = h; + ((int *) &z)[HIWORD] = ix; + k = h == lx; + t = y - z; + } + if (k) { /* x = N/4 */ + if ((n & 1) != 0) + *s = *c = sqrth_h + sqrth_l; + else + if ((n & 2) == 0) { + *s = 0.0; + *c = 1.0; + } + else { + *s = 1.0; + *c = 0.0; + } + y = (n & 2) == 0 ? 0.0 : 1.0; + if ((n & 4) != 0) + *s = -*s; + if (((n + 1) & 4) != 0) + *c = -*c; + } + else { + if ((n & 1) != 0) + t = 0.25 - t; + if (((n + (n & 1)) & 2) == 0) + *s = __k_sincos(pi * t, 0.0, c); + else + *c = __k_sincos(pi * t, 0.0, s); + if ((n & 4) != 0) + *s = -*s; + if (((n + 2) & 4) != 0) + *c = -*c; + } + } + if (hx < 0) + *s = -*s; +} diff --git a/usr/src/lib/libm/common/C/sinh.c b/usr/src/lib/libm/common/C/sinh.c new file mode 100644 index 0000000000..8c690a7ef2 --- /dev/null +++ b/usr/src/lib/libm/common/C/sinh.c @@ -0,0 +1,79 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak sinh = __sinh + +/* INDENT OFF */ +/* + * sinh(x) + * Code originated from 4.3bsd. + * Modified by K.C. Ng for SUN 4.0 libm. + * Method : + * 1. reduce x to non-negative by sinh(-x) = - sinh(x). + * 2. + * + * expm1(x) + expm1(x)/(expm1(x)+1) + * 0 <= x <= lnovft : sinh(x) := -------------------------------- + * 2 + * lnovft <= x < INF : sinh(x) := exp(x-1024*ln2)*2**1023 + * + * + * Special cases: + * sinh(x) is x if x is +INF, -INF, or NaN. + * only sinh(0)=0 is exact for finite argument. + * + */ +/* INDENT ON */ + +#include "libm.h" + +static const double + ln2hi = 6.93147180369123816490e-01, + ln2lo = 1.90821492927058770002e-10, + lnovft = 7.09782712893383973096e+02; + +double +sinh(double x) { + double ox, r, t; + + ox = x; + r = fabs(x); + if (!finite(x)) + return (x * r); + if (r < lnovft) { + t = expm1(r); + r = copysign((t + t / (1.0 + t)) * 0.5, x); + } else { + if (r < 1000.0) + x = copysign(exp((r - 1024 * ln2hi) - 1024 * ln2lo), x); + r = scalbn(x, 1023); + } + if (!finite(r)) + r = _SVID_libm_err(ox, ox, 25); + return (r); +} diff --git a/usr/src/lib/libm/common/C/sqrt.c b/usr/src/lib/libm/common/C/sqrt.c new file mode 100644 index 0000000000..be196f3900 --- /dev/null +++ b/usr/src/lib/libm/common/C/sqrt.c @@ -0,0 +1,150 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak sqrt = __sqrt + +#include "libm.h" + +#ifdef __INLINE + +extern double __inline_sqrt(double); + +double +sqrt(double x) { + double z = __inline_sqrt(x); + + if (isnan(x)) + return (z); + return ((x < 0.0)? _SVID_libm_err(x, x, 26) : z); +} + +#else /* defined(__INLINE) */ + +/* + * Warning: This correctly rounded sqrt is extremely slow because it computes + * the sqrt bit by bit using integer arithmetic. + */ + +static const double big = 1.0e30, small = 1.0e-30; + +double +sqrt(double x) +{ + double z; + unsigned r, t1, s1, ix1, q1; + int ix0, s0, j, q, m, n, t; + int *px = (int *)&x, *pz = (int *)&z; + + ix0 = px[HIWORD]; + ix1 = px[LOWORD]; + if ((ix0 & 0x7ff00000) == 0x7ff00000) { /* x is inf or NaN */ + if (ix0 == 0xfff00000 && ix1 == 0) + return (_SVID_libm_err(x, x, 26)); + return (x + x); + } + if (((ix0 & 0x7fffffff) | ix1) == 0) /* x is zero */ + return (x); + + /* extract exponent and significand */ + m = ilogb(x); + z = scalbn(x, -m); + ix0 = (pz[HIWORD] & 0x000fffff) | 0x00100000; + ix1 = pz[LOWORD]; + n = m >> 1; + if (n + n != m) { + ix0 = (ix0 << 1) | (ix1 >> 31); + ix1 <<= 1; + m -= 1; + } + + /* generate sqrt(x) bit by bit */ + ix0 = (ix0 << 1) | (ix1 >> 31); + ix1 <<= 1; + q = q1 = s0 = s1 = 0; + r = 0x00200000; + + for (j = 1; j <= 22; j++) { + t = s0 + r; + if (t <= ix0) { + s0 = t + r; + ix0 -= t; + q += r; + } + ix0 = (ix0 << 1) | (ix1 >> 31); + ix1 <<= 1; + r >>= 1; + } + + r = 0x80000000; + for (j = 1; j <= 32; j++) { + t1 = s1 + r; + t = s0; + if (t < ix0 || (t == ix0 && t1 <= ix1)) { + s1 = t1 + r; + if ((t1 & 0x80000000) == 0x80000000 && + (s1 & 0x80000000) == 0) + s0 += 1; + ix0 -= t; + if (ix1 < t1) + ix0 -= 1; + ix1 -= t1; + q1 += r; + } + ix0 = (ix0 << 1) | (ix1 >> 31); + ix1 <<= 1; + r >>= 1; + } + + /* round */ + if ((ix0 | ix1) == 0) + goto done; + z = big - small; /* trigger inexact flag */ + if (z < big) + goto done; + if (q1 == 0xffffffff) { + q1 = 0; + q += 1; + goto done; + } + z = big + small; + if (z > big) { + if (q1 == 0xfffffffe) + q += 1; + q1 += 2; + goto done; + } + q1 += (q1 & 1); +done: + pz[HIWORD] = (q >> 1) + 0x3fe00000; + pz[LOWORD] = q1 >> 1; + if ((q & 1) == 1) + pz[LOWORD] |= 0x80000000; + return (scalbn(z, n)); +} + +#endif /* defined(__INLINE) */ diff --git a/usr/src/lib/libm/common/C/tan.c b/usr/src/lib/libm/common/C/tan.c new file mode 100644 index 0000000000..7462e8f6b2 --- /dev/null +++ b/usr/src/lib/libm/common/C/tan.c @@ -0,0 +1,76 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak tan = __tan + +/* INDENT OFF */ +/* + * tan(x) + * Table look-up algorithm by K.C. Ng, November, 1989. + * + * kernel function: + * __k_tan ... tangent function on [-pi/4,pi/4] + * __rem_pio2 ... argument reduction routine + */ +/* INDENT ON */ + +#include "libm.h" +#include "libm_synonyms.h" +#include "libm_protos.h" +#include <math.h> + +double +tan(double x) { + double y[2], z = 0.0; + int n, ix; + + /* high word of x */ + ix = ((int *) &x)[HIWORD]; + + /* |x| ~< pi/4 */ + ix &= 0x7fffffff; + if (ix <= 0x3fe921fb) + return (__k_tan(x, z, 0)); + + /* tan(Inf or NaN) is NaN */ + else if (ix >= 0x7ff00000) { +#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN) + return (ix >= 0x7ff80000 ? x : x - x); /* NaN */ + /* assumes sparc-like QNaN */ +#else + return (x - x); /* NaN */ +#endif + } + + /* argument reduction needed */ + else { + n = __rem_pio2(x, y); + return (__k_tan(y[0], y[1], n & 1)); + } +} diff --git a/usr/src/lib/libm/common/C/tanh.c b/usr/src/lib/libm/common/C/tanh.c new file mode 100644 index 0000000000..aa45520ae0 --- /dev/null +++ b/usr/src/lib/libm/common/C/tanh.c @@ -0,0 +1,101 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak tanh = __tanh + +/* INDENT OFF */ +/* + * TANH(X) + * RETURN THE HYPERBOLIC TANGENT OF X + * code based on 4.3bsd + * Modified by K.C. Ng for sun 4.0, Jan 31, 1987 + * + * Method : + * 1. reduce x to non-negative by tanh(-x) = - tanh(x). + * 2. + * 0 < x <= 1.e-10 : tanh(x) := x + * -expm1(-2x) + * 1.e-10 < x <= 1 : tanh(x) := -------------- + * expm1(-2x) + 2 + * 2 + * 1 <= x <= 22.0 : tanh(x) := 1 - --------------- + * expm1(2x) + 2 + * 22.0 < x <= INF : tanh(x) := 1. + * + * Note: 22 was chosen so that fl(1.0+2/(expm1(2*22)+2)) == 1. + * + * Special cases: + * tanh(NaN) is NaN; + * only tanh(0)=0 is exact for finite argument. + */ + +#include "libm.h" +#include "libm_synonyms.h" +#include "libm_protos.h" +#include <math.h> + +static const double + one = 1.0, + two = 2.0, + small = 1.0e-10, + big = 1.0e10; +/* INDENT ON */ + +double +tanh(double x) { + double t, y, z; + int signx; + volatile double dummy; + + if (isnan(x)) + return (x * x); /* + -> * for Cheetah */ + signx = signbit(x); + t = fabs(x); + z = one; + if (t <= 22.0) { + if (t > one) + z = one - two / (expm1(t + t) + two); + else if (t > small) { + y = expm1(-t - t); + z = -y / (y + two); + } else { + /* raise the INEXACT flag for non-zero t */ + dummy = t + big; +#ifdef lint + dummy = dummy; +#endif + return (x); + } + } else if (!finite(t)) + return (copysign(1.0, x)); + else + return (signx == 1 ? -z + small * small : z - small * small); + + return (signx == 1 ? -z : z); +} diff --git a/usr/src/lib/libm/common/C/xpg6.h b/usr/src/lib/libm/common/C/xpg6.h new file mode 100644 index 0000000000..87ead1ca61 --- /dev/null +++ b/usr/src/lib/libm/common/C/xpg6.h @@ -0,0 +1,68 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _XPG6_H +#define _XPG6_H + +/* + * The bits in lib/libc/inc/xpg6.h fpgroup may use as per PSARC/2003/486. + */ + +/* + * If set, math library entry points present in SUSv2 deal with exceptional + * cases as per SUSv3 spec where math_errhandling is set to MATH_ERREXCEPT; + * otherwise they behave as per SUSv2 spec. + */ +#define _C99SUSv3_math_errexcept 0x00000400 +/* + * If set, pow(+/-1,+/-Inf) & pow(1,NaN) return 1; otherwise NaN is returned. + * Analogous comment applies to powf and powl. + */ +#define _C99SUSv3_pow_treats_Inf_as_an_even_int 0x00000080 +/* + * If set, logb(subnormal) returns (double) ilogb(subnormal); otherwise + * logb(subnormal) returns logb(DBL_MIN). Analogous comment applies to + * logbf and logbl. + */ +#define _C99SUSv3_logb_subnormal_is_like_ilogb 0x00000040 +/* + * If set, ilogb(0/+Inf/-Inf/NaN) raises FE_INVALID as per SUSv3; otherwise + * no exception is raised. Analogous comment applies to ilogbf and ilogbl. + */ +#define _C99SUSv3_ilogb_0InfNaN_raises_invalid 0x00000020 + +/* + * __xpg6 = _C99SUSv3_mode_OFF disables C99/SUSv3 standards conformance mode. + */ +#define _C99SUSv3_mode_OFF 0xFFFF0000 + +#if !defined(_ASM) +extern unsigned int __xpg6; +#endif + +#endif /* _XPG6_H */ diff --git a/usr/src/lib/libm/common/LD/_TBL_cosl.c b/usr/src/lib/libm/common/LD/_TBL_cosl.c new file mode 100644 index 0000000000..aa39f6b480 --- /dev/null +++ b/usr/src/lib/libm/common/LD/_TBL_cosl.c @@ -0,0 +1,195 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * For i = 0L, ..., 75 let x(i) be the extended precision number + * whose exponent is given by 0x3ffc + ((i + 8) >> 5) and whose + * five most significant fraction bits are given by (i + 8) & 0x1f. + * (The remaining fraction bits are zero and the integer bit is 1.) + * Then _TBL_cosl_hi[i] := cos(x(i)) rounded to extended precisionL, + * and _TBL_cosl_lo[i] ~ cos(x(i)) - _TBL_cosl_hi[i]. + */ + +#include "libm.h" + +const long double _TBL_cosl_hi[] = { + 9.8781778381647194407734133e-01L, + 9.8720237785483049041453801e-01L, + 9.8657190839949758873065125e-01L, + 9.8592638507066143575569700e-01L, + 9.8526581771821381618451860e-01L, + 9.8459021642159980601798769e-01L, + 9.8389959148966397219646454e-01L, + 9.8319395346049307253706584e-01L, + 9.8247331310125525749262290e-01L, + 9.8173768140803577633441562e-01L, + 9.8098706960566919046918752e-01L, + 9.8022148914756809622147657e-01L, + 9.7944095171554836000860772e-01L, + 9.7864546921965086785991095e-01L, + 9.7783505379795979334592304e-01L, + 9.7700971781641738478493484e-01L, + 9.7616947386863527671421389e-01L, + 9.7531433477570232649326437e-01L, + 9.7444431358598898037593275e-01L, + 9.7355942357494817143660423e-01L, + 9.7265967824491275265730642e-01L, + 9.7174509132488946761517512e-01L, + 9.7081567677034946294446077e-01L, + 9.6987144876301534501253018e-01L, + 9.6891242171064478417089050e-01L, + 9.6695002923067782202260975e-01L, + 9.6492861910477100957986285e-01L, + 9.6284831470937969988364152e-01L, + 9.6070924301556190306409372e-01L, + 9.5851153458122862729886421e-01L, + 9.5625532354317529696403552e-01L, + 9.5394074760889473397129298e-01L, + 9.5156794804817220216272555e-01L, + 9.4913706968446302764510006e-01L, + 9.4664826088605332182323443e-01L, + 9.4410167355700434565568893e-01L, + 9.4149746312788106861798448e-01L, + 9.3883578854626548865214275e-01L, + 9.3611681226705529027757452e-01L, + 9.3334070024254843565662820e-01L, + 9.3050762191231429116015580e-01L, + 9.2761775019285190965094914e-01L, + 9.2467126146703609851492875e-01L, + 9.2166833557335191816090730e-01L, + 9.1860915579491826785281383e-01L, + 9.1549390884830122858606058e-01L, + 9.1232278487211784648910212e-01L, + 9.0909597741543105166956915e-01L, + 9.0581368342593642076004609e-01L, + 9.0247610323794150491687888e-01L, + 8.9908344056013845619268129e-01L, + 8.9563590246317069891836618e-01L, + 8.9213369936699440471096142e-01L, + 8.8857704502803554333020819e-01L, + 8.8496615652614329169001889e-01L, + 8.8130125425134059916022419e-01L, + 8.7758256189037271613028607e-01L, + 8.6998471805841738884335773e-01L, + 8.6217447993488050434493855e-01L, + 8.5415375427738538514389754e-01L, + 8.4592449923106795446874767e-01L, + 8.3748872385052368529220410e-01L, + 8.2884848760932573481351876e-01L, + 8.2000589989723400824016969e-01L, + 8.1096311950521790220310775e-01L, + 8.0172235409841845058843968e-01L, + 7.9228585967717854313466241e-01L, + 7.8265594002627279692635431e-01L, + 7.7283494615247154478458735e-01L, + 7.6282527571057625053081719e-01L, + 7.5262937241806647606931838e-01L, + 7.4224972545850130697074609e-01L, + 7.3168886887382088632511210e-01L, + 7.2094938094569641805946583e-01L, + 7.1003388356607967499180972e-01L, +}; + +const long double _TBL_cosl_lo[] = { + 2.3161701550475222913914987e-20L, + -1.8449479910096732184579231e-20L, + 2.6686158961121436032543157e-20L, + -8.6377467693509323999412576e-21L, + 1.9776110020628332806497627e-20L, + -3.5925805070704800589322274e-21L, + -1.8155190558460064943241466e-20L, + -9.1900782344860461108346151e-21L, + -5.2952188498928572418662889e-21L, + 1.8052490350294447403358175e-22L, + 1.4237809112451219388907461e-22L, + 2.6375298402937478119012648e-20L, + -1.0076765547845230197228052e-20L, + 2.4356732099577389276048253e-20L, + -1.3951467830437376437362152e-20L, + 1.7110854885636746562043992e-20L, + 9.7751412348794551526570426e-21L, + -1.5984515732024779414075399e-20L, + -2.6221693743524256098098490e-20L, + 2.1708281645344702813143892e-20L, + 1.3606643184793342931047312e-20L, + 4.0913737251026449191179388e-21L, + 3.0297735892921952471510043e-21L, + -2.0186136916357220892889611e-20L, + -2.6295048282251297741856903e-20L, + -1.4268128384616571293099177e-20L, + 1.2118148575499258442724515e-21L, + 1.6059569963428104840244296e-20L, + 2.5656322072743666174102425e-21L, + 3.1051993049709377435678279e-21L, + 1.1564422287617245178214769e-20L, + 1.0031811944878086819339264e-20L, + -1.7237335190163247756143591e-20L, + 2.0747363423904458194504323e-20L, + 2.2865077385189808827392339e-20L, + -2.5671240384658541701793951e-20L, + 2.6526752505060021072717663e-20L, + -1.9564443985440576261207264e-20L, + 1.6662891366649668957364366e-20L, + -1.3289734577249155895809888e-21L, + -1.0679012486769670465318810e-20L, + -2.2918344926389240849631303e-20L, + -1.2815734598986502345856155e-20L, + 1.4504064768242345767590746e-20L, + -1.4988853557132440148049946e-20L, + -2.2142847270523120702212966e-20L, + 2.9274200155749021994272015e-21L, + -1.9187410072234352245854903e-20L, + -1.5529430996486684056198058e-20L, + 8.3043961792850937525987774e-21L, + 2.3863634821654097616646090e-20L, + -1.7796180005854437467836689e-20L, + 1.2938828814644961764053094e-20L, + -1.2599167110905505919738134e-20L, + 7.2776486597245992496949283e-21L, + -2.0062284600282808092832087e-20L, + -1.4004485599673539406695080e-20L, + -1.4442131618989703782137918e-20L, + 2.2223959244287650022010583e-20L, + -4.4575975223558432505505015e-22L, + -9.0245930394257121787744934e-21L, + 2.3149253152495269264191463e-20L, + -3.3469699832521350974745777e-21L, + 1.5380944635427999356502468e-20L, + -1.3572945384913555811651506e-20L, + 1.9052929123346841342486920e-20L, + 6.8389097769442269862154625e-21L, + 4.4331336879906155675581769e-21L, + 2.6264491975559389159451170e-20L, + -2.3718434730140290189643472e-20L, + -1.4777051948748214572130603e-20L, + 2.0601161465229389031848878e-20L, + -1.3273342027649427778913402e-20L, + -1.5653047869359238584973515e-20L, + -1.7688078635602856653655125e-20L, +}; diff --git a/usr/src/lib/libm/common/LD/_TBL_ipio2l.c b/usr/src/lib/libm/common/LD/_TBL_ipio2l.c new file mode 100644 index 0000000000..27655109b6 --- /dev/null +++ b/usr/src/lib/libm/common/LD/_TBL_ipio2l.c @@ -0,0 +1,504 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * Table of constants for 2/pi, used in __rem_pio2l (trigl) function. + * By K.C. Ng, April 25, 1989 + */ + +#include "libm.h" + +const int _TBL_ipio2l_inf[] = { /* by DHBailey MP package */ + 0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62, + 0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A, + 0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129, + 0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41, + 0x3991D6, 0x398353, 0x39F49C, 0x845F8B, 0xBDF928, 0x3B1FF8, + 0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF, + 0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5, + 0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08, + 0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3, + 0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880, + 0x4D7327, 0x310606, 0x1556CA, 0x73A8C9, 0x60E27B, 0xC08C6B, + 0x47C419, 0xC367CD, 0xDCE809, 0x2A8359, 0xC4768B, 0x961CA6, + 0xDDAF44, 0xD15719, 0x053EA5, 0xFF0705, 0x3F7E33, 0xE832C2, + 0xDE4F98, 0x327DBB, 0xC33D26, 0xEF6B1E, 0x5EF89F, 0x3A1F35, + 0xCAF27F, 0x1D87F1, 0x21907C, 0x7C246A, 0xFA6ED5, 0x772D30, + 0x433B15, 0xC614B5, 0x9D19C3, 0xC2C4AD, 0x414D2C, 0x5D000C, + 0x467D86, 0x2D71E3, 0x9AC69B, 0x006233, 0x7CD2B4, 0x97A7B4, + 0xD55537, 0xF63ED7, 0x1810A3, 0xFC764D, 0x2A9D64, 0xABD770, + 0xF87C63, 0x57B07A, 0xE71517, 0x5649C0, 0xD9D63B, 0x3884A7, + 0xCB2324, 0x778AD6, 0x23545A, 0xB91F00, 0x1B0AF1, 0xDFCE19, + 0xFF319F, 0x6A1E66, 0x615799, 0x47FBAC, 0xD87F7E, 0xB76522, + 0x89E832, 0x60BFE6, 0xCDC4EF, 0x09366C, 0xD43F5D, 0xD7DE16, + 0xDE3B58, 0x929BDE, 0x2822D2, 0xE88628, 0x4D58E2, 0x32CAC6, + 0x16E308, 0xCB7DE0, 0x50C017, 0xA71DF3, 0x5BE018, 0x34132E, + 0x621283, 0x014883, 0x5B8EF5, 0x7FB0AD, 0xF2E91E, 0x434A48, + 0xD36710, 0xD8DDAA, 0x425FAE, 0xCE616A, 0xA4280A, 0xB499D3, + 0xF2A606, 0x7F775C, 0x83C2A3, 0x883C61, 0x78738A, 0x5A8CAF, + 0xBDD76F, 0x63A62D, 0xCBBFF4, 0xEF818D, 0x67C126, 0x45CA55, + 0x36D9CA, 0xD2A828, 0x8D61C2, 0x77C912, 0x142604, 0x9B4612, + 0xC459C4, 0x44C5C8, 0x91B24D, 0xF31700, 0xAD43D4, 0xE54929, + 0x10D5FD, 0xFCBE00, 0xCC941E, 0xEECE70, 0xF53E13, 0x80F1EC, + 0xC3E7B3, 0x28F8C7, 0x940593, 0x3E71C1, 0xB3092E, 0xF3450B, + 0x9C1288, 0x7B20AB, 0x9FB52E, 0xC29247, 0x2F327B, 0x6D550C, + 0x90A772, 0x1FE76B, 0x96CB31, 0x4A1679, 0xE27941, 0x89DFF4, + 0x9794E8, 0x84E6E2, 0x973199, 0x6BED88, 0x365F5F, 0x0EFDBB, + 0xB49A48, 0x6CA467, 0x427271, 0x325D8D, 0xB8159F, 0x09E5BC, + 0x25318D, 0x3974F7, 0x1C0530, 0x010C0D, 0x68084B, 0x58EE2C, + 0x90AA47, 0x02E774, 0x24D6BD, 0xA67DF7, 0x72486E, 0xEF169F, + 0xA6948E, 0xF691B4, 0x5153D1, 0xF20ACF, 0x339820, 0x7E4BF5, + 0x6863B2, 0x5F3EDD, 0x035D40, 0x7F8985, 0x295255, 0xC06437, + 0x10D86D, 0x324832, 0x754C5B, 0xD4714E, 0x6E5445, 0xC1090B, + 0x69F52A, 0xD56614, 0x9D0727, 0x50045D, 0xDB3BB4, 0xC576EA, + 0x17F987, 0x7D6B49, 0xBA271D, 0x296996, 0xACCCC6, 0x5414AD, + 0x6AE290, 0x89D988, 0x50722C, 0xBEA404, 0x940777, 0x7030F3, + 0x27FC00, 0xA871EA, 0x49C266, 0x3DE064, 0x83DD97, 0x973FA3, + 0xFD9443, 0x8C860D, 0xDE4131, 0x9D3992, 0x8C70DD, 0xE7B717, + 0x3BDF08, 0x2B3715, 0xA0805C, 0x93805A, 0x921110, 0xD8E80F, + 0xAF806C, 0x4BFFDB, 0x0F9038, 0x761859, 0x15A562, 0xBBCB61, + 0xB989C7, 0xBD4010, 0x04F2D2, 0x277549, 0xF6B6EB, 0xBB22DB, + 0xAA140A, 0x2F2689, 0x768364, 0x333B09, 0x1A940E, 0xAA3A51, + 0xC2A31D, 0xAEEDAF, 0x12265C, 0x4DC26D, 0x9C7A2D, 0x9756C0, + 0x833F03, 0xF6F009, 0x8C402B, 0x99316D, 0x07B439, 0x15200C, + 0x5BC3D8, 0xC492F5, 0x4BADC6, 0xA5CA4E, 0xCD37A7, 0x36A9E6, + 0x9492AB, 0x6842DD, 0xDE6319, 0xEF8C76, 0x528B68, 0x37DBFC, + 0xABA1AE, 0x3115DF, 0xA1AE00, 0xDAFB0C, 0x664D64, 0xB705ED, + 0x306529, 0xBF5657, 0x3AFF47, 0xB9F96A, 0xF3BE75, 0xDF9328, + 0x3080AB, 0xF68C66, 0x15CB04, 0x0622FA, 0x1DE4D9, 0xA4B33D, + 0x8F1B57, 0x09CD36, 0xE9424E, 0xA4BE13, 0xB52333, 0x1AAAF0, + 0xA8654F, 0xA5C1D2, 0x0F3F0B, 0xCD785B, 0x76F923, 0x048B7B, + 0x721789, 0x53A6C6, 0xE26E6F, 0x00EBEF, 0x584A9B, 0xB7DAC4, + 0xBA66AA, 0xCFCF76, 0x1D02D1, 0x2DF1B1, 0xC1998C, 0x77ADC3, + 0xDA4886, 0xA05DF7, 0xF480C6, 0x2FF0AC, 0x9AECDD, 0xBC5C3F, + 0x6DDED0, 0x1FC790, 0xB6DB2A, 0x3A25A3, 0x9AAF00, 0x9353AD, + 0x0457B6, 0xB42D29, 0x7E804B, 0xA707DA, 0x0EAA76, 0xA1597B, + 0x2A1216, 0x2DB7DC, 0xFDE5FA, 0xFEDB89, 0xFDBE89, 0x6C76E4, + 0xFCA906, 0x70803E, 0x156E85, 0xFF87FD, 0x073E28, 0x336761, + 0x86182A, 0xEABD4D, 0xAFE7B3, 0x6E6D8F, 0x396795, 0x5BBF31, + 0x48D784, 0x16DF30, 0x432DC7, 0x356125, 0xCE70C9, 0xB8CB30, + 0xFD6CBF, 0xA200A4, 0xE46C05, 0xA0DD5A, 0x476F21, 0xD21262, + 0x845CB9, 0x496170, 0xE0566B, 0x015299, 0x375550, 0xB7D51E, + 0xC4F133, 0x5F6E13, 0xE4305D, 0xA92E85, 0xC3B21D, 0x3632A1, + 0xA4B708, 0xD4B1EA, 0x21F716, 0xE4698F, 0x77FF27, 0x80030C, + 0x2D408D, 0xA0CD4F, 0x99A520, 0xD3A2B3, 0x0A5D2F, 0x42F9B4, + 0xCBDA11, 0xD0BE7D, 0xC1DB9B, 0xBD17AB, 0x81A2CA, 0x5C6A08, + 0x17552E, 0x550027, 0xF0147F, 0x8607E1, 0x640B14, 0x8D4196, + 0xDEBE87, 0x2AFDDA, 0xB6256B, 0x34897B, 0xFEF305, 0x9EBFB9, + 0x4F6A68, 0xA82A4A, 0x5AC44F, 0xBCF82D, 0x985AD7, 0x95C7F4, + 0x8D4D0D, 0xA63A20, 0x5F57A4, 0xB13F14, 0x953880, 0x0120CC, + 0x86DD71, 0xB6DEC9, 0xF560BF, 0x11654D, 0x6B0701, 0xACB08C, + 0xD0C0B2, 0x485551, 0x0EFB1E, 0xC37295, 0x3B06A3, 0x3540C0, + 0x7BDC06, 0xCC45E0, 0xFA294E, 0xC8CAD6, 0x41F3E8, 0xDE647C, + 0xD8649B, 0x31BED9, 0xC397A4, 0xD45877, 0xC5E369, 0x13DAF0, + 0x3C3ABA, 0x461846, 0x5F7555, 0xF5BDD2, 0xC6926E, 0x5D2EAC, + 0xED440E, 0x423E1C, 0x87C461, 0xE9FD29, 0xF3D6E7, 0xCA7C22, + 0x35916F, 0xC5E008, 0x8DD7FF, 0xE26A6E, 0xC6FDB0, 0xC10893, + 0x745D7C, 0xB2AD6B, 0x9D6ECD, 0x7B723E, 0x6A11C6, 0xA9CFF7, + 0xDF7329, 0xBAC9B5, 0x5100B7, 0x0DB2E2, 0x24BA74, 0x607DE5, + 0x8AD874, 0x2C150D, 0x0C1881, 0x94667E, 0x162901, 0x767A9F, + 0xBEFDFD, 0xEF4556, 0x367ED9, 0x13D9EC, 0xB9BA8B, 0xFC97C4, + 0x27A831, 0xC36EF1, 0x36C594, 0x56A8D8, 0xB5A8B4, 0x0ECCCF, + 0x2D8912, 0x34576F, 0x89562C, 0xE3CE99, 0xB920D6, 0xAA5E6B, + 0x9C2A3E, 0xCC5F11, 0x4A0BFD, 0xFBF4E1, 0x6D3B8E, 0x2C86E2, + 0x84D4E9, 0xA9B4FC, 0xD1EEEF, 0xC9352E, 0x61392F, 0x442138, + 0xC8D91B, 0x0AFC81, 0x6A4AFB, 0xD81C2F, 0x84B453, 0x8C994E, + 0xCC2254, 0xDC552A, 0xD6C6C0, 0x96190B, 0xB8701A, 0x649569, + 0x605A26, 0xEE523F, 0x0F117F, 0x11B5F4, 0xF5CBFC, 0x2DBC34, + 0xEEBC34, 0xCC5DE8, 0x605EDD, 0x9B8E67, 0xEF3392, 0xB817C9, + 0x9B5861, 0xBC57E1, 0xC68351, 0x103ED8, 0x4871DD, 0xDD1C2D, + 0xA118AF, 0x462C21, 0xD7F359, 0x987AD9, 0xC0549E, 0xFA864F, + 0xFC0656, 0xAE79E5, 0x362289, 0x22AD38, 0xDC9367, 0xAAE855, + 0x382682, 0x9BE7CA, 0xA40D51, 0xB13399, 0x0ED7A9, 0x480569, + 0xF0B265, 0xA7887F, 0x974C88, 0x36D1F9, 0xB39221, 0x4A827B, + 0x21CF98, 0xDC9F40, 0x5547DC, 0x3A74E1, 0x42EB67, 0xDF9DFE, + 0x5FD45E, 0xA4677B, 0x7AACBA, 0xA2F655, 0x23882B, 0x55BA41, + 0x086E59, 0x862A21, 0x834739, 0xE6E389, 0xD49EE5, 0x40FB49, + 0xE956FF, 0xCA0F1C, 0x8A59C5, 0x2BFA94, 0xC5C1D3, 0xCFC50F, + 0xAE5ADB, 0x86C547, 0x624385, 0x3B8621, 0x94792C, 0x876110, + 0x7B4C2A, 0x1A2C80, 0x12BF43, 0x902688, 0x893C78, 0xE4C4A8, + 0x7BDBE5, 0xC23AC4, 0xEAF426, 0x8A67F7, 0xBF920D, 0x2BA365, + 0xB1933D, 0x0B7CBD, 0xDC51A4, 0x63DD27, 0xDDE169, 0x19949A, + 0x9529A8, 0x28CE68, 0xB4ED09, 0x209F44, 0xCA984E, 0x638270, + 0x237C7E, 0x32B90F, 0x8EF5A7, 0xE75614, 0x08F121, 0x2A9DB5, + 0x4D7E6F, 0x5119A5, 0xABF9B5, 0xD6DF82, 0x61DD96, 0x023616, + 0x9F3AC4, 0xA1A283, 0x6DED72, 0x7A8D39, 0xA9B882, 0x5C326B, + 0x5B2746, 0xED3400, 0x7700D2, 0x55F4FC, 0x4D5901, 0x8071E0, + 0xE13F89, 0xB295F3, 0x64A8F1, 0xAEA74B, 0x38FC4C, 0xEAB2BB, + 0x47270B, 0xABC3A7, 0x34BA60, 0x52DD34, 0xF8563A, 0xEB7E8A, + 0x31BB36, 0x5895B7, 0x47F7A9, 0x94C3AA, 0xD39225, 0x1E7F3E, + 0xD8974E, 0xBBA94F, 0xD8AE01, 0xE661B4, 0x393D8E, 0xA523AA, + 0x33068E, 0x1633B5, 0x3BB188, 0x1D3A9D, 0x4013D0, 0xCC1BE5, + 0xF862E7, 0x3BF28F, 0x39B5BF, 0x0BC235, 0x22747E, 0xA247C0, + 0xD52D1F, 0x19ADD3, 0x9094DF, 0x9311D0, 0xB42B25, 0x496DB2, + 0xE264B2, 0x5EF135, 0x3BC6A4, 0x1A4AD0, 0xAAC92E, 0x64E886, + 0x573091, 0x982CFB, 0x311B1A, 0x08728B, 0xBDCEE1, 0x60E142, + 0xEB641D, 0xD0BBA3, 0xE559D4, 0x597B8C, 0x2A4483, 0xF332BA, + 0xF84867, 0x2C8D1B, 0x2FA9B0, 0x50F3DD, 0xF9F573, 0xDB61B4, + 0xFE233E, 0x6C41A6, 0xEEA318, 0x775A26, 0xBC5E5C, 0xCEA708, + 0x94DC57, 0xE20196, 0xF1E839, 0xBE4851, 0x5D2D2F, 0x4E9555, + 0xD96EC2, 0xE7D755, 0x6304E0, 0xC02E0E, 0xFC40A0, 0xBBF9B3, + 0x7125A7, 0x222DFB, 0xF619D8, 0x838C1C, 0x6619E6, 0xB20D55, + 0xBB5137, 0x79E809, 0xAF9149, 0x0D73DE, 0x0B0DA5, 0xCE7F58, + 0xAC1934, 0x724667, 0x7A1A13, 0x9E26BC, 0x4555E7, 0x585CB5, + 0x711D14, 0x486991, 0x480D60, 0x56ADAB, 0xD62F64, 0x96EE0C, + 0x212FF3, 0x5D6D88, 0xA67684, 0x95651E, 0xAB9E0A, 0x4DDEFE, + 0x571010, 0x836A39, 0xF8EA31, 0x9E381D, 0xEAC8B1, 0xCAC96B, + 0x37F21E, 0xD505E9, 0x984743, 0x9FC56C, 0x0331B7, 0x3B8BF8, + 0x86E56A, 0x8DC343, 0x6230E7, 0x93CFD5, 0x6A8F2D, 0x733005, + 0x1AF021, 0xA09FCB, 0x7415A1, 0xD56B23, 0x6FF725, 0x2F4BC7, + 0xB8A591, 0x7FAC59, 0x5C55DE, 0x212C38, 0xB13296, 0x5CFF50, + 0x366262, 0xFA7B16, 0xF4D9A6, 0x2ACFE7, 0xF07403, 0xD4D604, + 0x6FD916, 0x31B1BF, 0xCBB450, 0x5BD7C8, 0x0CE194, 0x6BD643, + 0x4FD91C, 0xDF4543, 0x5F3453, 0xE2B5AA, 0xC9AEC8, 0x131485, + 0xF9D2BF, 0xBADB9E, 0x76F5B9, 0xAF15CF, 0xCA3182, 0x14B56D, + 0xE9FE4D, 0x50FC35, 0xF5AED5, 0xA2D0C1, 0xC96057, 0x192EB6, + 0xE91D92, 0x07D144, 0xAEA3C6, 0x343566, 0x26D5B4, 0x3161E2, + 0x37F1A2, 0x209EFF, 0x958E23, 0x493798, 0x35F4A6, 0x4BDC02, + 0xC2BE13, 0xBE80A0, 0x0B72A3, 0x115C5F, 0x1E1BD1, 0x0DB4D3, + 0x869E85, 0x96976B, 0x2AC91F, 0x8A26C2, 0x3070F0, 0x041412, + 0xFC9FA5, 0xF72A38, 0x9C6878, 0xE2AA76, 0x50CFE1, 0x559274, + 0x934E38, 0x0A92F7, 0x5533F0, 0xA63DB4, 0x399971, 0xE2B755, + 0xA98A7C, 0x008F19, 0xAC54D2, 0x2EA0B4, 0xF5F3E0, 0x60C849, + 0xFFD269, 0xAE52CE, 0x7A5FDD, 0xE9CE06, 0xFB0AE8, 0xA50CCE, + 0xEA9D3E, 0x3766DD, 0xB834F5, 0x0DA090, +}; + +#if 0 +const int _TBL_ipio2l_66[] = { + 0xA2F983, 0x6E4E44, 0x152A00, 0x062BC4, 0x0DA276, 0xBED4C1, + 0xFDF905, 0x5CD5BA, 0x767CEC, 0x1F80D6, 0xC26053, 0x3A0070, + 0x107C2A, 0xF68EE9, 0x687B7A, 0xB990AA, 0x38DE4B, 0x96CFF3, + 0x92735E, 0x8B34F6, 0x195BFC, 0x27F88E, 0xA93EC5, 0x3958A5, + 0x3E5D13, 0x1C55A8, 0x5B4A8B, 0xA42E04, 0x12D105, 0x35580D, + 0xF62347, 0x450900, 0xB98BCA, 0xF7E8A4, 0xA2E5D5, 0x69BC52, + 0xF0381D, 0x1A0A88, 0xFE8714, 0x7F6735, 0xBB7D4D, 0xC6F642, + 0xB27E80, 0x6191BF, 0xB6B750, 0x52776E, 0xD60FD0, 0x607DCC, + 0x68BFAF, 0xED69FC, 0x6EB305, 0xD2557D, 0x25BDFB, 0x3E4AA1, + 0x84472D, 0x8B0376, 0xF77740, 0xD290DF, 0x15EC8C, 0x45A5C3, + 0x6181EF, 0xC5E7E8, 0xD8909C, 0xF62144, 0x298428, 0x6E5D9D, + 0xF9A9B4, 0xCDBD2F, 0xC083E7, 0x0D3957, 0xECA3B2, 0x96223C, + 0xC1080D, 0x087D47, 0x7D7576, 0xA614B1, 0x42A4B6, 0xAA173C, + 0xE217E5, 0xFDCD34, 0x279D5F, 0x39AACA, 0x1CA8DF, 0x8B6633, + 0x5C49E4, 0xB56803, 0x1E7938, 0x741FDC, 0x4CB19B, 0xCECC3B, + 0x921EB7, 0x7C0FC3, 0x361F23, 0xF9EE22, 0xBA4235, 0xA5FCA3, + 0xBD4680, 0xFCDF65, 0xFC96AD, 0x31C90C, 0x919EEB, 0xFE0FB7, + 0x75B4B0, 0x693961, 0x75BCAA, 0xEB6F39, 0xA343C0, 0xD16FF2, + 0x33DAD0, 0xC1E095, 0x053182, 0x11E4A1, 0x40F943, 0x32D314, + 0xAF1B98, 0xE1B05A, 0xE5F3AD, 0x6E633F, 0x363D14, 0xA3777C, + 0xC8C6EE, 0x001E18, 0x0D180C, 0xAA1369, 0xEDFBA2, 0x998A9D, + 0x16E799, 0x693B75, 0x90EF50, 0x938DD4, 0xFB7ACD, 0x67CEEB, + 0x249DE3, 0x9B9B52, 0xD8CDAC, 0xC31A54, 0x855FBF, 0x848591, + 0x0954B0, 0x946B8C, 0xA4C7B4, 0x9A9E51, 0xF20425, 0xAA2637, + 0xFC6657, 0x7D8625, 0x620B74, 0x8B578D, 0xEC9A05, 0xDEF24F, + 0x7F19B0, 0xFC2544, 0x1DA0F1, 0x23790C, 0xC4294D, 0x6D3C32, + 0x66FE56, 0xD45562, 0x66264F, 0xA24162, 0x13E930, 0xB0E7C0, + 0xFA0E97, 0xBFC62C, 0x0E663F, 0x90F33B, 0x55E73C, 0xD791F7, + 0xD3F00D, 0xAB01C7, 0x40CF8F, 0xA593BA, 0xE627D5, 0x4A8308, + 0x32DC06, 0x80C876, 0x1C3DB5, 0xB5489F, 0x632CDF, 0xB02517, + 0xD17EFA, 0x92570F, 0xFAED44, 0x8F8536, 0x27069B, 0xC014DC, + 0x997D48, 0x961D61, 0x7A960B, 0x31B622, 0xD3C425, 0xA69520, + 0x98D29E, 0xF1C973, 0x5483D7, 0x99611E, 0xEAFF5F, 0x7DEFF1, + 0x98475C, 0x91C787, 0x537E17, 0x068C65, 0xF05E52, 0x942F04, + 0x37CF92, 0xEF4223, 0xC4C52F, 0x521DAA, 0xBAAF97, 0x972236, + 0xA2B3D3, 0x62C921, 0x8D3A8B, 0x2B3302, 0x6061B9, 0x0CBE94, + 0x75F451, 0xBD06DE, 0x86042D, 0xFB61ED, 0x4C8869, 0x590232, + 0x479963, 0x23518D, 0xAF5D28, 0x60C9DE, 0x473DB0, 0x9DE009, + 0xD8FC4C, 0xE96991, 0x9CA455, 0x800BC8, 0x977CE0, 0xDCBFA6, + 0x19D249, 0xA0F76D, 0x5F9B2F, 0x452BB3, 0x77E091, 0xB6383A, + 0x7BE9C2, 0x4BF7C1, 0x8A5EBF, 0xEB0D55, 0x9AF4DC, 0x275CA0, + 0xED09D0, 0xE50A7F, 0xBEF42C, 0x4803AF, 0x56139F, 0xD58848, + 0x797D96, 0xB8352E, 0x49D90D, 0x7607E0, 0xC99256, 0x75F530, + 0xB72237, 0x1AF080, 0xC2E813, 0x06CFA9, 0xB9DF8E, 0x919C38, + 0x89D97E, 0x0464D5, 0xB12EEF, 0xD14165, 0x365A72, 0x550D35, + 0x3772D8, 0xF41B58, 0x0378A7, 0x2D5D7D, 0xD6E433, 0xDD2018, + 0x139FD7, 0x1B5621, 0x94E046, 0x97A323, 0x693176, 0x28DF59, + 0xD24273, 0x0E4E26, 0xA9A8F6, 0xF15B41, 0x450EE3, 0x57EA61, + 0x7DADA6, 0xF21086, 0x394BEE, 0x8F4813, 0x3FDEE9, 0xF3A53D, + 0xAB2F40, 0x8B1E2B, 0xA07FD4, 0x992CC4, 0x63532D, 0x9F35A2, + 0x6FA290, 0x0094DE, 0xD2A24D, 0x755B81, 0x79F9E1, 0xFE1D35, + 0xFEE8CC, 0x9224C5, 0x54E2CE, 0x41F31C, 0xF45138, 0xED6D10, + 0x6B439D, 0xD2BE46, 0xC327D4, 0x68BFB0, 0x46D5A5, 0x79B285, + 0x776D7C, 0xE18647, 0x00E32F, 0xEBB7F2, 0x5DE307, 0x5A8EA0, + 0x06CEFE, 0x20923C, 0x354CE1, 0xAD09C5, 0x56996D, 0xCFB124, + 0xEF7BC1, 0x76BF72, 0xF20753, 0x5BBAFA, 0xB8A2B2, 0x5914F2, + 0x5D834F, 0xE64A08, 0x14C3AB, 0x07796B, 0xF2212D, 0xC74049, + 0xB61C6A, 0x282CFC, 0x25070C, 0x315BF1, 0x6FEAD3, 0x2CD2E5, + 0xD10F9C, 0x1972BB, 0x908073, 0x0F368C, 0x69BE97, 0xA242B0, + 0x722DFE, 0xAFE6A2, 0x143D8B, 0x5C5699, 0x48232B, 0xFF49AC, + 0xB5FA62, 0x6AD778, 0x7A844D, 0x258AA0, 0x8EDE3D, 0x9A9496, + 0x49924E, 0xA33E97, 0x4F43FA, 0xC40741, 0x2F764A, 0x8EB2B1, + 0x8E67D3, 0x9FF324, 0x51B11B, 0x5D6E09, 0xE9AD3E, 0xFFA902, + 0xF48653, 0x0845D3, 0xDED33E, 0x32D30E, 0x6247CA, 0x7C586D, + 0x2EAF9E, 0x323A35, 0xAD11FB, 0x0F420C, 0x0E0685, 0x401B60, + 0xBB3D43, 0xF4D489, 0xBCDC4C, 0x40FFBA, 0x18AB08, 0x7AC72D, + 0x5E76DB, 0xE8344E, 0x3975A2, 0xF9611B, 0x1121F3, 0x3A429C, + 0x9B18EC, 0xF298B1, 0x8AEC78, 0x1C248B, 0x69108F, 0xDB2D37, + 0xA1A613, 0x910359, 0x521451, 0xD4441F, 0x0BB3B6, 0x50D9DB, + 0xBD589F, 0x62A62E, 0xA9B903, 0x935F63, 0x058BEC, 0x78BCB5, + 0x2CB460, 0x3A9037, 0x0291C4, 0x1FABC1, 0xBE7D05, 0xF948E7, + 0x6BA5CD, 0xF62A0A, 0x9AEA19, 0x2257AB, 0x2E0D7D, 0x9EB93F, + 0x5E3F77, 0xD4A13F, 0x08E3DB, 0xDFD689, 0x2B9B4E, 0xB58427, + 0x25424B, 0x1197FD, 0xCF298A, 0x314008, 0xD5687F, 0x0F0EAC, + 0x13C485, 0xF684B2, 0xED7EC7, 0x6E636D, 0x28C933, 0xE19058, + 0x688B6A, 0xC88905, 0xFB2F31, 0x61304C, 0xC19765, 0x60D81A, + 0x57F276, 0xC6EFC4, 0x048954, 0x303470, 0xDA6F6F, 0x93901A, + 0x911439, 0x363D12, 0x59E72B, 0x6F9F1E, 0x57C584, 0xDF0D23, + 0xBB743F, 0xADE99C, 0x546097, 0xFCC820, 0xCBB968, 0xDA9B5F, + 0x0DC271, 0x563337, 0x9ED662, 0xE7C44F, 0x3129F8, 0xF5EAF9, + 0xDAF7F2, 0xCD09FF, 0xA92535, 0x441C29, 0x7DF436, 0xE2B00A, + 0x36746F, 0xF1DC61, 0x9D3C9C, 0x63AB71, 0xB8F3BB, 0x1C80F6, + 0x62FF65, 0x5FFE5F, 0x3B2814, 0xBADE27, 0x1B384B, 0x268AA9, + 0xBD91EF, 0xCA436B, 0xABE107, 0x88DCA6, 0xC3AFC0, 0x85D155, + 0x464A48, 0xBFDAEB, 0xC6F389, 0x907C11, 0x0D3E41, 0xCD2197, + 0x549008, 0x817E4E, 0x8C7154, 0x1DC37F, 0x5E897E, 0xA9A2FE, + 0xEC6060, 0xCC0728, 0x430D3B, 0x62471C, 0xD3A4D3, 0x2BA57B, + 0xE5D15A, 0xD632F3, 0xF2B76F, 0xEC8498, 0xAE41C2, 0xAAF413, + 0xEAF5C0, 0xDD1B07, 0xB9A2A0, 0x59F230, 0xA3F61B, 0x8F8643, + 0x05DE6B, 0x1B5B8E, 0x63ECC5, 0xBFF01D, 0x8F1440, 0x3F8ADF, + 0x2E6539, 0xF3DB7A, 0x293FE5, 0x7EE714, 0x88E6D8, 0x2B2A6A, + 0xDF6E34, 0x8D4604, 0x4F6594, 0x639063, 0x6B51CC, 0x0D05CD, + 0x009607, 0xE7BF70, 0xC9A0EA, 0x0D80DD, 0xA1A065, 0x0DCB8F, + 0xA48430, 0x715934, 0x6FC8E4, 0x6FFC52, 0xEF8B05, 0xDE506A, + 0xE62BBC, 0x31480F, 0xEA64EA, 0x51E6FB, 0x9AE773, 0x21C54D, + 0xBFA080, 0x273D1E, 0x9FFD4E, 0x0C2CA8, 0x0690A5, 0xF8773B, + 0x4B2680, 0x6E3F56, 0xC8B89F, 0x0B7BD0, 0x71C8BF, 0x5AABD3, + 0x2BA93E, 0x9D2EE1, 0xCDF2FA, 0xEE57BE, 0x84A116, 0xDA756D, + 0x8FD6C0, 0x927153, 0xFF5EF3, 0x9F8331, 0x713411, 0xF945F3, + 0x0382B2, 0x8BAE30, 0xBC45A4, 0x630101, 0x5C9C3A, 0x643CFD, + 0x48115C, 0x17F03E, 0xB5F55E, 0x288DAF, 0x725660, 0xFB58E0, + 0xFC189E, 0x1ECA69, 0xFB19A6, 0xFA7A92, 0x7CC48E, 0x869372, + 0x58089A, 0x16DB5C, 0xADC0CD, 0x09D3D4, 0xD1108E, 0xDC64ED, + 0x3A999C, 0xAA8716, 0x5A3D8E, 0x7037FB, 0x1976AD, 0xE477D7, + 0x23782B, 0xC51F39, 0x4A5E9A, 0xDAD9DA, 0xE5B559, 0x08EF06, + 0x76E24F, 0x7361AD, 0x5F42A3, 0x9B70E5, 0xCE96C4, 0x552E99, + 0x6D7A6F, 0x804474, 0x4FA45B, 0x1D115B, 0x6D109E, 0x0A1A63, + 0x1084A6, 0xE18E5D, 0x2D8589, 0x203345, 0x4851AF, 0xA71EDC, + 0x03B6B1, 0x267970, 0xDEC908, 0x795BED, 0x7099B9, 0x209321, + 0x7FC2E7, 0x0F3E5E, 0xC7A4F4, 0x088129, 0x59AE63, 0x4E3251, + 0x344268, 0x79285D, 0x2B9494, 0xF1E2A2, 0xF7DA20, 0xDF6756, + 0xCA3BA3, 0x422489, 0xA2239C, 0x38724D, 0x2AC767, 0x601E9D, + 0xB47C6C, 0xA22481, 0xBBB655, 0x1EC0C4, 0xD84A97, 0xD449EE, + 0x162C9D, 0x782F29, 0xCEB4FA, 0xE317BC, 0x2FFDBD, 0xB342D2, + 0xB2CB19, 0x323AB9, 0x1AFF93, 0x13A8DF, 0x86B5A5, 0x5741D6, + 0xC54342, 0x3CAC29, 0xF7517C, 0x129A7A, 0xB2B8B4, 0x9B709F, + 0x3923C5, 0xEAFA6E, 0xDB9077, 0x29EEA0, 0x702D8C, 0x4DC14F, + 0xE46933, 0xA764E4, 0x754266, 0xFA4F98, 0x643DA5, 0xCA775C, + 0x7F1632, 0xE671A3, 0x4BF4C6, 0xA82378, 0xEFD317, 0xE62D38, + 0xD461C9, 0x8EEC80, 0xC89882, 0x4CC73C, 0x830F3F, 0xE4B200, + 0x582615, 0x6CD558, 0xA66727, 0xEF7975, 0xFEA5CE, 0x147A40, + 0x4796E4, 0xC07761, 0xF5D5B3, 0x6B65FB, 0xE4F14D, 0xA837CA, + 0x9A152A, 0x554E94, 0x83EC5F, 0xA62174, 0x85E2ED, 0xCCE71C, + 0x3540FF, 0x088A84, 0xBA2816, 0x293610, 0x4C3EE7, 0x8E55A9, + 0x49E5E5, 0x782178, 0x45D2AA, 0x9BB449, 0x00D282, 0xF61E67, + 0xE2F7DE, 0xCC6AA1, 0xCD1979, 0x52FEDB, 0x9A8776, 0x70A018, + 0x500271, 0x1273BA, 0xDE648E, 0x7AC7F7, 0x767725, 0xD0A457, + 0xF17250, 0xBC578C, 0x2DFD3A, 0x97F988, 0xA576C8, 0x8129BB, + 0x22D9C3, 0x0436ED, 0x650791, 0xA314EC, 0x42A0B3, 0x37A521, + 0x4BFB2B, 0x8C1B7F, 0x115E17, 0xF7C27F, 0xC1D5EB, 0x060487, + 0x8A28D6, 0x41330F, 0xBFAE67, 0x7774E8, 0x4CCC3C, 0x6B2F80, + 0x628BF2, 0x1E41A6, 0x8D0B22, 0xBC85BA, 0xCCF461, 0xBEC69C, + 0xDF8A10, 0x3C5E71, 0x2F8D5F, 0x63D3DA, 0x5934D1, 0x2CA35D, + 0xC687A2, 0x24E9B4, 0x1843D3, 0x5C9B97, 0x9B580C, 0x780B2C, + 0x59943D, 0x0744D0, 0x8DA6E3, 0x07AAF6, 0x2214D0, 0x72E8D7, + 0x54151B, 0x514DE9, 0x8DCC3B, 0x0CEB00, 0x2C4DE3, 0x5012AE, + 0xD7B72E, 0xB7DE9A, 0x641B2F, 0xF9CF17, 0x8BD282, 0x9F31A3, + 0xDED846, 0x467E05, 0x26CCEA, 0xF8E404, 0x65572E, 0x82C594, + 0xE572A9, 0x895653, 0xA1AA94, 0x8DD876, 0x5E9A61, 0x69EB1C, + 0x0385A9, 0x5BC844, 0x95B2DF, 0x6678F6, 0xFA7033, 0xE4F434, + 0x5584A9, 0x32C099, 0x9AD846, 0xB3FFD1, 0xA81C56, 0x4E54EF, + 0x54D173, 0xF191B4, 0x49B2A2, 0xB309D9, 0x546D8D, 0xC0A51E, + 0xCAFFC0, 0x785400, 0x05F69D, 0x894056, 0xC33098, 0xDFF6C2, + 0x908D97, 0x05CC96, 0x46484B, 0xBD7B9D, 0xB152F5, 0x5A7461, + 0x59CA20, 0x8F8EF5, 0xC9FF05, 0xF6F398, 0x856C97, 0x81E07C, + 0xAE5EDA, 0x51BDC9, 0xF26437, 0xBBC8CE, 0x091B52, 0x68B6A5, + 0x90750E, 0x925EF9, 0x3D9CB3, 0x46EA96, 0x97D648, 0x78BCC7, + 0xF4B488, 0x05275E, 0x6619DF, 0x56D4A0, 0x8C5C41, 0xDB345A, + 0x0B79DA, 0x496369, 0x96109B, 0x667664, 0xC40CF9, 0x91D7CA, + 0x119F1A, 0xA99272, 0xCBB529, 0xBB033E, 0x8F91C0, 0x570045, + 0xB845C2, 0x2B8E52, 0x687AFB, 0x0D0AA3, 0x200863, 0x043B83, + 0xF129DE, 0x49C2D6, 0x9641D2, 0xC4747C, 0x220804, 0x503F05, + 0x7E274F, 0xCA83D9, 0x9D6495, 0x0E5039, +}; +const int _TBL_ipio2l_53[] = { + 0xA2F983, 0x6E4E44, 0x16F3C4, 0xEA69B5, 0xD3E131, 0x60E1D2, + 0xD7982A, 0xC031F5, 0xD67BCC, 0xFD1375, 0x60919B, 0x3FA0BB, + 0x612ABB, 0x714F9B, 0x03DA8A, 0xC05948, 0xD023F4, 0x5AFA37, + 0x51631D, 0xCD7A90, 0xC0474A, 0xF6A6F3, 0x1A52E1, 0x5C3927, + 0x3ADA45, 0x4E2DB5, 0x64E8C4, 0x274A5B, 0xB74ADC, 0x1E6591, + 0x2822BE, 0x4771F5, 0x12A63F, 0x83BD35, 0x2488CA, 0x1FE1BE, + 0x42C21A, 0x682569, 0x2AFB91, 0x68ADE1, 0x4A42E5, 0x9BE357, + 0xB79675, 0xCE998A, 0x83AF8B, 0xE645E6, 0xDF0789, 0x9E9747, + 0xAA15FF, 0x358C3F, 0xAF3141, 0x72A3F7, 0x2BF1D4, 0xF3AD96, + 0x7D759F, 0x257FCE, 0x29FB69, 0xB1B42C, 0xC32DE1, 0x8C0BBD, + 0x31EC2F, 0x942026, 0x85DCE7, 0x653FF3, 0x136FA7, 0x0D7A5F, + 0x93FC61, 0x035287, 0xC77FCA, 0x73530A, 0xC6BC15, 0x0E4B0F, + 0x568FCE, 0x2D3456, 0x4D7FE1, 0xA12CD1, 0xB2CEA2, 0x531C62, + 0x70B4D2, 0x1BCE9A, 0x87704D, 0x6B83D7, 0xAA8121, 0x2530EA, + 0x2074BF, 0x28A071, 0x9D69C3, 0x406DD8, 0xF58783, 0x115D89, + 0x5E85F3, 0xAACDCC, 0x8C0B57, 0xD7DFFE, 0x550D96, 0xC43EB4, + 0x89ABA7, 0x94F595, 0x56F260, 0x06A4CD, 0x7FD2E2, 0x6FDFA8, + 0x3E9C98, 0xBFD682, 0xAD3A12, 0x23A8A6, 0x173A89, 0x5DE9BD, + 0x95A978, 0x28E484, 0x5964F3, 0x496AF0, 0x4B1DA9, 0x989061, + 0xBD2BF2, 0xE01A90, 0x0905B7, 0xAC39AC, 0x52D5B7, 0x109F25, + 0x3AE1DC, 0xF90A7C, 0x33F4E5, 0xF5DFDF, 0x1522D0, 0x562CE6, + 0x392CFF, 0xEB9032, 0x10A08E, 0x0B1D7F, 0x42B80A, 0x366DD2, + 0xC24F89, 0x02222E, 0x21494C, 0x985287, 0x87FD07, 0x2EE361, + 0xAD8D68, 0xE72273, 0x9E8D59, 0xD09999, 0x10F4A1, 0x1079A3, + 0xE9BEAF, 0x9C0887, 0x09C622, 0xEBCF06, 0x974532, 0x086A8F, + 0x6CEA05, 0x388C00, 0x74969E, 0xC85B16, 0x385A38, 0x9A2F35, + 0x670531, 0xABA6D0, 0xEFD3C1, 0x27AD92, 0xF4203E, 0x3D619F, + 0x4D05F4, 0x9AE7CC, 0x03B592, 0x41FF55, 0xCAFCA5, 0x1A0987, + 0x88AB79, 0x3627D4, 0x25B12A, 0x52594A, 0xA2BEB0, 0x25C3F2, + 0x4489DA, 0x7959A7, 0xEAEC89, 0xB34714, 0x960196, 0x1FC33A, + 0x7F0275, 0x32EF92, 0x0111CE, 0x8E4685, 0x6F5B34, 0xF6123A, + 0x5543B2, 0xE9A02A, 0x74E03F, 0x54D5A8, 0x086A2C, 0x4A9CD3, + 0x921191, 0x229764, 0x0A1A84, 0x9B45AE, 0xC653A5, 0xB15F33, + 0x100FD1, 0x7DD740, 0xB20CD3, 0x0A0786, 0xF506C3, 0x25EBF4, + 0x3AB39E, 0xE3BB24, 0x27646F, 0xEECE57, 0x706BFE, 0xC7A869, + 0x57ED51, 0x118C82, 0x2B0FF5, 0xC8E545, 0xC43D80, 0x2A3183, + 0x4C1BB9, 0xBC108A, 0x099779, 0xF9ECC8, 0x2A1063, 0x5D2F6A, + 0x8F2675, 0x12FF6D, 0x32EED9, 0xE4A245, 0x7392CF, 0x5C240B, + 0xC476FF, 0x97AFC7, 0xB76131, 0x665E05, 0x67BD57, 0x19E998, + 0x3A5863, 0x23B8AA, 0x5B5608, 0x8A66C6, 0x5F2AD3, 0x78BAFA, + 0x3516CE, 0xCBEA16, 0x6E40D4, 0xB463D4, 0xA6C12F, 0xABD3D7, + 0x32650A, 0x579D10, 0x3CB9E2, 0x1A02A7, 0xDF2FFA, 0x28C991, + 0xB2264C, 0x027870, 0x47BDD4, 0xF243B1, 0x39AE2C, 0x282EA4, + 0xAF1D98, 0x2AFD16, 0xABE7AF, 0x17CB67, 0x8FF93E, 0x793167, + 0x435F6B, 0x48058B, 0x417DA0, 0xE01217, 0x085A69, 0xB50E36, + 0x79A4CD, 0xD74907, 0x26C4B5, 0xB90054, 0x06C3AD, 0x5AB38F, + 0x585E91, 0xD04E4F, 0x2938CE, 0xD4EAA7, 0xA06DE5, 0x40BFE5, + 0xDE6849, 0xEF65F0, 0xF1D4BB, 0x94C21E, 0x66E978, 0x1B9B94, + 0x961043, 0x5961B8, 0xBAAA74, 0xD662EE, 0x9DABF6, 0x0AFE28, + 0x9587A4, 0xA632BC, 0x09149F, 0xDEA996, 0x2CAFD7, 0xBDE29B, + 0x7159E6, 0x1F7C49, 0xF2E2ED, 0xBFA992, 0x7C77EF, 0xC245D0, + 0xB2D129, 0x993E75, 0xAB4C0C, 0x5C84B6, 0x17F542, 0x45314E, + 0x1DEF1B, 0xE3BDCC, 0xB3AE86, 0x24522F, 0x918FC6, 0x2138D5, + 0x883646, 0x6858B6, 0x032762, 0x5170F8, 0x4974EA, 0x76BF77, + 0xECDA8A, 0x9EADDD, 0x2404EF, 0xC52A5D, 0xF2E858, 0xC42D60, + 0xD18C08, 0xDE59B2, 0x4CC3A6, 0x94D888, 0x4C4AF0, 0xCF1F8C, + 0xBF2F6F, 0x7B4535, 0x98B0DB, 0x2BE0CF, 0x4616A7, 0xA8D9FB, + 0x88CA7A, 0x5087E1, 0x18DD8A, 0x1A9F4F, 0x1DCECE, 0xF8609E, + 0xE2F0C8, 0x9AD7D4, 0xE3CDFE, 0xC6FDD5, 0x8FF3CD, 0x7D45AA, + 0xD34957, 0x7C1963, 0x6CE098, 0xB70215, 0x326BBF, 0x47B3A6, + 0xF9235D, 0x6F66F5, 0xC6E40C, 0xE7F50B, 0xFF2FDD, 0x5A1251, + 0xE95EF1, 0xDE8E67, 0xECEE9B, 0xC9F98E, 0x722224, 0x6DF750, + 0x81D08F, 0x2BFCF0, 0xDDC10D, 0x775314, 0xDB1D87, 0x41626B, + 0x9EDF31, 0x7738D9, 0x8D9EB4, 0x4F1C2A, 0xF3E795, 0xB69699, + 0xD9A56D, 0x31BB1B, 0x542975, 0xAB917B, 0x63927C, 0x9BB764, + 0x84A598, 0x0A0C51, 0x5E48C4, 0x7780E3, 0x87E156, 0x155972, + 0xE406F8, 0x48AB9E, 0x3CCDDA, 0x010F87, 0x683B70, 0x400CAD, + 0x5DE5C5, 0x7262FA, 0xFA248D, 0x013AF2, 0xE2E8B5, 0x995F7D, + 0x7F8C4B, 0x0E8B59, 0x1006F1, 0x40B6E9, 0x760654, 0xCBCC8C, + 0x086F40, 0xDC7F6F, 0xFCD0D4, 0xA47ADE, 0x5204FA, 0xF38A9D, + 0xE76C7C, 0x575207, 0x499BF1, 0x0DB01C, 0x09098E, 0x957A71, + 0xD53E0E, 0x61DF1D, 0xE6EF34, 0x5821EC, 0x96BCC0, 0xDC96CE, + 0xA9C0AE, 0x130B2C, 0xCCC589, 0x829BB9, 0x2A75BA, 0x97611C, + 0x0CEAB8, 0x165D9D, 0x35AD41, 0x82A805, 0x975628, 0x5601A6, + 0x074F08, 0x80A27D, 0xEFA64E, 0xD7BB4B, 0x5E6397, 0xC92FFC, + 0x4F3F7A, 0xBEA764, 0x0C9B7D, 0xC5DC74, 0xEAD216, 0x6DBBC0, + 0x913E3E, 0xABF50B, 0x95B24A, 0x3FC9C5, 0xE7BA15, 0x8C7F70, + 0xF81358, 0x774606, 0xCE8C0D, 0xB6B268, 0xB85BA6, 0xAC9B2E, + 0x1AAB05, 0x0C6C82, 0x6EC2AE, 0x606874, 0x8F60BF, 0x1FBC7B, + 0x58C97A, 0x448794, 0xBA48A0, 0x72E882, 0x6D3568, 0xE131FD, + 0x4745D0, 0x0BFA1E, 0x07B01D, 0x474D43, 0x59387E, 0x5B0AD5, + 0xC37A8C, 0x0474E8, 0x13D99D, 0x68A13C, 0xB69118, 0x89228C, + 0x6F7D83, 0x86D665, 0x5C7744, 0xDD183E, 0x1C2E17, 0x712F5E, + 0x4AACCB, 0xB69B68, 0xA1201F, 0x743C2B, 0xF6AD70, 0x92E024, + 0xF34FD8, 0x33712E, 0xFE1D73, 0x4471F0, 0x7D0526, 0x58AF47, + 0x7B11FE, 0x1FCE4F, 0x1356C9, 0x9CE3CA, 0xA843C0, 0x8EEA3C, + 0xABEEE4, 0xA5D495, 0xA407A4, 0x31BB4B, 0x0AA1E3, 0x518E7C, + 0xAA4A66, 0xD82CD8, 0x6EF8D2, 0x6F32E6, 0x1DC26B, 0x17AE59, + 0x4B683B, 0x8D48F7, 0xF4FBD8, 0xD4FE0A, 0xE961DE, 0x87BD37, + 0xE6CCD6, 0xCBD76D, 0x3E99DE, 0xB72E21, 0x54EB90, 0x6AB45D, + 0x600AFB, 0xA17B2F, 0xDA0421, 0xE6CA95, 0x35AAA2, 0x7D8FB1, + 0x3207BB, 0xBF82EE, 0x71F55F, 0xC661CB, 0xBD72A1, 0xBF5A64, + 0x6E39E8, 0x6C6DE2, 0x2BD178, 0xAF62A5, 0xA7D86E, 0xE7D0FE, + 0x84DB03, 0x67FDA2, 0x2D6809, 0x0F8B8F, 0x1B50E3, 0x234EF5, + 0x7325ED, 0x8F8F4C, 0xC1E426, 0x3066AD, 0x0759A4, 0xE03390, + 0x70CC9A, 0x524F77, 0xCDD489, 0x97DD24, 0xA81858, 0xF24513, + 0xA9C18E, 0x2A2F82, 0xC2C014, 0xB8E7F0, 0x934036, 0xD36E51, + 0xD9A089, 0xDBC587, 0xB30418, 0x969192, 0x0A5213, 0xE21841, + 0x2881EC, 0x9A293F, 0x0DF705, 0x85B497, 0xE430B9, 0xE90ECF, + 0xC15FDC, 0x9E8A7E, 0xC5472D, 0xB54FBD, 0x456AF2, 0xCA80B6, + 0xAE25FE, 0xA03B46, 0x6C6CFD, 0x78382A, 0x0E7877, 0x7F2D31, + 0x03C827, 0x61CF52, 0x339A2F, 0x2286A9, 0xE41DF0, 0x640F5C, + 0xBEF364, 0x010506, 0x6D2C21, 0x841EFF, 0x7F3B5D, 0xD98DC8, + 0x0F9421, 0xA25B0C, 0x4C2C44, 0x922392, 0xB98A8A, 0x6179B9, + 0xF7B419, 0x289AAF, 0xE92F47, 0x5E47A2, 0x82927F, 0xC7290E, + 0x6C925C, 0xBA5A3C, 0x8FB7F6, 0x9C4BEE, 0x02C529, 0x0CFCD7, + 0x5EBD8C, 0x7196E0, 0x4B917E, 0x6B9780, 0x6A1731, 0xA617FF, + 0x27A20D, 0x5A56A3, 0x43C4DB, 0xC62EA4, 0x637A84, 0x1C46F9, + 0x33C780, 0x61A278, 0x4915C9, 0xD6C776, 0x6A7C66, 0xD8DD0C, + 0xF87EB1, 0x124C43, 0x5B87E7, 0x097456, 0x3C2FA7, 0x307C4A, + 0x54267A, 0x30E34E, 0xC0CF98, 0xD75B19, 0xFADEDB, 0x12CBE8, + 0x29F24C, 0x579C7E, 0xBF3682, 0xDCB460, 0xAE08B3, 0xA524BC, + 0xC181C2, 0x5DAB90, 0x466602, 0x55345B, 0xA13941, 0x47D820, + 0x278066, 0x81B089, 0x165EFB, 0x4D27FD, 0x2BF9F4, 0x2E2FFB, + 0x6106B5, 0xE76806, 0x445A84, 0x0BDA0D, 0x49D7A4, 0x72650D, + 0xCDC55B, 0x3E16BC, 0x132F6F, 0x29E8FD, 0xE58428, 0x621E41, + 0x7D2AC4, 0xAB5697, 0xAC61EB, 0xE5DAF0, 0x654ED6, 0x8E77E3, + 0x0B2FBC, 0x2E63A3, 0xC8296A, 0x8B631F, 0x4ECCA6, 0x91859C, + 0x9E3E45, 0x0E3CC7, 0xC12454, 0xCCBCB6, 0x17979E, 0xD0D374, + 0xA489A2, 0xC6258F, 0xE8EF9E, 0x12EE26, 0xC614C2, 0x62E23E, + 0xCA8C5C, 0x409AC9, 0x511D05, 0xA88CE0, 0x195500, 0xF7144F, + 0x913BB7, 0x17D064, 0xF6C9CE, 0xAC5D11, 0xD0C313, 0xBCCCB6, + 0xAAD4FC, 0xE47B2C, 0xFE4362, 0xF2E712, 0x2D5EFF, 0x833822, + 0x58A1D7, 0x68377C, 0xE49B25, 0x22B179, 0x048796, 0x069400, + 0xE670D3, 0xD2CB85, 0x55FBE6, 0x67F281, 0xFE2DE0, 0x8CFAF2, + 0x9865BC, 0x210CD3, 0x86DD70, 0x43D00F, 0x55E279, 0x679252, + 0x8D4F58, 0xE17AC5, 0x6A6127, 0x1B0876, 0x5D8ED0, 0x701330, + 0xD5BD25, 0xC9A126, 0x57C571, 0xDC5C3F, 0xB6D34E, 0xB72383, + 0x001A9E, 0x7D36C0, 0x8151F6, 0x65D7C1, 0xE1F513, 0xCD372A, + 0xE69B0C, 0xD02685, 0x23C3EB, 0x3544CB, 0xF0BE31, 0x83F399, + 0xCB93F8, 0xFFC693, 0x908EC6, 0x8E5DE1, 0x315B7E, 0x67CE7B, + 0x40AAF7, 0x7FD285, 0x069B36, 0x03C00A, 0x13C7D5, 0x0DA14C, + 0x1EAAD4, 0x2B777F, 0x8E05C1, 0x5AD1AE, 0x60C398, 0xA4EA59, + 0x10BEED, 0x88F2FA, 0x69B941, 0xA54E70, 0xA817C3, 0xB96246, + 0xE8EEDC, 0x56D570, 0xBBEBB5, 0xD8F235, 0x201AB9, 0x9CC747, + 0x5BC2FB, 0xC877F3, 0x428CF6, 0x4EEF84, 0xBF85FD, 0xEE6D34, + 0x84C2DE, 0xC42F4C, 0x1A513B, 0x9AC41F, 0x87FFFA, 0x1CA431, + 0x714252, 0xC73FB9, 0x662D89, 0x3D83BA, 0xBDF046, 0x2E4F62, + 0x76B7C0, 0x81336C, 0xBE80A9, 0x4C9D72, 0x739A15, 0x47972C, + 0xA36A1B, 0xD31731, 0x54BA46, 0x2E8C72, 0xFEA5A5, 0x9A7E5F, + 0xC359ED, 0x8F0FFB, 0x1270DA, 0x5E9B08, 0xB0BFCB, 0x36974C, + 0x6CD8F9, 0xD02E1F, 0x1C3F2F, 0xFCF8F0, 0x4C2C6D, 0x0B2169, + 0x48B9CE, 0x42737D, 0xA8E974, 0x64062D, 0xA86C59, 0xEEC419, + 0x047C83, 0x996A23, 0xF2A4C8, 0x4BE1B8, 0x348286, 0xE84240, + 0x8337CB, 0xE55A2F, 0xC17750, 0xA4DA06, 0x64347F, 0x59A5A1, + 0xDFF53D, 0x62A571, 0xEECF3A, 0x886700, 0xC06DAF, 0x4E161F, + 0x12670E, 0xBDFE1A, 0xA72B38, 0x5BA22C, 0xFED227, 0x3FC814, + 0x150E5A, 0xE99B3A, 0x8EE9FC, 0xBC1845, 0x32373A, 0xBDA476, + 0xCEB88F, 0x7FAED3, 0xDB9116, 0x31CF72, 0x1A5136, 0xC4F362, + 0xDE4799, 0x768043, 0x386207, 0x8E5497, 0xB0EF6D, 0x6C57FB, + 0xF56664, 0xD24F05, 0xE0F702, 0x8A41EF, 0xA2EC53, 0x09731C, + 0x6157FE, 0xC5731C, 0xEF1A2E, 0x60EC10, 0xA67EFE, 0x486A73, + 0x8004F6, 0xC3F482, 0x63BA28, 0x107282, +}; +#endif diff --git a/usr/src/lib/libm/common/LD/_TBL_sinl.c b/usr/src/lib/libm/common/LD/_TBL_sinl.c new file mode 100644 index 0000000000..d93a16c639 --- /dev/null +++ b/usr/src/lib/libm/common/LD/_TBL_sinl.c @@ -0,0 +1,195 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * For i = 0L, ..., 75 let x(i) be the extended precision number + * whose exponent is given by 0x3ffc + ((i + 8) >> 5) and whose + * five most significant fraction bits are given by (i + 8) & 0x1f. + * (The remaining fraction bits are zero and the integer bit is 1.) + * Then _TBL_sinl_hi[i] := sin(x(i)) rounded to extended precisionL, + * and _TBL_sinl_lo[i] ~ sin(x(i)) - _TBL_sinl_hi[i]. + */ + +#include "libm.h" + +const long double _TBL_sinl_hi[] = { + 1.5561499277355604121432509e-01L, + 1.5947245893184341994353297e-01L, + 1.6332749173661285085207024e-01L, + 1.6718003236480673437500555e-01L, + 1.7103002203139501927501524e-01L, + 1.7487740199027218989302670e-01L, + 1.7872211353515365937804412e-01L, + 1.8256409800047155539783929e-01L, + 1.8640329676226988454758749e-01L, + 1.9023965123909906176839606e-01L, + 1.9407310289290979115543571e-01L, + 1.9790359322994628465735775e-01L, + 2.0173106380163880472144652e-01L, + 2.0555545620549551765724079e-01L, + 2.0937671208599364370531084e-01L, + 2.1319477313546989061102989e-01L, + 2.1700958109501015675778940e-01L, + 2.2082107775533849055107655e-01L, + 2.2462920495770529235180901e-01L, + 2.2843390459477474541995223e-01L, + 2.3223511861151146241076006e-01L, + 2.3603278900606633373558587e-01L, + 2.3982685783066156443802536e-01L, + 2.4361726719247488600575847e-01L, + 2.4740395925452292959266856e-01L, + 2.5496596041587846749013231e-01L, + 2.6251239976915328146124702e-01L, + 2.7004281671858503154006088e-01L, + 2.7755675164633632592044860e-01L, + 2.8505374594054742458945975e-01L, + 2.9253334202332754361585744e-01L, + 2.9999508337868305117438275e-01L, + 3.0743851458038085066887951e-01L, + 3.1486318131974525087106269e-01L, + 3.2226863043338662567511427e-01L, + 3.2965440993086017192298214e-01L, + 3.3702006902225307624892253e-01L, + 3.4436515814569840820730424e-01L, + 3.5168922899481405922451731e-01L, + 3.5899183454606505366498749e-01L, + 3.6627252908604756136416898e-01L, + 3.7353086823869294642950362e-01L, + 3.8076640899239019207055991e-01L, + 3.8797870972702504604426484e-01L, + 3.9516733024093423623426119e-01L, + 4.0233183177777311122311904e-01L, + 4.0947177705329506611003562e-01L, + 4.1658673028204111924766885e-01L, + 4.2367625720393801036934428e-01L, + 4.3073992511080319721861361e-01L, + 4.3777730287275513286178799e-01L, + 4.4478796096452721142060563e-01L, + 4.5177147149168377657582618e-01L, + 4.5872740821673659236961014e-01L, + 4.6565534658516018269211988e-01L, + 4.7255486375130445115036980e-01L, + 4.7942553860420300028150759e-01L, + 4.9307868575392305727079882e-01L, + 5.0661145481425736764773474e-01L, + 5.2002054195372700474845132e-01L, + 5.3330267353602017331871271e-01L, + 5.4645460691920356440616155e-01L, + 5.5947313124736687740433047e-01L, + 5.7235506823450724037203458e-01L, + 5.8509727294046215482874185e-01L, + 5.9769663453870153121657086e-01L, + 6.1015007707579137127265265e-01L, + 6.2245456022234368301943030e-01L, + 6.3460708001526929683284300e-01L, + 6.4660466959115237050095826e-01L, + 6.5844439991056754159573505e-01L, + 6.7012338047316289465094724e-01L, + 6.8163876002333416675559724e-01L, + 6.9298772724631791026551897e-01L, + 7.0416751145453367277888060e-01L, +}; + +const long double _TBL_sinl_lo[] = { + -4.4044420388485708604352042e-21L, + -9.3658505779466794663857779e-22L, + -5.2040678607071393508410817e-21L, + -4.0395267481940078256007650e-21L, + 6.3327332576496468315469778e-21L, + 2.6586707822142093837984364e-21L, + -2.6878787450050744237345282e-21L, + 1.7063635662305595250654237e-21L, + 4.7924921282538555045455343e-21L, + -4.4101691066939302183010470e-21L, + 6.1948600915447822830980496e-22L, + -4.9638413649749502251618971e-21L, + 3.5916271597651546227926473e-21L, + -4.0777150323673712797756569e-22L, + 6.5799136599779898603647660e-21L, + 5.0431441802236271279596547e-21L, + 2.7886967636804383702412094e-21L, + 1.7797941915507094664564119e-21L, + -1.3804554392939635583829251e-21L, + 4.7855981187615466625152631e-21L, + 3.1708211390406997503004900e-21L, + -1.5157834044725652569873263e-22L, + 3.3438946731684019204631903e-21L, + 6.4724798056855877111175401e-21L, + 4.1801428671953314697839700e-21L, + -2.5757365367012227482016023e-21L, + -1.0297394515771810295074032e-20L, + 1.2694179637735656722464528e-20L, + 1.5748512781011179565308673e-21L, + -1.6967184859202905792705521e-21L, + 8.8448858652331336251731737e-21L, + -1.1134468969040340293241825e-20L, + 1.6234471791025321420471804e-21L, + -6.0263738196054484651751291e-21L, + 1.2631652295822646843414172e-20L, + -8.6644101687582762853694906e-21L, + 1.2359222174923859397271358e-20L, + -1.3219821587241831508027981e-22L, + -1.9324110998995296922101291e-21L, + 1.2722808830089214240915385e-20L, + 8.7403704479785940299212466e-21L, + -1.2663863629342751015966219e-20L, + -1.3359206065200525634208487e-20L, + 6.8148547822187652382727319e-21L, + 1.0571450573402892191582257e-20L, + -6.0134413552023063129130024e-21L, + 1.2658405457632407447211937e-20L, + 1.1443598275137284797608912e-20L, + -7.6602922503647693246330011e-21L, + -2.2920876394624080624512678e-21L, + -1.6901640257671788285214336e-22L, + 1.2450383440926973698285013e-20L, + 5.8625687909310643361252782e-21L, + 7.6848891207540014891539434e-21L, + -1.0920363727912466924531705e-20L, + -3.8184802762435242280438906e-21L, + -8.2196498741416868399433703e-21L, + -5.6622687407305065056015130e-21L, + -5.4387357437209102117877930e-21L, + 1.1762381857741709383097597e-20L, + 1.0418391756080576218864700e-20L, + -2.8119958331524728369894697e-21L, + -1.9486464776808433575964276e-20L, + 1.2919131320458122775352322e-20L, + -2.3342533395278737494836457e-20L, + 2.2076763147253802020227787e-20L, + 1.0897468372542621634126622e-21L, + 2.2496400209117994020651730e-20L, + 1.7466909662624346932394383e-20L, + 2.3083902445127091336067492e-20L, + -6.1510978111621596519832919e-21L, + 3.5843424075843715436394953e-21L, + -2.2355288181001597796661994e-20L, + 1.6296521874464521140945741e-20L, + 1.1789113655896899561477559e-21L, +}; diff --git a/usr/src/lib/libm/common/LD/_TBL_tanl.c b/usr/src/lib/libm/common/LD/_TBL_tanl.c new file mode 100644 index 0000000000..0802345019 --- /dev/null +++ b/usr/src/lib/libm/common/LD/_TBL_tanl.c @@ -0,0 +1,195 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * For i = 0L, ..., 75 let x(i) be the extended precision number + * whose exponent is given by 0x3ffc + ((i + 8) >> 5) and whose + * five most significant fraction bits are given by (i + 8) & 0x1f. + * (The remaining fraction bits are zero and the integer bit is 1.) + * Then _TBL_tanl_hi[i] := tan(x(i)) rounded to extended precisionL, + * and _TBL_tanl_lo[i] ~ tan(x(i)) - _TBL_tanl_hi[i]. + */ + +#include "libm.h" + +const long double _TBL_tanl_hi[] = { + 1.5753410732527161068790289e-01L, + 1.6153978404952147631388516e-01L, + 1.6555051927393397620861225e-01L, + 1.6956644521976651014845677e-01L, + 1.7358769476798152084980487e-01L, + 1.7761440147744672763405801e-01L, + 1.8164669960332142765752766e-01L, + 1.8568472411563441162006289e-01L, + 1.8972861071805913288790962e-01L, + 1.9377849586689186352228293e-01L, + 1.9783451679023866881187727e-01L, + 2.0189681150741713288741981e-01L, + 2.0596551884857887210688535e-01L, + 2.1004077847455898084587031e-01L, + 2.1412273089695866488913964e-01L, + 2.1821151749846743250413339e-01L, + 2.2230728055343133087249762e-01L, + 2.2641016324867383747423879e-01L, + 2.3052030970457614146129199e-01L, + 2.3463786499642367899603687e-01L, + 2.3876297517602592026663300e-01L, + 2.4289578729361654240565243e-01L, + 2.4703644942004126466383960e-01L, + 2.5118511066924076739260464e-01L, + 2.5534192122103626651019939e-01L, + 2.6368059641999679984405817e-01L, + 2.7205369865877088343545168e-01L, + 2.8046247014525140317325012e-01L, + 2.8890817244051472599780488e-01L, + 2.9739208726902458947518627e-01L, + 3.0591551735305926411887835e-01L, + 3.1447978727257151616261872e-01L, + 3.2308624435174552010563084e-01L, + 3.3173625957357276734381764e-01L, + 3.4043122852383038743446717e-01L, + 3.4917257236591035224446307e-01L, + 3.5796173884801699838350761e-01L, + 3.6680020334432342273152904e-01L, + 3.7568946993175484041940608e-01L, + 3.8463107250414922303567364e-01L, + 3.9362657592563275821902387e-01L, + 4.0267757722514021178576021e-01L, + 4.1178570683410847577655099e-01L, + 4.2095262986947582208789413e-01L, + 4.3018004746423004901363651e-01L, + 4.3946969814786624047050871e-01L, + 4.4882335927923970884728319e-01L, + 4.5824284853443236696884759e-01L, + 4.6773002545239179993303603e-01L, + 4.7728679304125226171028919e-01L, + 4.8691509944840632450355038e-01L, + 4.9661693969756562569970761e-01L, + 5.0639435749622981205141092e-01L, + 5.1624944711717514451250130e-01L, + 5.2618435535777914417981255e-01L, + 5.3620128358121603136601796e-01L, + 5.4630248984379051326943158e-01L, + 5.6676706558058644568054429e-01L, + 5.8759736759144322142123240e-01L, + 6.0881374032438072139072557e-01L, + 6.3043767383588476685765678e-01L, + 6.5249189792880799270563541e-01L, + 6.7500048514424290766085257e-01L, + 6.9798896362359925515245207e-01L, + 7.2148444099090441996918396e-01L, + 7.4551574055939199512374818e-01L, + 7.7011355134420870501661335e-01L, + 7.9531059356867418562312202e-01L, + 8.2114180158989412189243090e-01L, + 8.4764452644655265410892839e-01L, + 8.7485876055448234952464232e-01L, + 9.0282738745267350217570818e-01L, + 9.3159645994407246116005700e-01L, + 9.6121551049437041616208335e-01L, + 9.9173789836326868026407724e-01L, +}; + +const long double _TBL_tanl_lo[] = { + -2.6771159409105731701405510e-21L, + -4.6099226789741262900210606e-21L, + 5.3186644140375322820802458e-21L, + 2.5138405830938633735686839e-21L, + -5.1314617057806432706999694e-21L, + -2.3150818458524320771936317e-21L, + 7.4823150688409589857878346e-22L, + 6.5983384951777057330962451e-21L, + 3.1737465070309238679637904e-21L, + -6.2605330413009742107992404e-21L, + -3.4708968895421512574248288e-21L, + -3.3508177722855547163047103e-21L, + 1.8539761255947162282442845e-21L, + -4.3527863815358994574071238e-21L, + -3.0729582373746958079080308e-21L, + 1.7486583794617176080777995e-21L, + -2.0880427643688559927261666e-22L, + 3.4326156341633317484064051e-21L, + -5.8444712515543005993510667e-21L, + 3.3308393583864583403400180e-21L, + 1.5180609545016167494014088e-21L, + -4.5664864992230118395870971e-21L, + -3.4486635382887607253671356e-22L, + 6.4992471510018586950169590e-21L, + -5.7171552644357921603079772e-21L, + 1.0767820312749142840542796e-20L, + -8.8873094864264944929118678e-21L, + -3.6458345495736833933253427e-21L, + 3.7835691968285101289024150e-21L, + -7.9922577212991920007926665e-21L, + -1.1639426061963512311797196e-20L, + 1.0819496381458482697046145e-20L, + 1.2669812351932848585361942e-20L, + -4.3879352642165387665557942e-21L, + -6.2397232294970361376981025e-21L, + 1.0249894624181563425318369e-20L, + 4.8883545518509990780582976e-21L, + -1.0924217224719888561366811e-20L, + -1.0160304466598813882209781e-20L, + 5.1826415091471411711448075e-21L, + 1.0389918683332972349077236e-20L, + -7.1664776574714262163862363e-21L, + 1.2298884220333748071625466e-20L, + -1.3099990378137383497651040e-20L, + 6.6930911371536844477108605e-21L, + 1.3154437144468699485999317e-20L, + -6.7276672708135125503950130e-21L, + -9.6583948799780933132703713e-21L, + -1.1693327591353762422287158e-20L, + 1.2115072030396340314945014e-20L, + -4.8328734014430698289025015e-21L, + 1.0852973061445293626693228e-20L, + 1.9411831283588255256712679e-20L, + -2.0725962316575506668083850e-20L, + -9.1991091819589918968351350e-21L, + -1.8439030785497371079388971e-20L, + -1.4252114398617735096821730e-20L, + -3.6634999903039053547935623e-22L, + -2.7073538111310219812185487e-20L, + 2.1768400635771833866020006e-20L, + -5.0453509036808273670769239e-21L, + 1.8262326404957249986102613e-20L, + 2.3253788272891224529527726e-21L, + -2.6863465601726641017825874e-21L, + 2.0333919445169836552474035e-20L, + 1.2381983326738354735338055e-20L, + -1.0629693225258909983165405e-20L, + 2.2479666845586239075466463e-20L, + 1.8993064919061156630226362e-20L, + -1.8140078592138587341953739e-20L, + 1.5029592868184122759494625e-20L, + 2.0466189644006868146496769e-20L, + 5.1457594757697525471406575e-21L, + 2.3217272240793119168128789e-20L, + -7.2198528398134119662230907e-21L, +}; diff --git a/usr/src/lib/libm/common/LD/__cosl.c b/usr/src/lib/libm/common/LD/__cosl.c new file mode 100644 index 0000000000..9c0042ec3a --- /dev/null +++ b/usr/src/lib/libm/common/LD/__cosl.c @@ -0,0 +1,146 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* INDENT OFF */ +/* + * __k_cosl( long double x; long double y ) + * kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164 + * Input x is assumed to be bounded by ~pi/4 in magnitude. + * Input y is the tail of x. + * + * Table look up algorithm + * 1. by cos(-x) = cos(x), we may replace x by |x| + * 2. if x < 25/128 = [0x3ffc4000, 0] = 0.15625 , then + * if x < 2^-57 (hx < 0x3fc60000 0), return 1.0 with inexact if x != 0 + * z = x*x; + * if x <= 1/128 = 2**-7 = 0.0078125 + * cos(x)=1.0+z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5)))) + * else + * cos(x)=1.0+z*(q1+ ... z*q8) + * 3. else + * ht = (hx + 0x400)&0x7ffff800 (round x to a break point t) + * lt = 0 + * i = (hy-0x3ffc4000)>>11; (i<=64) + * x' = (x - t)+y (|x'| ~<= 2^-7 + * By + * cos(t+x') + * = cos(t)cos(x')-sin(t)sin(x') + * = cos(t)(1+z*(qq1+z*qq2))-[sin(t)]*x*(1+z*(pp1+z*pp2)) + * = cos(t) + [cos(t)]*(z*(qq1+z*qq2))- + * [sin(t)]*x*(1+z*(pp1+z*pp2)) + * + * Thus, + * let a= _TBL_cos_hi[i], b = _TBL_cos_lo[i], c= _TBL_sin_hi[i], + * x = (x-t)+y + * z = x*x; + * cos(t+x) = a+(b+ (-c*x*(1+z*(pp1+z*pp2))+a*(z*(qq1+z*qq2))) + */ + +#include "libm.h" + +#include <sys/isa_defs.h> + +extern const long double _TBL_cosl_hi[], _TBL_cosl_lo[], _TBL_sinl_hi[]; +static const long double +one = 1.0, +/* + * |sin(x) - (x+pp1*x^3+...+ pp5*x^11 )| <= 2^-122.32 for |x|<1/64 + */ +pp1 = -1.666666666666666666666666666586782940810e-0001L, +pp2 = 8.333333333333333333333003723660929317540e-0003L, +pp3 = -1.984126984126984076045903483778337804470e-0004L, +pp4 = 2.755731922361906641319723106210900949413e-0006L, +pp5 = -2.505198398570947019093998469135012057673e-0008L, +/* + * + * |cos(x) - (1+q1*x^2+...+q8*x^16)| <= 2^-117.11 for |x|<= 0.15625 + */ +q1 = -4.999999999999999999999999999999756416975e-0001L, +q2 = 4.166666666666666666666666664006066577258e-0002L, +q3 = -1.388888888888888888888877700363937169637e-0003L, +q4 = 2.480158730158730158494468463031814083559e-0005L, +q5 = -2.755731922398586276322819250356005542871e-0007L, +q6 = 2.087675698767424261441959760729854017855e-0009L, +q7 = -1.147074481239662089072452129010790774761e-0011L, +q8 = 4.777761647399651599730663422263531034782e-0014L, +/* + * + * |cos(x) - (1+qq1*x^2+...+ qq5*x^10)| <= 2^-123.84 for |x|<=1/128 + */ +qq1 = -4.999999999999999999999999999999378373641e-0001L, +qq2 = 4.166666666666666666666665478399327703130e-0002L, +qq3 = -1.388888888888888888058211230618051613494e-0003L, +qq4 = 2.480158730156105377771585658905303111866e-0005L, +qq5 = -2.755728099762526325736488376695157008736e-0007L; +/* INDENT ON */ +long double +__k_cosl(long double x, long double y) { + long double a, t, z, w; + int *pt = (int *) &t, *px = (int *) &x; + int i, j, hx, ix; + + t = 1.0; +#if defined(__i386) || defined(__amd64) + XTOI(px, hx); +#else + hx = px[0]; +#endif + ix = hx & 0x7fffffff; + if (ix < 0x3ffc4000) { + if (ix < 0x3fc60000) + if ((i = (int) x) == 0) + return (one); /* generate inexact */ + z = x * x; + + if (ix < 0x3ff80000) /* 0.0078125 */ + return (one + z * (qq1 + z * (qq2 + z * (qq3 + z * + (qq4 + z * qq5))))); + else + return (one + z * (q1 + z * (q2 + z * (q3 + z * (q4 + + z * (q5 + z * (q6 + z * (q7 + z * q8)))))))); + } + j = (ix + 0x400) & 0x7ffff800; + i = (j - 0x3ffc4000) >> 11; +#if defined(__i386) || defined(__amd64) + ITOX(j, pt); +#else + pt[0] = j; +#endif + if (hx > 0) + x = y - (t - x); + else + x = (-y) - (t + x); + a = _TBL_cosl_hi[i]; + z = x * x; + t = z * (qq1 + z * (qq2 + z * (qq3 + z * (qq4 + z * qq5)))); + w = x * (one + z * (pp1 + z * (pp2 + z * (pp3 + z * (pp4 + z * + pp5))))); + t = _TBL_cosl_lo[i] - (_TBL_sinl_hi[i] * w - a * t); + return (a + t); +} diff --git a/usr/src/lib/libm/common/LD/__lgammal.c b/usr/src/lib/libm/common/LD/__lgammal.c new file mode 100644 index 0000000000..e5725af683 --- /dev/null +++ b/usr/src/lib/libm/common/LD/__lgammal.c @@ -0,0 +1,402 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * long double __k_lgammal(long double x, int *signgamlp); + * K.C. Ng, August, 1989. + * + * We choose [1.5,2.5] to be the primary interval. Our algorithms + * are mainly derived from + * + * + * zeta(2)-1 2 zeta(3)-1 3 + * lgamma(2+s) = s*(1-euler) + --------- * s - --------- * s + ... + * 2 3 + * + * + * Note 1. Since gamma(1+s)=s*gamma(s), hence + * lgamma(1+s) = log(s) + lgamma(s), or + * lgamma(s) = lgamma(1+s) - log(s). + * When s is really tiny (like roundoff), lgamma(1+s) ~ s(1-enler) + * Hence lgamma(s) ~ -log(s) for tiny s + * + */ + +#include "libm.h" +#include "libm_synonyms.h" +#include "longdouble.h" + +static long double neg(long double, int *); +static long double poly(long double, const long double *, int); +static long double polytail(long double); +static long double primary(long double); + +static const long double +c0 = 0.0L, +ch = 0.5L, +c1 = 1.0L, +c2 = 2.0L, +c3 = 3.0L, +c4 = 4.0L, +c5 = 5.0L, +c6 = 6.0L, +pi = 3.1415926535897932384626433832795028841971L, +tiny = 1.0e-40L; + +long double +__k_lgammal(long double x, int *signgamlp) { + long double t, y; + int i; + + /* purge off +-inf, NaN and negative arguments */ + if (!finitel(x)) + return (x*x); + *signgamlp = 1; + if (signbitl(x)) + return (neg(x, signgamlp)); + + /* for x < 8.0 */ + if (x < 8.0L) { + y = anintl(x); + i = (int) y; + switch (i) { + case 0: + if (x < 1.0e-40L) + return (-logl(x)); + else + return (primary(x)-log1pl(x))-logl(x); + case 1: + return (primary(x-y)-logl(x)); + case 2: + return (primary(x-y)); + case 3: + return (primary(x-y)+logl(x-c1)); + case 4: + return (primary(x-y)+logl((x-c1)*(x-c2))); + case 5: + return (primary(x-y)+logl((x-c1)*(x-c2)*(x-c3))); + case 6: + return (primary(x-y)+logl((x-c1)*(x-c2)*(x-c3)*(x-c4))); + case 7: + return (primary(x-y)+logl((x-c1)*(x-c2)*(x-c3)*(x-c4)*(x-c5))); + case 8: + return primary(x-y)+ + logl((x-c1)*(x-c2)*(x-c3)*(x-c4)*(x-c5)*(x-c6)); + } + } + + /* 8.0 <= x < 1.0e40 */ + if (x < 1.0e40L) { + t = logl(x); + return (x*(t-c1)-(ch*t-polytail(c1/x))); + } + + /* 1.0e40 <= x <= inf */ + return (x*(logl(x)-c1)); +} + +static const long double an1[] = { /* 20 terms */ + -0.0772156649015328606065120900824024309741L, + 3.224670334241132182362075833230130289059e-0001L, + -6.735230105319809513324605383668929964120e-0002L, + 2.058080842778454787900092432928910226297e-0002L, + -7.385551028673985266273054086081102125704e-0003L, + 2.890510330741523285758867304409628648727e-0003L, + -1.192753911703260976581414338096267498555e-0003L, + 5.096695247430424562831956662855697824035e-0004L, + -2.231547584535777978926798502084300123638e-0004L, + 9.945751278186384670278268034322157947635e-0005L, + -4.492623673665547726647838474125147631082e-0005L, + 2.050721280617796810096993154281561168706e-0005L, + -9.439487785617396552092393234044767313568e-0006L, + 4.374872903516051510689234173139793159340e-0006L, + -2.039156676413643091040459825776029327487e-0006L, + 9.555777181318621470466563543806211523634e-0007L, + -4.468344919709630637558538313482398989638e-0007L, + 2.216738086090045781773004477831059444178e-0007L, + -7.472783403418388455860445842543843485916e-0008L, + 8.777317930927149922056782132706238921648e-0008L, +}; + +static const long double an2[] = { /* 20 terms */ + -.0772156649015328606062692723698127607018L, + 3.224670334241132182635552349060279118047e-0001L, + -6.735230105319809367555642883133994818325e-0002L, + 2.058080842778459676880822202762143671813e-0002L, + -7.385551028672828216011343150077846918930e-0003L, + 2.890510330762060607399561536905727853178e-0003L, + -1.192753911419623262328187532759756368041e-0003L, + 5.096695278636456678258091134532258618614e-0004L, + -2.231547306817535743052975194022893369135e-0004L, + 9.945771461633313282744264853986643877087e-0005L, + -4.492503279458972037926876061257489481619e-0005L, + 2.051311416812082875492678651369394595613e-0005L, + -9.415778282365955203915850761537462941165e-0006L, + 4.452428829045147098722932981088650055919e-0006L, + -1.835024727987632579886951760650722695781e-0006L, + 1.379783080658545009579060714946381462565e-0006L, + 2.282637532109775156769736768748402175238e-0007L, + 1.002577375515900191362119718128149880168e-0006L, + 5.177028794262638311939991106423220002463e-0007L, + 3.127947245174847104122426445937830555755e-0007L, +}; + +static const long double an3[] = { /* 20 terms */ + -.0772156649015328227870646417729220690875L, + 3.224670334241156699881788955959915250365e-0001L, + -6.735230105312273571375431059744975563170e-0002L, + 2.058080842924464587662846071337083809005e-0002L, + -7.385551008677271654723604653956131791619e-0003L, + 2.890510536479782086197110272583833176602e-0003L, + -1.192752262076857692740571567808259138697e-0003L, + 5.096800771149805289371135155128380707889e-0004L, + -2.231000836682831335505058492409860123647e-0004L, + 9.968912171073936803871803966360595275047e-0005L, + -4.412020779327746243544387946167256187258e-0005L, + 2.281374113541454151067016632998630209049e-0005L, + -4.028361291428629491824694655287954266830e-0006L, + 1.470694920619518924598956849226530750139e-0005L, + 1.381686137617987197975289545582377713772e-0005L, + 2.012493539265777728944759982054970441601e-0005L, + 1.723917864208965490251560644681933675799e-0005L, + 1.202954035243788300138608765425123713395e-0005L, + 5.079851887558623092776296577030850938146e-0006L, + 1.220657945824153751555138592006604026282e-0006L, +}; + +static const long double an4[] = { /* 21 terms */ + -.0772156649015732285350261816697540392371L, + 3.224670334221752060691751340365212226097e-0001L, + -6.735230109744009693977755991488196368279e-0002L, + 2.058080778913037626909954141611580783216e-0002L, + -7.385557567931505621170483708950557506819e-0003L, + 2.890459838416254326340844289785254883436e-0003L, + -1.193059036207136762877351596966718455737e-0003L, + 5.081914708100372836613371356529568937869e-0004L, + -2.289855016133600313131553005982542045338e-0004L, + 8.053454537980585879620331053833498511491e-0005L, + -9.574620532104845821243493405855672438998e-0005L, + -9.269085628207107155601445001196317715686e-0005L, + -2.183276779859490461716196344776208220180e-0004L, + -3.134834305597571096452454999737269668868e-0004L, + -3.973878894951937437018305986901392888619e-0004L, + -3.953352414899222799161275564386488057119e-0004L, + -3.136740932204038779362660900621212816511e-0004L, + -1.884502253819634073946130825196078627664e-0004L, + -8.192655799958926853585332542123631379301e-0005L, + -2.292183750010571062891605074281744854436e-0005L, + -3.223980628729716864927724265781406614294e-0006L, +}; + +static const long double ap1[] = { /* 19 terms */ + -0.0772156649015328606065120900824024296961L, + 3.224670334241132182362075833230047956465e-0001L, + -6.735230105319809513324605382963943777301e-0002L, + 2.058080842778454787900092126606252375465e-0002L, + -7.385551028673985266272518231365020063941e-0003L, + 2.890510330741523285681704570797770736423e-0003L, + -1.192753911703260971285304221165990244515e-0003L, + 5.096695247430420878696018188830886972245e-0004L, + -2.231547584535654004647639737841526025095e-0004L, + 9.945751278137201960636098805852315982919e-0005L, + -4.492623672777606053587919463929044226280e-0005L, + 2.050721258703289487603702670753053765201e-0005L, + -9.439485626565616989352750672499008021041e-0006L, + 4.374838162403994645138200419356844574219e-0006L, + -2.038979492862555348577006944451002161496e-0006L, + 9.536763152382263548086981191378885102802e-0007L, + -4.426111214332434049863595231916564014913e-0007L, + 1.911148847512947464234633846270287546882e-0007L, + -5.788673944861923038157839080272303519671e-0008L, +}; + +static const long double ap2[] = { /* 19 terms */ + -0.077215664901532860606428624449354836087L, + 3.224670334241132182271948744265855440139e-0001L, + -6.735230105319809467356126599005051676203e-0002L, + 2.058080842778453315716389815213496002588e-0002L, + -7.385551028673653323064118422580096222959e-0003L, + 2.890510330735923572088003424849289006039e-0003L, + -1.192753911629952368606185543945790688144e-0003L, + 5.096695239806718875364547587043220998766e-0004L, + -2.231547520600616108991867127392089144886e-0004L, + 9.945746913898151120612322833059416008973e-0005L, + -4.492599307461977003570224943054585729684e-0005L, + 2.050609891889165453592046505651759999090e-0005L, + -9.435329866734193796540515247917165988579e-0006L, + 4.362267138522223236241016136585565144581e-0006L, + -2.008556356653246579300491601497510230557e-0006L, + 8.961498103387207161105347118042844354395e-0007L, + -3.614187228330216282235692806488341157741e-0007L, + 1.136978988247816860500420915014777753153e-0007L, + -2.000532786387196664019286514899782691776e-0008L, +}; + +static const long double ap3[] = { /* 19 terms */ + -0.077215664901532859888521470795348856446L, + 3.224670334241131733364048614484228443077e-0001L, + -6.735230105319676541660495145259038151576e-0002L, + 2.058080842775975461837768839015444273830e-0002L, + -7.385551028347615729728618066663566606906e-0003L, + 2.890510327517954083379032008643080256676e-0003L, + -1.192753886919470728001821137439430882603e-0003L, + 5.096693728898932234814903769146577482912e-0004L, + -2.231540055048827662528594010961874258037e-0004L, + 9.945446210018649311491619999438833843723e-0005L, + -4.491608206598064519190236245753867697750e-0005L, + 2.047939071322271016498065052853746466669e-0005L, + -9.376824046522786006677541036631536790762e-0006L, + 4.259329829498149111582277209189150127347e-0006L, + -1.866064770421594266702176289764212873428e-0006L, + 7.462066721137579592928128104534957135669e-0007L, + -2.483546217529077735074007138457678727371e-0007L, + 5.915166576378161473299324673649144297574e-0008L, + -7.334139641706988966966252333759604701905e-0009L, +}; + +static const long double ap4[] = { /* 19 terms */ + -0.0772156649015326785569313252637238673675L, + 3.224670334241051435008842685722468344822e-0001L, + -6.735230105302832007479431772160948499254e-0002L, + 2.058080842553481183648529360967441889912e-0002L, + -7.385551007602909242024706804659879199244e-0003L, + 2.890510182473907253939821312248303471206e-0003L, + -1.192753098427856770847894497586825614450e-0003L, + 5.096659636418811568063339214203693550804e-0004L, + -2.231421144004355691166194259675004483639e-0004L, + 9.942073842343832132754332881883387625136e-0005L, + -4.483809261973204531263252655050701205397e-0005L, + 2.033260142610284888319116654931994447173e-0005L, + -9.153539544026646699870528191410440585796e-0006L, + 3.988460469925482725894144688699584997971e-0006L, + -1.609692980087029172567957221850825977621e-0006L, + 5.634916377249975825399706694496688803488e-0007L, + -1.560065465929518563549083208482591437696e-0007L, + 2.961350193868935325526962209019387821584e-0008L, + -2.834602215195368130104649234505033159842e-0009L, +}; + +static long double +primary(long double s) { /* assume |s|<=0.5 */ + int i; + + i = (int) (8.0L * (s + 0.5L)); + switch (i) { + case 0: return ch*s+s*poly(s, an4, 21); + case 1: return ch*s+s*poly(s, an3, 20); + case 2: return ch*s+s*poly(s, an2, 20); + case 3: return ch*s+s*poly(s, an1, 20); + case 4: return ch*s+s*poly(s, ap1, 19); + case 5: return ch*s+s*poly(s, ap2, 19); + case 6: return ch*s+s*poly(s, ap3, 19); + case 7: return ch*s+s*poly(s, ap4, 19); + } + /* NOTREACHED */ + return (0.0L); +} + +static long double +poly(long double s, const long double *p, int n) { + long double y; + int i; + y = p[n-1]; + for (i = n-2; i >= 0; i--) y = p[i]+s*y; + return (y); +} + +static const long double pt[] = { + 9.189385332046727417803297364056176804663e-0001L, + 8.333333333333333333333333333331286969123e-0002L, + -2.777777777777777777777777553194796036402e-0003L, + 7.936507936507936507927283071433584248176e-0004L, + -5.952380952380952362351042163192634108297e-0004L, + 8.417508417508395661774286645578379460131e-0004L, + -1.917526917525263651186066417934685675649e-0003L, + 6.410256409395203164659292973142293199083e-0003L, + -2.955065327248303301763594514012418438188e-0002L, + 1.796442830099067542945998615411893822886e-0001L, + -1.392413465829723742489974310411118662919e+0000L, + 1.339984238037267658352656597960492029261e+0001L, + -1.564707657605373662425785904278645727813e+0002L, + 2.156323807499211356127813962223067079300e+0003L, + -3.330486427626223184647299834137041307569e+0004L, + 5.235535072011889213611369254140123518699e+0005L, + -7.258160984602220710491988573430212593080e+0006L, + 7.316526934569686459641438882340322673357e+0007L, + -3.806450279064900548836571789284896711473e+0008L, +}; + +static long double +polytail(long double s) { + long double t, z; + int i; + z = s*s; + t = pt[18]; + for (i = 17; i >= 1; i--) t = pt[i]+z*t; + return (pt[0]+s*t); +} + +static long double +neg(long double z, int *signgamlp) { + long double t, p; + + /* + * written by K.C. Ng, Feb 2, 1989. + * + * Since + * -z*G(-z)*G(z) = pi/sin(pi*z), + * we have + * G(-z) = -pi/(sin(pi*z)*G(z)*z) + * = pi/(sin(pi*(-z))*G(z)*z) + * Algorithm + * z = |z| + * t = sinpi(z); ...note that when z>2**112, z is an int + * and hence t=0. + * + * if (t == 0.0) return 1.0/0.0; + * if (t< 0.0) *signgamlp = -1; else t= -t; + * if (z<1.0e-40) ...tiny z + * return -log(z); + * else + * return log(pi/(t*z))-lgamma(z); + * + */ + + t = sinpil(z); /* t := sin(pi*z) */ + if (t == c0) /* return 1.0/0.0 = +INF */ + return (c1/c0); + + z = -z; + if (z <= tiny) + p = -logl(z); + else + p = logl(pi/(fabsl(t)*z)) - __k_lgammal(z, signgamlp); + if (t < c0) *signgamlp = -1; + return (p); +} diff --git a/usr/src/lib/libm/common/LD/__poly_libmq.c b/usr/src/lib/libm/common/LD/__poly_libmq.c new file mode 100644 index 0000000000..ba0682c5bc --- /dev/null +++ b/usr/src/lib/libm/common/LD/__poly_libmq.c @@ -0,0 +1,41 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "libm.h" +#include "libm_synonyms.h" + +long double __poly_libmq(x,n,p) +long double x,p[]; +int n; +{ + long double t; int i; + t = p[n-1]; + for(i=n-2;i>=0;i--) t = p[i] + x*t; + return t; +} diff --git a/usr/src/lib/libm/common/LD/__rem_pio2l.c b/usr/src/lib/libm/common/LD/__rem_pio2l.c new file mode 100644 index 0000000000..6633ece604 --- /dev/null +++ b/usr/src/lib/libm/common/LD/__rem_pio2l.c @@ -0,0 +1,79 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* __rem_pio2l(x,y) + * + * return the remainder of x rem pi/2 in y[0]+y[1] + * by calling __rem_pio2m + */ + +#include "libm.h" +#include "longdouble.h" + +extern const int _TBL_ipio2l_inf[]; + +static const long double + two24l = 16777216.0L, + pio4 = 0.7853981633974483096156608458198757210495L; + +int +__rem_pio2l(long double x, long double *y) +{ + long double z, w; + double t[3], v[5]; + int e0, i, nx, n, sign; + + sign = signbitl(x); + z = fabsl(x); + if (z <= pio4) { + y[0] = x; + y[1] = 0; + return (0); + } + e0 = ilogbl(z) - 23; + z = scalbnl(z, -e0); + for (i = 0; i < 3; i++) { + t[i] = (double)((int)(z)); + z = (z - (long double)t[i]) * two24l; + } + nx = 3; + while (t[nx-1] == 0.0) + nx--; /* omit trailing zeros */ + n = __rem_pio2m(t, v, e0, nx, 2, _TBL_ipio2l_inf); + z = (long double)v[1]; + w = (long double)v[0]; + y[0] = z + w; + y[1] = z - (y[0] - w); + if (sign == 1) { + y[0] = -y[0]; + y[1] = -y[1]; + return (-n); + } + return (n); +} diff --git a/usr/src/lib/libm/common/LD/__sincosl.c b/usr/src/lib/libm/common/LD/__sincosl.c new file mode 100644 index 0000000000..e9318069ae --- /dev/null +++ b/usr/src/lib/libm/common/LD/__sincosl.c @@ -0,0 +1,154 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* INDENT OFF */ +/* + * long double __k_sincos( long double x, long double y, long double *c ) + * kernel sincosl function on [-pi/4, pi/4], pi/4 ~ 0.785398164 + * Input x is assumed to be bounded by ~pi/4 in magnitude. + * Input y is the tail of x. + * return sinl(x) with *c = cosl(x) + * + * Table look up algorithm + * see __k_sinl() and __k_cosl() + */ + +#include "libm.h" + +#include <sys/isa_defs.h> + +extern const long double _TBL_sinl_hi[], _TBL_sinl_lo[], _TBL_cosl_hi[], + _TBL_cosl_lo[]; +static const long double +one = 1.0, +/* + * |sin(x) - (x+pp1*x^3+...+pp5*x^11)| <= 2^-122.32 for |x|<1/64 + */ +pp1 = -1.666666666666666666666666666586782940810e-0001L, +pp2 = 8.333333333333333333333003723660929317540e-0003L, +pp3 = -1.984126984126984076045903483778337804470e-0004L, +pp4 = 2.755731922361906641319723106210900949413e-0006L, +pp5 = -2.505198398570947019093998469135012057673e-0008L, +/* + * |(sin(x) - (x+p1*x^3+...+p8*x^17)| + * |------------------------------- | <= 2^-116.17 for |x|<0.1953125 + * | x | + */ +p1 = -1.666666666666666666666666666666211262297e-0001L, +p2 = 8.333333333333333333333333301497876908541e-0003L, +p3 = -1.984126984126984126984041302881180621922e-0004L, +p4 = 2.755731922398589064100587351307269621093e-0006L, +p5 = -2.505210838544163129378906953765595393873e-0008L, +p6 = 1.605904383643244375050998243778534074273e-0010L, +p7 = -7.647162722800685516901456114270824622699e-0013L, +p8 = 2.810046428661902961725428841068844462603e-0015L, +/* + * + * |cos(x) - (1+qq1*x^2+...+ qq5*x^10)| <= 2^-123.84 for |x|<=1/128 + */ +qq1 = -4.999999999999999999999999999999378373641e-0001L, +qq2 = 4.166666666666666666666665478399327703130e-0002L, +qq3 = -1.388888888888888888058211230618051613494e-0003L, +qq4 = 2.480158730156105377771585658905303111866e-0005L, +qq5 = -2.755728099762526325736488376695157008736e-0007L, +/* + * + * |cos(x) - (1+q1*x^2+...+ q8*x^16)| <= 2^-117.11 for |x|<= 0.15625 + */ +q1 = -4.999999999999999999999999999999756416975e-0001L, +q2 = 4.166666666666666666666666664006066577258e-0002L, +q3 = -1.388888888888888888888877700363937169637e-0003L, +q4 = 2.480158730158730158494468463031814083559e-0005L, +q5 = -2.755731922398586276322819250356005542871e-0007L, +q6 = 2.087675698767424261441959760729854017855e-0009L, +q7 = -1.147074481239662089072452129010790774761e-0011L, +q8 = 4.777761647399651599730663422263531034782e-0014L; +/* INDENT ON */ +long double +__k_sincosl(long double x, long double y, long double *c) { + long double a1, a2, t, t1, t2, z, w; + int *pt = (int *) &t, *px = (int *) &x; + int i, j, hx, ix; + + t = 1.0; +#if defined(__i386) || defined(__amd64) + XTOI(px, hx); +#else + hx = px[0]; +#endif + ix = hx & 0x7fffffff; + if (ix < 0x3ffc4000) { + if (ix < 0x3fc60000) + if (((int) x) == 0) { + *c = one; + return (x); + } /* generate inexact */ + z = x * x; + + if (ix < 0x3ff80000) { + *c = one + z * (qq1 + z * (qq2 + z * (qq3 + z * (qq4 + + z * qq5)))); + t = z * (p1 + z * (p2 + z * (p3 + z * (p4 + z * (p5 + + z * p6))))); + } else { + *c = one + z * (q1 + z * (q2 + z * (q3 + z * (q4 + z * + (q5 + z * (q6 + z * (q7 + z * q8))))))); + t = z * (p1 + z * (p2 + z * (p3 + z * (p4 + z * (p5 + + z * (p6 + z * (p7 + z * p8))))))); + } + + t = y + x * t; + return (x + t); + } + j = (ix + 0x400) & 0x7ffff800; + i = (j - 0x3ffc4000) >> 11; +#if defined(__i386) || defined(__amd64) + ITOX(j, pt); +#else + pt[0] = j; +#endif + if (hx > 0) + x = y - (t - x); + else + x = (-y) - (t + x); + a1 = _TBL_sinl_hi[i]; + z = x * x; + t = z * (qq1 + z * (qq2 + z * (qq3 + z * (qq4 + z * qq5)))); + w = x * (one + z * (pp1 + z * (pp2 + z * (pp3 + z * (pp4 + z * + pp5))))); + a2 = _TBL_cosl_hi[i]; + t2 = _TBL_cosl_lo[i] - (a1 * w - a2 * t); + *c = a2 + t2; + t1 = a2 * w + a1 * t; + t1 += _TBL_sinl_lo[i]; + if (hx < 0) + return (-a1 - t1); + else + return (a1 + t1); +} diff --git a/usr/src/lib/libm/common/LD/__sinl.c b/usr/src/lib/libm/common/LD/__sinl.c new file mode 100644 index 0000000000..348ef28f56 --- /dev/null +++ b/usr/src/lib/libm/common/LD/__sinl.c @@ -0,0 +1,148 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* INDENT OFF */ +/* + * __k_sinl( long double x; long double y ) + * kernel sin function on [-pi/4, pi/4], pi/4 ~ 0.785398164 + * Input x is assumed to be bounded by ~pi/4 in magnitude. + * Input y is the tail of x. + * + * Table look up algorithm + * 1. by sin(-x) = -sin(x), need only to consider positive x + * 2. if x < 25/128 = [0x3ffc9000,0,0,0] = 0.1953125 , then + * if x < 2^-57 (hx < 0x3fc60000,0,0,0), return x (inexact if x != 0) + * z = x*x; + * if x <= 1/64 = 2**-6 + * sin(x) = x + (y+(x*z)*(p1 + z*p2)) + * else + * sin(x) = x + (y+(x*z)*(p1 + z*(p2 + z*(p3 + z*p4)))) + * 3. else + * ht = (hx + 0x400)&0x7ffff800 (round x to a break point t) + * lt = 0 + * i = (hy-0x3ffc4000)>>11; (i<=64) + * x' = (x - t)+y (|x'| ~<= 2^-7 + * By + * sin(t+x') + * = sin(t)cos(x')+cos(t)sin(x') + * = sin(t)(1+z*(qq1+z*qq2))+[cos(t)]*x*(1+z*(pp1+z*pp2)) + * = sin(t) + [sin(t)]*(z*(qq1+z*qq2))+ + * [cos(t)]*x*(1+z*(pp1+z*pp2)) + * + * Thus, + * let a= _TBL_sin_hi[i], b = _TBL_sin_lo[i], c= _TBL_cos_hi[i], + * x = (x-t)+y + * z = x*x; + * sin(t+x) = a+(b+ ((c*x)*(1+z*(pp1+z*pp2))+a*(z*(qq1+z*qq2))) + */ + +#include "libm.h" + +#include <sys/isa_defs.h> + +extern const long double _TBL_sinl_hi[], _TBL_sinl_lo[], _TBL_cosl_hi[]; +static const long double +one = 1.0, +/* + * |sin(x) - (x+pp1*x^3+...+ pp5*x^11)| <= 2^-122.32 for |x|<1/64 + */ +pp1 = -1.666666666666666666666666666586782940810e-0001L, +pp2 = 8.333333333333333333333003723660929317540e-0003L, +pp3 = -1.984126984126984076045903483778337804470e-0004L, +pp4 = 2.755731922361906641319723106210900949413e-0006L, +pp5 = -2.505198398570947019093998469135012057673e-0008L, +/* + * |(sin(x) - (x+p1*x^3+...+p8*x^17)| + * |------------------------------- | <= 2^-116.17 for |x|<0.1953125 + * | x | + */ +p1 = -1.666666666666666666666666666666211262297e-0001L, +p2 = 8.333333333333333333333333301497876908541e-0003L, +p3 = -1.984126984126984126984041302881180621922e-0004L, +p4 = 2.755731922398589064100587351307269621093e-0006L, +p5 = -2.505210838544163129378906953765595393873e-0008L, +p6 = 1.605904383643244375050998243778534074273e-0010L, +p7 = -7.647162722800685516901456114270824622699e-0013L, +p8 = 2.810046428661902961725428841068844462603e-0015L, +/* + * 2 10 -123.84 + * |cos(x) - (1+qq1*x +...+ qq5*x )| <= 2 for |x|<=1/128 + */ +qq1 = -4.999999999999999999999999999999378373641e-0001L, +qq2 = 4.166666666666666666666665478399327703130e-0002L, +qq3 = -1.388888888888888888058211230618051613494e-0003L, +qq4 = 2.480158730156105377771585658905303111866e-0005L, +qq5 = -2.755728099762526325736488376695157008736e-0007L; +/* INDENT ON */ +long double +__k_sinl(long double x, long double y) { + long double a, t, z, w; + int *pt = (int *) &t, *px = (int *) &x; + int i, j, hx, ix; + + t = 1.0L; +#if defined(__i386) || defined(__amd64) + XTOI(px, hx); +#else + hx = px[0]; +#endif + ix = hx & 0x7fffffff; + if (ix < 0x3ffc9000) { + if (ix < 0x3fc60000) + if (((int) x) == 0) + return (x); /* generate inexact */ + z = x * x; + t = z * (p1 + z * (p2 + z * (p3 + z * (p4 + z * (p5 + z * + (p6 + z * (p7 + z * p8))))))); + t = y + x * t; + return (x + t); + } + j = (ix + 0x400) & 0x7ffff800; + i = (j - 0x3ffc4000) >> 11; +#if defined(__i386) || defined(__amd64) + ITOX(j, pt); +#else + pt[0] = j; +#endif + if (hx > 0) + x = y - (t - x); + else + x = (-y) - (t + x); + a = _TBL_sinl_hi[i]; + z = x * x; + t = z * (qq1 + z * (qq2 + z * (qq3 + z * (qq4 + z * qq5)))); + w = x * (one + z * (pp1 + z * (pp2 + z * (pp3 + z * (pp4 + z * + pp5))))); + t = _TBL_cosl_hi[i] * w + a * t; + t += _TBL_sinl_lo[i]; + if (hx < 0) + return (-a - t); + else + return (a + t); +} diff --git a/usr/src/lib/libm/common/LD/__tanl.c b/usr/src/lib/libm/common/LD/__tanl.c new file mode 100644 index 0000000000..fffa457212 --- /dev/null +++ b/usr/src/lib/libm/common/LD/__tanl.c @@ -0,0 +1,171 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* INDENT OFF */ +/* + * __k_tanl( long double x; long double y; int k ) + * kernel tan/cotan function on [-pi/4, pi/4], pi/4 ~ 0.785398164 + * Input x is assumed to be bounded by ~pi/4 in magnitude. + * Input y is the tail of x. + * Input k indicate -- tan if k=0; else -1/tan + * + * Table look up algorithm + * 1. by tan(-x) = -tan(x), need only to consider positive x + * 2. if x < 5/32 = [0x3ffc4000, 0] = 0.15625 , then + * if x < 2^-57 (hx < 0x3fc40000 0), set w=x with inexact if x != 0 + * else + * z = x*x; + * w = x + (y+(x*z)*(t1+z*(t2+z*(t3+z*(t4+z*(t5+z*t6)))))) + * return (k == 0 ? w : 1/w); + * 3. else + * ht = (hx + 0x400)&0x7ffff800 (round x to a break point t) + * lt = 0 + * i = (hy-0x3ffc4000)>>11; (i<=64) + * x' = (x - t)+y (|x'| ~<= 2^-7) + * By + * tan(t+x') + * = (tan(t)+tan(x'))/(1-tan(x')tan(t)) + * We have + * sin(x')+tan(t)*(tan(t)*sin(x')) + * = tan(t) + ------------------------------- for k=0 + * cos(x') - tan(t)*sin(x') + * + * cos(x') - tan(t)*sin(x') + * = - -------------------------------------- for k=1 + * tan(t) + tan(t)*(cos(x')-1) + sin(x') + * + * + * where tan(t) is from the table, + * sin(x') = x + pp1*x^3 + ...+ pp5*x^11 + * cos(x') = 1 + qq1*x^2 + ...+ qq5*x^10 + */ + +#include "libm.h" + +#include <sys/isa_defs.h> + +extern const long double _TBL_tanl_hi[], _TBL_tanl_lo[]; +static const long double +one = 1.0, +/* + * |sin(x) - (x+pp1*x^3+...+ pp5*x^11)| <= 2^-122.32 for |x|<1/64 + */ +pp1 = -1.666666666666666666666666666586782940810e-0001L, +pp2 = 8.333333333333333333333003723660929317540e-0003L, +pp3 = -1.984126984126984076045903483778337804470e-0004L, +pp4 = 2.755731922361906641319723106210900949413e-0006L, +pp5 = -2.505198398570947019093998469135012057673e-0008L, +/* + * 2 10 -123.84 + * |cos(x) - (1+qq1*x +...+ qq5*x )| <= 2 for |x|<=1/128 + */ +qq1 = -4.999999999999999999999999999999378373641e-0001L, +qq2 = 4.166666666666666666666665478399327703130e-0002L, +qq3 = -1.388888888888888888058211230618051613494e-0003L, +qq4 = 2.480158730156105377771585658905303111866e-0005L, +qq5 = -2.755728099762526325736488376695157008736e-0007L, +/* + * |tan(x) - (x+t1*x^3+...+t6*x^13)| + * |------------------------------ | <= 2^-59.73 for |x|<0.15625 + * | x | + */ +t1 = 3.333333333333333333333333333333423342490e-0001L, +t2 = 1.333333333333333333333333333093838744537e-0001L, +t3 = 5.396825396825396825396827906318682662250e-0002L, +t4 = 2.186948853615520282185576976994418486911e-0002L, +t5 = 8.863235529902196573354554519991152936246e-0003L, +t6 = 3.592128036572480064652191427543994878790e-0003L, +t7 = 1.455834387051455257856833807581901305474e-0003L, +t8 = 5.900274409318599857829983256201725587477e-0004L, +t9 = 2.391291152117265181501116961901122362937e-0004L, +t10 = 9.691533169382729742394024173194981882375e-0005L, +t11 = 3.927994733186415603228178184225780859951e-0005L, +t12 = 1.588300018848323824227640064883334101288e-0005L, +t13 = 6.916271223396808311166202285131722231723e-0006L; +/* INDENT ON */ +long double +__k_tanl(long double x, long double y, int k) { + long double a, t, z, w = 0.0, s, c; + int *pt = (int *) &t, *px = (int *) &x; + int i, j, hx, ix; + + t = 1.0; +#if defined(__i386) || defined(__amd64) + XTOI(px, hx); +#else + hx = px[0]; +#endif + ix = hx & 0x7fffffff; + if (ix < 0x3ffc4000) { + if (ix < 0x3fc60000) { + if ((i = (int) x) == 0) /* generate inexact */ + w = x; + } else { + z = x * x; + if (ix < 0x3ff30000) /* 2**-12 */ + t = z * (t1 + z * (t2 + z * (t3 + z * t4))); + else + t = z * (t1 + z * (t2 + z * (t3 + z * (t4 + + z * (t5 + z * (t6 + z * (t7 + z * + (t8 + z * (t9 + z * (t10 + z * (t11 + + z * (t12 + z * t13)))))))))))); + t = y + x * t; + w = x + t; + } + return (k == 0 ? w : -one / w); + } + j = (ix + 0x400) & 0x7ffff800; + i = (j - 0x3ffc4000) >> 11; +#if defined(__i386) || defined(__amd64) + ITOX(j, pt); +#else + pt[0] = j; +#endif + if (hx > 0) + x = y - (t - x); + else + x = (-y) - (t + x); + a = _TBL_tanl_hi[i]; + z = x * x; + /* cos(x)-1 */ + t = z * (qq1 + z * (qq2 + z * (qq3 + z * (qq4 + z * qq5)))); + /* sin(x) */ + s = x * (one + z * (pp1 + z * (pp2 + z * (pp3 + z * (pp4 + z * + pp5))))); + if (k == 0) { + w = a * s; + t = _TBL_tanl_lo[i] + (s + a * w) / (one - (w - t)); + return (hx < 0 ? -a - t : a + t); + } else { + w = s + a * t; + c = w + _TBL_tanl_lo[i]; + z = (one - (a * s - t)); + return (hx >= 0 ? z / (-a - c) : z / (a + c)); + } +} diff --git a/usr/src/lib/libm/common/LD/acoshl.c b/usr/src/lib/libm/common/LD/acoshl.c new file mode 100644 index 0000000000..8f6d155fae --- /dev/null +++ b/usr/src/lib/libm/common/LD/acoshl.c @@ -0,0 +1,57 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak acoshl = __acoshl +#endif + +#include "libm.h" + +static const long double + zero = 0.0L, + ln2 = 6.931471805599453094172321214581765680755e-0001L, + one = 1.0L, + big = 1.e+20L; + +long double +acoshl(long double x) { + long double t; + + if (isnanl(x)) + return (x + x); + else if (x > big) + return (logl(x) + ln2); + else if (x > one) { + t = sqrtl(x - one); + return (log1pl(t * (t + sqrtl(x + one)))); + } else if (x == one) + return (zero); + else + return ((x - x) / (x - x)); +} diff --git a/usr/src/lib/libm/common/LD/asinhl.c b/usr/src/lib/libm/common/LD/asinhl.c new file mode 100644 index 0000000000..a8c1876ae1 --- /dev/null +++ b/usr/src/lib/libm/common/LD/asinhl.c @@ -0,0 +1,62 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak asinhl = __asinhl +#endif + +#include "libm.h" + +static const long double + ln2 = 6.931471805599453094172321214581765680755e-0001L, + one = 1.0L, + big = 1.0e+20L, + tiny = 1.0e-20L; + +long double +asinhl(long double x) { + long double t, w; +#ifndef lint + volatile long double dummy; +#endif + + w = fabsl(x); + if (isnanl(x)) + return (x + x); /* x is NaN */ + if (w < tiny) { +#ifndef lint + dummy = x + big; /* inexact if x != 0 */ +#endif + return (x); /* tiny x */ + } else if (w < big) { + t = one / w; + return (copysignl(log1pl(w + w / (t + sqrtl(one + t * t))), x)); + } else + return (copysignl(logl(w) + ln2, x)); +} diff --git a/usr/src/lib/libm/common/LD/atan2pil.c b/usr/src/lib/libm/common/LD/atan2pil.c new file mode 100644 index 0000000000..6b9cce9607 --- /dev/null +++ b/usr/src/lib/libm/common/LD/atan2pil.c @@ -0,0 +1,53 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak atan2pil = __atan2pil + +#include "libm.h" +#include "libm_synonyms.h" + +#define GENERIC long double +#define ATAN2PI atan2pil +#define ATAN2 atan2l + +/* ATAN2PI(y,x) + * + * ATAN2PI(y,x) = ATAN2(y,x)/pi + */ + +extern GENERIC ATAN2(); + +static GENERIC +invpi = (GENERIC) 3.183098861837906715377675267450287240689e-0001L; + +GENERIC ATAN2PI(y,x) +GENERIC y,x; +{ + return ATAN2(y,x)*invpi; +} diff --git a/usr/src/lib/libm/common/LD/atanhl.c b/usr/src/lib/libm/common/LD/atanhl.c new file mode 100644 index 0000000000..d7b6d67e26 --- /dev/null +++ b/usr/src/lib/libm/common/LD/atanhl.c @@ -0,0 +1,74 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak atanhl = __atanhl + +#include "libm.h" +#include "libm_synonyms.h" + +#define GENERIC long double +#define ATANH atanhl + +/* ATANH(x) + * 1 2x x + * ATANH(x) = --- * LOG(1 + -------) = 0.5 * LOG1P(2 * --------) + * 2 1 - x 1 - x + * Note: to guarantee ATANH(-x) = -ATANH(x), we use + * sign(x) |x| + * ATANH(x) = ------- * LOG1P(2*-------). + * 2 1 - |x| + * + * Special cases: + * ATANH(x) is NaN if |x| > 1 with signal; + * ATANH(NaN) is that NaN with no signal; + * ATANH(+-1) is +-INF with signal. + * + */ + +#define FABS fabsl +#define LOG1P log1pl +#define COPYSIGN copysignl + + +extern GENERIC FABS(),LOG1P(),COPYSIGN(); + +static GENERIC +zero = (GENERIC) 0.0, +half = (GENERIC) 0.5, +one = (GENERIC) 1.0; + +GENERIC ATANH(x) +GENERIC x; +{ + GENERIC t; + t = FABS(x); + if (t == one) return x/zero; + t = t/(one-t); + return COPYSIGN(half,x)*LOG1P(t+t); +} diff --git a/usr/src/lib/libm/common/LD/cbrtl.c b/usr/src/lib/libm/common/LD/cbrtl.c new file mode 100644 index 0000000000..13fe181cc6 --- /dev/null +++ b/usr/src/lib/libm/common/LD/cbrtl.c @@ -0,0 +1,75 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak cbrtl = __cbrtl +#endif + +#include "libm.h" +#include "longdouble.h" + +static const double d_one = 1.0; + +long double +cbrtl(long double x) { + long double s, t, r, w, y; + double dx, dy; + int *py = (int *) &dy; + int n, m, m3, n0, sx; + + if (!finitel(x)) + return (x + x); + if (iszerol(x)) + return (x); + n0 = 0; + if (*((int *) &d_one) == 0) + n0 = 1; + sx = signbitl(x); + x = fabsl(x); + n = ilogbl(x); + m = n / 3; + m3 = m + m + m; + y = scalbnl(x, -m3); + dx = (double) y; + dy = cbrt(dx); + py[1 - n0] += 2; + if (py[1 - n0] == 0) + py[n0] += 1; + + /* one step newton iteration to 113 bits with error < 0.667ulps */ + t = (long double) dy; + t = scalbnl(t, m); + s = t * t; + r = x / s; + w = t + t; + r = (r - t) / (w + r); + t += t * r; + + return (sx == 0 ? t : -t); +} diff --git a/usr/src/lib/libm/common/LD/coshl.c b/usr/src/lib/libm/common/LD/coshl.c new file mode 100644 index 0000000000..5ff0c7d08f --- /dev/null +++ b/usr/src/lib/libm/common/LD/coshl.c @@ -0,0 +1,109 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak coshl = __coshl +#endif + +#include "libm.h" +#include "longdouble.h" + +/* + * COSH(X) + * RETURN THE HYPERBOLIC COSINE OF X + * + * Method : + * 1. Replace x by |x| (COSH(x) = COSH(-x)). + * 2. + * [ EXP(x) - 1 ]^2 + * 0 <= x <= 0.3465 : COSH(x) := 1 + ------------------- + * 2*EXP(x) + * + * EXP(x) + 1/EXP(x) + * 0.3465 <= x <= thresh : COSH(x) := ------------------- + * 2 + * thresh <= x <= lnovft : COSH(x) := EXP(x)/2 + * lnovft <= x < INF : COSH(x) := SCALBN(EXP(x-MEP1*ln2),ME) + * + * + * here + * 0.3465 a number that is near one half of ln2. + * thresh a number such that + * EXP(thresh)+EXP(-thresh)=EXP(thresh) + * lnovft logarithm of the overflow threshold + * = MEP1*ln2 chopped to machine precision. + * ME maximum exponent + * MEP1 maximum exponent plus 1 + * + * Special cases: + * COSH(x) is |x| if x is +INF, -INF, or NaN. + * only COSH(0)=1 is exact for finite x. + */ + +static const long double C[] = { + 0.5L, + 1.0L, + 0.3465L, + 45.0L, + 1.135652340629414394879149e+04L, + 7.004447686242549087858985e-16L, + 2.710505431213761085018632e-20L, /* 2^-65 */ +}; + +#define half C[0] +#define one C[1] +#define thr1 C[2] +#define thr2 C[3] +#define lnovft C[4] +#define lnovlo C[5] +#define tinyl C[6] + +long double +coshl(long double x) { + long double w, t; + + w = fabsl(x); + if (!finitel(w)) + return (w + w); /* x is INF or NaN */ + if (w < thr1) { + if (w < tinyl) + return (one + w); /* inexact+directed rounding */ + t = expm1l(w); + w = one + t; + w = one + (t * t) / (w + w); + return (w); + } + if (w < thr2) { + t = expl(w); + return (half * (t + one / t)); + } + if (w <= lnovft) + return (half * expl(w)); + return (scalbnl(expl((w - lnovft) - lnovlo), 16383)); +} diff --git a/usr/src/lib/libm/common/LD/cosl.c b/usr/src/lib/libm/common/LD/cosl.c new file mode 100644 index 0000000000..c3a5d1b4ba --- /dev/null +++ b/usr/src/lib/libm/common/LD/cosl.c @@ -0,0 +1,109 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak cosl = __cosl + +/* INDENT OFF */ +/* cosl(x) + * Table look-up algorithm by K.C. Ng, November, 1989. + * + * kernel function: + * __k_sinl ... sin function on [-pi/4,pi/4] + * __k_cosl ... cos function on [-pi/4,pi/4] + * __rem_pio2l ... argument reduction routine + * + * Method. + * Let S and C denote the sin and cos respectively on [-PI/4, +PI/4]. + * 1. Assume the argument x is reduced to y1+y2 = x-k*pi/2 in + * [-pi/2 , +pi/2], and let n = k mod 4. + * 2. Let S=S(y1+y2), C=C(y1+y2). Depending on n, we have + * + * n sin(x) cos(x) tan(x) + * ---------------------------------------------------------- + * 0 S C S/C + * 1 C -S -C/S + * 2 -S -C S/C + * 3 -C S -C/S + * ---------------------------------------------------------- + * + * Special cases: + * Let trig be any of sin, cos, or tan. + * trig(+-INF) is NaN, with signals; + * trig(NaN) is that NaN; + * + * Accuracy: + * computer TRIG(x) returns trig(x) nearly rounded. + */ +/* INDENT ON */ + +#include "libm.h" +#include "libm_synonyms.h" +#include "longdouble.h" + +#include <sys/isa_defs.h> + +long double +cosl(long double x) { + long double y[2], z = 0.0L; + int n, ix; + int *px = (int *) &x; + + /* trig(Inf or NaN) is NaN */ + if (!finitel(x)) + return x - x; + + /* High word of x. */ +#if defined(__i386) || defined(__amd64) + XTOI(px, ix); +#else + ix = px[0]; +#endif + + /* |x| ~< pi/4 */ + ix &= 0x7fffffff; + if (ix <= 0x3ffe9220) + return __k_cosl(x, z); + + /* argument reduction needed */ + else { + n = __rem_pio2l(x, y); + switch (n & 3) { + case 0: + return __k_cosl(y[0], y[1]); + case 1: + return -__k_sinl(y[0], y[1]); + case 2: + return -__k_cosl(y[0], y[1]); + case 3: + return __k_sinl(y[0], y[1]); + /* NOTREACHED */ + } + } + return 0.0L; +} diff --git a/usr/src/lib/libm/common/LD/erfl.c b/usr/src/lib/libm/common/LD/erfl.c new file mode 100644 index 0000000000..6fdd40a5dc --- /dev/null +++ b/usr/src/lib/libm/common/LD/erfl.c @@ -0,0 +1,348 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* long double function erf,erfc (long double x) + * K.C. Ng, September, 1989. + * x + * 2 |\ + * erf(x) = --------- | exp(-t*t)dt + * sqrt(pi) \| + * 0 + * + * erfc(x) = 1-erf(x) + * + * method: + * Since erf(-x) = -erf(x), we assume x>=0. + * For x near 0, we have the expansion + * + * erf(x) = (2/sqrt(pi))*(x - x^3/3 + x^5/10 - x^7/42 + ....). + * + * Since 2/sqrt(pi) = 1.128379167095512573896158903121545171688, + * we use x + x*P(x^2) to approximate erf(x). This formula will + * guarantee the error less than one ulp where x is not too far + * away from 0. We note that erf(x)=x at x = 0.6174...... After + * some experiment, we choose the following approximation on + * interval [0,0.84375]. + * + * For x in [0,0.84375] + * 2 2 4 40 + * P = P(x ) = (p0 + p1 * x + p2 * x + ... + p20 * x ) + * + * erf(x) = x + x*P + * erfc(x) = 1 - erf(x) if x<=0.25 + * = 0.5 + ((0.5-x)-x*P) if x in [0.25,0.84375] + * precision: |P(x^2)-(erf(x)-x)/x| <= 2**-122.50 + * + * For x in [0.84375,1.25], let s = x - 1, and + * c = 0.84506291151 rounded to single (24 bits) + * erf(x) = c + P1(s)/Q1(s) + * erfc(x) = (1-c) - P1(s)/Q1(s) + * precision: |P1/Q1 - (erf(x)-c)| <= 2**-118.41 + * + * + * For x in [1.25,1.75], let s = x - 1.5, and + * c = 0.95478588343 rounded to single (24 bits) + * erf(x) = c + P2(s)/Q2(s) + * erfc(x) = (1-c) - P2(s)/Q2(s) + * precision: |P1/Q1 - (erf(x)-c)| <= 2**-123.83 + * + * + * For x in [1.75,16/3] + * erfc(x) = exp(-x*x)*(1/x)*R1(1/x)/S1(1/x) + * erf(x) = 1 - erfc(x) + * precision: absolute error of R1/S1 is bounded by 2**-124.03 + * + * For x in [16/3,107] + * erfc(x) = exp(-x*x)*(1/x)*R2(1/x)/S2(1/x) + * erf(x) = 1 - erfc(x) (if x>=9 simple return erf(x)=1 with inexact) + * precision: absolute error of R2/S2 is bounded by 2**-120.07 + * + * Else if inf > x >= 107 + * erf(x) = 1 with inexact + * erfc(x) = 0 with underflow + * + * Special case: + * erf(inf) = 1 + * erfc(inf) = 0 + */ + +#pragma weak erfl = __erfl +#pragma weak erfcl = __erfcl + +#include "libm.h" +#include "longdouble.h" + +static long double +tiny = 1e-40L, +nearunfl = 1e-4000L, +half = 0.5L, +one = 1.0L, +onehalf = 1.5L, +L16_3 = 16.0L/3.0L; +/* + * Coefficients for even polynomial P for erf(x)=x+x*P(x^2) on [0,0.84375] + */ +static long double P[] = { /* 21 coeffs */ + 1.283791670955125738961589031215451715556e-0001L, + -3.761263890318375246320529677071815594603e-0001L, + 1.128379167095512573896158903121205899135e-0001L, + -2.686617064513125175943235483344625046092e-0002L, + 5.223977625442187842111846652980454568389e-0003L, + -8.548327023450852832546626271083862724358e-0004L, + 1.205533298178966425102164715902231976672e-0004L, + -1.492565035840625097674944905027897838996e-0005L, + 1.646211436588924733604648849172936692024e-0006L, + -1.636584469123491976815834704799733514987e-0007L, + 1.480719281587897445302529007144770739305e-0008L, + -1.229055530170782843046467986464722047175e-0009L, + 9.422759064320307357553954945760654341633e-0011L, + -6.711366846653439036162105104991433380926e-0012L, + 4.463224090341893165100275380693843116240e-0013L, + -2.783513452582658245422635662559779162312e-0014L, + 1.634227412586960195251346878863754661546e-0015L, + -9.060782672889577722765711455623117802795e-0017L, + 4.741341801266246873412159213893613602354e-0018L, + -2.272417596497826188374846636534317381203e-0019L, + 8.069088733716068462496835658928566920933e-0021L, +}; + +/* + * Rational erf(x) = ((float)0.84506291151) + P1(x-1)/Q1(x-1) on [0.84375,1.25] + */ +static long double C1 = (long double)((float)0.84506291151); +static long double P1[] = { /* 12 top coeffs */ + -2.362118560752659955654364917390741930316e-0003L, + 4.129623379624420034078926610650759979146e-0001L, + -3.973857505403547283109417923182669976904e-0002L, + 4.357503184084022439763567513078036755183e-0002L, + 8.015593623388421371247676683754171456950e-0002L, + -1.034459310403352486685467221776778474602e-0002L, + 5.671850295381046679675355719017720821383e-0003L, + 1.219262563232763998351452194968781174318e-0003L, + 5.390833481581033423020320734201065475098e-0004L, + -1.978853912815115495053119023517805528300e-0004L, + 6.184234513953600118335017885706420552487e-0005L, + -5.331802711697810861017518515816271808286e-0006L, +}; +static long double Q1[] = { /* 12 bottom coeffs with leading 1.0 hidden */ + 9.081506296064882195280178373107623196655e-0001L, + 6.821049531968204097604392183650687642520e-0001L, + 4.067869178233539502315055970743271822838e-0001L, + 1.702332233546316765818144723063881095577e-0001L, + 7.498098377690553934266423088708614219356e-0002L, + 2.050154396918178697056927234366372760310e-0002L, + 7.012988534031999899054782333851905939379e-0003L, + 1.149904787014400354649843451234570731076e-0003L, + 3.185620255011299476196039491205159718620e-0004L, + 1.273405072153008775426376193374105840517e-0005L, + 4.753866999959432971956781228148402971454e-0006L, + -1.002287602111660026053981728549540200683e-0006L, +}; +/* + * Rational erf(x) = ((float)0.95478588343) + P2(x-1.5)/Q2(x-1.5) + * on [1.25,1.75] + */ +static long double C2 = (long double)((float)0.95478588343); +static long double P2[] = { /* 12 top coeffs */ + 1.131926304864446730135126164594785863512e-0002L, + 1.273617996967754151544330055186210322832e-0001L, + -8.169980734667512519897816907190281143423e-0002L, + 9.512267486090321197833634271787944271746e-0002L, + -2.394251569804872160005274999735914368170e-0002L, + 1.108768660227528667525252333184520222905e-0002L, + 3.527435492933902414662043314373277494221e-0004L, + 4.946116273341953463584319006669474625971e-0004L, + -4.289851942513144714600285769022420962418e-0005L, + 8.304719841341952705874781636002085119978e-0005L, + -1.040460226177309338781902252282849903189e-0005L, + 2.122913331584921470381327583672044434087e-0006L, +}; +static long double Q2[] = { /* 13 bottom coeffs with leading 1.0 hidden */ + 7.448815737306992749168727691042003832150e-0001L, + 7.161813850236008294484744312430122188043e-0001L, + 3.603134756584225766144922727405641236121e-0001L, + 1.955811609133766478080550795194535852653e-0001L, + 7.253059963716225972479693813787810711233e-0002L, + 2.752391253757421424212770221541238324978e-0002L, + 7.677654852085240257439050673446546828005e-0003L, + 2.141102244555509687346497060326630061069e-0003L, + 4.342123013830957093949563339130674364271e-0004L, + 8.664587895570043348530991997272212150316e-0005L, + 1.109201582511752087060167429397033701988e-0005L, + 1.357834375781831062713347000030984364311e-0006L, + 4.957746280594384997273090385060680016451e-0008L, +}; +/* + * erfc(x) = exp(-x*x)/x * R1(1/x)/S1(1/x) on [1.75, 16/3] + */ +static long double R1[] = { /* 14 top coeffs */ + 4.630195122654315016370705767621550602948e+0006L, + 1.257949521746494830700654204488675713628e+0007L, + 1.704153822720260272814743497376181625707e+0007L, + 1.502600568706061872381577539537315739943e+0007L, + 9.543710793431995284827024445387333922861e+0006L, + 4.589344808584091011652238164935949522427e+0006L, + 1.714660662941745791190907071920671844289e+0006L, + 5.034802147768798894307672256192466283867e+0005L, + 1.162286400443554670553152110447126850725e+0005L, + 2.086643834548901681362757308058660399137e+0004L, + 2.839793161868140305907004392890348777338e+0003L, + 2.786687241658423601778258694498655680778e+0002L, + 1.779177837102695602425897452623985786464e+0001L, + 5.641895835477470769043614623819144434731e-0001L, +}; +static long double S1[] = { /* 15 bottom coeffs with leading 1.0 hidden */ + 4.630195122654331529595606896287596843110e+0006L, + 1.780411093345512024324781084220509055058e+0007L, + 3.250113097051800703707108623715776848283e+0007L, + 3.737857099176755050912193712123489115755e+0007L, + 3.029787497516578821459174055870781168593e+0007L, + 1.833850619965384765005769632103205777227e+0007L, + 8.562719999736915722210391222639186586498e+0006L, + 3.139684562074658971315545539760008136973e+0006L, + 9.106421313731384880027703627454366930945e+0005L, + 2.085108342384266508613267136003194920001e+0005L, + 3.723126272693120340730491416449539290600e+0004L, + 5.049169878567344046145695360784436929802e+0003L, + 4.944274532748010767670150730035392093899e+0002L, + 3.153510608818213929982940249162268971412e+0001L, + 1.0e00L, +}; + +/* + * erfc(x) = exp(-x*x)/x * R2(1/x)/S2(1/x) on [16/3, 107] + */ +static long double R2[] = { /* 15 top coeffs in reverse order!!*/ + 2.447288012254302966796326587537136931669e+0005L, + 8.768592567189861896653369912716538739016e+0005L, + 1.552293152581780065761497908005779524953e+0006L, + 1.792075924835942935864231657504259926729e+0006L, + 1.504001463155897344947500222052694835875e+0006L, + 9.699485556326891411801230186016013019935e+0005L, + 4.961449933661807969863435013364796037700e+0005L, + 2.048726544693474028061176764716228273791e+0005L, + 6.891532964330949722479061090551896886635e+0004L, + 1.888014709010307507771964047905823237985e+0004L, + 4.189692064988957745054734809642495644502e+0003L, + 7.362346487427048068212968889642741734621e+0002L, + 9.980359714211411423007641056580813116207e+0001L, + 9.426910895135379181107191962193485174159e+0000L, + 5.641895835477562869480794515623601280429e-0001L, +}; +static long double S2[] = { /* 16 coefficients */ + 2.447282203601902971246004716790604686880e+0005L, + 1.153009852759385309367759460934808489833e+0006L, + 2.608580649612639131548966265078663384849e+0006L, + 3.766673917346623308850202792390569025740e+0006L, + 3.890566255138383910789924920541335370691e+0006L, + 3.052882073900746207613166259994150527732e+0006L, + 1.885574519970380988460241047248519418407e+0006L, + 9.369722034759943185851450846811445012922e+0005L, + 3.792278350536686111444869752624492443659e+0005L, + 1.257750606950115799965366001773094058720e+0005L, + 3.410830600242369370645608634643620355058e+0004L, + 7.513984469742343134851326863175067271240e+0003L, + 1.313296320593190002554779998138695507840e+0003L, + 1.773972700887629157006326333696896516769e+0002L, + 1.670876451822586800422009013880457094162e+0001L, + 1.000L, +}; + +long double erfl(x) +long double x; +{ + long double erfcl(long double),s,y,t; + + if (!finitel(x)) { + if (x != x) return x+x; /* NaN */ + return copysignl(one,x); /* return +-1.0 is x=Inf */ + } + + y = fabsl(x); + if (y <= 0.84375L) { + if (y<=tiny) return x+P[0]*x; + s = y*y; + t = __poly_libmq(s,21,P); + return x+x*t; + } + if (y<=1.25L) { + s = y-one; + t = C1+__poly_libmq(s,12,P1)/(one+s*__poly_libmq(s,12,Q1)); + return (signbitl(x))? -t: t; + } else if (y<=1.75L) { + s = y-onehalf; + t = C2+__poly_libmq(s,12,P2)/(one+s*__poly_libmq(s,13,Q2)); + return (signbitl(x))? -t: t; + } + if (y<=9.0L) t = erfcl(y); else t = tiny; + return (signbitl(x))? t-one: one-t; +} + +long double erfcl(x) +long double x; +{ + long double erfl(long double),s,y,t; + + if (!finitel(x)) { + if (x != x) return x+x; /* NaN */ + /* return 2.0 if x= -inf + 0.0 if x= +inf */ + if (x<0.0L) return 2.0L; else return 0.0L; + } + + if (x <= 0.84375L) { + if (x<=0.25) return one-erfl(x); + s = x*x; + t = half-x; + t = t - x*__poly_libmq(s,21,P); + return half+t; + } + if (x<=1.25L) { + s = x-one; + t = one-C1; + return t - __poly_libmq(s,12,P1)/(one+s*__poly_libmq(s,12,Q1)); + } else if (x<=1.75L) { + s = x-onehalf; + t = one-C2; + return t - __poly_libmq(s,12,P2)/(one+s*__poly_libmq(s,13,Q2)); + } + if (x>=107.0L) return nearunfl*nearunfl; /* underflow */ + else if (x >= L16_3) { + y = __poly_libmq(x,15,R2); + t = y/__poly_libmq(x,16,S2); + } else { + y = __poly_libmq(x,14,R1); + t = y/__poly_libmq(x,15,S1); + } + /* see comment in ../Q/erfl.c */ + y = x; + *(int*)&y = 0; + t *= expl(-y*y)*expl(-(x-y)*(x+y)); + return t; +} diff --git a/usr/src/lib/libm/common/LD/finitel.c b/usr/src/lib/libm/common/LD/finitel.c new file mode 100644 index 0000000000..bbe255514f --- /dev/null +++ b/usr/src/lib/libm/common/LD/finitel.c @@ -0,0 +1,52 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak finitel = __finitel +#endif + +#include "libm.h" + +#if defined(__sparc) +int +finitel(long double x) { + int *px = (int *) &x; + return ((px[0] & ~0x80000000) < 0x7fff0000); +} +#elif defined(__x86) +int +finitel(long double x) { + int *px = (int *) &x, t = px[2] & 0x7fff; +#if defined(HANDLE_UNSUPPORTED) + return (t != 0x7fff && ((px[1] & 0x80000000) != 0 || t == 0)); +#else + return (t != 0x7fff); +#endif +} +#endif /* defined(__sparc) || defined(__x86) */ diff --git a/usr/src/lib/libm/common/LD/gammal.c b/usr/src/lib/libm/common/LD/gammal.c new file mode 100644 index 0000000000..509c82b89c --- /dev/null +++ b/usr/src/lib/libm/common/LD/gammal.c @@ -0,0 +1,49 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak gammal = __gammal + +/* + * long double gammal(long double x); + */ + +#include "libm.h" +#include "libm_synonyms.h" +#include "longdouble.h" + +extern int signgam; +extern int signgaml; + +long double +gammal(long double x) { + long double y = __k_lgammal(x, &signgaml); + + signgam = signgaml; /* SUSv3 requires the setting of signgam */ + return y; +} diff --git a/usr/src/lib/libm/common/LD/gammal_r.c b/usr/src/lib/libm/common/LD/gammal_r.c new file mode 100644 index 0000000000..1390b53c63 --- /dev/null +++ b/usr/src/lib/libm/common/LD/gammal_r.c @@ -0,0 +1,42 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * long double gammal_r(long double x, int *signgamlp); + */ + +#pragma weak gammal_r = __gammal_r + +#include "libm.h" +#include "longdouble.h" + +long double +gammal_r(long double x, int *signgamlp) { + return __k_lgammal(x, signgamlp); +} diff --git a/usr/src/lib/libm/common/LD/hypotl.c b/usr/src/lib/libm/common/LD/hypotl.c new file mode 100644 index 0000000000..4303eceb8e --- /dev/null +++ b/usr/src/lib/libm/common/LD/hypotl.c @@ -0,0 +1,147 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak hypotl = __hypotl +#endif + +/* + * hypotl(x,y) + * Method : + * If z=x*x+y*y has error less than sqrt(2)/2 ulp than sqrt(z) has + * error less than 1 ulp. + * So, compute sqrt(x*x+y*y) with some care as follows: + * Assume x>y>0; + * 1. save and set rounding to round-to-nearest + * 2. if x > 2y use + * x1*x1+(y*y+(x2*(x+x2))) for x*x+y*y + * where x1 = x with lower 32 bits cleared, x2 = x-x1; else + * 3. if x <= 2y use + * t1*y1+((x-y)*(x-y)+(t1*y2+t2*y)) + * where t1 = 2x with lower 64 bits cleared, t2 = 2x-t1, y1= y with + * lower 32 bits cleared, y2 = y-y1. + * + * NOTE: DO NOT remove parenthsis! + * + * Special cases: + * hypot(x,y) is INF if x or y is +INF or -INF; else + * hypot(x,y) is NAN if x or y is NAN. + * + * Accuracy: + * hypot(x,y) returns sqrt(x^2+y^2) with error less than 1 ulps (units + * in the last place) + */ + +#include "libm.h" + +#if defined(__x86) +extern enum fp_direction_type __swap87RD(enum fp_direction_type); + +#define k 0x7fff + +long double +hypotl(long double x, long double y) { + long double t1, t2, y1, y2, w; + int *px = (int *) &x, *py = (int *) &y; + int *pt1 = (int *) &t1, *py1 = (int *) &y1; + enum fp_direction_type rd; + int j, nx, ny, nz; + + px[2] &= 0x7fff; /* clear sign bit and padding bits of x and y */ + py[2] &= 0x7fff; + nx = px[2]; /* biased exponent of x and y */ + ny = py[2]; + if (ny > nx) { + w = x; + x = y; + y = w; + nz = ny; + ny = nx; + nx = nz; + } /* force nx >= ny */ + if (nx - ny >= 66) + return (x + y); /* x / y >= 2**65 */ + if (nx < 0x5ff3 && ny > 0x205b) { /* medium x,y */ + /* save and set RD to Rounding to nearest */ + rd = __swap87RD(fp_nearest); + w = x - y; + if (w > y) { + pt1[2] = px[2]; + pt1[1] = px[1]; + pt1[0] = 0; + t2 = x - t1; + x = sqrtl(t1 * t1 - (y * (-y) - t2 * (x + t1))); + } else { + x += x; + py1[2] = py[2]; + py1[1] = py[1]; + py1[0] = 0; + y2 = y - y1; + pt1[2] = px[2]; + pt1[1] = px[1]; + pt1[0] = 0; + t2 = x - t1; + x = sqrtl(t1 * y1 - (w * (-w) - (t2 * y1 + y2 * x))); + } + if (rd != fp_nearest) + __swap87RD(rd); /* restore rounding mode */ + return (x); + } else { + if (nx == k || ny == k) { /* x or y is INF or NaN */ + /* since nx >= ny; nx is always k within this block */ + if (px[1] == 0x80000000 && px[0] == 0) + return (x); + else if (ny == k && py[1] == 0x80000000 && py[0] == 0) + return (y); + else + return (x + y); + } + if (ny == 0) { + if (y == 0.L || x == 0.L) + return (x + y); + pt1[2] = 0x3fff + 16381; + pt1[1] = 0x80000000; + pt1[0] = 0; + py1[2] = 0x3fff - 16381; + py1[1] = 0x80000000; + py1[0] = 0; + x *= t1; + y *= t1; + return (y1 * hypotl(x, y)); + } + j = nx - 0x3fff; + px[2] -= j; + py[2] -= j; + pt1[2] = nx; + pt1[1] = 0x80000000; + pt1[0] = 0; + return (t1 * hypotl(x, y)); + } +} +#endif diff --git a/usr/src/lib/libm/common/LD/isnanl.c b/usr/src/lib/libm/common/LD/isnanl.c new file mode 100644 index 0000000000..eb94eb32b9 --- /dev/null +++ b/usr/src/lib/libm/common/LD/isnanl.c @@ -0,0 +1,54 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak isnanl = __isnanl +#endif + +#include "libm.h" + +#if defined(__sparc) +int +isnanl(long double x) { + int *px = (int *) &x; + return ((px[0] & ~0x80000000) >= 0x7fff0000 && + ((px[0] & ~0xffff0000) | px[1] | px[2] | px[3]) != 0); +} +#elif defined(__x86) +int +isnanl(long double x) { + int *px = (int *) &x, t = px[2] & 0x7fff; +#if defined(HANDLE_UNSUPPORTED) + return ((t == 0x7fff && ((px[1] & ~0x80000000) | px[0]) != 0) || + (t != 0 && (px[1] & 0x80000000) == 0)); +#else + return (t == 0x7fff && ((px[1] & ~0x80000000) | px[0]) != 0); +#endif +} +#endif /* defined(__sparc) || defined(__x86) */ diff --git a/usr/src/lib/libm/common/LD/j0l.c b/usr/src/lib/libm/common/LD/j0l.c new file mode 100644 index 0000000000..cd1f8d4aae --- /dev/null +++ b/usr/src/lib/libm/common/LD/j0l.c @@ -0,0 +1,749 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * Floating point Bessel's function of the first and second kinds + * of order zero: j0(x),y0(x); + * + * Special cases: + * y0(0)=y1(0)=yn(n,0) = -inf with division by zero signal; + * y0(-ve)=y1(-ve)=yn(n,-ve) are NaN with invalid signal. + */ + +#pragma weak j0l = __j0l +#pragma weak y0l = __y0l + +#include "libm.h" +#include "libm_synonyms.h" + +#include "longdouble.h" + +#include <math.h> +#if defined(__SUNPRO_C) +#include <sunmath.h> +#endif + +#define GENERIC long double +static GENERIC +zero = 0.0L, +small = 1.0e-9L, +tiny = 1.0e-38L, +one = 1.0L, +five = 5.0L, +eight = 8.0L, +invsqrtpi = 5.641895835477562869480794515607725858441e-0001L, +tpi = 0.636619772367581343075535053490057448L; + +static GENERIC pzero(), qzero(); +static GENERIC r0[7] = { + -2.499999999999999999999999999999998934492e-0001L, + 1.272657927360049786327618451133763714880e-0002L, + -2.694499763712963276900636693400659600898e-0004L, + 2.724877475058977576903234070919616447883e-0006L, + -1.432617103214330236967477495393076320281e-0008L, + 3.823248804080079168706683540513792224471e-0011L, + -4.183174277567983647337568504286313665065e-0014L, +}; +static GENERIC s0[7] = { + 1.0e0L, + 1.159368290559800854689526195462884666395e-0002L, + 6.629397597394973383009743876169946772559e-0005L, + 2.426779981394054406305431142501735094340e-0007L, + 6.097663491248511069094400469635449749883e-0010L, + 1.017019133340929220238747413216052224036e-0012L, + 9.012593179306197579518374581969371278481e-0016L, +}; + +GENERIC +j0l(x) GENERIC x; { + GENERIC z, s, c, ss, cc, r, u, v; + int i; + + if (isnanl(x)) + return (x+x); + x = fabsl(x); + if (x > 1.28L) { + if (!finitel(x)) + return (zero); + s = sinl(x); + c = cosl(x); + /* + * j0(x) = sqrt(2/(pi*x))*(p0(x)*cos(x0)-q0(x)*sin(x0)) + * where x0 = x-pi/4 + * Better formula: + * cos(x0) = cos(x)cos(pi/4)+sin(x)sin(pi/4) + * = 1/sqrt(2) * (cos(x) + sin(x)) + * sin(x0) = sin(x)cos(pi/4)-cos(x)sin(pi/4) + * = 1/sqrt(2) * (sin(x) - cos(x)) + * To avoid cancellation, use + * sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x)) + * to compute the worse one. + */ + if (x > 1.0e2450L) { /* x+x may overflow */ + ss = s-c; + cc = s+c; + } else if (signbitl(s) != signbitl(c)) { + ss = s - c; + cc = -cosl(x+x)/ss; + } else { + cc = s + c; + ss = -cosl(x+x)/cc; + } + /* + * j0(x) = 1/sqrt(pi) * (P(0,x)*cc - Q(0,x)*ss) / sqrt(x) + * y0(x) = 1/sqrt(pi) * (P(0,x)*ss + Q(0,x)*cc) / sqrt(x) + */ + if (x > 1.0e120L) + return (invsqrtpi*cc)/sqrtl(x); + u = pzero(x); v = qzero(x); + return (invsqrtpi*(u*cc-v*ss)/sqrtl(x)); + } + if (x <= small) { + if (x <= tiny) + return (one-x); + else + return (one-x*x*0.25L); + } + z = x*x; + r = r0[6]; s = s0[6]; + for (i = 5; i >= 0; i--) { + r = r*z + r0[i]; + s = s*z + s0[i]; + } + return (one+z*(r/s)); +} + +static GENERIC u0[8] = { + -7.380429510868722527434392794848301631220e-0002L, + 1.766855559625940791857536949301981816513e-0001L, + -1.386470722701047923235553251240162839408e-0002L, + 3.520149242724811578636970811631224862615e-0004L, + -3.978599663243790049853642275624951870025e-0006L, + 2.228801153263957224547222556806915479763e-0008L, + -6.121246764298785018658597179498837316177e-0011L, + 6.677103629722678833475965810525587396596e-0014L, +}; +static GENERIC v0[8] = { + 1.0e0L, + 1.247164416539111311571676766127767127970e-0002L, + 7.829144749639791500052900281489367443576e-0005L, + 3.247126540422245330511218321013360336606e-0007L, + 9.750516724789499678567062572549568447869e-0010L, + 2.156713223173591212250543390258458098776e-0012L, + 3.322169561597890004231482431236452752624e-0015L, + 2.821213295314000924252226486305726805093e-0018L, +}; + +GENERIC +y0l(x) GENERIC x; { + GENERIC z, d, s, c, ss, cc, u, v; + int i; + + if (isnanl(x)) + return (x+x); + if (x <= zero) { + if (x == zero) + d = -one/(x-x); + else + d = zero/(x-x); + } + if (x > 1.28L) { + if (!finitel(x)) + return (zero); + s = sinl(x); + c = cosl(x); + /* + * j0(x) = sqrt(2/(pi*x))*(p0(x)*cos(x0)-q0(x)*sin(x0)) + * where x0 = x-pi/4 + * Better formula: + * cos(x0) = cos(x)cos(pi/4)+sin(x)sin(pi/4) + * = 1/sqrt(2) * (cos(x) + sin(x)) + * sin(x0) = sin(x)cos(pi/4)-cos(x)sin(pi/4) + * = 1/sqrt(2) * (sin(x) - cos(x)) + * To avoid cancellation, use + * sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x)) + * to compute the worse one. + */ + if (x > 1.0e2450L) { /* x+x may overflow */ + ss = s-c; + cc = s+c; + } else if (signbitl(s) != signbitl(c)) { + ss = s - c; + cc = -cosl(x+x)/ss; + } else { + cc = s + c; + ss = -cosl(x+x)/cc; + } + /* + * j0(x) = 1/sqrt(pi*x) * (P(0,x)*cc - Q(0,x)*ss) + * y0(x) = 1/sqrt(pi*x) * (P(0,x)*ss + Q(0,x)*cc) + */ + if (x > 1.0e120L) + return (invsqrtpi*ss)/sqrtl(x); + return (invsqrtpi*(pzero(x)*ss+qzero(x)*cc)/sqrtl(x)); + + } + if (x <= tiny) { + return (u0[0] + tpi*logl(x)); + } + z = x*x; + u = u0[7]; v = v0[7]; + for (i = 6; i >= 0; i--) { + u = u*z + u0[i]; + v = v*z + v0[i]; + } + return (u/v + tpi*(j0l(x)*logl(x))); +} + +static GENERIC pr0[12] = { /* [16 -- inf] */ + 9.999999999999999999999999999999999997515e-0001L, + 1.065981615377273376425365823967550598358e+0003L, + 4.390991200927588978306374718984240719130e+0005L, + 9.072086218607986711847069407339321363103e+0007L, + 1.022552886177375367408408501046461671528e+0010L, + 6.420766912243658241570635854089597269031e+0011L, + 2.206451725126933913591080211081242266908e+0013L, + 3.928369596816895077363705478743346298368e+0014L, + 3.258159928874124597286701119721482876596e+0015L, + 1.025715808134188978860679130140685101348e+0016L, + 7.537170874795721255796001687024031280685e+0015L, + -1.579413901450157332307745586004207687796e+0014L, +}; +static GENERIC ps0[11] = { + 1.0e0L, + 1.066051927877273376425365823967550512687e+0003L, + 4.391739647168381592399173804329266353038e+0005L, + 9.075162261801343671805658294123888867884e+0007L, + 1.023186118519904751819581912075985995058e+0010L, + 6.427861860414223746340515376512730275061e+0011L, + 2.210861503237823589735481303627993406235e+0013L, + 3.943247335784292905915956840901818177989e+0014L, + 3.283720976777545142150200110647270004481e+0015L, + 1.045346918812754048903645641538728986759e+0016L, + 8.043455468065618900750599584291193680463e+0015L, +}; +static GENERIC pr1[12] = { /* [8 -- 16] */ + 9.999999999999999999999784422701108683618e-0001L, + 6.796098532948334207755488692777907062894e+0002L, + 1.840036112605722168824530758797169836042e+0005L, + 2.598490483191916637264894340635847598122e+0007L, + 2.105774863242707025525730249472054578523e+0009L, + 1.015822044230542426666314997796944979959e+0011L, + 2.931557457008110436764077699944189071875e+0012L, + 4.962885121125457633655259224179322808824e+0013L, + 4.705424055148223269155430598563351566279e+0014L, + 2.294439854910747229152056080910427001110e+0015L, + 4.905531843137486691500950019322475458629e+0015L, + 3.187543169710339218793442542845735994565e+0015L, +}; +static GENERIC ps1[14] = { + 1.0e0L, + 6.796801657948334207754571576066758180288e+0002L, + 1.840512891201300567325421059826676366447e+0005L, + 2.599777028312918975306252167127695075221e+0007L, + 2.107582572771047636846811284634244892537e+0009L, + 1.017275794694156108975782763889979940348e+0011L, + 2.938487645192463845428059755454762316011e+0012L, + 4.982512164735557054521042916182317924466e+0013L, + 4.737639900153703274792677468264564361437e+0014L, + 2.323398719123742743524249528275097100646e+0015L, + 5.033419107069210577868909797896984419391e+0015L, + 3.409036105931068609601317076759804716059e+0015L, + 7.505655364352679737585745147753521662166e+0013L, + -9.976837153983688250780198248297109118313e+0012L, +}; +static GENERIC pr2[12] = { /* [5 -- 8 ] */ + 9.999999999999999937857236789277366320220e-0001L, + 3.692848765268649571651602420376358849214e+0002L, + 5.373022067535476576926715900057760985410e+0004L, + 4.038738891191314969971504035057219430725e+0006L, + 1.728285706306940523397385566659762646999e+0008L, + 4.375400819645889911158688737206054788534e+0009L, + 6.598950418204912408375591217782088567076e+0010L, + 5.827182039183238492480275401520072793783e+0011L, + 2.884222642913492390887572414999490975844e+0012L, + 7.373278873797767721932837830628688632775e+0012L, + 8.338295457568973761205077964397969230489e+0012L, + 2.911383183467288345772308817209806922143e+0012L, +}; +static GENERIC ps2[14] = { + 1.0e0L, + 3.693551890268649477288896267171993213102e+0002L, + 5.375607880998361502474715133828068514297e+0004L, + 4.042477764024108249744998862572786367328e+0006L, + 1.731069838737016956685839588670132939513e+0008L, + 4.387147674049898778738226585935491417728e+0009L, + 6.628058659620653765349556940567715258165e+0010L, + 5.869659904164177740471685856367322160664e+0011L, + 2.919839445622817017058977559638969436383e+0012L, + 7.535314897696671402628203718612309253907e+0012L, + 8.696355561452933775773309859748610658935e+0012L, + 3.216155103141537221173601557697083216257e+0012L, + 4.756857081068942248246880159213789086363e+0010L, + -3.496356619666608032231074866481472824067e+0009L, +}; +static GENERIC pr3[13] = { /* [3.5 -- 5 ] */ + 9.999999999999916693107285612398196588247e-0001L, + 2.263975921282917721194425320484974336945e+0002L, + 1.994358386744245848889492762781484199966e+0004L, + 8.980067458430542243559962493831661323168e+0005L, + 2.282213787521372663705567756420087553508e+0007L, + 3.409784374889063618250288699908375135923e+0008L, + 3.024380857401448589254343517589811711108e+0009L, + 1.571110368046740246895071721443082286379e+0010L, + 4.603187020243604632153685300463160593768e+0010L, + 7.087196453409712719449549280664058793403e+0010L, + 5.046196021776346356803687409644239065041e+0010L, + 1.287758439080165765709154276618854799932e+0010L, + 5.900679773415023433787846658096813590784e+0008L, +}; +static GENERIC ps3[13] = { + 1.0e0L, + 2.264679046282855061328604619231774747116e+0002L, + 1.995939523988944553755653255389812103448e+0004L, + 8.993853144706348727038389967490183236820e+0005L, + 2.288326099634588843906989983704795468773e+0007L, + 3.424967100255240885169240956804790118282e+0008L, + 3.046311797972463991368023759640028910016e+0009L, + 1.589614961932826812790222479700797224003e+0010L, + 4.692406624527744816497089139325073939927e+0010L, + 7.320486495902008912866462849073108323948e+0010L, + 5.345945972828978289935309597742981360994e+0010L, + 1.444033091910423754121309915092247171008e+0010L, + 7.987714685115314668378957273824383610525e+0008L, +}; +static GENERIC pr4[13] = { /* [2.5, 3.5] */ + 9.999999999986736677961118722747757712260e-0001L, + 1.453824980703800559037873123568378845663e+0002L, + 8.097327216430682288267610447006508661032e+0003L, + 2.273847252038264370231169686380192662135e+0005L, + 3.561056728046211111354759998976985449622e+0006L, + 3.244933588800096378434627029369680378599e+0007L, + 1.740112392860717950376210038908476792588e+0008L, + 5.426170187455893285197878563881579269524e+0008L, + 9.490107486454362321004377336020526281371e+0008L, + 8.688872439428470049801714121070005313806e+0008L, + 3.673315853166437222811910656900123215515e+0008L, + 5.577770470359303305164877446339693270239e+0007L, + 1.540438642031689641308197880181291865714e+0006L, +}; +static GENERIC ps4[13] = { /* [2.5, 3.5] */ + 1.0e0L, + 1.454528105698159439773035951959131799816e+0002L, + 8.107442215200392397172179900434987859618e+0003L, + 2.279390393778242887574177096606328994140e+0005L, + 3.576251625592252008424781111770934135844e+0006L, + 3.267909499056932631405942058670933813863e+0007L, + 1.760021515330805537499778238099704648805e+0008L, + 5.525553787667353981242060222587465726729e+0008L, + 9.769870295912820457889384082671269328511e+0008L, + 9.110582071004774279226905629624018008454e+0008L, + 3.981857678621955599371967680343918454345e+0008L, + 6.482404686230769399073192961667697036706e+0007L, + 2.210046943095878402443535460329391782298e+0006L, +}; +static GENERIC pr5[13] = { /* [1.777..., 2.5] */ + 9.999999999114986107951817871144655880699e-0001L, + 9.252583736048588342568344570315435947614e+0001L, + 3.218726757856078715214631502407386264637e+0003L, + 5.554009964621111656479588505862577040831e+0004L, + 5.269993115643664338253196944523510290175e+0005L, + 2.874613773778430691192912190618220544575e+0006L, + 9.133538151103658353874146919613442436035e+0006L, + 1.673067041410338922825193013077354249193e+0007L, + 1.706913873848398011744790289200151840498e+0007L, + 9.067766583853288534551600235576747618679e+0006L, + 2.216746733457884568532695355036338655872e+0006L, + 1.945753880802872541235703812722344514405e+0005L, + 3.132374412921948071539195638885330951749e+0003L, +}; +static GENERIC ps5[13] = { /* [1.777..., 2.5] */ + 1.0e0L, + 9.259614983862181118883831670990340052982e+0001L, + 3.225125275462903384842124075132609290304e+0003L, + 5.575705362829101545292760055941855246492e+0004L, + 5.306049863037087855496170121958448492522e+0005L, + 2.907060758873509564309729903109018597215e+0006L, + 9.298059206584995898298257827131208539289e+0006L, + 1.720391071006963176836108026556547062980e+0007L, + 1.782614812922865190479394509487941920612e+0007L, + 9.708016389605273153536452032839879950155e+0006L, + 2.476495084688170096480215640962175140027e+0006L, + 2.363200660365585759668077790194604917187e+0005L, + 4.803239569848196077121203575704356936731e+0003L, +}; +static GENERIC pr6[13] = { /* [1.28, 1.777...] */ + 9.999999969777095495998606925524322559556e-0001L, + 5.825486719466194430503283824096872219216e+0001L, + 1.248155491637757281915184824965379905380e+0003L, + 1.302093199842358609321338417071710477615e+0004L, + 7.353835804186292782840961999810543016039e+0004L, + 2.356471661113686180549195092555751341757e+0005L, + 4.350553267429009581632987060942780847101e+0005L, + 4.588762661876600638719159826652389418235e+0005L, + 2.675796398548523436544221045225290128611e+0005L, + 8.077649557108971388298292919988449940464e+0004L, + 1.117640459221306873519068741664054573776e+0004L, + 5.544400072396814695175787511557757885585e+0002L, + 5.072550541191480498431289089905822910718e+0000L, +}; +static GENERIC ps6[13] = { /* [1.28, 1.777...] */ + 1.0e0L, + 5.832517925357165050639075848183613063291e+0001L, + 1.252144364743592128171256104364976466898e+0003L, + 1.310300234342216813579118022415585740772e+0004L, + 7.434667697093812197817292154032863632923e+0004L, + 2.398706595587719165726469002404004614711e+0005L, + 4.472737517625103157004869372427480602511e+0005L, + 4.786313523337761975294171429067037723611e+0005L, + 2.851161872872731228472536061865365370192e+0005L, + 8.891648269899148412331918021801385815586e+0004L, + 1.297097489535351517572978123584751042287e+0004L, + 7.096761640545975756202184143400469812618e+0002L, + 8.378049338590233325977702401733340820351e+0000L, +}; +static GENERIC sixteen = 16.0L; +static GENERIC huge = 1.0e30L; + +static GENERIC pzero(x) +GENERIC x; +{ + GENERIC s, r, t, z; + int i; + if (x > huge) + return (one); + t = one/x; z = t*t; + if (x > sixteen) { + r = z*pr0[11]+pr0[10]; s = ps0[10]; + for (i = 9; i >= 0; i--) { + r = z*r + pr0[i]; + s = z*s + ps0[i]; + } + } else if (x > eight) { + r = pr1[11]; s = ps1[11]+z*(ps1[12]+z*ps1[13]); + for (i = 10; i >= 0; i--) { + r = z*r + pr1[i]; + s = z*s + ps1[i]; + } + } else if (x > five) { /* x > 5.0 */ + r = pr2[11]; s = ps2[11]+z*(ps2[12]+z*ps2[13]); + for (i = 10; i >= 0; i--) { + r = z*r + pr2[i]; + s = z*s + ps2[i]; + } + } else if (x > 3.5L) { + r = pr3[12]; s = ps3[12]; + for (i = 11; i >= 0; i--) { + r = z*r + pr3[i]; + s = z*s + ps3[i]; + } + } else if (x > 2.5L) { + r = pr4[12]; s = ps4[12]; + for (i = 11; i >= 0; i--) { + r = z*r + pr4[i]; + s = z*s + ps4[i]; + } + } else if (x > (1.0L/0.5625L)) { + r = pr5[12]; s = ps5[12]; + for (i = 11; i >= 0; i--) { + r = z*r + pr5[i]; + s = z*s + ps5[i]; + } + } else { /* assume x > 1.28 */ + r = pr6[12]; s = ps6[12]; + for (i = 11; i >= 0; i--) { + r = z*r + pr6[i]; + s = z*s + ps6[i]; + } + } + return (r/s); +} + + +static GENERIC qr0[12] = { /* [16, inf] */ + -1.249999999999999999999999999999999972972e-0001L, + -1.425179595545670577414395762503991596897e+0002L, + -6.312499645625970845534460257936222407219e+0004L, + -1.411374326457208384315121243698814446848e+0007L, + -1.735034212758873581410984757860787252842e+0009L, + -1.199777647512789489421826342485055280680e+0011L, + -4.596025334081655714499860409699100373644e+0012L, + -9.262525628201284107792924477031653399187e+0013L, + -8.858394728685039245344398842180662867639e+0014L, + -3.267527953687534887623740622709505972113e+0015L, + -2.664222971186311967587129347029450062019e+0015L, + 3.442464060723987869585180095344504100204e+0014L, +}; +static GENERIC qs0[11] = { + 1.0e0L, + 1.140729613936536461931516610003185687881e+0003L, + 5.056665510442299351009198186490085803580e+0005L, + 1.132041763825642787943941650522718199115e+0008L, + 1.394570111872581606392620678214246479767e+0010L, + 9.677945218152264789534431079563744378421e+0011L, + 3.731140327851536828225143058896348502096e+0013L, + 7.612785951064869291722846681020881676410e+0014L, + 7.476077016406764891730191004811863975940e+0015L, + 2.951246482613592035421503427100393831709e+0016L, + 3.108361803691811711136854587074302034901e+0016L, +}; +static GENERIC qr1[12] = { /* [8, 16 ] */ + -1.249999999999999999997949010383433818157e-0001L, + -9.051215166393822640636752244895124126934e+0001L, + -2.620782703428148837671179031904208303947e+0004L, + -3.975571261553504457766177974508785790884e+0006L, + -3.479029330759311306270072218074074994090e+0008L, + -1.823955008124268573036216746186239829089e+0010L, + -5.765932697111801375765156029221568664435e+0011L, + -1.079843680798742592954002192417934779114e+0013L, + -1.146893630504592739082205764611581332897e+0014L, + -6.367016059683898464936104447282880704182e+0014L, + -1.583109041961213490464459111903484209098e+0015L, + -1.230149555764242473103128650135795639412e+0015L, +}; +static GENERIC qs1[14] = { + 1.0e0L, + 7.246831508115058112438579847778014458432e+0002L, + 2.100854184439168518399383786306927037611e+0005L, + 3.192636418837951507430188285940994235122e+0007L, + 2.801558443383354674538443461124434216152e+0009L, + 1.475026997664373739293483927250653467487e+0011L, + 4.694486824913954608552363821799927145318e+0012L, + 8.890350100919200250838438709601547334021e+0013L, + 9.626844429082905144874701068760469752067e+0014L, + 5.541110744600460773528263862687521642140e+0015L, + 1.486500494789452556727470329232123096563e+0016L, + 1.415840104845959400365430773732093899210e+0016L, + 1.780866095241517418081312567239682336483e+0015L, + -2.359230917384889357887631544079990129494e+0014L, +}; +static GENERIC qr2[12] = { /* [5, 8] */ + -1.249999999999999531937744362527772181614e-0001L, + -4.944373897356969774839375977239241573966e+0001L, + -7.728449175433465285314261650078450473909e+0003L, + -6.262574329612752346336901434651220705903e+0005L, + -2.900948220220943306027235217424380672732e+0007L, + -7.988719647634192770463917157562874119535e+0008L, + -1.318228171927181389547760026626357012375e+0010L, + -1.282439773983029245309263271945424928196e+0011L, + -7.050925570827818040186149940257918845138e+0011L, + -2.021751882573871990004205616874202684429e+0012L, + -2.592939962400668552384333900573812635658e+0012L, + -1.038267109518891262840601514932972850326e+0012L, +}; +static GENERIC qs2[14] = { + 1.0e0L, + 3.961358492885570003202784022894248952116e+0002L, + 6.205788738864701882828752634586510926968e+0004L, + 5.045715603932670286550673813011764406749e+0006L, + 2.349248611362658323353343389430968751429e+0008L, + 6.520244524415828635917683553721880063911e+0009L, + 1.089111211223507719337067159886281887722e+0011L, + 1.080406000905359867958779409414903018610e+0012L, + 6.135645280895514703514154680623769562148e+0012L, + 1.862433040246625874245867151368643668215e+0013L, + 2.667780805786648888840777888702193708994e+0013L, + 1.394401107289087774765300711809313112824e+0013L, + 1.093247500616320375562898297156722445484e+0012L, + -7.228875530378928722826604216491493780775e+0010L, +}; +static GENERIC qr3[13] = { /* [3.5 5] */ + -1.249999999999473067748420379578481661075e-0001L, + -3.044549048635289351913574324803250977998e+0001L, + -2.890081140649769078496693003524681440869e+0003L, + -1.404922456817202235879343275330529107684e+0005L, + -3.862746614385573443518177403617349281869e+0006L, + -6.257517309110249049201133708911155047689e+0007L, + -6.031451330920839916987079782727323477520e+0008L, + -3.411542405173830611454025765755854382346e+0009L, + -1.089392478149726672133014498723021526099e+0010L, + -1.824934078420210941290140903415956782726e+0010L, + -1.400780278304358710423481070486939531139e+0010L, + -3.716484136064917363926635716743771092093e+0009L, + -1.397591075296425529970434890954904331580e+0008L, +}; +static GENERIC qs3[13] = { + 1.0e0L, + 2.441498613904962049391000187014945858042e+0002L, + 2.326188882072370711500164222341514337043e+0004L, + 1.137138213121231338494977104659239578165e+0006L, + 3.152918070735662728722998452605364253517e+0007L, + 5.172877993426507259314270488444013595108e+0008L, + 5.083086439731669807455961078856470774115e+0009L, + 2.961842732066434123119325521139476909941e+0010L, + 9.912185866862440735829781856081353151390e+0010L, + 1.793560561251622234430564181567297983598e+0011L, + 1.577090119341228122525265108497940403073e+0011L, + 5.509910306780166194333889999985463681636e+0010L, + 4.761691134078874491202320181517936758141e+0009L, +}; +static GENERIC qr4[13] = { /* [2.5 3.5] */ + -1.249999999928567734339745043490705340835e-0001L, + -1.967201748731419063051601624435565528481e+0001L, + -1.186329146714562236407099740615528170707e+0003L, + -3.607736959222941810356301491152457934060e+0004L, + -6.119200717978104904932828468575194267125e+0005L, + -6.037847781158358226670305078652205586384e+0006L, + -3.503558153336140359700536720393565984740e+0007L, + -1.180196478268225718757218523746787309773e+0008L, + -2.221860232085134915841426363505169680528e+0008L, + -2.173372505452747585296176761701746236760e+0008L, + -9.649364865061237558517730539506568013963e+0007L, + -1.465429227847933034546039640094862650385e+0007L, + -3.083003197920262085170581866246663380607e+0005L, +}; +static GENERIC qs4[13] = { /* [2.5 3.5] */ + 1.0e0L, + 1.579620773732259142752614142139986854055e+0002L, + 9.581372220329138733203879503753685054968e+0003L, + 2.939598672379108095776114131010825885308e+0005L, + 5.052183049314542218630341818692588448168e+0006L, + 5.083497695595206639433839326338971980149e+0007L, + 3.036385361800553388049719014005099206516e+0008L, + 1.067826481452753409910563785161661492137e+0009L, + 2.145644125557118044720741775125319669272e+0009L, + 2.324115615959719949363946673491552216799e+0009L, + 1.223262962112070757966959855619847011146e+0009L, + 2.569765553318495423738478585947110270709e+0008L, + 1.354744744299227127897905787732636565504e+0007L, +}; +static GENERIC qr5[13] = { /* [1.777.., 2.5] */ + -1.249999995936639697637680428174576069971e-0001L, + -1.260846055371311453485891923426489068315e+0001L, + -4.772398467544467480801174330290141578895e+0002L, + -8.939852599990298486613760833996490599724e+0003L, + -9.184070787149542050979542226446134243197e+0004L, + -5.406038945018274458362637897739280435171e+0005L, + -1.845896544705190261018653728678171084418e+0006L, + -3.613616990680809501878667570653308071547e+0006L, + -3.908782978135693252252557720414348623779e+0006L, + -2.173711022517323927109138170588442768176e+0006L, + -5.431253130679918485836408549007856244495e+0005L, + -4.591098546452684510082591587275940765959e+0004L, + -5.244711364168207806835520057792229646578e+0002L, +}; +static GENERIC qs5[13] = { /* [1.777.., 2.5] */ + 1.0e0L, + 1.014536210851290878350892750972474861447e+0002L, + 3.875547510687135314064434160096139681076e+0003L, + 7.361913122670079814955259281995617732580e+0004L, + 7.720288944218771126581086539585529314636e+0005L, + 4.681529554446752496404431433608306558038e+0006L, + 1.667882621940503925455031252308367745820e+0007L, + 3.469403153761399881888272620855305156241e+0007L, + 4.096992047964210711867089384719947863019e+0007L, + 2.596804755829217449311530735959560630554e+0007L, + 7.983933774697889238154465064019410763845e+0006L, + 9.818133816979900819087242425280757938152e+0005L, + 3.061083930868694396013541535670745443560e+0004L, +}; + +static GENERIC qr6[13] = { /* [1.28, 1.777..] */ + -1.249999881577289001807137282824929082771e-0001L, + -7.998273510053110759610810594119533619282e+0000L, + -1.872481955335172543369089617771565632719e+0002L, + -2.122116786726300805079874003303799646812e+0003L, + -1.293850285839529282503178263484773478457e+0004L, + -4.445024742266316181033354192262529356093e+0004L, + -8.730161378334357767668344467356505347070e+0004L, + -9.706222895172078442801444972505315054736e+0004L, + -5.896325518259858270165531513618195321041e+0004L, + -1.823172034368108822276420827074668832233e+0004L, + -2.509304178635055926638833040337472387175e+0003L, + -1.156608965715779237316769828941729964099e+0002L, + -7.028005789650731396887346826397785210442e-0001L, +}; +static GENERIC qs6[13] = { /* [1.28, 1.777..] */ + 1.0e0L, + 6.457211085058064845601261321277721075900e+0001L, + 1.534005216588011210342824555136008682950e+0003L, + 1.777217999176441782593357660462379097171e+0004L, + 1.118372652642469468091084810263231199696e+0005L, + 4.015242433858461813142365748386473605294e+0005L, + 8.377081045517098645448616514388280497673e+0005L, + 1.011495020008010352575398009604164287337e+0006L, + 6.886722075290430568652227875200208955970e+0005L, + 2.504735189948021472047157148613171956537e+0005L, + 4.408138920171044846941001844352009817062e+0004L, + 3.105572178072115145673058722853640854884e+0003L, + 5.588294821118916113437396504573817033678e+0001L, +}; +static GENERIC qzero(x) +GENERIC x; +{ + GENERIC s, r, t, z; + int i; + if (x > huge) + return (-0.125L/x); + t = one/x; z = t*t; + if (x > sixteen) { + r = z*qr0[11]+qr0[10]; s = qs0[10]; + for (i = 9; i >= 0; i--) { + r = z*r + qr0[i]; + s = z*s + qs0[i]; + } + } else if (x > eight) { + r = qr1[11]; s = qs1[11]+z*(qs1[12]+z*qs1[13]); + for (i = 10; i >= 0; i--) { + r = z*r + qr1[i]; + s = z*s + qs1[i]; + } + } else if (x > five) { /* assume x > 5.0 */ + r = qr2[11]; s = qs2[11]+z*(qs2[12]+z*qs2[13]); + for (i = 10; i >= 0; i--) { + r = z*r + qr2[i]; + s = z*s + qs2[i]; + } + } else if (x > 3.5L) { + r = qr3[12]; s = qs3[12]; + for (i = 11; i >= 0; i--) { + r = z*r + qr3[i]; + s = z*s + qs3[i]; + } + } else if (x > 2.5L) { + r = qr4[12]; s = qs4[12]; + for (i = 11; i >= 0; i--) { + r = z*r + qr4[i]; + s = z*s + qs4[i]; + } + } else if (x > (1.0L/0.5625L)) { + r = qr5[12]; s = qs5[12]; + for (i = 11; i >= 0; i--) { + r = z*r + qr5[i]; + s = z*s + qs5[i]; + } + } else { /* assume x > 1.28 */ + r = qr6[12]; s = qs6[12]; + for (i = 11; i >= 0; i--) { + r = z*r + qr6[i]; + s = z*s + qs6[i]; + } + } + return (t*(r/s)); +} diff --git a/usr/src/lib/libm/common/LD/j1l.c b/usr/src/lib/libm/common/LD/j1l.c new file mode 100644 index 0000000000..b12ed96ae3 --- /dev/null +++ b/usr/src/lib/libm/common/LD/j1l.c @@ -0,0 +1,753 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * floating point Bessel's function of the first and second kinds + * of order zero: j1(x),y1(x); + * + * Special cases: + * y0(0)=y1(0)=yn(n,0) = -inf with division by zero signal; + * y0(-ve)=y1(-ve)=yn(n,-ve) are NaN with invalid signal. + */ + +#pragma weak j1l = __j1l +#pragma weak y1l = __y1l + +#include "libm.h" +#include "libm_synonyms.h" +#include "longdouble.h" +#include <math.h> +#if defined(__SUNPRO_C) +#include <sunmath.h> +#endif + +#define GENERIC long double +static GENERIC +zero = 0.0L, +small = 1.0e-9L, +tiny = 1.0e-38L, +one = 1.0L, +five = 5.0L, +invsqrtpi = 5.641895835477562869480794515607725858441e-0001L, +tpi = 0.636619772367581343075535053490057448L; + +static GENERIC pone(), qone(); +static GENERIC r0[7] = { + -6.249999999999999999999999999999999627320e-0002L, + 1.940606727194041716205384618494641565464e-0003L, + -3.005630423155733701856481469986459043883e-0005L, + 2.345586219403918667468341047369572169358e-0007L, + -9.976809285885253587529010109133336669724e-0010L, + 2.218743258363623946078958783775107473381e-0012L, + -2.071079656218700604767650924103578046280e-0015L, +}; +static GENERIC s0[7] = { + 1.0e0L, + 1.061695903156199920738051277075003059555e-0002L, + 5.521860513111180371566951179398862692060e-0005L, + 1.824214367413754193524107877084979441407e-0007L, + 4.098957778439576834818838198039029353925e-0010L, + 6.047735079699666389853240090925264056197e-0013L, + 4.679044728878836197247923279512047035041e-0016L, +}; + +GENERIC +j1l(x) GENERIC x; { + GENERIC z, d, s, c, ss, cc, r; + int i, sgn; + + if (!finitel(x)) + return (one/x); + sgn = signbitl(x); + x = fabsl(x); + if (x > 1.28L) { + s = sinl(x); + c = cosl(x); + /* + * j1(x) = sqrt(2/(pi*x))*(p1(x)*cos(x0)-q1(x)*sin(x0)) + * where x0 = x-3pi/4 + * Better formula: + * cos(x0) = cos(x)cos(3pi/4)+sin(x)sin(3pi/4) + * = 1/sqrt(2) * (sin(x) - cos(x)) + * sin(x0) = sin(x)cos(3pi/4)-cos(x)sin(3pi/4) + * = -1/sqrt(2) * (cos(x) + sin(x)) + * To avoid cancellation, use + * sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x)) + * to compute the worse one. + */ + if (x > 1.0e2450L) { /* x+x may overflow */ + ss = -s-c; + cc = s-c; + } else if (signbitl(s) != signbitl(c)) { + cc = s - c; + ss = cosl(x+x)/cc; + } else { + ss = -s-c; + cc = cosl(x+x)/ss; + } + /* + * j1(x) = 1/sqrt(pi*x) * (P(1,x)*cc - Q(1,x)*ss) + * y1(x) = 1/sqrt(pi*x) * (P(1,x)*ss + Q(1,x)*cc) + */ + if (x > 1.0e120L) + return (invsqrtpi*cc)/sqrtl(x); + d = invsqrtpi*(pone(x)*cc-qone(x)*ss)/sqrtl(x); + if (sgn == 0) + return (d); + else + return (-d); + } + if (x <= small) { + if (x <= tiny) d = 0.5L*x; + else d = x*(0.5L-x*x*0.125L); + if (sgn == 0) + return (d); + else + return (-d); + } + z = x*x; + r = r0[6]; + s = s0[6]; + for (i = 5; i >= 0; i--) { + r = r*z + r0[i]; + s = s*z + s0[i]; + } + d = x*0.5L+x*(z*(r/s)); + if (sgn == 0) + return (d); + else + return (-d); +} + +static GENERIC u0[7] = { + -1.960570906462389484060557273467558703503e-0001L, + 5.166389353148318460304315890665450006495e-0002L, + -2.229699464105910913337190798743451115604e-0003L, + 3.625437034548863342715657067759078267158e-0005L, + -2.689902826993117212255524537353883987171e-0007L, + 9.304570592456930912969387719010256018466e-0010L, + -1.234878126794286643318321347997500346131e-0012L, +}; +static GENERIC v0[8] = { + 1.0e0L, + 1.369394302535807332517110204820556695644e-0002L, + 9.508438148097659501433367062605935379588e-0005L, + 4.399007309420092056052714797296467565655e-0007L, + 1.488083087443756398305819693177715000787e-0009L, + 3.751609832625793536245746965768587624922e-0012L, + 6.680926434086257291872903276124244131448e-0015L, + 6.676602383908906988160099057991121446058e-0018L, +}; + +GENERIC +y1l(x) GENERIC x; { + GENERIC z, s, c, ss, cc, u, v; + int i; + + if (isnanl(x)) + return (x+x); + if (x <= zero) { + if (x == zero) + return (-one/zero); + else + return (zero/zero); + } + if (x > 1.28L) { + if (!finitel(x)) + return (zero); + s = sinl(x); + c = cosl(x); + /* + * j1(x) = sqrt(2/(pi*x))*(p1(x)*cos(x0)-q1(x)*sin(x0)) + * where x0 = x-3pi/4 + * Better formula: + * cos(x0) = cos(x)cos(3pi/4)+sin(x)sin(3pi/4) + * = 1/sqrt(2) * (sin(x) - cos(x)) + * sin(x0) = sin(x)cos(3pi/4)-cos(x)sin(3pi/4) + * = -1/sqrt(2) * (cos(x) + sin(x)) + * To avoid cancellation, use + * sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x)) + * to compute the worse one. + */ + if (x > 1.0e2450L) { /* x+x may overflow */ + ss = -s-c; + cc = s-c; + } else if (signbitl(s) != signbitl(c)) { + cc = s - c; + ss = cosl(x+x)/cc; + } else { + ss = -s-c; + cc = cosl(x+x)/ss; + } + /* + * j1(x) = 1/sqrt(pi*x) * (P(1,x)*cc - Q(1,x)*ss) + * y1(x) = 1/sqrt(pi*x) * (P(1,x)*ss + Q(1,x)*cc) + */ + if (x > 1.0e91L) + return (invsqrtpi*ss)/sqrtl(x); + return (invsqrtpi*(pone(x)*ss+qone(x)*cc)/sqrtl(x)); + } + if (x <= tiny) { + return (-tpi/x); + } + z = x*x; + u = u0[6]; v = v0[6]+z*v0[7]; + for (i = 5; i >= 0; i--) { + u = u*z + u0[i]; + v = v*z + v0[i]; + } + return (x*(u/v) + tpi*(j1l(x)*logl(x)-one/x)); +} + +static GENERIC pr0[12] = { + 1.000000000000000000000000000000000000267e+0000L, + 1.060717875045891455602180843276758003035e+0003L, + 4.344347542892127024446687712181105852335e+0005L, + 8.915680220724007016377924252717410457094e+0007L, + 9.969502259938406062809873257569171272819e+0009L, + 6.200290193138613035646510338707386316595e+0011L, + 2.105978548788015119851815854422247330118e+0013L, + 3.696635772784601239371730810311998368948e+0014L, + 3.015913097920694682057958412534134515156e+0015L, + 9.370298471339353098123277427328592725921e+0015L, + 7.190349005196335967340799265074029443057e+0015L, + 2.736097786240689996880391074927552517982e+0014L, +}; +static GENERIC ps0[11] = { + 1.0e0L, + 1.060600687545891455602180843276758095107e+0003L, + 4.343106093416975589147153906505338900961e+0005L, + 8.910605869002176566582072242244353399059e+0007L, + 9.959122058635087888690713917622056540190e+0009L, + 6.188744967234948231792482949171041843894e+0011L, + 2.098863976953783506401759873801990304907e+0013L, + 3.672870357018063196746729751479938908450e+0014L, + 2.975538419246824921049011529574385888420e+0015L, + 9.063657659995043205018686029284479837091e+0015L, + 6.401953344314747916729366441508892711691e+0015L, +}; +static GENERIC pr1[12] = { + 1.000000000000000000000023667524130660984e+0000L, + 6.746154419979618754354803488126452971204e+0002L, + 1.811210781083390154857018330296145970502e+0005L, + 2.533098390379924268038005329095287842244e+0007L, + 2.029683619805342145252338570875424600729e+0009L, + 9.660859662192711465301069401598929980319e+0010L, + 2.743396238644831519934098967716621316316e+0012L, + 4.553097354140854377931023170263455246288e+0013L, + 4.210245069852219757476169864974870720374e+0014L, + 1.987334056229596485076645967176169801727e+0015L, + 4.067120052787096893838970455751338930462e+0015L, + 2.486539606380406398310845264910691398133e+0015L, +}; +static GENERIC ps1[14] = { + 1.0e0L, + 6.744982544979618754355808680196859521782e+0002L, + 1.810421795396966762032155290441364740350e+0005L, + 2.530986460644310651529583759699988435573e+0007L, + 2.026743276048023121360249288818290224145e+0009L, + 9.637461924407405935245269407052641341836e+0010L, + 2.732378628423766417402292797028314160831e+0012L, + 4.522345274960527124354844364012184278488e+0013L, + 4.160650668341743132685335758415469856545e+0014L, + 1.943730242988858208243492424892435901211e+0015L, + 3.880228532692127989901131618598067450001e+0015L, + 2.178020816161154615841000173683302999728e+0015L, + -8.994062666842225551554346698171600634173e+0013L, + 1.368520368508851253495764806934619574990e+0013L, +}; +static GENERIC pr2[12] = { + 1.000000000000000006938651621840396237282e+0000L, + 3.658416291850404981407101077037948144698e+0002L, + 5.267073772170356547709794670602812447537e+0004L, + 3.912012101226837463014925210735894620442e+0006L, + 1.651295648974103957193874928714180765625e+0008L, + 4.114901144480797609972484998142146783499e+0009L, + 6.092524309766036681542980572526335147672e+0010L, + 5.263913178071282616719249969074134570577e+0011L, + 2.538408581124324223367341020538081330994e+0012L, + 6.288607929360291027895126983015365677648e+0012L, + 6.848330048211148419047055075386525945280e+0012L, + 2.290309646838867941423178163991423244690e+0012L, +}; +static GENERIC ps2[14] = { + 1.0e0L, + 3.657244416850405086459410165762319861856e+0002L, + 5.262802358425023243992387075861237306312e+0004L, + 3.905896813959919648136295861661483848364e+0006L, + 1.646791907791461220742694842108202772763e+0008L, + 4.096132803064256022224954120208201437344e+0009L, + 6.046665195915950447544429445730680236759e+0010L, + 5.198061739781991313414052212328653295168e+0011L, + 2.484233851814333966401527626421254279796e+0012L, + 6.047868806925315879339651539434315255940e+0012L, + 6.333103831254091652501642567294101813354e+0012L, + 1.875143098754284994467609936924685024968e+0012L, + -5.238330920563392692965412762508813601534e+0010L, + 4.656888609439364725427789198383779259957e+0009L, +}; +static GENERIC pr3[13] = { + 1.000000000000009336887318068056137842897e+0000L, + 2.242719942728459588488051572002835729183e+0002L, + 1.955450611382026550266257737331095691092e+0004L, + 8.707143293993619899395400562409175590739e+0005L, + 2.186267894487004565948324289010954505316e+0007L, + 3.224328510541957792360691585667502864688e+0008L, + 2.821057355151380597331792896882741364897e+0009L, + 1.445371387295422404365584793796028979840e+0010L, + 4.181743160669891357783011002656658107864e+0010L, + 6.387371088767993119325536137794535513922e+0010L, + 4.575619999412716078064070587767416436396e+0010L, + 1.228415651211639160620284441690503550842e+0010L, + 7.242170349875563053436050532153112882072e+0008L, +}; +static GENERIC ps3[13] = { + 1.0e0L, + 2.241548067728529551049804610486061401070e+0002L, + 1.952838216795552145132137932931237181307e+0004L, + 8.684574926493185744628127341069974575526e+0005L, + 2.176357771067037962940853412819852189164e+0007L, + 3.199958682356132977319258783167122100567e+0008L, + 2.786218931525334687844675219914201872570e+0009L, + 1.416283776951741549631417572317916039767e+0010L, + 4.042962659271567948735676834609348842922e+0010L, + 6.028168462646694510083847222968444402161e+0010L, + 4.118410226794641413833887606580085281111e+0010L, + 9.918735736297038430744161253338202230263e+0009L, + 4.092967198238098023219124487437130332038e+0008L, +}; +static GENERIC pr4[13] = { + 1.000000000001509220978157399042059553390e+0000L, + 1.437551868378147851133499996323782607787e+0002L, + 7.911335537418177296041518061404505428004e+0003L, + 2.193710939115317214716518908935756104804e+0005L, + 3.390662495136730962513489796538274984382e+0006L, + 3.048655347929348891006070609293884274789e+0007L, + 1.613781633489496606354045161527450975195e+0008L, + 4.975089835037230277110156150038482159988e+0008L, + 8.636047087015115403880904418339566323264e+0008L, + 7.918202912328366140110671223076949101509e+0008L, + 3.423294665798984733439650311722794853294e+0008L, + 5.621904953441963961040503934782662613621e+0007L, + 2.086303543310240260758670404509484499793e+0006L, +}; +static GENERIC ps4[13] = { + 1.0e0L, + 1.436379993384532371670493319591847362304e+0002L, + 7.894647154785430678061053848847436659499e+0003L, + 2.184659753392097529008981741550878586174e+0005L, + 3.366109083305465176803513738147049499361e+0006L, + 3.011911545968996817697665866587226343186e+0007L, + 1.582262913779689851316760148459414895301e+0008L, + 4.819268809494937919217938589530138201770e+0008L, + 8.201355762990450679702837123432527154830e+0008L, + 7.268232093982510937417446421282341425212e+0008L, + 2.950911909015572933262131323934036480462e+0008L, + 4.242839924305934423010858966540621219396e+0007L, + 1.064387620445090779182117666330405186866e+0006L, +}; +static GENERIC pr5[13] = { + 1.000000000102434805241171427253847353861e+0000L, + 9.129332257083629259060502249025963234821e+0001L, + 3.132238483586953037576119377504557191413e+0003L, + 5.329782528269307971278943122454171107861e+0004L, + 4.988460157184117790692873002103052944145e+0005L, + 2.686602071615786816147010334256047469378e+0006L, + 8.445418526028961197703799808701268301831e+0006L, + 1.536575358646141157475725889907900827390e+0007L, + 1.568405818236523821796862770586544811945e+0007L, + 8.450876239888770102387618667362302173547e+0006L, + 2.154414900139567328424026827163203446077e+0006L, + 2.105656926565043898888460254808062352205e+0005L, + 4.739165011023396507022134303736862812975e+0003L, +}; +static GENERIC ps5[13] = { + 1.0e0L, + 9.117613509595327476509152673394703847793e+0001L, + 3.121697972484015639301279229281770795147e+0003L, + 5.294447222735893568040911873834576440255e+0004L, + 4.930368882192772335798256684110887882807e+0005L, + 2.634854685641165298302167435798357437768e+0006L, + 8.185462775400326393555896157031818280918e+0006L, + 1.462417423080215192609668642663030667086e+0007L, + 1.450624993985851675982860844153954896015e+0007L, + 7.460467647561995283219086567162006113864e+0006L, + 1.754210981405612478869227142579056338965e+0006L, + 1.463286721155271971526264914524746699596e+0005L, + 2.155894725796702015341211116579827039459e+0003L, +}; +static GENERIC pr6[13] = { + 1.000000003564855546741735920315743157129e+0000L, + 5.734003934862540458119423509909510288366e+0001L, + 1.209572491935850486086559692291796887976e+0003L, + 1.243398391422281247933674779163660286838e+0004L, + 6.930996755181437937258220998601708278787e+0004L, + 2.198067659532757598646722249966767620099e+0005L, + 4.033659432712058633933179115820576858455e+0005L, + 4.257759657219008027016047206574574358678e+0005L, + 2.511917395876004349480721277445763916389e+0005L, + 7.813756153070623654178731651381881953552e+0004L, + 1.152069173381127881385588092905864352891e+0004L, + 6.548580782804088553777816037551523398082e+0002L, + 8.668725370116906132327542766127938496880e+0000L, +}; +static GENERIC ps6[13] = { + 1.0e0L, + 5.722285236357114566499221525736286205184e+0001L, + 1.203010842878317935444582950620339570506e+0003L, + 1.230058335378583550155825502172435371208e+0004L, + 6.800998550607861288865300438648089894412e+0004L, + 2.130767829599304262987769347536850885921e+0005L, + 3.840483466643916681759936972992155310026e+0005L, + 3.947432373459225542861819148108081160393e+0005L, + 2.237816239393081111481588434457838526738e+0005L, + 6.545820495124419723398946273790921540774e+0004L, + 8.729563630320892741500726213278834737196e+0003L, + 4.130762660291894753450174794196998813709e+0002L, + 3.480368898672684645130335786015075595598e+0000L, +}; +static GENERIC sixteen = 16.0L; +static GENERIC eight = 8.0L; +static GENERIC huge = 1.0e30L; + +static GENERIC pone(x) +GENERIC x; +{ + GENERIC s, r, t, z; + int i; + if (x > huge) + return (one); + t = one/x; z = t*t; + if (x > sixteen) { + r = z*pr0[11]+pr0[10]; s = ps0[10]; + for (i = 9; i >= 0; i--) { + r = z*r + pr0[i]; + s = z*s + ps0[i]; + } + } else if (x > eight) { + r = pr1[11]; s = ps1[11]+z*(ps1[12]+z*ps1[13]); + for (i = 10; i >= 0; i--) { + r = z*r + pr1[i]; + s = z*s + ps1[i]; + } + } else if (x > five) { + r = pr2[11]; s = ps2[11]+z*(ps2[12]+z*ps2[13]); + for (i = 10; i >= 0; i--) { + r = z*r + pr2[i]; + s = z*s + ps2[i]; + } + } else if (x > 3.5L) { + r = pr3[12]; s = ps3[12]; + for (i = 11; i >= 0; i--) { + r = z*r + pr3[i]; + s = z*s + ps3[i]; + } + } else if (x > 2.5L) { + r = pr4[12]; s = ps4[12]; + for (i = 11; i >= 0; i--) { + r = z*r + pr4[i]; + s = z*s + ps4[i]; + } + } else if (x > (1.0L/0.5625L)) { + r = pr5[12]; s = ps5[12]; + for (i = 11; i >= 0; i--) { + r = z*r + pr5[i]; + s = z*s + ps5[i]; + } + } else { /* assume x > 1.28 */ + r = pr6[12]; s = ps6[12]; + for (i = 11; i >= 0; i--) { + r = z*r + pr6[i]; + s = z*s + ps6[i]; + } + } + return (r/s); +} + + +static GENERIC qr0[12] = { + 3.749999999999999999999999999999999971033e-0001L, + 4.256726035237050601607682277433094262226e+0002L, + 1.875976490812878489192409978945401066066e+0005L, + 4.170314268048041914273603680317745592790e+0007L, + 5.092750132543855817293451118974555746551e+0009L, + 3.494749676278488654103505795794139483404e+0011L, + 1.327062148257437316997667817096694173709e+0013L, + 2.648993138273427226907503742066551150490e+0014L, + 2.511695665909547412222430494473998127684e+0015L, + 9.274694506662289043224310499164702306096e+0015L, + 8.150904170663663829331320302911792892002e+0015L, + -5.001918733707662355772037829620388765122e+0014L, +}; +static GENERIC qs0[11] = { + 1.0e0L, + 1.135400380229880160428715273982155760093e+0003L, + 5.005701183877126164326765545516590744360e+0005L, + 1.113444200113712167984337603933040102987e+0008L, + 1.361074819925223062778717565699039471124e+0010L, + 9.355750985802849484438933905325982809653e+0011L, + 3.563462786008988825003965543857998084828e+0013L, + 7.155145113900094163648726863803802910454e+0014L, + 6.871266835834472758055559013851843654113e+0015L, + 2.622030899226736712644974988157345234092e+0016L, + 2.602912729172876330650077021706139707746e+0016L, +}; +static GENERIC qr1[12] = { + 3.749999999999999999997762458207284405806e-0001L, + 2.697883998881706839929255517498189980485e+0002L, + 7.755195925781028489386938870473834411019e+0004L, + 1.166777762104017777198211072895528968355e+0007L, + 1.011504772984321168320010084520261069362e+0009L, + 5.246007703574156853577754571720205550010e+0010L, + 1.637692549885592683166116551691266537647e+0012L, + 3.022303623698185669912990310925039382495e+0013L, + 3.154769927290655684846107030265909987946e+0014L, + 1.715819913441554770089730934808123360921e+0015L, + 4.165044355759732622273534445131736188510e+0015L, + 3.151381420874174705643100381708086287596e+0015L, +}; +static GENERIC qs1[14] = { + 1.0e0L, + 7.197091705351218239785633172408276982828e+0002L, + 2.070012799599548685544883041297609861055e+0005L, + 3.117014815317656221871840152778458754516e+0007L, + 2.705719678902554974863325877025902971727e+0009L, + 1.406113614727345726925060648750867264098e+0011L, + 4.403777536067131320363005978631674817359e+0012L, + 8.170725690209322283061499386703167242894e+0013L, + 8.609458844975495289227794126964431210566e+0014L, + 4.766766367015473481257280600694952920204e+0015L, + 1.202286587943342194863557940888115641650e+0016L, + 1.012474328306200909525063936061756024120e+0016L, + 6.183552022678917858273222879615824070703e+0014L, + -9.756731548558226997573737400988225722740e+0013L, +}; +static GENERIC qr2[12] = { + 3.749999999999999481245647262226994293189e-0001L, + 1.471366807289771354491181140167359026735e+0002L, + 2.279432486768448220142080962843526951250e+0004L, + 1.828943048523771225163804043356958285893e+0006L, + 8.379828388647823135832220596417725010837e+0007L, + 2.279814029335044024585393671278378022053e+0009L, + 3.711653952257118120832817785271466441420e+0010L, + 3.557650914518554549916730572553105048068e+0011L, + 1.924583483146095896259774329498934160650e+0012L, + 5.424386256063736390759567088291887140278e+0012L, + 6.839325621241776786206509704671746841737e+0012L, + 2.702169563144001166291686452305436313971e+0012L, +}; +static GENERIC qs2[14] = { + 1.0e0L, + 3.926379194439388135703211933895203191089e+0002L, + 6.089148804106598297488336063007609312276e+0004L, + 4.893546162973278583711376356041614150645e+0006L, + 2.247571119114497845046388801813832219404e+0008L, + 6.137635663350177751290469334200757872645e+0009L, + 1.005115019784102856424493519524998953678e+0011L, + 9.725664462014503832860151384604677240620e+0011L, + 5.345525100485511116148634192844434636072e+0012L, + 1.549944007398946691720862738173956994779e+0013L, + 2.067148441178952625710302124163264760362e+0013L, + 9.401565402641963611295119487242595462301e+0012L, + 3.548217088622398274748837287769709374385e+0011L, + -2.934470341719047120076509938432417352365e+0010L, +}; +static GENERIC qr3[13] = { + 3.749999999999412724084579833297451472091e-0001L, + 9.058478580291706212422978492938435582527e+0001L, + 8.524056033161038750461083666711724381171e+0003L, + 4.105967158629109427753434569223631014730e+0005L, + 1.118326603378531348259783091972623333657e+0007L, + 1.794636683403578918528064904714132329343e+0008L, + 1.714314157463635959556133236004368896724e+0009L, + 9.622092032236084846572067257267661456030e+0009L, + 3.057759524485859159957762858780768355020e+0010L, + 5.129306780754798531609621454415938890020e+0010L, + 3.999122002794961070680636194346316041352e+0010L, + 1.122298454643493485989721564358100345388e+0010L, + 5.603981987645989709668830968522362582221e+0008L, +}; +static GENERIC qs3[13] = { + 1.0e0L, + 2.418328663076578169836155170053634419922e+0002L, + 2.279620205900121042587523541281272875520e+0004L, + 1.100984222585729521470129014992217092794e+0006L, + 3.010743223679247091004262516286654516282e+0007L, + 4.860925542827367817289619265215599433996e+0008L, + 4.686668111035348691982715864307839581243e+0009L, + 2.668701788405102017427214705946730894074e+0010L, + 8.677395746106802640390580944836650584903e+0010L, + 1.511936455574951790658498795945106643036e+0011L, + 1.260845604432623478002018696873608353093e+0011L, + 4.052692278419853853911440231600864589805e+0010L, + 2.965516519212226064983267822243329694729e+0009L, +}; +static GENERIC qr4[13] = { + 3.749999999919234164154669754440123072618e-0001L, + 5.844218580776819864791168253485055101858e+0001L, + 3.489273514092912982675669411371435670220e+0003L, + 1.050523637774575684509663430018995479594e+0005L, + 1.764549172059701565500717319792780115289e+0006L, + 1.725532438844133795028063102681497371154e+0007L, + 9.938114847359778539965140247590176334874e+0007L, + 3.331710808184595545396883770200772842314e+0008L, + 6.271970557641881511609560444872797282698e+0008L, + 6.188529798677357075020774923903737913285e+0008L, + 2.821905302742849974509982167877885011629e+0008L, + 4.615467358646911976773290256984329814896e+0007L, + 1.348140608731546467396685802693380693275e+0006L, +}; +static GENERIC qs4[13] = { + 1.0e0L, + 1.561192663112345185261418296389902133372e+0002L, + 9.346678031144098270547225423124213083072e+0003L, + 2.825851246482293547838023847601704751590e+0005L, + 4.776572711622156091710902891124911556293e+0006L, + 4.715106953717135402977938048006267859302e+0007L, + 2.753962350894311316439652227611209035193e+0008L, + 9.428501434615463207768964787500411575223e+0008L, + 1.832650858775206787088236896454141572617e+0009L, + 1.901697378939743226948920874296595242257e+0009L, + 9.433322226854293780627188599226380812725e+0008L, + 1.808520540608671608680284520798858587370e+0008L, + 7.983342331736662753157217446919462398008e+0006L, +}; +static GENERIC qr5[13] = { + 3.749999995331364437028988850515190446719e-0001L, + 3.739356381766559882677514593041627547911e+0001L, + 1.399562500629413529355265462912819802551e+0003L, + 2.594154053098947925345332218062210111753e+0004L, + 2.640149879297408640394163979394594318371e+0005L, + 1.542471854873199142031889093591449397995e+0006L, + 5.242272868972053374067572098992335425895e+0006L, + 1.025834487769410221329633071426044839935e+0007L, + 1.116553924239448940142230579060124209622e+0007L, + 6.318076065595910176374916303525884653514e+0006L, + 1.641218086168640408527639735915512881785e+0006L, + 1.522369793529178644168813882912134706444e+0005L, + 2.526530541062297200914180060208669584055e+0003L, +}; +static GENERIC qs5[13] = { + 1.0e0L, + 9.998960735935075380397545659016287506660e+0001L, + 3.758767417842043742686475060540416737562e+0003L, + 7.013652806952306520121959742519780781653e+0004L, + 7.208949808818615099246529616211730446850e+0005L, + 4.272753927109614455417836186072202009252e+0006L, + 1.482524411356470699336129814111025434703e+0007L, + 2.988750366665678233425279237627700803473e+0007L, + 3.396957890261080492694709150553619185065e+0007L, + 2.050652487738593004111578091156304540386e+0007L, + 5.900504120811732547616511555946279451316e+0006L, + 6.563391409260160897024498082273183468347e+0005L, + 1.692629845012790205348966731477187041419e+0004L, +}; +static GENERIC qr6[13] = { + 3.749999861516664133157566870858975421296e-0001L, + 2.367863756747764863120797431599473468918e+0001L, + 5.476715802114976248882067325630793143777e+0002L, + 6.143190357869842894025012945444096170251e+0003L, + 3.716250534677997850513733595140463851730e+0004L, + 1.270883463823876752138326905022875657430e+0005L, + 2.495301449636814481646371665429083801388e+0005L, + 2.789578988212952248340486296254398601942e+0005L, + 1.718247946911109055931819087137397324634e+0005L, + 5.458973214011665714330326732204106364229e+0004L, + 7.912102686687948786048943339759596652813e+0003L, + 4.077961006160866935722030715149087938091e+0002L, + 3.765206972770245085551057237882528510428e+0000L, +}; +static GENERIC qs6[13] = { + 1.0e0L, + 6.341646532940517305641893852673926809601e+0001L, + 1.477058277414040790932597537920671025359e+0003L, + 1.674406564031044491436044253393536487604e+0004L, + 1.028516501369755949895050806908994650768e+0005L, + 3.593620042532885295087463507733285434207e+0005L, + 7.267924991381020915185873399453724799625e+0005L, + 8.462277510768818399961191426205006083088e+0005L, + 5.514399892230892163373611895645500250514e+0005L, + 1.898084241009259353540620272932188102299e+0005L, + 3.102941242117739015721984123081026253068e+0004L, + 1.958971184431466907681440650181421086143e+0003L, + 2.878853357310495087181721613889455121867e+0001L, +}; +static GENERIC qone(x) +GENERIC x; +{ + GENERIC s, r, t, z; + int i; + if (x > huge) + return (0.375L/x); + t = one/x; z = t*t; + if (x > sixteen) { + r = z*qr0[11]+qr0[10]; s = qs0[10]; + for (i = 9; i >= 0; i--) { + r = z*r + qr0[i]; + s = z*s + qs0[i]; + } + } else if (x > eight) { + r = qr1[11]; s = qs1[11]+z*(qs1[12]+z*qs1[13]); + for (i = 10; i >= 0; i--) { + r = z*r + qr1[i]; + s = z*s + qs1[i]; + } + } else if (x > five) { /* x > 5.0 */ + r = qr2[11]; s = qs2[11]+z*(qs2[12]+z*qs2[13]); + for (i = 10; i >= 0; i--) { + r = z*r + qr2[i]; + s = z*s + qs2[i]; + } + } else if (x > 3.5L) { + r = qr3[12]; s = qs3[12]; + for (i = 11; i >= 0; i--) { + r = z*r + qr3[i]; + s = z*s + qs3[i]; + } + } else if (x > 2.5L) { + r = qr4[12]; s = qs4[12]; + for (i = 11; i >= 0; i--) { + r = z*r + qr4[i]; + s = z*s + qs4[i]; + } + } else if (x > (1.0L/0.5625L)) { + r = qr5[12]; s = qs5[12]; + for (i = 11; i >= 0; i--) { + r = z*r + qr5[i]; + s = z*s + qs5[i]; + } + } else { /* assume x > 1.28 */ + r = qr6[12]; s = qs6[12]; + for (i = 11; i >= 0; i--) { + r = z*r + qr6[i]; + s = z*s + qs6[i]; + } + } + return (t*(r/s)); +} diff --git a/usr/src/lib/libm/common/LD/jnl.c b/usr/src/lib/libm/common/LD/jnl.c new file mode 100644 index 0000000000..dd63612e71 --- /dev/null +++ b/usr/src/lib/libm/common/LD/jnl.c @@ -0,0 +1,285 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak jnl = __jnl +#pragma weak ynl = __ynl +#endif + +/* + * floating point Bessel's function of the 1st and 2nd kind + * of order n: jn(n,x),yn(n,x); + * + * Special cases: + * y0(0)=y1(0)=yn(n,0) = -inf with division by zero signal; + * y0(-ve)=y1(-ve)=yn(n,-ve) are NaN with invalid signal. + * Note 2. About jn(n,x), yn(n,x) + * For n=0, j0(x) is called, + * for n=1, j1(x) is called, + * for n<x, forward recursion us used starting + * from values of j0(x) and j1(x). + * for n>x, a continued fraction approximation to + * j(n,x)/j(n-1,x) is evaluated and then backward + * recursion is used starting from a supposed value + * for j(n,x). The resulting value of j(0,x) is + * compared with the actual value to correct the + * supposed value of j(n,x). + * + * yn(n,x) is similar in all respects, except + * that forward recursion is used for all + * values of n>1. + * + */ + +#include "libm.h" +#include "longdouble.h" +#include <float.h> /* LDBL_MAX */ + +#define GENERIC long double + +static const GENERIC +invsqrtpi = 5.641895835477562869480794515607725858441e-0001L, +two = 2.0L, +zero = 0.0L, +one = 1.0L; + +GENERIC +jnl(n, x) int n; GENERIC x; { + int i, sgn; + GENERIC a, b, temp = 0, z, w; + + /* + * J(-n,x) = (-1)^n * J(n, x), J(n, -x) = (-1)^n * J(n, x) + * Thus, J(-n,x) = J(n,-x) + */ + if (n < 0) { + n = -n; + x = -x; + } + if (n == 0) return (j0l(x)); + if (n == 1) return (j1l(x)); + if (x != x) return x+x; + if ((n&1) == 0) + sgn = 0; /* even n */ + else + sgn = signbitl(x); /* old n */ + x = fabsl(x); + if (x == zero || !finitel(x)) b = zero; + else if ((GENERIC)n <= x) { + /* + * Safe to use + * J(n+1,x)=2n/x *J(n,x)-J(n-1,x) + */ + if (x > 1.0e91L) { + /* + * x >> n**2 + * Jn(x) = cos(x-(2n+1)*pi/4)*sqrt(2/x*pi) + * Yn(x) = sin(x-(2n+1)*pi/4)*sqrt(2/x*pi) + * Let s=sin(x), c=cos(x), + * xn=x-(2n+1)*pi/4, sqt2 = sqrt(2),then + * + * n sin(xn)*sqt2 cos(xn)*sqt2 + * ---------------------------------- + * 0 s-c c+s + * 1 -s-c -c+s + * 2 -s+c -c-s + * 3 s+c c-s + */ + switch (n&3) { + case 0: temp = cosl(x)+sinl(x); break; + case 1: temp = -cosl(x)+sinl(x); break; + case 2: temp = -cosl(x)-sinl(x); break; + case 3: temp = cosl(x)-sinl(x); break; + } + b = invsqrtpi*temp/sqrtl(x); + } else { + a = j0l(x); + b = j1l(x); + for (i = 1; i < n; i++) { + temp = b; + b = b*((GENERIC)(i+i)/x) - a; /* avoid underflow */ + a = temp; + } + } + } else { + if (x < 1e-17L) { /* use J(n,x) = 1/n!*(x/2)^n */ + b = powl(0.5L*x, (GENERIC) n); + if (b != zero) { + for (a = one, i = 1; i <= n; i++) a *= (GENERIC)i; + b = b/a; + } + } else { + /* + * use backward recurrence + * x x^2 x^2 + * J(n,x)/J(n-1,x) = ---- ------ ------ ..... + * 2n - 2(n+1) - 2(n+2) + * + * 1 1 1 + * (for large x) = ---- ------ ------ ..... + * 2n 2(n+1) 2(n+2) + * -- - ------ - ------ - + * x x x + * + * Let w = 2n/x and h=2/x, then the above quotient + * is equal to the continued fraction: + * 1 + * = ----------------------- + * 1 + * w - ----------------- + * 1 + * w+h - --------- + * w+2h - ... + * + * To determine how many terms needed, let + * Q(0) = w, Q(1) = w(w+h) - 1, + * Q(k) = (w+k*h)*Q(k-1) - Q(k-2), + * When Q(k) > 1e4 good for single + * When Q(k) > 1e9 good for double + * When Q(k) > 1e17 good for quaduple + */ + /* determin k */ + GENERIC t, v; + double q0, q1, h, tmp; int k, m; + w = (n+n)/(double)x; h = 2.0/(double)x; + q0 = w; z = w+h; q1 = w*z - 1.0; k = 1; + while (q1 < 1.0e17) { + k += 1; z += h; + tmp = z*q1 - q0; + q0 = q1; + q1 = tmp; + } + m = n+n; + for (t = zero, i = 2*(n+k); i >= m; i -= 2) t = one/(i/x-t); + a = t; + b = one; + /* + * Estimate log((2/x)^n*n!) = n*log(2/x)+n*ln(n) + * hence, if n*(log(2n/x)) > ... + * single 8.8722839355e+01 + * double 7.09782712893383973096e+02 + * long double 1.1356523406294143949491931077970765006170e+04 + * then recurrent value may overflow and the result is + * likely underflow to zero. + */ + tmp = n; + v = two/x; + tmp = tmp*logl(fabsl(v*tmp)); + if (tmp < 1.1356523406294143949491931077970765e+04L) { + for (i = n-1; i > 0; i--) { + temp = b; + b = ((i+i)/x)*b - a; + a = temp; + } + } else { + for (i = n-1; i > 0; i--) { + temp = b; + b = ((i+i)/x)*b - a; + a = temp; + if (b > 1e1000L) { + a /= b; + t /= b; + b = 1.0; + } + } + } + b = (t*j0l(x)/b); + } + } + if (sgn == 1) + return -b; + else + return b; +} + +GENERIC +ynl(n, x) int n; GENERIC x; { + int i; + int sign; + GENERIC a, b, temp = 0; + + if (x != x) + return x+x; + if (x <= zero) { + if (x == zero) + return -one/zero; + else + return zero/zero; + } + sign = 1; + if (n < 0) { + n = -n; + if ((n&1) == 1) sign = -1; + } + if (n == 0) return (y0l(x)); + if (n == 1) return (sign*y1l(x)); + if (!finitel(x)) return zero; + + if (x > 1.0e91L) { + /* + * x >> n**2 + * Jn(x) = cos(x-(2n+1)*pi/4)*sqrt(2/x*pi) + * Yn(x) = sin(x-(2n+1)*pi/4)*sqrt(2/x*pi) + * Let s=sin(x), c=cos(x), + * xn=x-(2n+1)*pi/4, sqt2 = sqrt(2),then + * + * n sin(xn)*sqt2 cos(xn)*sqt2 + * ---------------------------------- + * 0 s-c c+s + * 1 -s-c -c+s + * 2 -s+c -c-s + * 3 s+c c-s + */ + switch (n&3) { + case 0: temp = sinl(x)-cosl(x); break; + case 1: temp = -sinl(x)-cosl(x); break; + case 2: temp = -sinl(x)+cosl(x); break; + case 3: temp = sinl(x)+cosl(x); break; + } + b = invsqrtpi*temp/sqrtl(x); + } else { + a = y0l(x); + b = y1l(x); + /* + * fix 1262058 and take care of non-default rounding + */ + for (i = 1; i < n; i++) { + temp = b; + b *= (GENERIC) (i + i) / x; + if (b <= -LDBL_MAX) + break; + b -= a; + a = temp; + } + } + if (sign > 0) + return b; + else + return -b; +} diff --git a/usr/src/lib/libm/common/LD/lgammal.c b/usr/src/lib/libm/common/LD/lgammal.c new file mode 100644 index 0000000000..996fc09697 --- /dev/null +++ b/usr/src/lib/libm/common/LD/lgammal.c @@ -0,0 +1,49 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak lgammal = __lgammal + +/* + * long double lgammal(long double x); + */ + +#include "libm.h" +#include "libm_synonyms.h" +#include "longdouble.h" + +extern int signgam; +extern int signgaml; + +long double +lgammal(long double x) { + long double y = __k_lgammal(x, &signgaml); + + signgam = signgaml; /* SUSv3 requires the setting of signgam */ + return y; +} diff --git a/usr/src/lib/libm/common/LD/lgammal_r.c b/usr/src/lib/libm/common/LD/lgammal_r.c new file mode 100644 index 0000000000..eeeb9d0d28 --- /dev/null +++ b/usr/src/lib/libm/common/LD/lgammal_r.c @@ -0,0 +1,42 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * long double lgammal_r(long double x, int *signgamlp); + */ + +#pragma weak lgammal_r = __lgammal_r + +#include "libm.h" +#include "longdouble.h" + +long double +lgammal_r(long double x, int *signgamlp) { + return __k_lgammal(x, signgamlp); +} diff --git a/usr/src/lib/libm/common/LD/log1pl.c b/usr/src/lib/libm/common/LD/log1pl.c new file mode 100644 index 0000000000..97dc4910c4 --- /dev/null +++ b/usr/src/lib/libm/common/LD/log1pl.c @@ -0,0 +1,66 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak log1pl = __log1pl +#endif + +/* + * log1pl(x) + * Kahan's trick based on log(1+x)/x being a slow varying function. + */ + +#include "libm.h" + +#if defined(__x86) +#define __swapRD __swap87RD +#endif +extern enum fp_direction_type __swapRD(enum fp_direction_type); + +long double +log1pl(long double x) { + long double y; + enum fp_direction_type rd; + + if (x != x) + return (x + x); + if (x < -1.L) + return (logl(x)); + rd = __swapRD(fp_nearest); + y = 1.L + x; + if (y != 1.L) { + if (y == x) + x = logl(x); + else + x *= logl(y) / (y - 1.L); + } + if (rd != fp_nearest) + (void) __swapRD(rd); + return (x); +} diff --git a/usr/src/lib/libm/common/LD/logbl.c b/usr/src/lib/libm/common/LD/logbl.c new file mode 100644 index 0000000000..e8a7809cd5 --- /dev/null +++ b/usr/src/lib/libm/common/LD/logbl.c @@ -0,0 +1,83 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak logbl = __logbl +#endif + +#include "libm.h" +#include "xpg6.h" /* __xpg6 */ +#define _C99SUSv3_logb _C99SUSv3_logb_subnormal_is_like_ilogb + +#if defined(__sparc) +#define ISNORMALL(k, x) (k != 0x7fff) /* assuming k != 0 */ +#define X86PDNRM(k, x) +#define XSCALE_OFFSET 0x406f /* 0x3fff + 112 */ +static const long double xscale = 5192296858534827628530496329220096.0L; + /* 2^112 */ +#elif defined(__x86) +/* + * if pseudo-denormal, replace by the equivalent normal + */ +#define X86PDNRM(k, x) if (k == 0 && (((int *) &x)[1] & 0x80000000) != 0) \ + ((int *) &x)[2] |= k = 1 +#if defined(HANDLE_UNSUPPORTED) /* assuming k != 0 */ +#define ISNORMALL(k, x) (k != 0x7fff && (((int *) &x)[1] & 0x80000000) != 0) +#else +#define ISNORMALL(k, x) (k != 0x7fff) +#endif +#define XSCALE_OFFSET 0x403e /* 0x3fff + 63 */ +static const long double xscale = 9223372036854775808.0L; /* 2^63 */ +#endif + +static long double +raise_division(long double v) { +#pragma STDC FENV_ACCESS ON + static const long double zero = 0.0L; + return (v / zero); +} + +long double +logbl(long double x) { + int k = XBIASED_EXP(x); + + X86PDNRM(k, x); + if (k == 0) { + if (ISZEROL(x)) + return (raise_division(-1.0L)); + else if ((__xpg6 & _C99SUSv3_logb) != 0) { + x *= xscale; /* scale up by 2^112 or 2^63 */ + return (long double) (XBIASED_EXP(x) - XSCALE_OFFSET); + } else + return ((long double) (-16382)); + } else if (ISNORMALL(k, x)) + return ((long double) (k - 0x3fff)); + else + return (x * x); +} diff --git a/usr/src/lib/libm/common/LD/longdouble.h b/usr/src/lib/libm/common/LD/longdouble.h new file mode 100644 index 0000000000..10d4c26fc0 --- /dev/null +++ b/usr/src/lib/libm/common/LD/longdouble.h @@ -0,0 +1,160 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LD_LONGDOUBLE_H +#define _LD_LONGDOUBLE_H +#include <sys/ieeefp.h> + +extern long double __k_cosl(long double, long double); +extern long double __k_lgammal(long double, int *); +extern long double __k_sincosl(long double, long double, long double *); +extern long double __k_sinl(long double, long double); +extern long double __k_tanl(long double, long double, int); +extern long double __poly_libmq(long double, int, long double *); +extern int __rem_pio2l(long double, long double *); + +extern long double acosdl(long double); +extern long double acoshl(long double); +extern long double acosl(long double); +extern long double acospil(long double); +extern long double acospl(long double); +extern long double aintl(long double); +extern long double anintl(long double); +extern long double annuityl(long double, long double); +extern long double asindl(long double); +extern long double asinhl(long double); +extern long double asinl(long double); +extern long double asinpil(long double); +extern long double asinpl(long double); +extern long double atan2dl(long double, long double); +extern long double atan2l(long double, long double); +extern long double atan2pil(long double, long double); +extern long double atandl(long double); +extern long double atanhl(long double); +extern long double atanl(long double); +extern long double atanpil(long double); +extern long double atanpl(long double); +extern long double cbrtl(long double); +extern long double ceill(long double); +extern long double compoundl(long double, long double); +extern long double copysignl(long double, long double); +extern long double cosdl(long double); +extern long double coshl(long double); +extern long double cosl(long double); +extern long double cospil(long double); +extern long double cospl(long double); +extern long double erfcl(long double); +extern long double erfl(long double); +extern long double exp10l(long double); +extern long double exp2l(long double); +extern long double expl(long double); +extern long double expm1l(long double); +extern long double fabsl(long double); +extern int finitel(long double); +extern long double floorl(long double); +extern long double fmodl(long double, long double); +extern enum fp_class_type fp_classl(long double); +extern long double gammal(long double); +extern long double hypotl(long double, long double); +extern int ilogbl(long double); +extern long double infinityl(void); +extern int irintl(long double); +extern int isinfl(long double); +extern int isnanl(long double); +extern int isnormall(long double); +extern int issubnormall(long double); +extern int iszerol(long double); +extern long double j0l(long double); +extern long double j1l(long double); +extern long double jnl(int, long double); +extern long double lgammal(long double); +extern long double log10l(long double); +extern long double log1pl(long double); +extern long double log2l(long double); +extern long double logbl(long double); +extern long double logl(long double); +extern long double max_normall(void); +extern long double max_subnormall(void); +extern long double min_normall(void); +extern long double min_subnormall(void); +extern long double nextafterl(long double, long double); +extern int nintl(long double); +extern long double pow_li(long double *, int *); +extern long double powl(long double, long double); +extern long double quiet_nanl(long); +extern long double remainderl(long double, long double); +extern long double rintl(long double); +extern long double scalbl(long double, long double); +extern long double scalbnl(long double, int); +extern long double signaling_nanl(long); +extern int signbitl(long double); +extern long double significandl(long double); +extern void sincosdl(long double, long double *, long double *); +extern void sincosl(long double, long double *, long double *); +extern void sincospil(long double, long double *, long double *); +extern void sincospl(long double, long double *, long double *); +extern long double sindl(long double); +extern long double sinhl(long double); +extern long double sinl(long double); +extern long double sinpil(long double); +extern long double sinpl(long double); +extern long double sqrtl(long double); +extern long double tandl(long double); +extern long double tanhl(long double); +extern long double tanl(long double); +extern long double tanpil(long double); +extern long double tanpl(long double); +extern long double y0l(long double); +extern long double y1l(long double); +extern long double ynl(int, long double); + +extern long double q_copysign_(long double *, long double *); +extern long double q_fabs_(long double *); +extern int iq_finite_(long double *); +extern long double q_fmod_(long double *, long double *); +extern enum fp_class_type iq_fp_class_(long double *); +extern int iq_ilogb_(long double *); +extern long double q_infinity_(void); +extern int iq_isinf_(long double *); +extern int iq_isnan_(long double *); +extern int iq_isnormal_(long double *); +extern int iq_issubnormal_(long double *); +extern int iq_iszero_(long double *); +extern long double q_max_normal_(void); +extern long double q_max_subnormal_(void); +extern long double q_min_normal_(void); +extern long double q_min_subnormal_(void); +extern long double q_nextafter_(long double *, long double *); +extern long double q_quiet_nan_(long *); +extern long double q_remainder_(long double *, long double *); +extern long double q_scalbn_(long double *, int *); +extern long double q_signaling_nan_(long *); +extern int iq_signbit_(long double *); + +#endif /* _LD_LONGDOUBLE_H */ diff --git a/usr/src/lib/libm/common/LD/nextafterl.c b/usr/src/lib/libm/common/LD/nextafterl.c new file mode 100644 index 0000000000..1f6e40d398 --- /dev/null +++ b/usr/src/lib/libm/common/LD/nextafterl.c @@ -0,0 +1,119 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak nextafterl = __nextafterl +#endif + +#include "libm.h" +#include <float.h> /* LDBL_MAX, LDBL_MIN */ + +#if defined(__sparc) +#define n0 0 +#define n1 1 +#define n2 2 +#define n3 3 +#define X86PDNRM1(x) +#define INC(px) { \ + if (++px[n3] == 0) \ + if (++px[n2] == 0) \ + if (++px[n1] == 0) \ + ++px[n0]; \ + } +#define DEC(px) { \ + if (--px[n3] == 0xffffffff) \ + if (--px[n2] == 0xffffffff) \ + if (--px[n1] == 0xffffffff) \ + --px[n0]; \ + } +#elif defined(__x86) +#define n0 2 +#define n1 1 +#define n2 0 +#define n3 0 +/* + * if pseudo-denormal, replace by the equivalent normal + */ +#define X86PDNRM1(x) if (XBIASED_EXP(x) == 0 && (((int *) &x)[1] & \ + 0x80000000) != 0) \ + ((int *) &x)[2] |= 1 +#define INC(px) { \ + if (++px[n2] == 0) \ + if ((++px[n1] & ~0x80000000) == 0) \ + px[n1] = 0x80000000, ++px[n0]; \ + } +#define DEC(px) { \ + if (--px[n2] == 0xffffffff) \ + if (--px[n1] == 0x7fffffff) \ + if ((--px[n0] & 0x7fff) != 0) \ + px[n1] |= 0x80000000; \ + } +#endif + +long double +nextafterl(long double x, long double y) { + int *px = (int *) &x; + int *py = (int *) &y; + + if (x == y) + return (y); /* C99 requirement */ + if (x != x || y != y) + return (x * y); + + if (ISZEROL(x)) { /* x == 0.0 */ + px[n0] = py[n0] & XSGNMSK; + px[n1] = px[n2] = 0; + px[n3] = 1; + } else { + X86PDNRM1(x); + if ((px[n0] & XSGNMSK) == 0) { /* x > 0.0 */ + if (x > y) /* x > y */ + DEC(px) + else + INC(px) + } else { + if (x < y) /* x < y */ + DEC(px) + else + INC(px) + } + } +#ifndef lint + { + volatile long double dummy; + int k = XBIASED_EXP(x); + + if (k == 0) + dummy = LDBL_MIN * copysignl(LDBL_MIN, x); + else if (k == 0x7fff) + dummy = LDBL_MAX * copysignl(LDBL_MAX, x); + } +#endif + return (x); +} diff --git a/usr/src/lib/libm/common/LD/scalbl.c b/usr/src/lib/libm/common/LD/scalbl.c new file mode 100644 index 0000000000..63009154f9 --- /dev/null +++ b/usr/src/lib/libm/common/LD/scalbl.c @@ -0,0 +1,72 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak scalbl = __scalbl + +/* + * scalbl(x,n): return x * 2**n by manipulating exponent. + */ + +#include "libm.h" +#include "longdouble.h" + +#include <sys/isa_defs.h> + +long double +scalbl(long double x, long double fn) { + int *py = (int *) &fn, n; + long double z; + + if (isnanl(x) || isnanl(fn)) + return x * fn; + + /* fn is +/-Inf */ +#if defined(_BIG_ENDIAN) + if ((py[0] & 0x7fff0000) == 0x7fff0000) { + if ((py[0] & 0x80000000) != 0) +#else + if ((py[2] & 0x7fff) == 0x7fff) { + if ((py[2] & 0x8000) != 0) +#endif + return x / (-fn); + else + return x * fn; + } + if (rintl(fn) != fn) + return (fn - fn) / (fn - fn); + if (fn > 65000.0L) + z = scalbnl(x, 65000); + else if (-fn > 65000.0L) + z = scalbnl(x, -65000); + else { + n = (int) fn; + z = scalbnl(x, n); + } + return z; +} diff --git a/usr/src/lib/libm/common/LD/signgaml.c b/usr/src/lib/libm/common/LD/signgaml.c new file mode 100644 index 0000000000..d072f148e0 --- /dev/null +++ b/usr/src/lib/libm/common/LD/signgaml.c @@ -0,0 +1,36 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak signgaml = __signgaml + +#include "libm.h" +#include "libm_synonyms.h" +#include "longdouble.h" + +int signgaml = 0; diff --git a/usr/src/lib/libm/common/LD/significandl.c b/usr/src/lib/libm/common/LD/significandl.c new file mode 100644 index 0000000000..8140463a2e --- /dev/null +++ b/usr/src/lib/libm/common/LD/significandl.c @@ -0,0 +1,42 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak significandl = __significandl +#endif + +#include "libm.h" + +long double +significandl(long double x) { + if (ISZEROL(x) || XBIASED_EXP(x) == 0x7fff) /* 0/+-Inf/NaN */ + return (x + x); + else + return (scalbnl(x, -ilogbl(x))); +} diff --git a/usr/src/lib/libm/common/LD/sincosl.c b/usr/src/lib/libm/common/LD/sincosl.c new file mode 100644 index 0000000000..959d15e1af --- /dev/null +++ b/usr/src/lib/libm/common/LD/sincosl.c @@ -0,0 +1,115 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak sincosl = __sincosl + +/* INDENT OFF */ +/* cosl(x) + * Table look-up algorithm by K.C. Ng, November, 1989. + * + * kernel function: + * __k_sincosl ... sin and cos function on [-pi/4,pi/4] + * __rem_pio2l ... argument reduction routine + * + * Method. + * Let S and C denote the sin and cos respectively on [-PI/4, +PI/4]. + * 1. Assume the argument x is reduced to y1+y2 = x-k*pi/2 in + * [-pi/2 , +pi/2], and let n = k mod 4. + * 2. Let S=S(y1+y2), C=C(y1+y2). Depending on n, we have + * + * n sin(x) cos(x) tan(x) + * ---------------------------------------------------------- + * 0 S C S/C + * 1 C -S -C/S + * 2 -S -C S/C + * 3 -C S -C/S + * ---------------------------------------------------------- + * + * Special cases: + * Let trig be any of sin, cos, or tan. + * trig(+-INF) is NaN, with signals; + * trig(NaN) is that NaN; + * + * Accuracy: + * computer TRIG(x) returns trig(x) nearly rounded. + */ +/* INDENT ON */ + +#include "libm.h" +#include "libm_synonyms.h" +#include "longdouble.h" + +#include <sys/isa_defs.h> + +void +sincosl(long double x, long double *s, long double *c) { + long double y[2], z = 0.0L; + int n, ix; +#if defined(__i386) || defined(__amd64) + int *px = (int *) &x; +#endif + + /* trig(Inf or NaN) is NaN */ + if (!finitel(x)) { + *s = *c = x - x; + return; + } + + /* High word of x. */ +#if defined(__i386) || defined(__amd64) + XTOI(px, ix); +#else + ix = *(int *) &x; +#endif + + /* |x| ~< pi/4 */ + ix &= 0x7fffffff; + if (ix <= 0x3ffe9220) + *s = __k_sincosl(x, z, c); + + /* argument reduction needed */ + else { + n = __rem_pio2l(x, y); + switch (n & 3) { + case 0: + *s = __k_sincosl(y[0], y[1], c); + break; + case 1: + *c = -__k_sincosl(y[0], y[1], s); + break; + case 2: + *s = -__k_sincosl(y[0], y[1], c); + *c = -*c; + break; + case 3: + *c = __k_sincosl(y[0], y[1], s); + *s = -*s; + } + } +} diff --git a/usr/src/lib/libm/common/LD/sincospil.c b/usr/src/lib/libm/common/LD/sincospil.c new file mode 100644 index 0000000000..59b18fd3dd --- /dev/null +++ b/usr/src/lib/libm/common/LD/sincospil.c @@ -0,0 +1,208 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak sincospil = __sincospil + +/* + * void sincospil(long double x, long double *s, long double *c) + * *s = sinl(pi*x); *c = cosl(pi*x); + * + * Algorithm, 10/17/2002, K.C. Ng + * ------------------------------ + * Let y = |4x|, z = floor(y), and n = (int)(z mod 8.0) (displayed in binary). + * 1. If y == z, then x is a multiple of pi/4. Return the following values: + * --------------------------------------------------- + * n x mod 2 sin(x*pi) cos(x*pi) tan(x*pi) + * --------------------------------------------------- + * 000 0.00 +0 ___ +1 ___ +0 + * 001 0.25 +\/0.5 +\/0.5 +1 + * 010 0.50 +1 ___ +0 ___ +inf + * 011 0.75 +\/0.5 -\/0.5 -1 + * 100 1.00 -0 ___ -1 ___ +0 + * 101 1.25 -\/0.5 -\/0.5 +1 + * 110 1.50 -1 ___ -0 ___ +inf + * 111 1.75 -\/0.5 +\/0.5 -1 + * --------------------------------------------------- + * 2. Otherwise, + * --------------------------------------------------- + * n t sin(x*pi) cos(x*pi) tan(x*pi) + * --------------------------------------------------- + * 000 (y-z)/4 sinpi(t) cospi(t) tanpi(t) + * 001 (z+1-y)/4 cospi(t) sinpi(t) 1/tanpi(t) + * 010 (y-z)/4 cospi(t) -sinpi(t) -1/tanpi(t) + * 011 (z+1-y)/4 sinpi(t) -cospi(t) -tanpi(t) + * 100 (y-z)/4 -sinpi(t) -cospi(t) tanpi(t) + * 101 (z+1-y)/4 -cospi(t) -sinpi(t) 1/tanpi(t) + * 110 (y-z)/4 -cospi(t) sinpi(t) -1/tanpi(t) + * 111 (z+1-y)/4 -sinpi(t) cospi(t) -tanpi(t) + * --------------------------------------------------- + * + * NOTE. This program compute sinpi/cospi(t<0.25) by __k_sin/cos(pi*t, 0.0). + * This will return a result with error slightly more than one ulp (but less + * than 2 ulp). If one wants accurate result, one may break up pi*t in + * high (tpi_h) and low (tpi_l) parts and call __k_sin/cos(tip_h, tip_lo) + * instead. + */ + +#include "libm.h" +#include "libm_synonyms.h" +#include "longdouble.h" + +#include <sys/isa_defs.h> + +#define I(q, m) ((int *) &(q))[m] +#define U(q, m) ((unsigned *) &(q))[m] +#if defined(__i386) || defined(__amd64) +#define LDBL_MOST_SIGNIF_I(ld) ((I(ld, 2) << 16) | (0xffff & (I(ld, 1) >> 15))) +#define LDBL_LEAST_SIGNIF_U(ld) U(ld, 0) +#define PREC 64 +#define PRECM1 63 +#define PRECM2 62 +static const long double twoPRECM2 = 9.223372036854775808000000000000000e+18L; +#else +#define LDBL_MOST_SIGNIF_I(ld) I(ld, 0) +#define LDBL_LEAST_SIGNIF_U(ld) U(ld, sizeof(long double) / sizeof(int) - 1) +#define PREC 113 +#define PRECM1 112 +#define PRECM2 111 +static const long double twoPRECM2 = 5.192296858534827628530496329220096e+33L; +#endif + +static const long double +zero = 0.0L, +quater = 0.25L, +one = 1.0L, +pi = 3.141592653589793238462643383279502884197e+0000L, +sqrth = 0.707106781186547524400844362104849039284835937688474, +tiny = 1.0e-100; + +void +sincospil(long double x, long double *s, long double *c) { + long double y, z, t; + int hx, n, k; + unsigned lx; + + hx = LDBL_MOST_SIGNIF_I(x); + lx = LDBL_LEAST_SIGNIF_U(x); + k = ((hx & 0x7fff0000) >> 16) - 0x3fff; + if (k >= PRECM2) { /* |x| >= 2**(Prec-2) */ + if (k >= 16384) { + *s = *c = x - x; + } + else { + if (k >= PREC) { + *s = zero; + *c = one; + } + else if (k == PRECM1) { + if ((lx & 1) == 0) { + *s = zero; + *c = one; + } + else { + *s = -zero; + *c = -one; + } + } + else { /* k = Prec - 2 */ + if ((lx & 1) == 0) { + *s = zero; + *c = one; + } + else { + *s = one; + *c = zero; + } + if ((lx & 2) != 0) { + *s = -*s; + *c = -*c; + } + } + } + } + else if (k < -2) /* |x| < 0.25 */ + *s = __k_sincosl(pi * fabsl(x), zero, c); + else { + /* y = |4x|, z = floor(y), and n = (int)(z mod 8.0) */ + y = 4.0L * fabsl(x); + if (k < PRECM2) { + z = y + twoPRECM2; + n = LDBL_LEAST_SIGNIF_U(z) & 7; /* 3 LSb of z */ + t = z - twoPRECM2; + k = 0; + if (t == y) + k = 1; + else if (t > y) { + n -= 1; + t = quater + (y - t) * quater; + } + else + t = (y - t) * quater; + } + else { /* k = Prec-3 */ + n = LDBL_LEAST_SIGNIF_U(y) & 7; /* 3 LSb of z */ + k = 1; + } + if (k) { /* x = N/4 */ + if ((n & 1) != 0) + *s = *c = sqrth + tiny; + else + if ((n & 2) == 0) { + *s = zero; + *c = one; + } + else { + *s = one; + *c = zero; + } + if ((n & 4) != 0) + *s = -*s; + if (((n + 1) & 4) != 0) + *c = -*c; + } + else { + if ((n & 1) != 0) + t = quater - t; + if (((n + (n & 1)) & 2) == 0) + *s = __k_sincosl(pi * t, zero, c); + else + *c = __k_sincosl(pi * t, zero, s); + if ((n & 4) != 0) + *s = -*s; + if (((n + 2) & 4) != 0) + *c = -*c; + } + } + if (hx < 0) + *s = -*s; +} +#undef U +#undef LDBL_LEAST_SIGNIF_U +#undef I +#undef LDBL_MOST_SIGNIF_I diff --git a/usr/src/lib/libm/common/LD/sinhl.c b/usr/src/lib/libm/common/LD/sinhl.c new file mode 100644 index 0000000000..242a22908a --- /dev/null +++ b/usr/src/lib/libm/common/LD/sinhl.c @@ -0,0 +1,88 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak sinhl = __sinhl + +#include "libm.h" +#include "longdouble.h" + +/* SINH(X) + * RETURN THE HYPERBOLIC SINE OF X + * + * Method : + * 1. reduce x to non-negative by SINH(-x) = - SINH(x). + * 2. + * + * EXPM1(x) + EXPM1(x)/(EXPM1(x)+1) + * 0 <= x <= lnovft : SINH(x) := -------------------------------- + * 2 + * + * lnovft <= x < INF : SINH(x) := EXP(x-MEP1*ln2)*2**ME + * + * here + * lnovft logarithm of the overflow threshold + * = MEP1*ln2 chopped to machine precision. + * ME maximum exponent + * MEP1 maximum exponent plus 1 + * + * Special cases: + * SINH(x) is x if x is +INF, -INF, or NaN. + * only SINH(0)=0 is exact for finite argument. + * + */ + +static const long double C[] = { + 0.5L, + 1.0L, + 1.135652340629414394879149e+04L, + 7.004447686242549087858985e-16L +}; + +#define half C[0] +#define one C[1] +#define lnovft C[2] +#define lnovlo C[3] + +long double +sinhl(long double x) +{ + long double r, t; + + if (!finitel(x)) + return (x + x); /* x is INF or NaN */ + r = fabsl(x); + if (r < lnovft) { + t = expm1l(r); + r = copysignl((t + t / (one + t)) * half, x); + } else { + r = copysignl(expl((r - lnovft) - lnovlo), x); + r = scalbnl(r, 16383); + } + return (r); +} diff --git a/usr/src/lib/libm/common/LD/sinl.c b/usr/src/lib/libm/common/LD/sinl.c new file mode 100644 index 0000000000..d51f0e75f0 --- /dev/null +++ b/usr/src/lib/libm/common/LD/sinl.c @@ -0,0 +1,110 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak sinl = __sinl + +/* INDENT OFF */ +/* sinl(x) + * Table look-up algorithm by K.C. Ng, November, 1989. + * + * kernel function: + * __k_sinl ... sin function on [-pi/4,pi/4] + * __k_cosl ... cos function on [-pi/4,pi/4] + * __rem_pio2l ... argument reduction routine + * + * Method. + * Let S and C denote the sin and cos respectively on [-PI/4, +PI/4]. + * 1. Assume the argument x is reduced to y1+y2 = x-k*pi/2 in + * [-pi/2 , +pi/2], and let n = k mod 4. + * 2. Let S=S(y1+y2), C=C(y1+y2). Depending on n, we have + * + * n sin(x) cos(x) tan(x) + * ---------------------------------------------------------- + * 0 S C S/C + * 1 C -S -C/S + * 2 -S -C S/C + * 3 -C S -C/S + * ---------------------------------------------------------- + * + * Special cases: + * Let trig be any of sin, cos, or tan. + * trig(+-INF) is NaN, with signals; + * trig(NaN) is that NaN; + * + * Accuracy: + * computer TRIG(x) returns trig(x) nearly rounded. + */ +/* INDENT ON */ + +#include "libm.h" +#include "libm_synonyms.h" +#include "longdouble.h" + +#include <sys/isa_defs.h> + +long double +sinl(long double x) { + long double y[2], z = 0.0L; + int n, ix; +#if defined(__i386) || defined(__amd64) + int *px = (int *) &x; +#endif + + /* sin(Inf or NaN) is NaN */ + if (!finitel(x)) + return x - x; + + /* High word of x. */ +#if defined(__i386) || defined(__amd64) + XTOI(px, ix); +#else + ix = *(int *) &x; +#endif + /* |x| ~< pi/4 */ + ix &= 0x7fffffff; + if (ix <= 0x3ffe9220) + return __k_sinl(x, z); + + /* argument reduction needed */ + else { + n = __rem_pio2l(x, y); + switch (n & 3) { + case 0: + return __k_sinl(y[0], y[1]); + case 1: + return __k_cosl(y[0], y[1]); + case 2: + return -__k_sinl(y[0], y[1]); + case 3: + return -__k_cosl(y[0], y[1]); + /* NOTREACHED */ + } + } + return 0.0L; +} diff --git a/usr/src/lib/libm/common/LD/sinpil.c b/usr/src/lib/libm/common/LD/sinpil.c new file mode 100644 index 0000000000..1429c89c1d --- /dev/null +++ b/usr/src/lib/libm/common/LD/sinpil.c @@ -0,0 +1,175 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak sinpil = __sinpil + +/* long double sinpil(long double x), + * return long double precision sinl(pi*x). + * + * Algorithm, 10/17/2002, K.C. Ng + * ------------------------------ + * Let y = |4x|, z = floor(y), and n = (int)(z mod 8.0) (displayed in binary). + * 1. If y == z, then x is a multiple of pi/4. Return the following values: + * --------------------------------------------------- + * n x mod 2 sin(x*pi) cos(x*pi) tan(x*pi) + * --------------------------------------------------- + * 000 0.00 +0 ___ +1 ___ +0 + * 001 0.25 +\/0.5 +\/0.5 +1 + * 010 0.50 +1 ___ +0 ___ +inf + * 011 0.75 +\/0.5 -\/0.5 -1 + * 100 1.00 -0 ___ -1 ___ +0 + * 101 1.25 -\/0.5 -\/0.5 +1 + * 110 1.50 -1 ___ -0 ___ +inf + * 111 1.75 -\/0.5 +\/0.5 -1 + * --------------------------------------------------- + * 2. Otherwise, + * --------------------------------------------------- + * n t sin(x*pi) cos(x*pi) tan(x*pi) + * --------------------------------------------------- + * 000 (y-z)/4 sinpi(t) cospi(t) tanpi(t) + * 001 (z+1-y)/4 cospi(t) sinpi(t) 1/tanpi(t) + * 010 (y-z)/4 cospi(t) -sinpi(t) -1/tanpi(t) + * 011 (z+1-y)/4 sinpi(t) -cospi(t) -tanpi(t) + * 100 (y-z)/4 -sinpi(t) -cospi(t) tanpi(t) + * 101 (z+1-y)/4 -cospi(t) -sinpi(t) 1/tanpi(t) + * 110 (y-z)/4 -cospi(t) sinpi(t) -1/tanpi(t) + * 111 (z+1-y)/4 -sinpi(t) cospi(t) -tanpi(t) + * --------------------------------------------------- + * + * NOTE. This program compute sinpi/cospi(t<0.25) by __k_sin/cos(pi*t, 0.0). + * This will return a result with error slightly more than one ulp (but less + * than 2 ulp). If one wants accurate result, one may break up pi*t in + * high (tpi_h) and low (tpi_l) parts and call __k_sin/cos(tip_h, tip_lo) + * instead. + */ + +#include "libm.h" +#include "libm_synonyms.h" +#include "longdouble.h" + +#include <sys/isa_defs.h> + +#define I(q, m) ((int *) &(q))[m] +#define U(q, m) ((unsigned *) &(q))[m] +#if defined(__i386) || defined(__amd64) +#define LDBL_MOST_SIGNIF_I(ld) ((I(ld, 2) << 16) | (0xffff & (I(ld, 1) >> 15))) +#define LDBL_LEAST_SIGNIF_U(ld) U(ld, 0) +#define PREC 64 +#define PRECM1 63 +#define PRECM2 62 +static const long double twoPRECM2 = 9.223372036854775808000000000000000e+18L; +#else +#define LDBL_MOST_SIGNIF_I(ld) I(ld, 0) +#define LDBL_LEAST_SIGNIF_U(ld) U(ld, sizeof(long double) / sizeof(int) - 1) +#define PREC 113 +#define PRECM1 112 +#define PRECM2 111 +static const long double twoPRECM2 = 5.192296858534827628530496329220096e+33L; +#endif + +static const long double +zero = 0.0L, +quater = 0.25L, +one = 1.0L, +pi = 3.141592653589793238462643383279502884197e+0000L, +sqrth = 0.707106781186547524400844362104849039284835937688474, +tiny = 1.0e-100; + +long double +sinpil(long double x) { + long double y, z, t; + int hx, n, k; + unsigned lx; + + hx = LDBL_MOST_SIGNIF_I(x); + lx = LDBL_LEAST_SIGNIF_U(x); + k = ((hx & 0x7fff0000) >> 16) - 0x3fff; + if (k >= PRECM2) { /* |x| >= 2**(Prec-2) */ + if (k >= 16384) + y = x - x; + else { + if (k >= PREC) + y = zero; + else if (k == PRECM1) + y = (lx & 1) == 0 ? zero: -zero; + else { /* k = Prec - 2 */ + y = (lx & 1) == 0 ? zero : one; + if ((lx & 2) != 0) + y = -y; + } + } + } + else if (k < -2) /* |x| < 0.25 */ + y = __k_sinl(pi * fabsl(x), zero); + else { + /* y = |4x|, z = floor(y), and n = (int)(z mod 8.0) */ + y = 4.0L * fabsl(x); + if (k < PRECM2) { + z = y + twoPRECM2; + n = LDBL_LEAST_SIGNIF_U(z) & 7; /* 3 LSb of z */ + t = z - twoPRECM2; + k = 0; + if (t == y) + k = 1; + else if (t > y) { + n -= 1; + t = quater + (y - t) * quater; + } + else + t = (y - t) * quater; + } + else { /* k = Prec-3 */ + n = LDBL_LEAST_SIGNIF_U(y) & 7; /* 3 LSb of z */ + k = 1; + } + if (k) { /* x = N/4 */ + if ((n & 1) != 0) + y = sqrth + tiny; + else + y = (n & 2) == 0 ? zero : one; + if ((n & 4) != 0) + y = -y; + } + else { + if ((n & 1) != 0) + t = quater - t; + if (((n + (n & 1)) & 2) == 0) + y = __k_sinl(pi * t, zero); + else + y = __k_cosl(pi * t, zero); + if ((n & 4) != 0) + y = -y; + } + } + return hx >= 0 ? y : -y; +} +#undef U +#undef LDBL_LEAST_SIGNIF_U +#undef I +#undef LDBL_MOST_SIGNIF_I diff --git a/usr/src/lib/libm/common/LD/tanhl.c b/usr/src/lib/libm/common/LD/tanhl.c new file mode 100644 index 0000000000..87ecfa705b --- /dev/null +++ b/usr/src/lib/libm/common/LD/tanhl.c @@ -0,0 +1,102 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak tanhl = __tanhl +#endif + +/* + * tanhl(x) returns the hyperbolic tangent of x + * + * Method : + * 1. reduce x to non-negative: tanhl(-x) = - tanhl(x). + * 2. + * 0 < x <= small : tanhl(x) := x + * -expm1l(-2x) + * small < x <= 1 : tanhl(x) := -------------- + * expm1l(-2x) + 2 + * 2 + * 1 <= x <= threshold : tanhl(x) := 1 - --------------- + * expm1l(2x) + 2 + * threshold < x <= INF : tanhl(x) := 1. + * + * where + * single : small = 1.e-5 threshold = 11.0 + * double : small = 1.e-10 threshold = 22.0 + * quad : small = 1.e-20 threshold = 45.0 + * + * Note: threshold was chosen so that + * fl(1.0+2/(expm1(2*threshold)+2)) == 1. + * + * Special cases: + * tanhl(NaN) is NaN; + * only tanhl(0.0)=0.0 is exact for finite argument. + */ + +#include "libm.h" +#include "longdouble.h" + +static const long double small = 1.0e-20L, one = 1.0, two = 2.0, +#ifndef lint + big = 1.0e+20L, +#endif + threshold = 45.0L; + +long double +tanhl(long double x) { + long double t, y, z; + int signx; +#ifndef lint + volatile long double dummy; +#endif + + if (isnanl(x)) + return (x + x); /* x is NaN */ + signx = signbitl(x); + t = fabsl(x); + z = one; + if (t <= threshold) { + if (t > one) + z = one - two / (expm1l(t + t) + two); + else if (t > small) { + y = expm1l(-t - t); + z = -y / (y + two); + } else { +#ifndef lint + dummy = t + big; + /* inexact if t != 0 */ +#endif + return (x); + } + } else if (!finitel(t)) + return (copysignl(one, x)); + else + return (signx ? -z + small * small : z - small * small); + return (signx ? -z : z); +} diff --git a/usr/src/lib/libm/common/LD/tanl.c b/usr/src/lib/libm/common/LD/tanl.c new file mode 100644 index 0000000000..c346c163e8 --- /dev/null +++ b/usr/src/lib/libm/common/LD/tanl.c @@ -0,0 +1,99 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak tanl = __tanl + +/* INDENT OFF */ +/* cosl(x) + * Table look-up algorithm by K.C. Ng, November, 1989. + * + * kernel function: + * __k_tanl ... tangent function on [-pi/4,pi/4] + * __rem_pio2l ... argument reduction routine + * + * Method. + * Let S and C denote the sin and cos respectively on [-PI/4, +PI/4]. + * 1. Assume the argument x is reduced to y1+y2 = x-k*pi/2 in + * [-pi/2 , +pi/2], and let n = k mod 4. + * 2. Let S=S(y1+y2), C=C(y1+y2). Depending on n, we have + * + * n sin(x) cos(x) tan(x) + * ---------------------------------------------------------- + * 0 S C S/C + * 1 C -S -C/S + * 2 -S -C S/C + * 3 -C S -C/S + * ---------------------------------------------------------- + * + * Special cases: + * Let trig be any of sin, cos, or tan. + * trig(+-INF) is NaN, with signals; + * trig(NaN) is that NaN; + * + * Accuracy: + * computer TRIG(x) returns trig(x) nearly rounded. + */ +/* INDENT ON */ + +#include "libm.h" +#include "libm_synonyms.h" +#include "longdouble.h" + +#include <sys/isa_defs.h> + +long double +tanl(long double x) { + long double y[2], z = 0.0L; + int n, ix; +#if defined(__i386) || defined(__amd64) + int *px = (int *) &x; +#endif + + /* trig(Inf or NaN) is NaN */ + if (!finitel(x)) + return x - x; + + /* High word of x. */ +#if defined(__i386) || defined(__amd64) + XTOI(px, ix); +#else + ix = *(int *) &x; +#endif + + /* |x| ~< pi/4 */ + ix &= 0x7fffffff; + if (ix <= 0x3ffe9220) + return __k_tanl(x, z, 0); + + /* argument reduction needed */ + else { + n = __rem_pio2l(x, y); + return __k_tanl(y[0], y[1], n & 1); + } +} diff --git a/usr/src/lib/libm/common/Q/_TBL_atanl.c b/usr/src/lib/libm/common/Q/_TBL_atanl.c new file mode 100644 index 0000000000..bf152db99f --- /dev/null +++ b/usr/src/lib/libm/common/Q/_TBL_atanl.c @@ -0,0 +1,235 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * Table of constants for atanl. + * By K.C. Ng, March 9, 1989 + */ + +#include "libm.h" + +const long double _TBL_atanl_hi[] = { + +1.243549945467614350313548491638710241657e-0001L, + +1.320397616146387492746844065265695322625e-0001L, + +1.397088742891636451833677767390950568161e-0001L, + +1.473614810886516356098027603968455182107e-0001L, + +1.549967419239409823037143749334921913337e-0001L, + +1.626138285979485753736415637615578006202e-0001L, + +1.702119252854744044904966070997617136954e-0001L, + +1.777902289926760707966247992158246889946e-0001L, + +1.853479499956947648860259612285446466726e-0001L, + +1.928843122579746641970587106902273034988e-0001L, + +2.003985538258785146539457850343783844615e-0001L, + +2.078899272022629936053349831029943247563e-0001L, + +2.153576996977380480244596271664896416574e-0001L, + +2.228011537593945157710321221404325552502e-0001L, + +2.302195872768437302401709596798029906555e-0001L, + +2.376123138654712524738836343256377791989e-0001L, + +2.449786631268641541720824812112758064196e-0001L, + +2.596296294082575310299464431839719056011e-0001L, + +2.741674511196587975993718983421757859244e-0001L, + +2.885873618940773956236114199582183450433e-0001L, + +3.028848683749714055605560945055582181228e-0001L, + +3.170557532091470098090155766744673297585e-0001L, + +3.310960767041320949443387877569445542126e-0001L, + +3.450021772071051088676812869000516840829e-0001L, + +3.587706702705722203959200639264605221536e-0001L, + +3.723984466767542219236550382837018264141e-0001L, + +3.858826693980737758976954846072313963819e-0001L, + +3.992207695752525656147166961588647649110e-0001L, + +4.124104415973873068997912896671269426092e-0001L, + +4.254496373700422895422636051807923301382e-0001L, + +4.383365598579578054456160492147713089588e-0001L, + +4.510696559885234763756392572821934407380e-0001L, + +4.636476090008061162142562314612143971334e-0001L, + +4.883339510564055238671649607470648445964e-0001L, + +5.123894603107377066666010205842592380556e-0001L, + +5.358112379604637002690850687076914469847e-0001L, + +5.585993153435624359715082164016612287587e-0001L, + +5.807563535676703992032744750015008237512e-0001L, + +6.022873461349641816821226942042329192246e-0001L, + +6.231993299340659309924753490603745936779e-0001L, + +6.435011087932843868028092287173226044727e-0001L, + +6.632029927060932553632543102382758341723e-0001L, + +6.823165548747480782564299817111529878473e-0001L, + +7.008544078844501724579512817867512731862e-0001L, + +7.188299996216245054170141515259046989104e-0001L, + +7.362574289814281317428352710891466247927e-0001L, + +7.531512809621943895247393702690288860057e-0001L, + +7.695264804056582604068200359856540172660e-0001L, + +7.853981633974483096156608458198756993698e-0001L, + +8.156919233162234110214608387456458267228e-0001L, + +8.441539861131710025178441482716474673863e-0001L, + +8.709034570756529531401731125978140729165e-0001L, + +8.960553845713439561748007180299377954660e-0001L, + +9.197196053504168172286034548210894096931e-0001L, + +9.420000403794636647379371705345936211589e-0001L, + +9.629943306809362018151958359970998967730e-0001L, + +9.827937232473290679857106110146660376257e-0001L, + +1.001483135694234732918329595301437489634e+0000L, + +1.019141344266349734638342917023063621235e+0000L, + +1.035841253008800176584694470325444073548e+0000L, + +1.051650212548373667459867312086299902692e+0000L, + +1.066630365315743563079176347420279908601e+0000L, + +1.080839000541168310887156729217199785900e+0000L, + +1.094328907321189919892788314610235276303e+0000L, + +1.107148717794090503017065460178537049754e+0000L, + +1.130953743979160446470933515536327756003e+0000L, + +1.152571997215667518040149862612751467283e+0000L, + +1.172273881128476386600594944133704600686e+0000L, + +1.190289949682531732927733774829318280338e+0000L, + +1.206817370285252530395511580056557662568e+0000L, + +1.222025323210989637041741743922570412029e+0000L, + +1.236059489478081941909451971109078614621e+0000L, + +1.249045772398254425829917077281090048355e+0000L, + +1.261093382252440419313940881247335764012e+0000L, + +1.272297395208717341296193749822480574646e+0000L, + +1.282740879744270747362885251136495516407e+0000L, + +1.292496667789785267903091421407081672353e+0000L, + +1.301628834009196143804785850366685502445e+0000L, + +1.310193935047555634256437689171905343754e+0000L, + +1.318242051016837049859330202327136304043e+0000L, + +1.325817663668032465059239210428475688616e+0000L, + +1.339705659598999539328303752589555785024e+0000L, + +1.352127380920954657189147941389812759877e+0000L, + +1.363300100359693954289298527825099156027e+0000L, + +1.373400766945015860861271926444961060484e+0000L, + +1.382574821490125858059967417768568516395e+0000L, + +1.390942827002418348642768694383643239549e+0000L, + +1.398605512271957595012670081611428272786e+0000L, + +1.405647649380269780952193401995808066441e+0000L, + +1.412141064608495215367613671858489085282e+0000L, + +1.418146998399631459403860303970098863261e+0000L, + +1.423717971406494118901819046610729710890e+0000L, + +1.428899272190732696418470074537198400139e+0000L, + +1.433730152484708986640471909669887388026e+0000L, + +1.438244794498222597961404247935481603967e+0000L, + +1.442473099109101820025292059937729181035e+0000L, + +1.446441332248135184199966842475880386611e+0000L, +}; + +const long double _TBL_atanl_lo[] = { + +1.407486919762806380231720282041430859065e-0036L, + -4.959696159473992555573043943799966949987e-0036L, + +8.952774562519464887393121344636183788152e-0036L, + +1.188043742320789571818076584354496443030e-0035L, + -2.781027811204514537842537512823435451463e-0037L, + +1.479722037702380032729553623431514726239e-0036L, + -4.216956140054819873287038480184963406819e-0036L, + +7.243122966691348464993032365631602349468e-0036L, + -2.157343008983917029989567935379065159119e-0036L, + -9.951574540512672355445236729812860518631e-0036L, + -3.906555899232483818161756973039787656743e-0036L, + +5.526029227179372681321198066466113031444e-0036L, + +8.841572221591432180768225431803645204369e-0036L, + -8.176772879158617925419332362828558820944e-0036L, + -1.334412303465614224379711382302833876421e-0036L, + -4.492733120781338290893073392468132589219e-0036L, + +4.494551147181249039320182433676250148336e-0036L, + -1.668808150427922355577672445964844056727e-0035L, + +1.562975758610795576946108656893732968411e-0035L, + -2.238983556330807855250797038533151084811e-0035L, + -4.831232174554731155187045067118216295832e-0036L, + -1.433617235290583287695892661098069884431e-0035L, + -8.744018199889993280298917417096058172481e-0036L, + +5.928463600852983744578036078546455593865e-0036L, + -2.237665124843624127606105529504351499363e-0035L, + +6.074583759933610541428031075667744213648e-0036L, + +1.537218711045194967779234476202996702309e-0035L, + +2.097606805675115624165712158247879024716e-0035L, + -5.562395640549543806072686220262281911497e-0036L, + +1.969736670783247184185841193489735190152e-0035L, + +2.107031196447948850903473363942488754370e-0035L, + -2.302735636298200160225651851085422984456e-0035L, + +4.895096422573334926686184352202977056848e-0036L, + -7.238014347779445821387272305082026475766e-0036L, + +1.636564886570361403163744339604956885811e-0035L, + -3.988581195823453079372912991980323419740e-0035L, + +4.158772212091261351041778392322742597344e-0035L, + +3.834742145455647215368468737733713502739e-0035L, + -9.225117893363872172351589646548899090659e-0036L, + +1.409461969045598952617573674185465039654e-0036L, + +3.356885780547223527061285142581080367945e-0035L, + +3.909099105552255239501810680323211880340e-0035L, + +5.295641697965420814052186270729703965359e-0036L, + -5.096084681994551436784706392366250713672e-0036L, + -4.495901442527761585832968039391831520500e-0035L, + +3.803922654455163426656685761596261429034e-0035L, + -4.405652287289551210830864219661168965762e-0036L, + +1.602502419248216107622380775342561907695e-0036L, + +2.167952532530945256199261006510838063526e-0035L, + +1.984403801351542212571536292573675410407e-0035L, + +3.913961947179974683450522735356843245724e-0035L, + +2.111344380797545350551845343679956185473e-0035L, + +3.155855727744469275503981694439277018543e-0035L, + +1.629504452035546140826558561950023833561e-0035L, + -3.508724520927030585615123035617120894580e-0035L, + +2.904104186428285567959105527094611730009e-0035L, + -2.312884345381835659093199520980662723328e-0035L, + -7.712492318147157843996797382071597987481e-0035L, + +2.753902782988692242909206359044995381933e-0035L, + -9.450089945318130895108454599083752773445e-0035L, + -7.306175530203209233759494600164318159101e-0035L, + -4.173614481395375219395277015740431906643e-0035L, + +3.436994835625640704534485526286425749647e-0035L, + -6.379024349229809090730208492427563489748e-0035L, + -9.684294381635326129100412786609400488464e-0036L, + +4.874675753913887090927595832669806057728e-0035L, + -8.753388647708419088451160136858547852751e-0035L, + +1.428474399232791889269255113808220484160e-0035L, + +5.726277621107338954256562569347449057228e-0035L, + -3.225488314878041124559482227075035491317e-0035L, + +7.885354819060987732596552525237673513561e-0035L, + +8.408173673903719409751503836536882928318e-0035L, + +7.472287035756368381507824298193454239425e-0035L, + +7.997720282579343528943481360087007043974e-0036L, + -8.057784077336213905484849234629395332153e-0035L, + +1.421774675367058306549004020905308580426e-0035L, + +1.223248691422120500410974356032312699327e-0035L, + +8.969605507083003644736195721794664042146e-0035L, + -3.148039443508188441068606673984936704609e-0035L, + -5.092714604071534501324064251761157116236e-0035L, + -5.743199771592413656813385943270585886166e-0035L, + -4.392045140508377027909976608047950844300e-0035L, + +9.110675398490771556301866677631321964372e-0035L, + -3.703256901427284100951240077306435653503e-0035L, + +8.816741942974671427690982540513176913907e-0035L, + -3.838934169602835250375231286170331051923e-0036L, + -3.346295934196089154634089550801425121335e-0035L, + -3.921262677678607438391618849895555508099e-0035L, + -7.834039739637786725586449456859141775022e-0035L, + +7.468101863245698652060064034062436100558e-0035L, + +8.911091861895691845113559487616548179839e-0035L, + +3.941816063227189053043179714566870857491e-0035L, + -4.104811408858010482019343563832718161219e-0035L, + -2.316541945158215332638394475622094450115e-0035L, + -1.842831258152531940939933020370545982007e-0035L, + +7.147731654670948234541171201790940212220e-0035L, + +2.991450157843587466215363770701953452571e-0035L, +}; diff --git a/usr/src/lib/libm/common/Q/_TBL_cosl.c b/usr/src/lib/libm/common/Q/_TBL_cosl.c new file mode 100644 index 0000000000..58f7468f70 --- /dev/null +++ b/usr/src/lib/libm/common/Q/_TBL_cosl.c @@ -0,0 +1,192 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * table of cosl(x) where x is 0.15625 + i*0.03125, i=0,1,...,74. + * {0x3ffc4000,0,0,0} --> (inc 0x800) --> {0x3ffe9000,0,0,0} + * 0.15625 0.03125 0.78125 (pi/4 = 0.785395663...) + */ + +#include "libm.h" + +const long double _TBL_cosl_hi[] = { + +9.878177838164719441005030343632113165093e-0001L, + +9.872023778548304903960885335116224443952e-0001L, + +9.865719083994975887573374074953084086015e-0001L, + +9.859263850706614357470592528694354441508e-0001L, + +9.852658177182138162042947097595789939359e-0001L, + +9.845902164215998060143951077820687364441e-0001L, + +9.838995914896639721783093514164872453367e-0001L, + +9.831939534604930725278757612989684275134e-0001L, + +9.824733131012552574873276832436224950147e-0001L, + +9.817376814080357763345961479047090031971e-0001L, + +9.809870696056691904693298964353096645569e-0001L, + +9.802214891475680962478518674217420182872e-0001L, + +9.794409517155483599985309545029874933258e-0001L, + +9.786454692196508678842676797432752842534e-0001L, + +9.778350537979597933319715729444545493330e-0001L, + +9.770097178164173848020456902145767884624e-0001L, + +9.761694738686352767239890354351355336967e-0001L, + +9.753143347757023264772798556222610938601e-0001L, + +9.744443135859889803497110560454343440501e-0001L, + +9.735594235749481714583125145098981012012e-0001L, + +9.726596782449127526709130582675652597851e-0001L, + +9.717450913248894676192664941325029643211e-0001L, + +9.708156767703494629474905457850460270255e-0001L, + +9.698714487630153449923440459169307761267e-0001L, + +9.689124217106447841445954494941892053405e-0001L, + +9.669500292306778220083416236105315034050e-0001L, + +9.649286191047710095810746653157483714001e-0001L, + +9.628483147093796998997010934802143646862e-0001L, + +9.607092430155619030666593505813134717046e-0001L, + +9.585115345812286273019694081549198217856e-0001L, + +9.562553235431752969755999422630283611690e-0001L, + +9.539407476088947339813247959876116228319e-0001L, + +9.515679480481722021454882173642709621657e-0001L, + +9.491370696844630276658474217621056230077e-0001L, + +9.466482608860533218460995072955329761108e-0001L, + +9.441016735570043456300176912531248599600e-0001L, + +9.414974631278810686445112360536708146537e-0001L, + +9.388357885462654886325783059847125541586e-0001L, + +9.361168122670552902942374110195085880318e-0001L, + +9.333407002425484356552992294699955265909e-0001L, + +9.305076219123142911494767922295554806411e-0001L, + +9.276177501928519096280307987999613501918e-0001L, + +9.246712614670360985021130145601387709996e-0001L, + +9.216683355733519181754113682027127142383e-0001L, + +9.186091557949182678378249777185498625801e-0001L, + +9.154939088483012285639177321802218816645e-0001L, + +9.123227848721178464920295420473417337577e-0001L, + +9.090959774154310516503817356844764174905e-0001L, + +9.058136834259364207445166606527002577088e-0001L, + +9.024761032379415049251832726758959994948e-0001L, + +8.990834405601384562165449292093793065380e-0001L, + +8.956359024631706989005700004462563503448e-0001L, + +8.921336993669944047239002537237885750767e-0001L, + +8.885770450280355433176090231160209800973e-0001L, + +8.849661565261432916972965369666479264236e-0001L, + +8.813012542513405991401619082981001728813e-0001L, + +8.775825618903727161162815826038296809401e-0001L, + +8.699847180584173888289155999014662429887e-0001L, + +8.621744799348805043671625102533242741250e-0001L, + +8.541537542773853851434517851051031764412e-0001L, + +8.459244992310679544597230785974932624246e-0001L, + +8.374887238505236853153533489172406171513e-0001L, + +8.288484876093257348101717901191166381510e-0001L, + +8.200058998972340082555506338765560425268e-0001L, + +8.109631195052179021895348039410807243520e-0001L, + +8.017223540984184506074926056529642078277e-0001L, + +7.922858596771785431415013237817093985302e-0001L, + +7.826559400262727969307874474281390259485e-0001L, + +7.728349461524715448108518459134251775639e-0001L, + +7.628252757105762505070987536254297918621e-0001L, + +7.526293724180664760545413248471431159893e-0001L, + +7.422497254585013069913472534496105367206e-0001L, + +7.316888688738208863118387530000845290150e-0001L, + +7.209493809456964180438127841484476879092e-0001L, + +7.100338835660796749741216439594902194333e-0001L, +}; + +const long double _TBL_cosl_lo[] = { + +4.742713078367058978924681076205264183648e-0035L, + -3.400922580038153352909034207677181560093e-0035L, + -2.473279499369853624762524012127207246323e-0035L, + -3.902320877004518000716232064546238578734e-0035L, + +2.265680295058180661415174977785279521173e-0035L, + -2.254772246444203259170588302104662991085e-0036L, + +2.734143189480662078104863307237612648780e-0035L, + -3.701912560693446438656202168446355677822e-0035L, + -1.649243588915575846254638680142303422320e-0035L, + +2.725042655698714891044457001868653187367e-0035L, + -1.908992594100964198869963315362783449712e-0036L, + -1.465547554627127716918860559012698704471e-0035L, + +4.428780565915607570668447972900679899952e-0035L, + +1.439313657623768907227720140857454695843e-0035L, + -3.792074229051804169372108537791927020038e-0035L, + -2.610779485320152706286660129045188117210e-0036L, + -2.877279742494815830479448606269854599891e-0035L, + +3.991065835589256680020290949615723238476e-0035L, + +3.099479059550534193045145385925483327991e-0035L, + +1.146611686911982702287167679510021879695e-0035L, + -3.917592318193149049660769585602527582231e-0035L, + -1.951971321999985008371800682574139933978e-0035L, + +2.974588209723938591252776820212028367960e-0035L, + -2.038390756570426530537115267786908745116e-0036L, + -5.536347061134619893988732877493263844943e-0036L, + -4.389722144327924120620880599904805370946e-0035L, + -3.666858326708207750024755456027611364938e-0035L, + +4.889869663833434507994220130518213362272e-0036L, + -5.870115582315839607120133516012219562069e-0036L, + +2.507707793716364811457350893931543805685e-0035L, + +3.216165721908659970511036451358372071749e-0035L, + +2.880756890524786020083959729246571876109e-0035L, + +6.368426285981156583087492887998846060579e-0036L, + +6.844339659916371522503091904688601360028e-0037L, + -4.329063396630008909415294204988246215817e-0035L, + +1.038125352401202296098224611721455839121e-0035L, + +3.207093666031656020715902410548849578474e-0036L, + -3.987580687739740313485850727522454807713e-0035L, + +3.404815912367106584354098624390321615909e-0035L, + -4.752557072516798311248008988313821999362e-0035L, + +2.745410885517329825733352856854160918801e-0035L, + +7.585203719163457562812011671268547121453e-0036L, + -4.141871248600318251086493472511758380472e-0035L, + -1.835879954339576229487102635414793218992e-0035L, + +2.976082827782744334600577457984098492775e-0035L, + -3.507755179553069548150909011683056358498e-0035L, + +7.869038865563736742679481321788455681309e-0036L, + +1.208860140284441557337760250856779527931e-0035L, + -3.609503076059411697756765630044671398302e-0035L, + +2.262828995013444190183062956802106020046e-0035L, + -2.067726154909043706666702751547519756391e-0035L, + +3.735937416598668830886204955423117851511e-0035L, + -1.107719376025673147326930792646924920884e-0035L, + +4.123542789546647314438136551770221119198e-0036L, + +4.533705702883256304420378263134621416396e-0035L, + -1.434191923121166877839456190096294453634e-0035L, + -2.894849601813639248551925385406988512004e-0035L, + -4.681686383005756267827413197921838600437e-0035L, + -3.715568183175335822345624718357717998947e-0035L, + -1.687075340130951528732220617225731715663e-0035L, + +1.980549471419898781791643429252740528544e-0035L, + +2.727619978720845330457777186773261559081e-0035L, + +1.430825081004965817190481755062397701422e-0035L, + -1.720088119552308234167243322979912469421e-0035L, + +1.104812928567944364260514024188043464704e-0035L, + +6.094878513052330893256279394589637408556e-0036L, + +2.475195582284731678792488916738076213891e-0035L, + +1.693320456792379194278077712885062541662e-0035L, + +3.949752293412116642372415347411469162440e-0035L, + +4.220674118886015050047489393823250795070e-0035L, + +3.713069586576631896654508643111045710544e-0035L, + -3.789252700498009135399234738712875263543e-0035L, + +1.482556375489316971849917102931986196306e-0035L, + +4.786912857336733794995363260508118324272e-0035L, + -4.096232247636924432208967529079024417475e-0035L, +}; diff --git a/usr/src/lib/libm/common/Q/_TBL_expl.c b/usr/src/lib/libm/common/Q/_TBL_expl.c new file mode 100644 index 0000000000..aac2030240 --- /dev/null +++ b/usr/src/lib/libm/common/Q/_TBL_expl.c @@ -0,0 +1,105 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * Table of constants for expl. + * By K.C. Ng, March 9, 1989 + */ + +#include "libm.h" + +const long double _TBL_expl_hi[] = { + +1.000000000000000000000000000000000000000e+0000L, + +1.021897148654116678234480134783299439782e+0000L, + +1.044273782427413840321966478739929008785e+0000L, + +1.067140400676823618169521120992809162607e+0000L, + +1.090507732665257659207010655760707978993e+0000L, + +1.114386742595892536308812956919603067800e+0000L, + +1.138788634756691653703830283841511254720e+0000L, + +1.163724858777577513813573599092185312343e+0000L, + +1.189207115002721066717499970560475915293e+0000L, + +1.215247359980468878116520251338798457624e+0000L, + +1.241857812073484048593677468726595605511e+0000L, + +1.269050957191733222554419081032338004715e+0000L, + +1.296839554651009665933754117792451159835e+0000L, + +1.325236643159741294629537095498721674113e+0000L, + +1.354255546936892728298014740140702804343e+0000L, + +1.383909881963831954872659527265192818002e+0000L, + +1.414213562373095048801688724209698078570e+0000L, + +1.445180806977046620037006241471670905678e+0000L, + +1.476826145939499311386907480374049923924e+0000L, + +1.509164427593422739766019551033193531420e+0000L, + +1.542210825407940823612291862090734841307e+0000L, + +1.575980845107886486455270160181905008906e+0000L, + +1.610490331949254308179520667357400583459e+0000L, + +1.645755478153964844518756724725822445667e+0000L, + +1.681792830507429086062250952466429790080e+0000L, + +1.718619298122477915629344376456312504516e+0000L, + +1.756252160373299483112160619375313221294e+0000L, + +1.794709075003107186427703242127781814354e+0000L, + +1.834008086409342463487083189588288856077e+0000L, + +1.874167634110299901329998949954446534439e+0000L, + +1.915206561397147293872611270295830887850e+0000L, + +1.957144124175400269018322251626871491190e+0000L, +}; + +const long double _TBL_expl_lo[] = { + +0.000000000000000000000000000000000000000e+0000L, + +1.805067874203309547455733330545737864651e-0035L, + -9.374520292280427421957567419730832143843e-0035L, + -1.596968447292758770712909630231499971233e-0035L, + +9.112493410125022978511686101672486662119e-0035L, + -6.504228206978548287230374775259388710985e-0035L, + -8.148468844525851137325691767488155323605e-0035L, + -5.066214576721800313372330745142903350963e-0035L, + -1.359830974688816973749875638245919118924e-0035L, + +9.497427635563196470307710566433246597109e-0035L, + -3.283170523176998601615065965333915261932e-0036L, + -5.017235709387190410290186530458428950862e-0035L, + -2.391474797689109171622834301602640139258e-0035L, + -8.350571357633908815298890737944083853080e-0036L, + +7.036756889073265042421737190671876440729e-0035L, + -5.182484853064646457536893018566956189817e-0035L, + +9.422242548621832065692116736394064879758e-0035L, + -3.967500825398862309167306130216418281103e-0035L, + +7.143528991563300614523273615092767243521e-0035L, + +1.159871252867985124246517834100444327747e-0035L, + +4.696933478358115495309739213201874466685e-0035L, + -3.386513175995004710799241984999819165197e-0035L, + -8.587318774298247068868655935103874453522e-0035L, + -9.605951548749350503185499362246069088835e-0035L, + +9.609733932128012784507558697141785813655e-0035L, + +6.378397921440028439244761449780848545957e-0035L, + +7.792430785695864249456461125169277701177e-0035L, + +7.361337767588456524131930836633932195088e-0035L, + -6.472995147913347230035214575612170525266e-0035L, + +8.587474417953698694278798062295229624207e-0035L, + +2.371815422825174835691651228302690977951e-0035L, + -3.026891682096118773004597373421900314256e-0037L, +}; diff --git a/usr/src/lib/libm/common/Q/_TBL_expm1l.c b/usr/src/lib/libm/common/Q/_TBL_expm1l.c new file mode 100644 index 0000000000..54044fe87c --- /dev/null +++ b/usr/src/lib/libm/common/Q/_TBL_expm1l.c @@ -0,0 +1,368 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * Table of constants for expm1l. + * By K.C. Ng, June 30, 1995 + */ + +#include "libm.h" + +const long double _TBL_expm1lx[] = { + +7.8124999999999999999999999999995004619e-03L, + +2.3437499999999999999999999999998790275e-02L, + +3.9062499999999999999999999999981095794e-02L, + +5.4687500000000000000000000000007637516e-02L, + +7.0312500000000000000000000000001673152e-02L, + +8.5937500000000000000000000000004285194e-02L, + +1.0156249999999999999999999999997969348e-01L, + +1.1718749999999999999999999999998630182e-01L, + +1.3281249999999999999999999999999246480e-01L, + +1.4843750000000000000000000000000096296e-01L, + +1.6406249999999999999999999999999947037e-01L, + +1.7968750000000000000000000000007190941e-01L, + +1.9531249999999999999999999999999874815e-01L, + +2.1093749999999999999999999999999877222e-01L, + +2.2656250000000000000000000000000019259e-01L, + +2.4218749999999999999999999999999800185e-01L, + +2.5781249999999999999999999999996335918e-01L, + +2.7343749999999999999999999999999903704e-01L, + +2.8906249999999999999999999999998362960e-01L, + +3.0468750000000000000000000000000240741e-01L, + +3.2031249999999999999999999999999542592e-01L, + +3.3593749999999999999999999999999619629e-01L, + +3.5156250000000000000000000000001040002e-01L, + +3.6718749999999999999999999999999316295e-01L, + +3.8281250000000000000000000000000871483e-01L, + +3.9843750000000000000000000000000385186e-01L, + +4.1406249999999999999999999999999975926e-01L, + +4.2968750000000000000000000000000385186e-01L, + +4.4531250000000000000000000000032119697e-01L, + +4.6093749999999999999999999999999956667e-01L, + +4.7656250000000000000000000000000134815e-01L, + +4.9218749999999999999999999999999388517e-01L, + +5.0781249999999999999999999999999778518e-01L, + +5.2343749999999999999999999999999325925e-01L, + +5.3906249999999999999999999999990649610e-01L, + +5.5468750000000000000000000000000452594e-01L, + +5.7031250000000000000000000000000645187e-01L, + +5.8593749999999999999999999999999807407e-01L, + +6.0156250000000000000000000000000182963e-01L, + +6.1718750000000000000000000000000048148e-01L, + +6.3281250000000000000000000000000943706e-01L, + +6.4843749999999999999999999999999470369e-01L, + +6.6406250000000000000000000000000192593e-01L, + +6.7968750000000000000000000000000693335e-01L, + +6.9531250000000000000000000000001482966e-01L, + +7.1093750000000000000000000000000154074e-01L, + +7.2656250000000000000000000000000385186e-01L, + +7.4218750000000000000000000000000115556e-01L, + +7.5781250000000000000000000000000048148e-01L, + +7.7343749999999999999999999999999942222e-01L, + +7.8906249999999999999999999999999720740e-01L, + +8.0468749999999999999999999999999634073e-01L, + +8.2031249999999999999999999999999797777e-01L, + +8.3593750000000000000000000000000828150e-01L, + +8.5156249999999999999999999999999865185e-01L, + +8.6718749999999999999999999999999682222e-01L, + +8.8281249999999999999999999999999749629e-01L, + +8.9843749999999999999999999999999836296e-01L, + +9.1406249999999999999999999999999229628e-01L, + +9.2968750000000000000000000000000462223e-01L, + +9.4531249999999999999999999999999499258e-01L, + +9.6093749999999999999999999999999894074e-01L, + +9.7656249999999999999999999999999855555e-01L, + +9.9218750000000000000000000000000028889e-01L, + +1.0078124999999999999999999999999870963e+00L, + +1.0234375000000000000000000000000003852e+00L, + +1.0390624999999999999999999999999998074e+00L, + +1.0546874999999999999999999999999801629e+00L, + +1.0703125000000000000000000000000182963e+00L, + +1.0859375000000000000000000000000021185e+00L, + +1.1015624999999999999999999999999978815e+00L, + +1.1171874999999999999999999999999986518e+00L, + +1.1328124999999999999999999999999984593e+00L, + +1.1484374999999999999999999999999980741e+00L, + +1.1640625000000000000000000000000000000e+00L, + +1.1796874999999999999999999999999998074e+00L, + +1.1953125000000000000000000000000196445e+00L, + +1.2109374999999999999999999999999976889e+00L, + +1.2265625000000000000000000000000017333e+00L, + +1.2421874999999999999999999999999976889e+00L, + +1.2578124999999999999999999999999980741e+00L, + +1.2734374999999999999999999999999951852e+00L, + -7.8125000000000000000000000000074012886e-03L, + -2.3437499999999999999999999999997418050e-02L, + -3.9062499999999999999999999999998134255e-02L, + -5.4687500000000000000000000000006361587e-02L, + -7.0312500000000000000000000000006271309e-02L, + -8.5937500000000000000000000000001721300e-02L, + -1.0156250000000000000000000000000020463e-01L, + -1.1718750000000000000000000000000290093e-01L, + -1.3281249999999999999999999999999987963e-01L, + -1.4843749999999999999999999999999942222e-01L, + -1.6406250000000000000000000000000026482e-01L, + -1.7968750000000000000000000000000052963e-01L, + -1.9531249999999999999999999999999867592e-01L, + -2.1093750000000000000000000000000081852e-01L, + -2.2656250000000000000000000000000250371e-01L, + -2.4218749999999999999999999999999718333e-01L, + -2.5781250000000000000000000000001059261e-01L, + -2.7343749999999999999999999999999557036e-01L, + -2.8906250000000000000000000000000520001e-01L, + -3.0468749999999999999999999999999792963e-01L, + -3.2031250000000000000000000000000231112e-01L, + -3.3593749999999999999999999999999383702e-01L, + -3.5156249999999999999999999999999903704e-01L, + -3.6718749999999999999999999999999634073e-01L, + -3.8281249999999999999999999999999956667e-01L, + -3.9843750000000000000000000000000004815e-01L, + -4.1406249999999999999999999999999870000e-01L, + -4.2968750000000000000000000000000216667e-01L, + -4.4531250000000000000000000000000072222e-01L, + -4.6093749999999999999999999999999513703e-01L, + -4.7656250000000000000000000000000539260e-01L, + -4.9218749999999999999999999999999759259e-01L, + -5.0781250000000000000000000000000067408e-01L, + -5.2343750000000000000000000000001386670e-01L, + -5.3906249999999999999999999999996427400e-01L, + -5.5468750000000000000000000000000404445e-01L, + -5.7031249999999999999999999999998536293e-01L, + -5.8593749999999999999999999999999634073e-01L, + -6.0156250000000000000000000000000028889e-01L, + -6.1718749999999999999999999999998468886e-01L, + -6.3281249999999999999999999999999335554e-01L, + -6.4843750000000000000000000000000356297e-01L, + -6.6406250000000000000000000000000019259e-01L, + -6.7968750000000000000000000000000067408e-01L, + -6.9531249999999999999999999999998439997e-01L, + -7.1093750000000000000000000000000009630e-01L, + -7.2656250000000000000000000000000414075e-01L, + -7.4218749999999999999999999999998738516e-01L, + -7.5781250000000000000000000000000019259e-01L, + -7.7343750000000000000000000000000808891e-01L, + -7.8906250000000000000000000000000028889e-01L, + -8.0468750000000000000000000000000096296e-01L, + -8.2031249999999999999999999999999634073e-01L, + -8.3593750000000000000000000000000086667e-01L, + -8.5156250000000000000000000000000115556e-01L, + -8.6718750000000000000000000000000028889e-01L, + -8.8281250000000000000000000000000577779e-01L, + -8.9843750000000000000000000000000587409e-01L, + -9.1406250000000000000000000000000654816e-01L, + -9.2968749999999999999999999999999210369e-01L, + -9.4531250000000000000000000000000115556e-01L, + -9.6093749999999999999999999999999682222e-01L, + -9.7656250000000000000000000000000298519e-01L, + -9.9218749999999999999999999999999711111e-01L, + -1.0078124999999999999999999999999953778e+00L, + -1.0234375000000000000000000000000279260e+00L, + -1.0390625000000000000000000000000000000e+00L, + -1.0546875000000000000000000000000042370e+00L, + -1.0703124999999999999999999999999969185e+00L, + -1.0859374999999999999999999999999965333e+00L, + -1.1015625000000000000000000000000144445e+00L, + -1.1171875000000000000000000000000007704e+00L, + -1.1328125000000000000000000000000052000e+00L, + -1.1484375000000000000000000000000021185e+00L, + -1.1640625000000000000000000000000188741e+00L, + -1.1796874999999999999999999999999915259e+00L, + -1.1953125000000000000000000000000196445e+00L, + -1.2109374999999999999999999999999965333e+00L, + -1.2265624999999999999999999999999940296e+00L, + -1.2421874999999999999999999999999955704e+00L, + -1.2578125000000000000000000000000036593e+00L, + -1.2734375000000000000000000000000005778e+00L, +}; +const long double _TBL_expm1l[] = { + +7.8430972064479776934535597601230757455e-03L, + +2.3714316602357916968850532165767719684e-02L, + +3.9835471336230000576622009875172135295e-02L, + +5.6210497316931971181336703768137839969e-02L, + +7.2843392434877444411300095010815562014e-02L, + +8.9738217538093231018282025418516874073e-02L, + +1.0689909742365748278760239374175648642e-01L, + +1.2433022184475071745173290544494115241e-01L, + +1.4203584653356558967616143134879189360e-01L, + +1.6002029424032514702921521389409692713e-01L, + +1.7828795578866324266314330819585528511e-01L, + +1.9684329114762477118219685277015441058e-01L, + +2.1569083052054745183001825454039531082e-01L, + +2.3483517545109100468401611087600122614e-01L, + +2.5428099994668375200482125309068587301e-01L, + +2.7403305161966092927649653871633746915e-01L, + +2.9409615284637330982611062681002354822e-01L, + +3.1447520194454913428413492197987169290e-01L, + +3.3517517436919679261104091066359163835e-01L, + +3.5620112392734023305848825938576026618e-01L, + +3.7755818401188367036076223788924855083e-01L, + +3.9925156885490683578620594549518425777e-01L, + +4.2128657480069675555953993770759156716e-01L, + +4.4366858159882686275236684344276542291e-01L, + +4.6640305371759914220725806222619001343e-01L, + +4.8949554167816997960557644740137111180e-01L, + +5.1295168340968543562438375411433268970e-01L, + +5.3677720562575679548630956319776281677e-01L, + +5.6097792522261245434042965895343734385e-01L, + +5.8555975069926749109404773234379454282e-01L, + +6.1052868360005765883685767151390117210e-01L, + +6.3589081997988998017269448860547030419e-01L, + +6.6165235189256768193303746403901387346e-01L, + +6.8781956890255283724467962874956344875e-01L, + +7.1439885962053580513691701731111144172e-01L, + +7.4139671326318637019829227657113109938e-01L, + +7.6881972123746738864840365608134114398e-01L, + +7.9667457874989774401796242919906293119e-01L, + +8.2496808644115750689424843988747369733e-01L, + +8.5370715204643438037652998773947217833e-01L, + +8.8289879208191679750745518776052322714e-01L, + +9.1255013355784542053624989476899678486e-01L, + +9.4266841571854127598629758313676072782e-01L, + +9.7326099180983534572639774176307759762e-01L, + +1.0043353308743311241896996041635484486e+00L, + +1.0358990195749384471803078942778539846e+00L, + +1.0679597640471238000138982954032239149e+00L, + +1.1005253917803293237405302109843095228e+00L, + +1.1336038535290198596069082796475886372e+00L, + +1.1672032252538246054702826653026856058e+00L, + +1.2013317100932473053430319197695098859e+00L, + +1.2359976403664263564195928556324051853e+00L, + +1.2712094796074337986378900692396989892e+00L, + +1.3069758246316098519065681271390426992e+00L, + +1.3433054076344374874670823571053750423e+00L, + +1.3802070983234694643656048039370676504e+00L, + +1.4176899060838283316233663173665695732e+00L, + +1.4557629821778080933856445408266366139e+00L, + +1.4944356219791145601003068865880283870e+00L, + +1.5337172672422898656593276319684411587e+00L, + +1.5736175084078752204957296405891018055e+00L, + +1.6141460869438746959611789503655886752e+00L, + +1.6553128977240916980462361132966809382e+00L, + +1.6971279914439187908098398388318058200e+00L, + +1.7396015770741706739548391625061335312e+00L, + +1.7827440243535594070547844776995992896e+00L, + +1.8265658663204204072713762882878497127e+00L, + +1.8710778018843073303053045700866748054e+00L, + +1.9162906984380836781353147778063807290e+00L, + +1.9622155945111488641976761209920359164e+00L, + +2.0088637024644465094546949911966296733e+00L, + +2.0562464112279129437484158431835361592e+00L, + +2.1043752890810342484246997824723919209e+00L, + +2.1532620864771907009732038831535016010e+00L, + +2.2029187389124781729299756967091584890e+00L, + +2.2533573698397068911414662577557965622e+00L, + +2.3045902936282890023428920188543322512e+00L, + +2.3566300185707375845529318204056188573e+00L, + +2.4094892499365111287660263409108200844e+00L, + +2.4631808930739490736114262571853290077e+00L, + +2.5177180565610557168667363422785819682e+00L, + +2.5731140554059017538295927080989585090e+00L, + -7.7820617397564878940627738863895136168e-03L, + -2.3164975049937966141654020345517900132e-02L, + -3.8309398394574704340244721980137502162e-02L, + -5.3219029217871103345945692892173875140e-02L, + -6.7897507640472422098597150880870814431e-02L, + -8.2348417348184187852664478898998721220e-02L, + -9.6575286466913289047103004903893595054e-02L, + -1.1058158842404436382535801893754083366e-01L, + -1.2437074279646178545389116639858446817e-01L, + -1.3794611614542428546897208319214646321e-01L, + -1.5131102283849604551092782942638950286e-01L, + -1.6446872585873492869892658849405193342e-01L, + -1.7742243760133541028616024893906020644e-01L, + -1.9017532065792070445541830028432254804e-01L, + -2.0273048858867556872072433107848955384e-01L, + -2.1509100668250829875574108587424583890e-01L, + -2.2725989270542750384925893490015094814e-01L, + -2.3924011763731637587872084997993792064e-01L, + -2.5103460639728433199192216502316397159e-01L, + -2.6264623855777312240316411149867979990e-01L, + -2.7407784904759174916236707216223561007e-01L, + -2.8533222884405183877364930326430409924e-01L, + -2.9641212565437245046372839566093886675e-01L, + -3.0732024458652068208051596680746383072e-01L, + -3.1805924880965185639883349668276001738e-01L, + -3.2863176020431053139357768964876694788e-01L, + -3.3904036000255107819096872094974754682e-01L, + -3.4928758941813410931539692705947339063e-01L, + -3.5937595026695261691197788694876862518e-01L, + -3.6930790557783929525168292907013160482e-01L, + -3.7908588019390417343445352368401927192e-01L, + -3.8871226136454937222268435896035706784e-01L, + -3.9818939932830552279841068785563696096e-01L, + -4.0751960788663214438675145523141552015e-01L, + -4.1670516496882207157854692957232201487e-01L, + -4.2574831318814785027807613334156070311e-01L, + -4.3465126038938588296698375694570623290e-01L, + -4.4341618018785199889104786302733041801e-01L, + -4.5204521250008005232823143909224611574e-01L, + -4.6054046406627311177860364731167719334e-01L, + -4.6890400896465479423105583940253757841e-01L, + -4.7713788911784632111786130180348641941e-01L, + -4.8524411479139292568322765140558720714e-01L, + -4.9322466508456132479074344820321183415e-01L, + -5.0108148841352808120907176750144850100e-01L, + -5.0881650298707682468469397665961648404e-01L, + -5.1643159727492047118140006480748942245e-01L, + -5.2392863046876277909203259884139061144e-01L, + -5.3130943293621180856911747373956708600e-01L, + -5.3857580666765610494466923948125161529e-01L, + -5.4572952571621270908956480845806990458e-01L, + -5.5277233663085440607996537742519552762e-01L, + -5.5970595888282195827936433920573211501e-01L, + -5.6653208528542542950338319090135722295e-01L, + -5.7325238240733709291016556235278458118e-01L, + -5.7986849097947682625807457952262425338e-01L, + -5.8638202629558933380643321697780798620e-01L, + -5.9279457860661099402232802681583606392e-01L, + -5.9910771350892261602101192647121362362e-01L, + -6.0532297232658289493900937468812013977e-01L, + -6.1144187248763588685320419660027392238e-01L, + -6.1746590789458437705718363441235796711e-01L, + -6.2339654928911959113469156706514092208e-01L, + -6.2923524461119629598145525663487507890e-01L, + -6.3498341935254095737853086332102468069e-01L, + -6.4064247690467926157560858675744165996e-01L, + -6.4621379890156797026959945044832806294e-01L, + -6.5169874555691476103560779451668832748e-01L, + -6.5709865599626840836245660190447925603e-01L, + -6.6241484858396038364664323096343822726e-01L, + -6.6764862124497769549523628863318702845e-01L, + -6.7280125178184555417288842186006604551e-01L, + -6.7787399818659722569870081614203949644e-01L, + -6.8286809894790724165768340684449330930e-01L, + -6.8778477335346294994617200009875163606e-01L, + -6.9262522178764822913249147051072428209e-01L, + -6.9739062602461204459956440664519817754e-01L, + -7.0208214951679339786562882012396622127e-01L, + -7.0670093767897311117765076057926938575e-01L, + -7.1124811816792179736856679611852620191e-01L, + -7.1572480115771228979753508374143873445e-01L, + -7.2013207961076374868941367244540733250e-01L, +}; diff --git a/usr/src/lib/libm/common/Q/_TBL_ipio2l.c b/usr/src/lib/libm/common/Q/_TBL_ipio2l.c new file mode 100644 index 0000000000..27655109b6 --- /dev/null +++ b/usr/src/lib/libm/common/Q/_TBL_ipio2l.c @@ -0,0 +1,504 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * Table of constants for 2/pi, used in __rem_pio2l (trigl) function. + * By K.C. Ng, April 25, 1989 + */ + +#include "libm.h" + +const int _TBL_ipio2l_inf[] = { /* by DHBailey MP package */ + 0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62, + 0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A, + 0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129, + 0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41, + 0x3991D6, 0x398353, 0x39F49C, 0x845F8B, 0xBDF928, 0x3B1FF8, + 0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF, + 0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5, + 0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08, + 0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3, + 0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880, + 0x4D7327, 0x310606, 0x1556CA, 0x73A8C9, 0x60E27B, 0xC08C6B, + 0x47C419, 0xC367CD, 0xDCE809, 0x2A8359, 0xC4768B, 0x961CA6, + 0xDDAF44, 0xD15719, 0x053EA5, 0xFF0705, 0x3F7E33, 0xE832C2, + 0xDE4F98, 0x327DBB, 0xC33D26, 0xEF6B1E, 0x5EF89F, 0x3A1F35, + 0xCAF27F, 0x1D87F1, 0x21907C, 0x7C246A, 0xFA6ED5, 0x772D30, + 0x433B15, 0xC614B5, 0x9D19C3, 0xC2C4AD, 0x414D2C, 0x5D000C, + 0x467D86, 0x2D71E3, 0x9AC69B, 0x006233, 0x7CD2B4, 0x97A7B4, + 0xD55537, 0xF63ED7, 0x1810A3, 0xFC764D, 0x2A9D64, 0xABD770, + 0xF87C63, 0x57B07A, 0xE71517, 0x5649C0, 0xD9D63B, 0x3884A7, + 0xCB2324, 0x778AD6, 0x23545A, 0xB91F00, 0x1B0AF1, 0xDFCE19, + 0xFF319F, 0x6A1E66, 0x615799, 0x47FBAC, 0xD87F7E, 0xB76522, + 0x89E832, 0x60BFE6, 0xCDC4EF, 0x09366C, 0xD43F5D, 0xD7DE16, + 0xDE3B58, 0x929BDE, 0x2822D2, 0xE88628, 0x4D58E2, 0x32CAC6, + 0x16E308, 0xCB7DE0, 0x50C017, 0xA71DF3, 0x5BE018, 0x34132E, + 0x621283, 0x014883, 0x5B8EF5, 0x7FB0AD, 0xF2E91E, 0x434A48, + 0xD36710, 0xD8DDAA, 0x425FAE, 0xCE616A, 0xA4280A, 0xB499D3, + 0xF2A606, 0x7F775C, 0x83C2A3, 0x883C61, 0x78738A, 0x5A8CAF, + 0xBDD76F, 0x63A62D, 0xCBBFF4, 0xEF818D, 0x67C126, 0x45CA55, + 0x36D9CA, 0xD2A828, 0x8D61C2, 0x77C912, 0x142604, 0x9B4612, + 0xC459C4, 0x44C5C8, 0x91B24D, 0xF31700, 0xAD43D4, 0xE54929, + 0x10D5FD, 0xFCBE00, 0xCC941E, 0xEECE70, 0xF53E13, 0x80F1EC, + 0xC3E7B3, 0x28F8C7, 0x940593, 0x3E71C1, 0xB3092E, 0xF3450B, + 0x9C1288, 0x7B20AB, 0x9FB52E, 0xC29247, 0x2F327B, 0x6D550C, + 0x90A772, 0x1FE76B, 0x96CB31, 0x4A1679, 0xE27941, 0x89DFF4, + 0x9794E8, 0x84E6E2, 0x973199, 0x6BED88, 0x365F5F, 0x0EFDBB, + 0xB49A48, 0x6CA467, 0x427271, 0x325D8D, 0xB8159F, 0x09E5BC, + 0x25318D, 0x3974F7, 0x1C0530, 0x010C0D, 0x68084B, 0x58EE2C, + 0x90AA47, 0x02E774, 0x24D6BD, 0xA67DF7, 0x72486E, 0xEF169F, + 0xA6948E, 0xF691B4, 0x5153D1, 0xF20ACF, 0x339820, 0x7E4BF5, + 0x6863B2, 0x5F3EDD, 0x035D40, 0x7F8985, 0x295255, 0xC06437, + 0x10D86D, 0x324832, 0x754C5B, 0xD4714E, 0x6E5445, 0xC1090B, + 0x69F52A, 0xD56614, 0x9D0727, 0x50045D, 0xDB3BB4, 0xC576EA, + 0x17F987, 0x7D6B49, 0xBA271D, 0x296996, 0xACCCC6, 0x5414AD, + 0x6AE290, 0x89D988, 0x50722C, 0xBEA404, 0x940777, 0x7030F3, + 0x27FC00, 0xA871EA, 0x49C266, 0x3DE064, 0x83DD97, 0x973FA3, + 0xFD9443, 0x8C860D, 0xDE4131, 0x9D3992, 0x8C70DD, 0xE7B717, + 0x3BDF08, 0x2B3715, 0xA0805C, 0x93805A, 0x921110, 0xD8E80F, + 0xAF806C, 0x4BFFDB, 0x0F9038, 0x761859, 0x15A562, 0xBBCB61, + 0xB989C7, 0xBD4010, 0x04F2D2, 0x277549, 0xF6B6EB, 0xBB22DB, + 0xAA140A, 0x2F2689, 0x768364, 0x333B09, 0x1A940E, 0xAA3A51, + 0xC2A31D, 0xAEEDAF, 0x12265C, 0x4DC26D, 0x9C7A2D, 0x9756C0, + 0x833F03, 0xF6F009, 0x8C402B, 0x99316D, 0x07B439, 0x15200C, + 0x5BC3D8, 0xC492F5, 0x4BADC6, 0xA5CA4E, 0xCD37A7, 0x36A9E6, + 0x9492AB, 0x6842DD, 0xDE6319, 0xEF8C76, 0x528B68, 0x37DBFC, + 0xABA1AE, 0x3115DF, 0xA1AE00, 0xDAFB0C, 0x664D64, 0xB705ED, + 0x306529, 0xBF5657, 0x3AFF47, 0xB9F96A, 0xF3BE75, 0xDF9328, + 0x3080AB, 0xF68C66, 0x15CB04, 0x0622FA, 0x1DE4D9, 0xA4B33D, + 0x8F1B57, 0x09CD36, 0xE9424E, 0xA4BE13, 0xB52333, 0x1AAAF0, + 0xA8654F, 0xA5C1D2, 0x0F3F0B, 0xCD785B, 0x76F923, 0x048B7B, + 0x721789, 0x53A6C6, 0xE26E6F, 0x00EBEF, 0x584A9B, 0xB7DAC4, + 0xBA66AA, 0xCFCF76, 0x1D02D1, 0x2DF1B1, 0xC1998C, 0x77ADC3, + 0xDA4886, 0xA05DF7, 0xF480C6, 0x2FF0AC, 0x9AECDD, 0xBC5C3F, + 0x6DDED0, 0x1FC790, 0xB6DB2A, 0x3A25A3, 0x9AAF00, 0x9353AD, + 0x0457B6, 0xB42D29, 0x7E804B, 0xA707DA, 0x0EAA76, 0xA1597B, + 0x2A1216, 0x2DB7DC, 0xFDE5FA, 0xFEDB89, 0xFDBE89, 0x6C76E4, + 0xFCA906, 0x70803E, 0x156E85, 0xFF87FD, 0x073E28, 0x336761, + 0x86182A, 0xEABD4D, 0xAFE7B3, 0x6E6D8F, 0x396795, 0x5BBF31, + 0x48D784, 0x16DF30, 0x432DC7, 0x356125, 0xCE70C9, 0xB8CB30, + 0xFD6CBF, 0xA200A4, 0xE46C05, 0xA0DD5A, 0x476F21, 0xD21262, + 0x845CB9, 0x496170, 0xE0566B, 0x015299, 0x375550, 0xB7D51E, + 0xC4F133, 0x5F6E13, 0xE4305D, 0xA92E85, 0xC3B21D, 0x3632A1, + 0xA4B708, 0xD4B1EA, 0x21F716, 0xE4698F, 0x77FF27, 0x80030C, + 0x2D408D, 0xA0CD4F, 0x99A520, 0xD3A2B3, 0x0A5D2F, 0x42F9B4, + 0xCBDA11, 0xD0BE7D, 0xC1DB9B, 0xBD17AB, 0x81A2CA, 0x5C6A08, + 0x17552E, 0x550027, 0xF0147F, 0x8607E1, 0x640B14, 0x8D4196, + 0xDEBE87, 0x2AFDDA, 0xB6256B, 0x34897B, 0xFEF305, 0x9EBFB9, + 0x4F6A68, 0xA82A4A, 0x5AC44F, 0xBCF82D, 0x985AD7, 0x95C7F4, + 0x8D4D0D, 0xA63A20, 0x5F57A4, 0xB13F14, 0x953880, 0x0120CC, + 0x86DD71, 0xB6DEC9, 0xF560BF, 0x11654D, 0x6B0701, 0xACB08C, + 0xD0C0B2, 0x485551, 0x0EFB1E, 0xC37295, 0x3B06A3, 0x3540C0, + 0x7BDC06, 0xCC45E0, 0xFA294E, 0xC8CAD6, 0x41F3E8, 0xDE647C, + 0xD8649B, 0x31BED9, 0xC397A4, 0xD45877, 0xC5E369, 0x13DAF0, + 0x3C3ABA, 0x461846, 0x5F7555, 0xF5BDD2, 0xC6926E, 0x5D2EAC, + 0xED440E, 0x423E1C, 0x87C461, 0xE9FD29, 0xF3D6E7, 0xCA7C22, + 0x35916F, 0xC5E008, 0x8DD7FF, 0xE26A6E, 0xC6FDB0, 0xC10893, + 0x745D7C, 0xB2AD6B, 0x9D6ECD, 0x7B723E, 0x6A11C6, 0xA9CFF7, + 0xDF7329, 0xBAC9B5, 0x5100B7, 0x0DB2E2, 0x24BA74, 0x607DE5, + 0x8AD874, 0x2C150D, 0x0C1881, 0x94667E, 0x162901, 0x767A9F, + 0xBEFDFD, 0xEF4556, 0x367ED9, 0x13D9EC, 0xB9BA8B, 0xFC97C4, + 0x27A831, 0xC36EF1, 0x36C594, 0x56A8D8, 0xB5A8B4, 0x0ECCCF, + 0x2D8912, 0x34576F, 0x89562C, 0xE3CE99, 0xB920D6, 0xAA5E6B, + 0x9C2A3E, 0xCC5F11, 0x4A0BFD, 0xFBF4E1, 0x6D3B8E, 0x2C86E2, + 0x84D4E9, 0xA9B4FC, 0xD1EEEF, 0xC9352E, 0x61392F, 0x442138, + 0xC8D91B, 0x0AFC81, 0x6A4AFB, 0xD81C2F, 0x84B453, 0x8C994E, + 0xCC2254, 0xDC552A, 0xD6C6C0, 0x96190B, 0xB8701A, 0x649569, + 0x605A26, 0xEE523F, 0x0F117F, 0x11B5F4, 0xF5CBFC, 0x2DBC34, + 0xEEBC34, 0xCC5DE8, 0x605EDD, 0x9B8E67, 0xEF3392, 0xB817C9, + 0x9B5861, 0xBC57E1, 0xC68351, 0x103ED8, 0x4871DD, 0xDD1C2D, + 0xA118AF, 0x462C21, 0xD7F359, 0x987AD9, 0xC0549E, 0xFA864F, + 0xFC0656, 0xAE79E5, 0x362289, 0x22AD38, 0xDC9367, 0xAAE855, + 0x382682, 0x9BE7CA, 0xA40D51, 0xB13399, 0x0ED7A9, 0x480569, + 0xF0B265, 0xA7887F, 0x974C88, 0x36D1F9, 0xB39221, 0x4A827B, + 0x21CF98, 0xDC9F40, 0x5547DC, 0x3A74E1, 0x42EB67, 0xDF9DFE, + 0x5FD45E, 0xA4677B, 0x7AACBA, 0xA2F655, 0x23882B, 0x55BA41, + 0x086E59, 0x862A21, 0x834739, 0xE6E389, 0xD49EE5, 0x40FB49, + 0xE956FF, 0xCA0F1C, 0x8A59C5, 0x2BFA94, 0xC5C1D3, 0xCFC50F, + 0xAE5ADB, 0x86C547, 0x624385, 0x3B8621, 0x94792C, 0x876110, + 0x7B4C2A, 0x1A2C80, 0x12BF43, 0x902688, 0x893C78, 0xE4C4A8, + 0x7BDBE5, 0xC23AC4, 0xEAF426, 0x8A67F7, 0xBF920D, 0x2BA365, + 0xB1933D, 0x0B7CBD, 0xDC51A4, 0x63DD27, 0xDDE169, 0x19949A, + 0x9529A8, 0x28CE68, 0xB4ED09, 0x209F44, 0xCA984E, 0x638270, + 0x237C7E, 0x32B90F, 0x8EF5A7, 0xE75614, 0x08F121, 0x2A9DB5, + 0x4D7E6F, 0x5119A5, 0xABF9B5, 0xD6DF82, 0x61DD96, 0x023616, + 0x9F3AC4, 0xA1A283, 0x6DED72, 0x7A8D39, 0xA9B882, 0x5C326B, + 0x5B2746, 0xED3400, 0x7700D2, 0x55F4FC, 0x4D5901, 0x8071E0, + 0xE13F89, 0xB295F3, 0x64A8F1, 0xAEA74B, 0x38FC4C, 0xEAB2BB, + 0x47270B, 0xABC3A7, 0x34BA60, 0x52DD34, 0xF8563A, 0xEB7E8A, + 0x31BB36, 0x5895B7, 0x47F7A9, 0x94C3AA, 0xD39225, 0x1E7F3E, + 0xD8974E, 0xBBA94F, 0xD8AE01, 0xE661B4, 0x393D8E, 0xA523AA, + 0x33068E, 0x1633B5, 0x3BB188, 0x1D3A9D, 0x4013D0, 0xCC1BE5, + 0xF862E7, 0x3BF28F, 0x39B5BF, 0x0BC235, 0x22747E, 0xA247C0, + 0xD52D1F, 0x19ADD3, 0x9094DF, 0x9311D0, 0xB42B25, 0x496DB2, + 0xE264B2, 0x5EF135, 0x3BC6A4, 0x1A4AD0, 0xAAC92E, 0x64E886, + 0x573091, 0x982CFB, 0x311B1A, 0x08728B, 0xBDCEE1, 0x60E142, + 0xEB641D, 0xD0BBA3, 0xE559D4, 0x597B8C, 0x2A4483, 0xF332BA, + 0xF84867, 0x2C8D1B, 0x2FA9B0, 0x50F3DD, 0xF9F573, 0xDB61B4, + 0xFE233E, 0x6C41A6, 0xEEA318, 0x775A26, 0xBC5E5C, 0xCEA708, + 0x94DC57, 0xE20196, 0xF1E839, 0xBE4851, 0x5D2D2F, 0x4E9555, + 0xD96EC2, 0xE7D755, 0x6304E0, 0xC02E0E, 0xFC40A0, 0xBBF9B3, + 0x7125A7, 0x222DFB, 0xF619D8, 0x838C1C, 0x6619E6, 0xB20D55, + 0xBB5137, 0x79E809, 0xAF9149, 0x0D73DE, 0x0B0DA5, 0xCE7F58, + 0xAC1934, 0x724667, 0x7A1A13, 0x9E26BC, 0x4555E7, 0x585CB5, + 0x711D14, 0x486991, 0x480D60, 0x56ADAB, 0xD62F64, 0x96EE0C, + 0x212FF3, 0x5D6D88, 0xA67684, 0x95651E, 0xAB9E0A, 0x4DDEFE, + 0x571010, 0x836A39, 0xF8EA31, 0x9E381D, 0xEAC8B1, 0xCAC96B, + 0x37F21E, 0xD505E9, 0x984743, 0x9FC56C, 0x0331B7, 0x3B8BF8, + 0x86E56A, 0x8DC343, 0x6230E7, 0x93CFD5, 0x6A8F2D, 0x733005, + 0x1AF021, 0xA09FCB, 0x7415A1, 0xD56B23, 0x6FF725, 0x2F4BC7, + 0xB8A591, 0x7FAC59, 0x5C55DE, 0x212C38, 0xB13296, 0x5CFF50, + 0x366262, 0xFA7B16, 0xF4D9A6, 0x2ACFE7, 0xF07403, 0xD4D604, + 0x6FD916, 0x31B1BF, 0xCBB450, 0x5BD7C8, 0x0CE194, 0x6BD643, + 0x4FD91C, 0xDF4543, 0x5F3453, 0xE2B5AA, 0xC9AEC8, 0x131485, + 0xF9D2BF, 0xBADB9E, 0x76F5B9, 0xAF15CF, 0xCA3182, 0x14B56D, + 0xE9FE4D, 0x50FC35, 0xF5AED5, 0xA2D0C1, 0xC96057, 0x192EB6, + 0xE91D92, 0x07D144, 0xAEA3C6, 0x343566, 0x26D5B4, 0x3161E2, + 0x37F1A2, 0x209EFF, 0x958E23, 0x493798, 0x35F4A6, 0x4BDC02, + 0xC2BE13, 0xBE80A0, 0x0B72A3, 0x115C5F, 0x1E1BD1, 0x0DB4D3, + 0x869E85, 0x96976B, 0x2AC91F, 0x8A26C2, 0x3070F0, 0x041412, + 0xFC9FA5, 0xF72A38, 0x9C6878, 0xE2AA76, 0x50CFE1, 0x559274, + 0x934E38, 0x0A92F7, 0x5533F0, 0xA63DB4, 0x399971, 0xE2B755, + 0xA98A7C, 0x008F19, 0xAC54D2, 0x2EA0B4, 0xF5F3E0, 0x60C849, + 0xFFD269, 0xAE52CE, 0x7A5FDD, 0xE9CE06, 0xFB0AE8, 0xA50CCE, + 0xEA9D3E, 0x3766DD, 0xB834F5, 0x0DA090, +}; + +#if 0 +const int _TBL_ipio2l_66[] = { + 0xA2F983, 0x6E4E44, 0x152A00, 0x062BC4, 0x0DA276, 0xBED4C1, + 0xFDF905, 0x5CD5BA, 0x767CEC, 0x1F80D6, 0xC26053, 0x3A0070, + 0x107C2A, 0xF68EE9, 0x687B7A, 0xB990AA, 0x38DE4B, 0x96CFF3, + 0x92735E, 0x8B34F6, 0x195BFC, 0x27F88E, 0xA93EC5, 0x3958A5, + 0x3E5D13, 0x1C55A8, 0x5B4A8B, 0xA42E04, 0x12D105, 0x35580D, + 0xF62347, 0x450900, 0xB98BCA, 0xF7E8A4, 0xA2E5D5, 0x69BC52, + 0xF0381D, 0x1A0A88, 0xFE8714, 0x7F6735, 0xBB7D4D, 0xC6F642, + 0xB27E80, 0x6191BF, 0xB6B750, 0x52776E, 0xD60FD0, 0x607DCC, + 0x68BFAF, 0xED69FC, 0x6EB305, 0xD2557D, 0x25BDFB, 0x3E4AA1, + 0x84472D, 0x8B0376, 0xF77740, 0xD290DF, 0x15EC8C, 0x45A5C3, + 0x6181EF, 0xC5E7E8, 0xD8909C, 0xF62144, 0x298428, 0x6E5D9D, + 0xF9A9B4, 0xCDBD2F, 0xC083E7, 0x0D3957, 0xECA3B2, 0x96223C, + 0xC1080D, 0x087D47, 0x7D7576, 0xA614B1, 0x42A4B6, 0xAA173C, + 0xE217E5, 0xFDCD34, 0x279D5F, 0x39AACA, 0x1CA8DF, 0x8B6633, + 0x5C49E4, 0xB56803, 0x1E7938, 0x741FDC, 0x4CB19B, 0xCECC3B, + 0x921EB7, 0x7C0FC3, 0x361F23, 0xF9EE22, 0xBA4235, 0xA5FCA3, + 0xBD4680, 0xFCDF65, 0xFC96AD, 0x31C90C, 0x919EEB, 0xFE0FB7, + 0x75B4B0, 0x693961, 0x75BCAA, 0xEB6F39, 0xA343C0, 0xD16FF2, + 0x33DAD0, 0xC1E095, 0x053182, 0x11E4A1, 0x40F943, 0x32D314, + 0xAF1B98, 0xE1B05A, 0xE5F3AD, 0x6E633F, 0x363D14, 0xA3777C, + 0xC8C6EE, 0x001E18, 0x0D180C, 0xAA1369, 0xEDFBA2, 0x998A9D, + 0x16E799, 0x693B75, 0x90EF50, 0x938DD4, 0xFB7ACD, 0x67CEEB, + 0x249DE3, 0x9B9B52, 0xD8CDAC, 0xC31A54, 0x855FBF, 0x848591, + 0x0954B0, 0x946B8C, 0xA4C7B4, 0x9A9E51, 0xF20425, 0xAA2637, + 0xFC6657, 0x7D8625, 0x620B74, 0x8B578D, 0xEC9A05, 0xDEF24F, + 0x7F19B0, 0xFC2544, 0x1DA0F1, 0x23790C, 0xC4294D, 0x6D3C32, + 0x66FE56, 0xD45562, 0x66264F, 0xA24162, 0x13E930, 0xB0E7C0, + 0xFA0E97, 0xBFC62C, 0x0E663F, 0x90F33B, 0x55E73C, 0xD791F7, + 0xD3F00D, 0xAB01C7, 0x40CF8F, 0xA593BA, 0xE627D5, 0x4A8308, + 0x32DC06, 0x80C876, 0x1C3DB5, 0xB5489F, 0x632CDF, 0xB02517, + 0xD17EFA, 0x92570F, 0xFAED44, 0x8F8536, 0x27069B, 0xC014DC, + 0x997D48, 0x961D61, 0x7A960B, 0x31B622, 0xD3C425, 0xA69520, + 0x98D29E, 0xF1C973, 0x5483D7, 0x99611E, 0xEAFF5F, 0x7DEFF1, + 0x98475C, 0x91C787, 0x537E17, 0x068C65, 0xF05E52, 0x942F04, + 0x37CF92, 0xEF4223, 0xC4C52F, 0x521DAA, 0xBAAF97, 0x972236, + 0xA2B3D3, 0x62C921, 0x8D3A8B, 0x2B3302, 0x6061B9, 0x0CBE94, + 0x75F451, 0xBD06DE, 0x86042D, 0xFB61ED, 0x4C8869, 0x590232, + 0x479963, 0x23518D, 0xAF5D28, 0x60C9DE, 0x473DB0, 0x9DE009, + 0xD8FC4C, 0xE96991, 0x9CA455, 0x800BC8, 0x977CE0, 0xDCBFA6, + 0x19D249, 0xA0F76D, 0x5F9B2F, 0x452BB3, 0x77E091, 0xB6383A, + 0x7BE9C2, 0x4BF7C1, 0x8A5EBF, 0xEB0D55, 0x9AF4DC, 0x275CA0, + 0xED09D0, 0xE50A7F, 0xBEF42C, 0x4803AF, 0x56139F, 0xD58848, + 0x797D96, 0xB8352E, 0x49D90D, 0x7607E0, 0xC99256, 0x75F530, + 0xB72237, 0x1AF080, 0xC2E813, 0x06CFA9, 0xB9DF8E, 0x919C38, + 0x89D97E, 0x0464D5, 0xB12EEF, 0xD14165, 0x365A72, 0x550D35, + 0x3772D8, 0xF41B58, 0x0378A7, 0x2D5D7D, 0xD6E433, 0xDD2018, + 0x139FD7, 0x1B5621, 0x94E046, 0x97A323, 0x693176, 0x28DF59, + 0xD24273, 0x0E4E26, 0xA9A8F6, 0xF15B41, 0x450EE3, 0x57EA61, + 0x7DADA6, 0xF21086, 0x394BEE, 0x8F4813, 0x3FDEE9, 0xF3A53D, + 0xAB2F40, 0x8B1E2B, 0xA07FD4, 0x992CC4, 0x63532D, 0x9F35A2, + 0x6FA290, 0x0094DE, 0xD2A24D, 0x755B81, 0x79F9E1, 0xFE1D35, + 0xFEE8CC, 0x9224C5, 0x54E2CE, 0x41F31C, 0xF45138, 0xED6D10, + 0x6B439D, 0xD2BE46, 0xC327D4, 0x68BFB0, 0x46D5A5, 0x79B285, + 0x776D7C, 0xE18647, 0x00E32F, 0xEBB7F2, 0x5DE307, 0x5A8EA0, + 0x06CEFE, 0x20923C, 0x354CE1, 0xAD09C5, 0x56996D, 0xCFB124, + 0xEF7BC1, 0x76BF72, 0xF20753, 0x5BBAFA, 0xB8A2B2, 0x5914F2, + 0x5D834F, 0xE64A08, 0x14C3AB, 0x07796B, 0xF2212D, 0xC74049, + 0xB61C6A, 0x282CFC, 0x25070C, 0x315BF1, 0x6FEAD3, 0x2CD2E5, + 0xD10F9C, 0x1972BB, 0x908073, 0x0F368C, 0x69BE97, 0xA242B0, + 0x722DFE, 0xAFE6A2, 0x143D8B, 0x5C5699, 0x48232B, 0xFF49AC, + 0xB5FA62, 0x6AD778, 0x7A844D, 0x258AA0, 0x8EDE3D, 0x9A9496, + 0x49924E, 0xA33E97, 0x4F43FA, 0xC40741, 0x2F764A, 0x8EB2B1, + 0x8E67D3, 0x9FF324, 0x51B11B, 0x5D6E09, 0xE9AD3E, 0xFFA902, + 0xF48653, 0x0845D3, 0xDED33E, 0x32D30E, 0x6247CA, 0x7C586D, + 0x2EAF9E, 0x323A35, 0xAD11FB, 0x0F420C, 0x0E0685, 0x401B60, + 0xBB3D43, 0xF4D489, 0xBCDC4C, 0x40FFBA, 0x18AB08, 0x7AC72D, + 0x5E76DB, 0xE8344E, 0x3975A2, 0xF9611B, 0x1121F3, 0x3A429C, + 0x9B18EC, 0xF298B1, 0x8AEC78, 0x1C248B, 0x69108F, 0xDB2D37, + 0xA1A613, 0x910359, 0x521451, 0xD4441F, 0x0BB3B6, 0x50D9DB, + 0xBD589F, 0x62A62E, 0xA9B903, 0x935F63, 0x058BEC, 0x78BCB5, + 0x2CB460, 0x3A9037, 0x0291C4, 0x1FABC1, 0xBE7D05, 0xF948E7, + 0x6BA5CD, 0xF62A0A, 0x9AEA19, 0x2257AB, 0x2E0D7D, 0x9EB93F, + 0x5E3F77, 0xD4A13F, 0x08E3DB, 0xDFD689, 0x2B9B4E, 0xB58427, + 0x25424B, 0x1197FD, 0xCF298A, 0x314008, 0xD5687F, 0x0F0EAC, + 0x13C485, 0xF684B2, 0xED7EC7, 0x6E636D, 0x28C933, 0xE19058, + 0x688B6A, 0xC88905, 0xFB2F31, 0x61304C, 0xC19765, 0x60D81A, + 0x57F276, 0xC6EFC4, 0x048954, 0x303470, 0xDA6F6F, 0x93901A, + 0x911439, 0x363D12, 0x59E72B, 0x6F9F1E, 0x57C584, 0xDF0D23, + 0xBB743F, 0xADE99C, 0x546097, 0xFCC820, 0xCBB968, 0xDA9B5F, + 0x0DC271, 0x563337, 0x9ED662, 0xE7C44F, 0x3129F8, 0xF5EAF9, + 0xDAF7F2, 0xCD09FF, 0xA92535, 0x441C29, 0x7DF436, 0xE2B00A, + 0x36746F, 0xF1DC61, 0x9D3C9C, 0x63AB71, 0xB8F3BB, 0x1C80F6, + 0x62FF65, 0x5FFE5F, 0x3B2814, 0xBADE27, 0x1B384B, 0x268AA9, + 0xBD91EF, 0xCA436B, 0xABE107, 0x88DCA6, 0xC3AFC0, 0x85D155, + 0x464A48, 0xBFDAEB, 0xC6F389, 0x907C11, 0x0D3E41, 0xCD2197, + 0x549008, 0x817E4E, 0x8C7154, 0x1DC37F, 0x5E897E, 0xA9A2FE, + 0xEC6060, 0xCC0728, 0x430D3B, 0x62471C, 0xD3A4D3, 0x2BA57B, + 0xE5D15A, 0xD632F3, 0xF2B76F, 0xEC8498, 0xAE41C2, 0xAAF413, + 0xEAF5C0, 0xDD1B07, 0xB9A2A0, 0x59F230, 0xA3F61B, 0x8F8643, + 0x05DE6B, 0x1B5B8E, 0x63ECC5, 0xBFF01D, 0x8F1440, 0x3F8ADF, + 0x2E6539, 0xF3DB7A, 0x293FE5, 0x7EE714, 0x88E6D8, 0x2B2A6A, + 0xDF6E34, 0x8D4604, 0x4F6594, 0x639063, 0x6B51CC, 0x0D05CD, + 0x009607, 0xE7BF70, 0xC9A0EA, 0x0D80DD, 0xA1A065, 0x0DCB8F, + 0xA48430, 0x715934, 0x6FC8E4, 0x6FFC52, 0xEF8B05, 0xDE506A, + 0xE62BBC, 0x31480F, 0xEA64EA, 0x51E6FB, 0x9AE773, 0x21C54D, + 0xBFA080, 0x273D1E, 0x9FFD4E, 0x0C2CA8, 0x0690A5, 0xF8773B, + 0x4B2680, 0x6E3F56, 0xC8B89F, 0x0B7BD0, 0x71C8BF, 0x5AABD3, + 0x2BA93E, 0x9D2EE1, 0xCDF2FA, 0xEE57BE, 0x84A116, 0xDA756D, + 0x8FD6C0, 0x927153, 0xFF5EF3, 0x9F8331, 0x713411, 0xF945F3, + 0x0382B2, 0x8BAE30, 0xBC45A4, 0x630101, 0x5C9C3A, 0x643CFD, + 0x48115C, 0x17F03E, 0xB5F55E, 0x288DAF, 0x725660, 0xFB58E0, + 0xFC189E, 0x1ECA69, 0xFB19A6, 0xFA7A92, 0x7CC48E, 0x869372, + 0x58089A, 0x16DB5C, 0xADC0CD, 0x09D3D4, 0xD1108E, 0xDC64ED, + 0x3A999C, 0xAA8716, 0x5A3D8E, 0x7037FB, 0x1976AD, 0xE477D7, + 0x23782B, 0xC51F39, 0x4A5E9A, 0xDAD9DA, 0xE5B559, 0x08EF06, + 0x76E24F, 0x7361AD, 0x5F42A3, 0x9B70E5, 0xCE96C4, 0x552E99, + 0x6D7A6F, 0x804474, 0x4FA45B, 0x1D115B, 0x6D109E, 0x0A1A63, + 0x1084A6, 0xE18E5D, 0x2D8589, 0x203345, 0x4851AF, 0xA71EDC, + 0x03B6B1, 0x267970, 0xDEC908, 0x795BED, 0x7099B9, 0x209321, + 0x7FC2E7, 0x0F3E5E, 0xC7A4F4, 0x088129, 0x59AE63, 0x4E3251, + 0x344268, 0x79285D, 0x2B9494, 0xF1E2A2, 0xF7DA20, 0xDF6756, + 0xCA3BA3, 0x422489, 0xA2239C, 0x38724D, 0x2AC767, 0x601E9D, + 0xB47C6C, 0xA22481, 0xBBB655, 0x1EC0C4, 0xD84A97, 0xD449EE, + 0x162C9D, 0x782F29, 0xCEB4FA, 0xE317BC, 0x2FFDBD, 0xB342D2, + 0xB2CB19, 0x323AB9, 0x1AFF93, 0x13A8DF, 0x86B5A5, 0x5741D6, + 0xC54342, 0x3CAC29, 0xF7517C, 0x129A7A, 0xB2B8B4, 0x9B709F, + 0x3923C5, 0xEAFA6E, 0xDB9077, 0x29EEA0, 0x702D8C, 0x4DC14F, + 0xE46933, 0xA764E4, 0x754266, 0xFA4F98, 0x643DA5, 0xCA775C, + 0x7F1632, 0xE671A3, 0x4BF4C6, 0xA82378, 0xEFD317, 0xE62D38, + 0xD461C9, 0x8EEC80, 0xC89882, 0x4CC73C, 0x830F3F, 0xE4B200, + 0x582615, 0x6CD558, 0xA66727, 0xEF7975, 0xFEA5CE, 0x147A40, + 0x4796E4, 0xC07761, 0xF5D5B3, 0x6B65FB, 0xE4F14D, 0xA837CA, + 0x9A152A, 0x554E94, 0x83EC5F, 0xA62174, 0x85E2ED, 0xCCE71C, + 0x3540FF, 0x088A84, 0xBA2816, 0x293610, 0x4C3EE7, 0x8E55A9, + 0x49E5E5, 0x782178, 0x45D2AA, 0x9BB449, 0x00D282, 0xF61E67, + 0xE2F7DE, 0xCC6AA1, 0xCD1979, 0x52FEDB, 0x9A8776, 0x70A018, + 0x500271, 0x1273BA, 0xDE648E, 0x7AC7F7, 0x767725, 0xD0A457, + 0xF17250, 0xBC578C, 0x2DFD3A, 0x97F988, 0xA576C8, 0x8129BB, + 0x22D9C3, 0x0436ED, 0x650791, 0xA314EC, 0x42A0B3, 0x37A521, + 0x4BFB2B, 0x8C1B7F, 0x115E17, 0xF7C27F, 0xC1D5EB, 0x060487, + 0x8A28D6, 0x41330F, 0xBFAE67, 0x7774E8, 0x4CCC3C, 0x6B2F80, + 0x628BF2, 0x1E41A6, 0x8D0B22, 0xBC85BA, 0xCCF461, 0xBEC69C, + 0xDF8A10, 0x3C5E71, 0x2F8D5F, 0x63D3DA, 0x5934D1, 0x2CA35D, + 0xC687A2, 0x24E9B4, 0x1843D3, 0x5C9B97, 0x9B580C, 0x780B2C, + 0x59943D, 0x0744D0, 0x8DA6E3, 0x07AAF6, 0x2214D0, 0x72E8D7, + 0x54151B, 0x514DE9, 0x8DCC3B, 0x0CEB00, 0x2C4DE3, 0x5012AE, + 0xD7B72E, 0xB7DE9A, 0x641B2F, 0xF9CF17, 0x8BD282, 0x9F31A3, + 0xDED846, 0x467E05, 0x26CCEA, 0xF8E404, 0x65572E, 0x82C594, + 0xE572A9, 0x895653, 0xA1AA94, 0x8DD876, 0x5E9A61, 0x69EB1C, + 0x0385A9, 0x5BC844, 0x95B2DF, 0x6678F6, 0xFA7033, 0xE4F434, + 0x5584A9, 0x32C099, 0x9AD846, 0xB3FFD1, 0xA81C56, 0x4E54EF, + 0x54D173, 0xF191B4, 0x49B2A2, 0xB309D9, 0x546D8D, 0xC0A51E, + 0xCAFFC0, 0x785400, 0x05F69D, 0x894056, 0xC33098, 0xDFF6C2, + 0x908D97, 0x05CC96, 0x46484B, 0xBD7B9D, 0xB152F5, 0x5A7461, + 0x59CA20, 0x8F8EF5, 0xC9FF05, 0xF6F398, 0x856C97, 0x81E07C, + 0xAE5EDA, 0x51BDC9, 0xF26437, 0xBBC8CE, 0x091B52, 0x68B6A5, + 0x90750E, 0x925EF9, 0x3D9CB3, 0x46EA96, 0x97D648, 0x78BCC7, + 0xF4B488, 0x05275E, 0x6619DF, 0x56D4A0, 0x8C5C41, 0xDB345A, + 0x0B79DA, 0x496369, 0x96109B, 0x667664, 0xC40CF9, 0x91D7CA, + 0x119F1A, 0xA99272, 0xCBB529, 0xBB033E, 0x8F91C0, 0x570045, + 0xB845C2, 0x2B8E52, 0x687AFB, 0x0D0AA3, 0x200863, 0x043B83, + 0xF129DE, 0x49C2D6, 0x9641D2, 0xC4747C, 0x220804, 0x503F05, + 0x7E274F, 0xCA83D9, 0x9D6495, 0x0E5039, +}; +const int _TBL_ipio2l_53[] = { + 0xA2F983, 0x6E4E44, 0x16F3C4, 0xEA69B5, 0xD3E131, 0x60E1D2, + 0xD7982A, 0xC031F5, 0xD67BCC, 0xFD1375, 0x60919B, 0x3FA0BB, + 0x612ABB, 0x714F9B, 0x03DA8A, 0xC05948, 0xD023F4, 0x5AFA37, + 0x51631D, 0xCD7A90, 0xC0474A, 0xF6A6F3, 0x1A52E1, 0x5C3927, + 0x3ADA45, 0x4E2DB5, 0x64E8C4, 0x274A5B, 0xB74ADC, 0x1E6591, + 0x2822BE, 0x4771F5, 0x12A63F, 0x83BD35, 0x2488CA, 0x1FE1BE, + 0x42C21A, 0x682569, 0x2AFB91, 0x68ADE1, 0x4A42E5, 0x9BE357, + 0xB79675, 0xCE998A, 0x83AF8B, 0xE645E6, 0xDF0789, 0x9E9747, + 0xAA15FF, 0x358C3F, 0xAF3141, 0x72A3F7, 0x2BF1D4, 0xF3AD96, + 0x7D759F, 0x257FCE, 0x29FB69, 0xB1B42C, 0xC32DE1, 0x8C0BBD, + 0x31EC2F, 0x942026, 0x85DCE7, 0x653FF3, 0x136FA7, 0x0D7A5F, + 0x93FC61, 0x035287, 0xC77FCA, 0x73530A, 0xC6BC15, 0x0E4B0F, + 0x568FCE, 0x2D3456, 0x4D7FE1, 0xA12CD1, 0xB2CEA2, 0x531C62, + 0x70B4D2, 0x1BCE9A, 0x87704D, 0x6B83D7, 0xAA8121, 0x2530EA, + 0x2074BF, 0x28A071, 0x9D69C3, 0x406DD8, 0xF58783, 0x115D89, + 0x5E85F3, 0xAACDCC, 0x8C0B57, 0xD7DFFE, 0x550D96, 0xC43EB4, + 0x89ABA7, 0x94F595, 0x56F260, 0x06A4CD, 0x7FD2E2, 0x6FDFA8, + 0x3E9C98, 0xBFD682, 0xAD3A12, 0x23A8A6, 0x173A89, 0x5DE9BD, + 0x95A978, 0x28E484, 0x5964F3, 0x496AF0, 0x4B1DA9, 0x989061, + 0xBD2BF2, 0xE01A90, 0x0905B7, 0xAC39AC, 0x52D5B7, 0x109F25, + 0x3AE1DC, 0xF90A7C, 0x33F4E5, 0xF5DFDF, 0x1522D0, 0x562CE6, + 0x392CFF, 0xEB9032, 0x10A08E, 0x0B1D7F, 0x42B80A, 0x366DD2, + 0xC24F89, 0x02222E, 0x21494C, 0x985287, 0x87FD07, 0x2EE361, + 0xAD8D68, 0xE72273, 0x9E8D59, 0xD09999, 0x10F4A1, 0x1079A3, + 0xE9BEAF, 0x9C0887, 0x09C622, 0xEBCF06, 0x974532, 0x086A8F, + 0x6CEA05, 0x388C00, 0x74969E, 0xC85B16, 0x385A38, 0x9A2F35, + 0x670531, 0xABA6D0, 0xEFD3C1, 0x27AD92, 0xF4203E, 0x3D619F, + 0x4D05F4, 0x9AE7CC, 0x03B592, 0x41FF55, 0xCAFCA5, 0x1A0987, + 0x88AB79, 0x3627D4, 0x25B12A, 0x52594A, 0xA2BEB0, 0x25C3F2, + 0x4489DA, 0x7959A7, 0xEAEC89, 0xB34714, 0x960196, 0x1FC33A, + 0x7F0275, 0x32EF92, 0x0111CE, 0x8E4685, 0x6F5B34, 0xF6123A, + 0x5543B2, 0xE9A02A, 0x74E03F, 0x54D5A8, 0x086A2C, 0x4A9CD3, + 0x921191, 0x229764, 0x0A1A84, 0x9B45AE, 0xC653A5, 0xB15F33, + 0x100FD1, 0x7DD740, 0xB20CD3, 0x0A0786, 0xF506C3, 0x25EBF4, + 0x3AB39E, 0xE3BB24, 0x27646F, 0xEECE57, 0x706BFE, 0xC7A869, + 0x57ED51, 0x118C82, 0x2B0FF5, 0xC8E545, 0xC43D80, 0x2A3183, + 0x4C1BB9, 0xBC108A, 0x099779, 0xF9ECC8, 0x2A1063, 0x5D2F6A, + 0x8F2675, 0x12FF6D, 0x32EED9, 0xE4A245, 0x7392CF, 0x5C240B, + 0xC476FF, 0x97AFC7, 0xB76131, 0x665E05, 0x67BD57, 0x19E998, + 0x3A5863, 0x23B8AA, 0x5B5608, 0x8A66C6, 0x5F2AD3, 0x78BAFA, + 0x3516CE, 0xCBEA16, 0x6E40D4, 0xB463D4, 0xA6C12F, 0xABD3D7, + 0x32650A, 0x579D10, 0x3CB9E2, 0x1A02A7, 0xDF2FFA, 0x28C991, + 0xB2264C, 0x027870, 0x47BDD4, 0xF243B1, 0x39AE2C, 0x282EA4, + 0xAF1D98, 0x2AFD16, 0xABE7AF, 0x17CB67, 0x8FF93E, 0x793167, + 0x435F6B, 0x48058B, 0x417DA0, 0xE01217, 0x085A69, 0xB50E36, + 0x79A4CD, 0xD74907, 0x26C4B5, 0xB90054, 0x06C3AD, 0x5AB38F, + 0x585E91, 0xD04E4F, 0x2938CE, 0xD4EAA7, 0xA06DE5, 0x40BFE5, + 0xDE6849, 0xEF65F0, 0xF1D4BB, 0x94C21E, 0x66E978, 0x1B9B94, + 0x961043, 0x5961B8, 0xBAAA74, 0xD662EE, 0x9DABF6, 0x0AFE28, + 0x9587A4, 0xA632BC, 0x09149F, 0xDEA996, 0x2CAFD7, 0xBDE29B, + 0x7159E6, 0x1F7C49, 0xF2E2ED, 0xBFA992, 0x7C77EF, 0xC245D0, + 0xB2D129, 0x993E75, 0xAB4C0C, 0x5C84B6, 0x17F542, 0x45314E, + 0x1DEF1B, 0xE3BDCC, 0xB3AE86, 0x24522F, 0x918FC6, 0x2138D5, + 0x883646, 0x6858B6, 0x032762, 0x5170F8, 0x4974EA, 0x76BF77, + 0xECDA8A, 0x9EADDD, 0x2404EF, 0xC52A5D, 0xF2E858, 0xC42D60, + 0xD18C08, 0xDE59B2, 0x4CC3A6, 0x94D888, 0x4C4AF0, 0xCF1F8C, + 0xBF2F6F, 0x7B4535, 0x98B0DB, 0x2BE0CF, 0x4616A7, 0xA8D9FB, + 0x88CA7A, 0x5087E1, 0x18DD8A, 0x1A9F4F, 0x1DCECE, 0xF8609E, + 0xE2F0C8, 0x9AD7D4, 0xE3CDFE, 0xC6FDD5, 0x8FF3CD, 0x7D45AA, + 0xD34957, 0x7C1963, 0x6CE098, 0xB70215, 0x326BBF, 0x47B3A6, + 0xF9235D, 0x6F66F5, 0xC6E40C, 0xE7F50B, 0xFF2FDD, 0x5A1251, + 0xE95EF1, 0xDE8E67, 0xECEE9B, 0xC9F98E, 0x722224, 0x6DF750, + 0x81D08F, 0x2BFCF0, 0xDDC10D, 0x775314, 0xDB1D87, 0x41626B, + 0x9EDF31, 0x7738D9, 0x8D9EB4, 0x4F1C2A, 0xF3E795, 0xB69699, + 0xD9A56D, 0x31BB1B, 0x542975, 0xAB917B, 0x63927C, 0x9BB764, + 0x84A598, 0x0A0C51, 0x5E48C4, 0x7780E3, 0x87E156, 0x155972, + 0xE406F8, 0x48AB9E, 0x3CCDDA, 0x010F87, 0x683B70, 0x400CAD, + 0x5DE5C5, 0x7262FA, 0xFA248D, 0x013AF2, 0xE2E8B5, 0x995F7D, + 0x7F8C4B, 0x0E8B59, 0x1006F1, 0x40B6E9, 0x760654, 0xCBCC8C, + 0x086F40, 0xDC7F6F, 0xFCD0D4, 0xA47ADE, 0x5204FA, 0xF38A9D, + 0xE76C7C, 0x575207, 0x499BF1, 0x0DB01C, 0x09098E, 0x957A71, + 0xD53E0E, 0x61DF1D, 0xE6EF34, 0x5821EC, 0x96BCC0, 0xDC96CE, + 0xA9C0AE, 0x130B2C, 0xCCC589, 0x829BB9, 0x2A75BA, 0x97611C, + 0x0CEAB8, 0x165D9D, 0x35AD41, 0x82A805, 0x975628, 0x5601A6, + 0x074F08, 0x80A27D, 0xEFA64E, 0xD7BB4B, 0x5E6397, 0xC92FFC, + 0x4F3F7A, 0xBEA764, 0x0C9B7D, 0xC5DC74, 0xEAD216, 0x6DBBC0, + 0x913E3E, 0xABF50B, 0x95B24A, 0x3FC9C5, 0xE7BA15, 0x8C7F70, + 0xF81358, 0x774606, 0xCE8C0D, 0xB6B268, 0xB85BA6, 0xAC9B2E, + 0x1AAB05, 0x0C6C82, 0x6EC2AE, 0x606874, 0x8F60BF, 0x1FBC7B, + 0x58C97A, 0x448794, 0xBA48A0, 0x72E882, 0x6D3568, 0xE131FD, + 0x4745D0, 0x0BFA1E, 0x07B01D, 0x474D43, 0x59387E, 0x5B0AD5, + 0xC37A8C, 0x0474E8, 0x13D99D, 0x68A13C, 0xB69118, 0x89228C, + 0x6F7D83, 0x86D665, 0x5C7744, 0xDD183E, 0x1C2E17, 0x712F5E, + 0x4AACCB, 0xB69B68, 0xA1201F, 0x743C2B, 0xF6AD70, 0x92E024, + 0xF34FD8, 0x33712E, 0xFE1D73, 0x4471F0, 0x7D0526, 0x58AF47, + 0x7B11FE, 0x1FCE4F, 0x1356C9, 0x9CE3CA, 0xA843C0, 0x8EEA3C, + 0xABEEE4, 0xA5D495, 0xA407A4, 0x31BB4B, 0x0AA1E3, 0x518E7C, + 0xAA4A66, 0xD82CD8, 0x6EF8D2, 0x6F32E6, 0x1DC26B, 0x17AE59, + 0x4B683B, 0x8D48F7, 0xF4FBD8, 0xD4FE0A, 0xE961DE, 0x87BD37, + 0xE6CCD6, 0xCBD76D, 0x3E99DE, 0xB72E21, 0x54EB90, 0x6AB45D, + 0x600AFB, 0xA17B2F, 0xDA0421, 0xE6CA95, 0x35AAA2, 0x7D8FB1, + 0x3207BB, 0xBF82EE, 0x71F55F, 0xC661CB, 0xBD72A1, 0xBF5A64, + 0x6E39E8, 0x6C6DE2, 0x2BD178, 0xAF62A5, 0xA7D86E, 0xE7D0FE, + 0x84DB03, 0x67FDA2, 0x2D6809, 0x0F8B8F, 0x1B50E3, 0x234EF5, + 0x7325ED, 0x8F8F4C, 0xC1E426, 0x3066AD, 0x0759A4, 0xE03390, + 0x70CC9A, 0x524F77, 0xCDD489, 0x97DD24, 0xA81858, 0xF24513, + 0xA9C18E, 0x2A2F82, 0xC2C014, 0xB8E7F0, 0x934036, 0xD36E51, + 0xD9A089, 0xDBC587, 0xB30418, 0x969192, 0x0A5213, 0xE21841, + 0x2881EC, 0x9A293F, 0x0DF705, 0x85B497, 0xE430B9, 0xE90ECF, + 0xC15FDC, 0x9E8A7E, 0xC5472D, 0xB54FBD, 0x456AF2, 0xCA80B6, + 0xAE25FE, 0xA03B46, 0x6C6CFD, 0x78382A, 0x0E7877, 0x7F2D31, + 0x03C827, 0x61CF52, 0x339A2F, 0x2286A9, 0xE41DF0, 0x640F5C, + 0xBEF364, 0x010506, 0x6D2C21, 0x841EFF, 0x7F3B5D, 0xD98DC8, + 0x0F9421, 0xA25B0C, 0x4C2C44, 0x922392, 0xB98A8A, 0x6179B9, + 0xF7B419, 0x289AAF, 0xE92F47, 0x5E47A2, 0x82927F, 0xC7290E, + 0x6C925C, 0xBA5A3C, 0x8FB7F6, 0x9C4BEE, 0x02C529, 0x0CFCD7, + 0x5EBD8C, 0x7196E0, 0x4B917E, 0x6B9780, 0x6A1731, 0xA617FF, + 0x27A20D, 0x5A56A3, 0x43C4DB, 0xC62EA4, 0x637A84, 0x1C46F9, + 0x33C780, 0x61A278, 0x4915C9, 0xD6C776, 0x6A7C66, 0xD8DD0C, + 0xF87EB1, 0x124C43, 0x5B87E7, 0x097456, 0x3C2FA7, 0x307C4A, + 0x54267A, 0x30E34E, 0xC0CF98, 0xD75B19, 0xFADEDB, 0x12CBE8, + 0x29F24C, 0x579C7E, 0xBF3682, 0xDCB460, 0xAE08B3, 0xA524BC, + 0xC181C2, 0x5DAB90, 0x466602, 0x55345B, 0xA13941, 0x47D820, + 0x278066, 0x81B089, 0x165EFB, 0x4D27FD, 0x2BF9F4, 0x2E2FFB, + 0x6106B5, 0xE76806, 0x445A84, 0x0BDA0D, 0x49D7A4, 0x72650D, + 0xCDC55B, 0x3E16BC, 0x132F6F, 0x29E8FD, 0xE58428, 0x621E41, + 0x7D2AC4, 0xAB5697, 0xAC61EB, 0xE5DAF0, 0x654ED6, 0x8E77E3, + 0x0B2FBC, 0x2E63A3, 0xC8296A, 0x8B631F, 0x4ECCA6, 0x91859C, + 0x9E3E45, 0x0E3CC7, 0xC12454, 0xCCBCB6, 0x17979E, 0xD0D374, + 0xA489A2, 0xC6258F, 0xE8EF9E, 0x12EE26, 0xC614C2, 0x62E23E, + 0xCA8C5C, 0x409AC9, 0x511D05, 0xA88CE0, 0x195500, 0xF7144F, + 0x913BB7, 0x17D064, 0xF6C9CE, 0xAC5D11, 0xD0C313, 0xBCCCB6, + 0xAAD4FC, 0xE47B2C, 0xFE4362, 0xF2E712, 0x2D5EFF, 0x833822, + 0x58A1D7, 0x68377C, 0xE49B25, 0x22B179, 0x048796, 0x069400, + 0xE670D3, 0xD2CB85, 0x55FBE6, 0x67F281, 0xFE2DE0, 0x8CFAF2, + 0x9865BC, 0x210CD3, 0x86DD70, 0x43D00F, 0x55E279, 0x679252, + 0x8D4F58, 0xE17AC5, 0x6A6127, 0x1B0876, 0x5D8ED0, 0x701330, + 0xD5BD25, 0xC9A126, 0x57C571, 0xDC5C3F, 0xB6D34E, 0xB72383, + 0x001A9E, 0x7D36C0, 0x8151F6, 0x65D7C1, 0xE1F513, 0xCD372A, + 0xE69B0C, 0xD02685, 0x23C3EB, 0x3544CB, 0xF0BE31, 0x83F399, + 0xCB93F8, 0xFFC693, 0x908EC6, 0x8E5DE1, 0x315B7E, 0x67CE7B, + 0x40AAF7, 0x7FD285, 0x069B36, 0x03C00A, 0x13C7D5, 0x0DA14C, + 0x1EAAD4, 0x2B777F, 0x8E05C1, 0x5AD1AE, 0x60C398, 0xA4EA59, + 0x10BEED, 0x88F2FA, 0x69B941, 0xA54E70, 0xA817C3, 0xB96246, + 0xE8EEDC, 0x56D570, 0xBBEBB5, 0xD8F235, 0x201AB9, 0x9CC747, + 0x5BC2FB, 0xC877F3, 0x428CF6, 0x4EEF84, 0xBF85FD, 0xEE6D34, + 0x84C2DE, 0xC42F4C, 0x1A513B, 0x9AC41F, 0x87FFFA, 0x1CA431, + 0x714252, 0xC73FB9, 0x662D89, 0x3D83BA, 0xBDF046, 0x2E4F62, + 0x76B7C0, 0x81336C, 0xBE80A9, 0x4C9D72, 0x739A15, 0x47972C, + 0xA36A1B, 0xD31731, 0x54BA46, 0x2E8C72, 0xFEA5A5, 0x9A7E5F, + 0xC359ED, 0x8F0FFB, 0x1270DA, 0x5E9B08, 0xB0BFCB, 0x36974C, + 0x6CD8F9, 0xD02E1F, 0x1C3F2F, 0xFCF8F0, 0x4C2C6D, 0x0B2169, + 0x48B9CE, 0x42737D, 0xA8E974, 0x64062D, 0xA86C59, 0xEEC419, + 0x047C83, 0x996A23, 0xF2A4C8, 0x4BE1B8, 0x348286, 0xE84240, + 0x8337CB, 0xE55A2F, 0xC17750, 0xA4DA06, 0x64347F, 0x59A5A1, + 0xDFF53D, 0x62A571, 0xEECF3A, 0x886700, 0xC06DAF, 0x4E161F, + 0x12670E, 0xBDFE1A, 0xA72B38, 0x5BA22C, 0xFED227, 0x3FC814, + 0x150E5A, 0xE99B3A, 0x8EE9FC, 0xBC1845, 0x32373A, 0xBDA476, + 0xCEB88F, 0x7FAED3, 0xDB9116, 0x31CF72, 0x1A5136, 0xC4F362, + 0xDE4799, 0x768043, 0x386207, 0x8E5497, 0xB0EF6D, 0x6C57FB, + 0xF56664, 0xD24F05, 0xE0F702, 0x8A41EF, 0xA2EC53, 0x09731C, + 0x6157FE, 0xC5731C, 0xEF1A2E, 0x60EC10, 0xA67EFE, 0x486A73, + 0x8004F6, 0xC3F482, 0x63BA28, 0x107282, +}; +#endif diff --git a/usr/src/lib/libm/common/Q/_TBL_logl.c b/usr/src/lib/libm/common/Q/_TBL_logl.c new file mode 100644 index 0000000000..b4b5b44ea5 --- /dev/null +++ b/usr/src/lib/libm/common/Q/_TBL_logl.c @@ -0,0 +1,169 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * Table of constants for logl. + * By K.C. Ng, March 9, 1989 + */ + +#include "libm.h" + +const long double _TBL_logl_hi[] = { + +0.000000000000000000000000000000000000000e+0000L, + +1.550418653596525415085404409320395875274e-0002L, + +3.077165866675368837102820454313423549427e-0002L, + +4.580953603129420316667926449525231301634e-0002L, + +6.062462181643484258060612972483742997442e-0002L, + +7.522342123758752569860532039086827578824e-0002L, + +8.961215868968713261995143730528787027578e-0002L, + +1.037967936816435648260617694803438348244e-0001L, + +1.177830356563834545387940581504548168563e-0001L, + +1.315763577887192725887160624312135596698e-0001L, + +1.451820098444978972819350572286183123887e-0001L, + +1.586050301766385840933711683530052981349e-0001L, + +1.718502569266592223400988812516892523599e-0001L, + +1.849223384940119926639035059723077314615e-0001L, + +1.978257433299198803625720374533782925763e-0001L, + +2.105647691073496376695527531626088702497e-0001L, + +2.231435513142097557662949937027997104032e-0001L, + +2.355660713127669090775881163009096337126e-0001L, + +2.478361639045812567806026867952720081699e-0001L, + +2.599575244369260669720794933085231739065e-0001L, + +2.719337154836417588316694242031136685288e-0001L, + +2.837681731306445983469010458794547135312e-0001L, + +2.954642128938358763866817700916107535002e-0001L, + +3.070250352949118620751243666552419834311e-0001L, + +3.184537311185346158102471140686078561531e-0001L, + +3.297532863724679818144228014362478784488e-0001L, + +3.409265869705932103050890001544662426952e-0001L, + +3.519764231571781846554474552048254288130e-0001L, + +3.629054936893684531378242945398272521523e-0001L, + +3.737164097935840808210167331226757525499e-0001L, + +3.844116989103320397347900487369508110320e-0001L, + +3.949938082408689781063939783520655318915e-0001L, + +4.054651081081643819780130322994137932204e-0001L, + +4.158278951437109656133288259511744826605e-0001L, + +4.260843953109000631245447385569842356371e-0001L, + +4.362367667749180703490412239178509575736e-0001L, + +4.462871026284195115325899874055994208063e-0001L, + +4.562374334815875943808053840818705719756e-0001L, + +4.660897299245992245586191878736453654769e-0001L, + +4.758459048699639142652093893677655824436e-0001L, + +4.855078157817008078017910633011255535046e-0001L, + +4.950772667978515145979645213034899480604e-0001L, + +5.045560107523952870583081828817948816463e-0001L, + +5.139457511022343168010058668287669524912e-0001L, + +5.232481437645478365168069353535037563840e-0001L, + +5.324647988694718438739234379583263151144e-0001L, + +5.415972824327443715765422111689841356847e-0001L, + +5.506471179526622792599479861304555364807e-0001L, + +5.596157879354226862708883466532843603287e-0001L, + +5.685047353526687120787385804082945993734e-0001L, + +5.773153650348236043181117067559499073234e-0001L, + +5.860490450035782089041193916402035316840e-0001L, + +5.947071077466927895143434959005658134879e-0001L, + +6.032908514380842623405849663552155166682e-0001L, + +6.118015411059929035298897608882125523626e-0001L, + +6.202404097518575288514942954323627943283e-0001L, + +6.286086594223741377443081293997900727520e-0001L, + +6.369074622370692316204942281372157123062e-0001L, + +6.451379613735847016652282983340864160916e-0001L, + +6.533012720127456387586157190946858013903e-0001L, + +6.613984822453650082602354487776933060928e-0001L, + +6.694306539426292672988850845059757003379e-0001L, + +6.773988235918061408096824565025274617492e-0001L, + +6.853040030989194165440476699956951850629e-0001L, +}; + +const long double _TBL_logl_lo[] = { + +0.000000000000000000000000000000000000000e+0000L, + +1.949242877125126389030374148355277037360e-0027L, + +3.053637928597425156289800058830629052349e-0027L, + +3.119411029097592549724599219796579698355e-0027L, + +2.315582833311779694729302029874044004747e-0027L, + +1.959279413884862919696230642481001644914e-0026L, + +3.207319665850940689112590931321584585232e-0026L, + +3.428363085348215886901240200560690191423e-0026L, + +5.132006688821218644279793035639158591104e-0026L, + +6.625826960278191623061313902987136675670e-0026L, + +6.511946011133829904478213998927380817716e-0027L, + +6.272836277110805877048126233548710095828e-0027L, + +6.480345801257546326311423010001184801374e-0026L, + +8.668694188954430256018491855337005516255e-0026L, + +3.374358317649896753533582921130800923337e-0026L, + +5.956974264347082186429247944518667757530e-0026L, + +9.660703479297144864941461785565180191497e-0026L, + +1.026401337764243728855958607127831718221e-0025L, + +7.895125273982903351541822547625351974082e-0026L, + +1.233787870669833985274611329531652753861e-0027L, + +7.032988549345377674736424478296516377526e-0026L, + +1.764708000531295728633384847670848625081e-0025L, + +1.359633534416813878749988462341486606257e-0025L, + +1.738801359182578816100029030519562527565e-0025L, + +9.952199173944211463901058384143333287734e-0026L, + +1.048454193250289008158931842356333139861e-0026L, + +1.996258899657478647716755914664160562170e-0025L, + +1.054613497176328160439100383508915283893e-0027L, + +5.143766259398803158035428613944687700657e-0026L, + +9.959314775409457843445608446369116918733e-0026L, + +1.374434005748650164937032849496159512584e-0026L, + +5.801291623641845255360276144691829323298e-0026L, + +8.316493534335158882618991007102844149005e-0026L, + +6.700372782269538472749252935215499311080e-0026L, + +1.410384923832595967313936024639114199873e-0025L, + +9.914327034309007140234547094345224044051e-0026L, + +1.932140695859428972988292357113036038299e-0025L, + +1.540820591764623257530922632028001952748e-0025L, + +5.963112403438125368118769047371614538741e-0026L, + +1.969366158297316138140115855981754471320e-0025L, + +1.388966334707414023926476567157219393213e-0026L, + +6.353934371729676603785277612987160899450e-0026L, + +3.488563800483361999633395030516586696799e-0025L, + +2.214454506406188993139159148705861598088e-0025L, + +2.895813670852564643073769701905380524878e-0025L, + +2.855018159274929532107406110765900047355e-0025L, + +9.273144996328510392949911518833977809658e-0026L, + +1.930744579236138780895942105787011752697e-0025L, + +1.538735422331574088102192677519746877453e-0025L, + +1.844586676642028985383989272409206538468e-0025L, + +3.547635464941839708071563131885310128521e-0025L, + +4.464712081783102087084281748635332222581e-0026L, + +5.062863951970459495500575300347508324877e-0026L, + +2.203060950889790157204518257910818074191e-0025L, + +5.540602231323196163388428517126435254723e-0027L, + +3.371348840624439923830692211721531149909e-0025L, + +7.637439356719457811667844141793488670929e-0026L, + +1.990439834788842292780211676828666657547e-0025L, + +1.978006454898465493718923085569873769719e-0025L, + +1.621161880831806223416081355472819612309e-0025L, + +3.899319576320551292151632804501913965920e-0025L, + +1.864235278097858865893177670582100390924e-0025L, + +1.534948208368053655735541548539936152221e-0025L, + +4.089715378013580174759550633443176148182e-0025L, +}; diff --git a/usr/src/lib/libm/common/Q/_TBL_sinl.c b/usr/src/lib/libm/common/Q/_TBL_sinl.c new file mode 100644 index 0000000000..766d15014c --- /dev/null +++ b/usr/src/lib/libm/common/Q/_TBL_sinl.c @@ -0,0 +1,192 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * table of sinl(x) where x is 0.15625 + i*0.03125, i=0,1,...,74. + * {0x3ffc4000,0,0,0} --> (inc 0x800) --> {0x3ffe9000,0,0,0} + * 0.15625 0.03125 0.78125 (pi/4 = 0.785395663...) + */ + +#include "libm.h" + +const long double _TBL_sinl_hi[] = { + +1.556149927735560412099206432035162581492e-0001L, + +1.594724589318434199425963881130908091043e-0001L, + +1.633274917366128508468661724543543700180e-0001L, + +1.671800323648067343709660282007512722777e-0001L, + +1.710300220313950192813479692398343312832e-0001L, + +1.748774019902721898956853691085201901772e-0001L, + +1.787221135351536593753562418641807235164e-0001L, + +1.825640980004715553995456513594130154574e-0001L, + +1.864032967622698845523799831032052611919e-0001L, + +1.902396512390990617639858876307573287214e-0001L, + +1.940731028929097911560552002141454036336e-0001L, + +1.979035932299462846523939109918127853182e-0001L, + +2.017310638016388047250381511640009707423e-0001L, + +2.055554562054955176568330206054936963632e-0001L, + +2.093767120859936437118907527248816522107e-0001L, + +2.131947731354698906160730331184784624987e-0001L, + +2.170095810950101567605780958260553963420e-0001L, + +2.208210777553384905528563479277490523429e-0001L, + +2.246292049577052923504285497964248198189e-0001L, + +2.284339045947747454247378313461956799859e-0001L, + +2.322351186115114624139308777462358722636e-0001L, + +2.360327890060663337354342917945180835158e-0001L, + +2.398268578306615644413692518108865737937e-0001L, + +2.436172671924748860122309477052146367777e-0001L, + +2.474039592545229295968487048493892032583e-0001L, + +2.549659604158784674875565748648726276685e-0001L, + +2.625123997691532814509496263956929310415e-0001L, + +2.700428167185850315527550636188270542366e-0001L, + +2.775567516463363259220234468281285678680e-0001L, + +2.850537459405474245877630333232525606110e-0001L, + +2.925333420233275436247023264939134225079e-0001L, + +2.999950833786830511632482820116999437532e-0001L, + +3.074385145803808506705029582019820907725e-0001L, + +3.148631813197452508650363151269390156066e-0001L, + +3.222686304333866256877459198931880313050e-0001L, + +3.296544099308601719143177251264631756945e-0001L, + +3.370200690222530762612817541738100244419e-0001L, + +3.443651581456984082071720464722237468910e-0001L, + +3.516892289948140592225848969555470155541e-0001L, + +3.589918345460650536777102991528689411936e-0001L, + +3.662725290860475613729093517162641768533e-0001L, + +3.735308682386929464168397526608481120900e-0001L, + +3.807664089923901920572007033888966750813e-0001L, + +3.879787097270250460510796908137419597834e-0001L, + +3.951673302409342362448326404196536570776e-0001L, + +4.023318317777731112171055988809823868862e-0001L, + +4.094717770532950661226940270114522362676e-0001L, + +4.165867302820411192591124488310696565000e-0001L, + +4.236762572039380103616839880311024798208e-0001L, + +4.307399251108031972163215178508491897943e-0001L, + +4.377773028727551328616189747027966801523e-0001L, + +4.447879609645272114330560125295252111499e-0001L, + +4.517714714916837765816887501340628695303e-0001L, + +4.587274082167365923772950289728747732442e-0001L, + +4.656553465851601826811995125075467791328e-0001L, + +4.725548637513044511465513178085169418350e-0001L, + +4.794255386042030002732879352155714019245e-0001L, + +4.930786857539230572651365527534871205832e-0001L, + +5.066114548142573676422960008938671919466e-0001L, + +5.200205419537270047602136998746747297451e-0001L, + +5.333026735360201733291311033081615288994e-0001L, + +5.464546069192035644033495537494110008818e-0001L, + +5.594731312473668773848440060031166884132e-0001L, + +5.723550682345072403849537068245036075406e-0001L, + +5.850972729404621548053993141500804585059e-0001L, + +5.976966345387015312386476189673343370299e-0001L, + +6.101500770757913712737423935661832200218e-0001L, + +6.224545602223436830419267050904433302049e-0001L, + +6.346070800152692968503099142036714364826e-0001L, + +6.466046695911523705240421598828007629792e-0001L, + +6.584443999105675415895839548840419894459e-0001L, + +6.701233804731628946545315835006484946172e-0001L, + +6.816387600233341667332419527798939078545e-0001L, + +6.929877272463179102818154908230482095679e-0001L, + +7.041675114545336727800595099739428438828e-0001L, +}; + +const long double _TBL_sinl_lo[] = { + -7.839895634192879801217180506294972695887e-0036L, + -7.579278167533093253112813720340914585189e-0036L, + +1.813803443011554857703679023007542917336e-0036L, + -5.685040200337201343842157163322014327778e-0036L, + +7.013958751874876088754160302032414326691e-0036L, + +9.101164084055805006113433827277389417722e-0036L, + -1.529069265172651032025475612605940319203e-0036L, + -5.873100812266872079952884219254900231461e-0036L, + +1.764603048068267800105867159753318395454e-0036L, + +1.747799267790272859521729635868399475234e-0036L, + -9.673047410519982672089452429449289994858e-0036L, + -7.666827750837122707923169727244402427704e-0036L, + -4.275134347549669784351512906173841196088e-0036L, + -1.826904072780322152815985026139121969706e-0036L, + -1.594702873443294499653146384825158092559e-0036L, + -7.180615084240582786256765419723871383233e-0036L, + +1.073564887942168318128295491982011935257e-0035L, + +6.166267602604185314123111207543917974633e-0036L, + +2.420615108492974698446957518700585915995e-0036L, + +1.864291640707538541155008952901532832506e-0036L, + -4.969304833641910200750246243329289676583e-0036L, + +7.191910920600591837788283739445222790835e-0036L, + +2.398670365698962872409384444450714480056e-0036L, + +2.625717623049256499265563616201152710192e-0036L, + -7.364870011085995329435971152758116180239e-0036L, + +2.202803779185347210050716883280741537850e-0035L, + +3.249236770720310646731771785718217268891e-0036L, + +2.438735936561976529428558055804286674772e-0035L, + -1.358485954689981282143446687700830546868e-0036L, + +2.042693258859029188027001236804037487674e-0035L, + +1.935394086687044503080036879506851279569e-0035L, + +1.351742655356978501392833614755710504356e-0035L, + +1.065151724232046458392410994534171402266e-0035L, + +1.924312402124329269930577050628341603064e-0035L, + -1.495058978047592634838539083355002279867e-0035L, + -1.226069967847432149730821922942328537678e-0035L, + -2.214357561488394736777775450498906642993e-0035L, + -3.197918850054809249377584675940519273161e-0036L, + +1.752934334182702105675254128020832940341e-0035L, + -2.067723892627233681394169702571120887364e-0035L, + -1.967684335349365926758978182531089889151e-0035L, + -1.480234947789865560488791134115171284680e-0035L, + -2.020095411752086363369245333724961071903e-0035L, + +8.019047838709350758444432786175864173856e-0036L, + +7.575600313883125509400401940426278198665e-0036L, + -1.956787228828481747235699165048715626458e-0035L, + +2.239452414684575979216557857298213538383e-0035L, + -2.004881068319988136754382697969636119420e-0035L, + +1.404844563886544703294730965793125947043e-0035L, + +1.540967800016293988508912183967615475673e-0035L, + +9.627943645034426124771174260339225827341e-0036L, + -1.671879365114936780075083716139548989818e-0035L, + -1.193872230164722958937943872752845047434e-0035L, + -4.709469941941829089292517195754317215227e-0036L, + -1.562825989789718724786197721553059612264e-0035L, + +9.313247749577680188502242676253713195205e-0036L, + -1.384269776167183189501758486393819264119e-0035L, + +7.064986931125350563523011010886249504328e-0036L, + -3.109636998242741557027060430659670849804e-0035L, + -3.324150213308849248337118428668967104680e-0035L, + -3.427152913195516159969937952267551337396e-0035L, + -2.118702307301603154209365237718648576399e-0035L, + -1.289226205241639223068869521009917813361e-0037L, + +2.125722734799331239445801994645145285587e-0035L, + -1.781645762780561951365253354033804640300e-0035L, + -2.495276089408737145274279413504615537138e-0035L, + +1.338422379299389637809694183691505317685e-0035L, + +1.919747869211470727176212361922698586017e-0035L, + +4.330259169399683693260601564559270596091e-0036L, + -3.417429818162194124156743659460798263758e-0035L, + -4.544129440843003305237213918657872189520e-0035L, + -1.331658529527437298976340693936847286647e-0036L, + +2.748387759350275490242241143386673942983e-0035L, + +4.500898710776635571808492195291899181897e-0035L, + -2.872593727403933486766336102755986165044e-0035L, +}; diff --git a/usr/src/lib/libm/common/Q/_TBL_tanl.c b/usr/src/lib/libm/common/Q/_TBL_tanl.c new file mode 100644 index 0000000000..b207c0cbfa --- /dev/null +++ b/usr/src/lib/libm/common/Q/_TBL_tanl.c @@ -0,0 +1,192 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * table of tanl(x) where x is 0.15625 + i*0.03125, i=0,1,...,74. + * {0x3ffc4000,0,0,0} --> (inc 0x800) --> {0x3ffe9000,0,0,0} + * 0.15625 0.03125 0.78125 (pi/4 = 0.785395663...) + */ + +#include "libm.h" + +const long double _TBL_tanl_hi[] = { + +1.575341073252716106852257741724104864870e-0001L, + +1.615397840495214763092752400110463977418e-0001L, + +1.655505192739339762139309125850523900470e-0001L, + +1.695664452197665101509706065437500194264e-0001L, + +1.735876947679815208446734114353616329985e-0001L, + +1.776144014774467276317429269586882243819e-0001L, + +1.816466996033214276582758961743535864882e-0001L, + +1.856847241156344116266612278649865067149e-0001L, + +1.897286107180591328910833700730338069829e-0001L, + +1.937784958668918635160223977682694780440e-0001L, + +1.978345167902386688084063751239797409303e-0001L, + +2.018968115074171328840689933657666757769e-0001L, + +2.059655188485788721087393288030358608878e-0001L, + +2.100407784745589808415175232245911862545e-0001L, + +2.141227308969586648860666814158624683863e-0001L, + +2.182115174984674325058820481495796084382e-0001L, + +2.223072805534313308722888175879995829692e-0001L, + +2.264101632486738374776714045035595099974e-0001L, + +2.305203097045761414554475155379181753938e-0001L, + +2.346378649964236789993677105610770268268e-0001L, + +2.387629751760259202681510637409399276566e-0001L, + +2.428957872936165424010859430156609881174e-0001L, + +2.470364494200412646634947325158035272170e-0001L, + +2.511851106692407673991038906774344215246e-0001L, + +2.553419212210362665044822364904736907938e-0001L, + +2.636805964199967998548259948794679989658e-0001L, + +2.720536986587708834265643667712727220498e-0001L, + +2.804624701452514031696042891852650256007e-0001L, + +2.889081724405147260015884454642448163630e-0001L, + +2.973920872690245894671940160246554900716e-0001L, + +3.059155173530592641072389231969929579942e-0001L, + +3.144797872725715161734382202256272257022e-0001L, + +3.230862443517455201183006557179619867007e-0001L, + +3.317362595735727673394297030105334375685e-0001L, + +3.404312285238303874282274418902587687499e-0001L, + +3.491725723659103522547129636843912210518e-0001L, + +3.579617388480169983883959631794471179752e-0001L, + +3.668002033443234227206048185537661359712e-0001L, + +3.756894699317548404092457756875977254806e-0001L, + +3.846310725041492230408562796582816506283e-0001L, + +3.936265759256327582294137871012180779893e-0001L, + +4.026775772251402117785937359900067250949e-0001L, + +4.117857068341084757888498763848712415895e-0001L, + +4.209526298694758220747941414506739471850e-0001L, + +4.301800474642300490203296054472752443302e-0001L, + +4.394696981478662404836631484799309510327e-0001L, + +4.488233592792397088405555239245740331672e-0001L, + +4.582428485344323669591891965241567184790e-0001L, + +4.677300254523917999213427069619926229670e-0001L, + +4.772867930412522617224042590104237355391e-0001L, + +4.869150994484063244987175035683195875449e-0001L, + +4.966169396975656257105605790725693200164e-0001L, + +5.063943574962298120708227547071771601970e-0001L, + +5.162494471171751444917753379369286911420e-0001L, + +5.261843553577791441706134379510093677744e-0001L, + +5.362012835812160313475789292393083126826e-0001L, + +5.463024898437905132551794657802853544147e-0001L, + +5.667670655805864456801779441354759990792e-0001L, + +5.875973675914432213941588631578976895206e-0001L, + +6.088137403243807214124939743963768234939e-0001L, + +6.304376738358847668526114292997751740101e-0001L, + +6.524918979288079927238977365516267472227e-0001L, + +6.750004851442429076631779494777228720541e-0001L, + +6.979889636235992551497657233900136516119e-0001L, + +7.214844409909044199895178832795946639042e-0001L, + +7.455157405593919951361301646778137804617e-0001L, + +7.701135513442087050059836600527731975210e-0001L, + +7.953105935686741856456016917474183548089e-0001L, + +8.211418015898941219114239653747117425236e-0001L, + +8.476445264465526540907883088076187235513e-0001L, + +8.748587605544823495396719079321555572147e-0001L, + +9.028273874526735021961743652539763208464e-0001L, + +9.315964599440724611652027565739364074620e-0001L, + +9.612155104943704161853006259468735267385e-0001L, + +9.917378983632686802568573899299023560595e-0001L, +}; + +const long double _TBL_tanl_lo[] = { + +4.179214385976688849250979202972663542033e-0036L, + +1.201528446191025246839024650298397902579e-0035L, + +1.129323489449537738080901788756231977300e-0035L, + +2.140135278964936125815581758267649033136e-0037L, + +4.432205749300185001040819456988862684951e-0036L, + +6.136100978120132271332684207100740679906e-0036L, + -1.032553059579180849987395832156976613765e-0035L, + -3.160024259922437001215851404196652376871e-0037L, + +9.288062528988428190963791818336024913881e-0036L, + -7.446971695790644707546943119354167721612e-0036L, + -3.194115406765633171232961214385101074252e-0036L, + +8.636824101000271295925487212833770093090e-0036L, + +3.102272236726159152985822088441358430350e-0036L, + -5.851906473589368694487202441718008909753e-0036L, + +4.010022070137306925338504597897336002613e-0036L, + +1.037727706884673933875970874373462194321e-0035L, + -7.373234860421060505099033319601658081963e-0037L, + +1.012564187615243178899324943342662908733e-0035L, + -1.409372712725876553601555574139438939044e-0036L, + +8.378827024922767151362882309834645448153e-0036L, + +2.973824478467770877677465646013477493211e-0037L, + +5.400099398783906370270919848839276575083e-0036L, + -6.462512242458415498262723324973388658384e-0036L, + -2.322762023061318925750503642571013465985e-0035L, + -1.258955887171193954556521579215259847692e-0035L, + -2.320447955805179154521333495999564905899e-0035L, + -1.149012552345329193834437558081484346041e-0035L, + +1.452751817871169833623945031311944393871e-0035L, + +1.233520419884672519188849688498814953115e-0035L, + -2.801716058919562991500189219464456618491e-0036L, + -8.652310551710608096633992612270187537921e-0036L, + +1.247172716748407772564831128401880847054e-0035L, + -1.239704249638930213583929247314024560861e-0035L, + +5.184462988068616168233816296529150644737e-0036L, + -6.856476723415391305857531095744442523549e-0039L, + -9.739553531295433673398454344315039002245e-0036L, + +2.266233016492660661638292126777401538348e-0035L, + +2.301502770052376628347923621704562121797e-0035L, + +1.948845747336057051538318007442114995744e-0035L, + -1.940750389335608259363326370556914475278e-0035L, + +2.019644660873458215118483163076314703163e-0035L, + +1.602015812156905914821208807083062984550e-0035L, + -3.292416392515743374743236507806546284438e-0036L, + +8.663813942351672490328381271391704283086e-0036L, + +2.366609581506599084093910217277994736871e-0035L, + -1.408950063101056644039900854057776596620e-0035L, + -1.514769920962849077013113923603803573445e-0035L, + -2.261973795598615105449462443044330073903e-0035L, + -2.553211882172402068866429390071980923144e-0036L, + +1.416627029437126089675998257335033382140e-0035L, + +2.342724931714249133589230079809850869266e-0035L, + +1.710557978782419482731492281155256146438e-0036L, + -2.148837714938406737587489024152464642738e-0035L, + -4.273007541330408144086077695573950943351e-0035L, + -1.187512317830147119742251549090183099823e-0035L, + +4.828271743385077560204615670566277021463e-0036L, + +2.888285131340709127656514948635349311805e-0035L, + -4.505233085580329558684272075904471228864e-0035L, + +2.931630895327838681946984510160883959332e-0036L, + +2.647698485118630114484469975939947791390e-0035L, + +3.589320320845381187254017736531618320153e-0035L, + +3.109409548262590459351847474032415851843e-0035L, + +4.083234910839125531016836269706248922707e-0035L, + +2.617081426185972174278972738311427223003e-0035L, + +1.685638883876736468625598871602567025329e-0035L, + +3.340709007044122362174996515517070074049e-0035L, + +4.272448967674769643335827331513513914893e-0035L, + -1.016337077502454982949287784426587554312e-0035L, + -4.164820472415940877265629374001265181061e-0035L, + -1.266702907529482683855413412028523879313e-0035L, + -2.498295523749676738976415773050309926889e-0035L, + -2.240244888035701528565322302010524216607e-0035L, + +2.072673676633052237512344957839713494538e-0035L, + -5.635620575073849011607547314084511148918e-0036L, + +1.289773398786324444403985925780591709915e-0035L, +}; diff --git a/usr/src/lib/libm/common/Q/__cosl.c b/usr/src/lib/libm/common/Q/__cosl.c new file mode 100644 index 0000000000..58a268f7dc --- /dev/null +++ b/usr/src/lib/libm/common/Q/__cosl.c @@ -0,0 +1,138 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * __k_cosl(long double x, long double y) + * kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164 + * Input x is assumed to be bounded by ~pi/4 in magnitude. + * Input y is the tail of x. + * + * Table look up algorithm + * 1. by cos(-x) = cos(x), we may replace x by |x| + * 2. if x < 25/128 = [0x3ffc4000, 0] = 0.15625 , then + * if x < 2^-57 (hx < 0x3fc60000 0), return 1.0 with inexact if x != 0 + * z = x*x; + * if x <= 1/128 = 2**-7 = 0.0078125 + * cos(x)=1.0+z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5)))) + * else + * cos(x)=1.0+z*(q1+ ... z*q8) + * 3. else + * ht = (hx + 0x400)&0x7ffff800 (round x to a break point t) + * lt = 0 + * i = (hy-0x3ffc4000)>>11; (i<=64) + * x' = (x - t)+y (|x'| ~<= 2^-7 + * By + * cos(t+x') + * = cos(t)cos(x')-sin(t)sin(x') + * = cos(t)(1+z*(qq1+z*qq2))-[sin(t)]*x*(1+z*(pp1+z*pp2)) + * = cos(t) + [cos(t)]*(z*(qq1+z*qq2))- + * [sin(t)]*x*(1+z*(pp1+z*pp2)) + * + * Thus, + * let a= _TBL_cos_hi[i], b = _TBL_cos_lo[i], c= _TBL_sin_hi[i], + * x = (x-t)+y + * z = x*x; + * cos(t+x) = a+(b+ (-c*x*(1+z*(pp1+z*pp2))+a*(z*(qq1+z*qq2))) + */ + +#include "libm.h" + +extern const long double _TBL_cosl_hi[], _TBL_cosl_lo[], _TBL_sinl_hi[]; +static const long double + one = 1.0L, +/* + * 3 11 -122.32 + * |sin(x) - (x+pp1*x +...+ pp5*x )| <= 2 for |x|<1/64 + */ + pp1 = -1.666666666666666666666666666586782940810e-0001L, + pp2 = +8.333333333333333333333003723660929317540e-0003L, + pp3 = -1.984126984126984076045903483778337804470e-0004L, + pp4 = +2.755731922361906641319723106210900949413e-0006L, + pp5 = -2.505198398570947019093998469135012057673e-0008L, +/* + * 2 16 -117.11 + * |cos(x) - (1+q1*x + ... + q8*x )| <= 2 for |x|<= 0.15625 + */ + q1 = -4.999999999999999999999999999999756416975e-0001L, + q2 = +4.166666666666666666666666664006066577258e-0002L, + q3 = -1.388888888888888888888877700363937169637e-0003L, + q4 = +2.480158730158730158494468463031814083559e-0005L, + q5 = -2.755731922398586276322819250356005542871e-0007L, + q6 = +2.087675698767424261441959760729854017855e-0009L, + q7 = -1.147074481239662089072452129010790774761e-0011L, + q8 = +4.777761647399651599730663422263531034782e-0014L, +/* + * 2 10 -123.84 + * |cos(x) - (1+qq1*x +...+ qq5*x )| <= 2 for |x|<=1/128 + */ + qq1 = -4.999999999999999999999999999999378373641e-0001L, + qq2 = +4.166666666666666666666665478399327703130e-0002L, + qq3 = -1.388888888888888888058211230618051613494e-0003L, + qq4 = +2.480158730156105377771585658905303111866e-0005L, + qq5 = -2.755728099762526325736488376695157008736e-0007L; + +#define i0 0 + +long double +__k_cosl(long double x, long double y) { + long double a, t, z, w; + int *pt = (int *) &t, *px = (int *) &x; + int i, j, hx, ix; + + t = 1.0L; + hx = px[i0]; + ix = hx & 0x7fffffff; + if (ix < 0x3ffc4000) { + if (ix < 0x3fc60000) + if ((i = (int) x) == 0) + return (one); /* generate inexact */ + z = x * x; + + if (ix < 0x3ff80000) /* 0.0078125 */ + return one + z * (qq1 + z * (qq2 + z * (qq3 + + z * (qq4 + z * qq5)))); + else + return one + z * (q1 + z * (q2 + z * (q3 + + z * (q4 + z * (q5 + z * (q6 + z * (q7 + + z * q8))))))); + } + j = (ix + 0x400) & 0x7ffff800; + i = (j - 0x3ffc4000) >> 11; + pt[i0] = j; + if (hx > 0) + x = y - (t - x); + else + x = (-y) - (t + x); + a = _TBL_cosl_hi[i]; + z = x * x; + t = z * (qq1 + z * (qq2 + z * (qq3 + z * (qq4 + z * qq5)))); + w = x * (one + z * (pp1 + z * (pp2 + z * (pp3 + z * (pp4 + z * pp5))))); + t = _TBL_cosl_lo[i] - (_TBL_sinl_hi[i] * w - a * t); + return (a + t); +} diff --git a/usr/src/lib/libm/common/Q/__lgammal.c b/usr/src/lib/libm/common/Q/__lgammal.c new file mode 100644 index 0000000000..3b1f8eff75 --- /dev/null +++ b/usr/src/lib/libm/common/Q/__lgammal.c @@ -0,0 +1,397 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * long double __k_lgammal(long double x, int *signgamlp); + * K.C. Ng, August, 1989. + * + * We choose [1.5,2.5] to be the primary interval. Our algorithms + * are mainly derived from + * + * + * zeta(2)-1 2 zeta(3)-1 3 + * lgamma(2+s) = s*(1-euler) + --------- * s - --------- * s + ... + * 2 3 + * + * + * Note 1. Since gamma(1+s)=s*gamma(s), hence + * lgamma(1+s) = log(s) + lgamma(s), or + * lgamma(s) = lgamma(1+s) - log(s). + * When s is really tiny (like roundoff), lgamma(1+s) ~ s(1-enler) + * Hence lgamma(s) ~ -log(s) for tiny s + * + */ + +#include "libm.h" +#include "longdouble.h" + +static long double neg(long double, int *); +static long double poly(long double, const long double *, int); +static long double polytail(long double); +static long double primary(long double); + +static const long double +c0 = 0.0L, +ch = 0.5L, +c1 = 1.0L, +c2 = 2.0L, +c3 = 3.0L, +c4 = 4.0L, +c5 = 5.0L, +c6 = 6.0L, +pi = 3.1415926535897932384626433832795028841971L, +tiny = 1.0e-40L; + +long double +__k_lgammal(long double x, int *signgamlp) { + long double t,y; + int i; + + /* purge off +-inf, NaN and negative arguments */ + if (!finitel(x)) return x*x; + *signgamlp = 1; + if (signbitl(x)) return (neg(x,signgamlp)); + + /* for x < 8.0 */ + if (x<8.0L) { + y = anintl(x); + i = (int) y; + switch(i) { + case 0: + if (x<1.0e-40L) return -logl(x); else + return (primary(x)-log1pl(x))-logl(x); + case 1: + return primary(x-y)-logl(x); + case 2: + return primary(x-y); + case 3: + return primary(x-y)+logl(x-c1); + case 4: + return primary(x-y)+logl((x-c1)*(x-c2)); + case 5: + return primary(x-y)+logl((x-c1)*(x-c2)*(x-c3)); + case 6: + return primary(x-y)+logl((x-c1)*(x-c2)*(x-c3)*(x-c4)); + case 7: + return primary(x-y)+logl((x-c1)*(x-c2)*(x-c3)*(x-c4)*(x-c5)); + case 8: + return primary(x-y)+ + logl((x-c1)*(x-c2)*(x-c3)*(x-c4)*(x-c5)*(x-c6)); + } + } + + /* 8.0 <= x < 1.0e40 */ + if (x < 1.0e40L) { + t = logl(x); + return x*(t-c1)-(ch*t-polytail(c1/x)); + } + + /* 1.0e40 <= x <= inf */ + return x*(logl(x)-c1); +} + +static const long double an1[] = { /* 20 terms */ + -0.0772156649015328606065120900824024309741L, + 3.224670334241132182362075833230130289059e-0001L, + -6.735230105319809513324605383668929964120e-0002L, + 2.058080842778454787900092432928910226297e-0002L, + -7.385551028673985266273054086081102125704e-0003L, + 2.890510330741523285758867304409628648727e-0003L, + -1.192753911703260976581414338096267498555e-0003L, + 5.096695247430424562831956662855697824035e-0004L, + -2.231547584535777978926798502084300123638e-0004L, + 9.945751278186384670278268034322157947635e-0005L, + -4.492623673665547726647838474125147631082e-0005L, + 2.050721280617796810096993154281561168706e-0005L, + -9.439487785617396552092393234044767313568e-0006L, + 4.374872903516051510689234173139793159340e-0006L, + -2.039156676413643091040459825776029327487e-0006L, + 9.555777181318621470466563543806211523634e-0007L, + -4.468344919709630637558538313482398989638e-0007L, + 2.216738086090045781773004477831059444178e-0007L, + -7.472783403418388455860445842543843485916e-0008L, + 8.777317930927149922056782132706238921648e-0008L, +}; + +static const long double an2[] = { /* 20 terms */ + -.0772156649015328606062692723698127607018L, + 3.224670334241132182635552349060279118047e-0001L, + -6.735230105319809367555642883133994818325e-0002L, + 2.058080842778459676880822202762143671813e-0002L, + -7.385551028672828216011343150077846918930e-0003L, + 2.890510330762060607399561536905727853178e-0003L, + -1.192753911419623262328187532759756368041e-0003L, + 5.096695278636456678258091134532258618614e-0004L, + -2.231547306817535743052975194022893369135e-0004L, + 9.945771461633313282744264853986643877087e-0005L, + -4.492503279458972037926876061257489481619e-0005L, + 2.051311416812082875492678651369394595613e-0005L, + -9.415778282365955203915850761537462941165e-0006L, + 4.452428829045147098722932981088650055919e-0006L, + -1.835024727987632579886951760650722695781e-0006L, + 1.379783080658545009579060714946381462565e-0006L, + 2.282637532109775156769736768748402175238e-0007L, + 1.002577375515900191362119718128149880168e-0006L, + 5.177028794262638311939991106423220002463e-0007L, + 3.127947245174847104122426445937830555755e-0007L, +}; + +static const long double an3[] = { /* 20 terms */ + -.0772156649015328227870646417729220690875L, + 3.224670334241156699881788955959915250365e-0001L, + -6.735230105312273571375431059744975563170e-0002L, + 2.058080842924464587662846071337083809005e-0002L, + -7.385551008677271654723604653956131791619e-0003L, + 2.890510536479782086197110272583833176602e-0003L, + -1.192752262076857692740571567808259138697e-0003L, + 5.096800771149805289371135155128380707889e-0004L, + -2.231000836682831335505058492409860123647e-0004L, + 9.968912171073936803871803966360595275047e-0005L, + -4.412020779327746243544387946167256187258e-0005L, + 2.281374113541454151067016632998630209049e-0005L, + -4.028361291428629491824694655287954266830e-0006L, + 1.470694920619518924598956849226530750139e-0005L, + 1.381686137617987197975289545582377713772e-0005L, + 2.012493539265777728944759982054970441601e-0005L, + 1.723917864208965490251560644681933675799e-0005L, + 1.202954035243788300138608765425123713395e-0005L, + 5.079851887558623092776296577030850938146e-0006L, + 1.220657945824153751555138592006604026282e-0006L, +}; + +static const long double an4[] = { /* 21 terms */ + -.0772156649015732285350261816697540392371L, + 3.224670334221752060691751340365212226097e-0001L, + -6.735230109744009693977755991488196368279e-0002L, + 2.058080778913037626909954141611580783216e-0002L, + -7.385557567931505621170483708950557506819e-0003L, + 2.890459838416254326340844289785254883436e-0003L, + -1.193059036207136762877351596966718455737e-0003L, + 5.081914708100372836613371356529568937869e-0004L, + -2.289855016133600313131553005982542045338e-0004L, + 8.053454537980585879620331053833498511491e-0005L, + -9.574620532104845821243493405855672438998e-0005L, + -9.269085628207107155601445001196317715686e-0005L, + -2.183276779859490461716196344776208220180e-0004L, + -3.134834305597571096452454999737269668868e-0004L, + -3.973878894951937437018305986901392888619e-0004L, + -3.953352414899222799161275564386488057119e-0004L, + -3.136740932204038779362660900621212816511e-0004L, + -1.884502253819634073946130825196078627664e-0004L, + -8.192655799958926853585332542123631379301e-0005L, + -2.292183750010571062891605074281744854436e-0005L, + -3.223980628729716864927724265781406614294e-0006L, +}; + +static const long double ap1[] = { /* 19 terms */ + -0.0772156649015328606065120900824024296961L, + 3.224670334241132182362075833230047956465e-0001L, + -6.735230105319809513324605382963943777301e-0002L, + 2.058080842778454787900092126606252375465e-0002L, + -7.385551028673985266272518231365020063941e-0003L, + 2.890510330741523285681704570797770736423e-0003L, + -1.192753911703260971285304221165990244515e-0003L, + 5.096695247430420878696018188830886972245e-0004L, + -2.231547584535654004647639737841526025095e-0004L, + 9.945751278137201960636098805852315982919e-0005L, + -4.492623672777606053587919463929044226280e-0005L, + 2.050721258703289487603702670753053765201e-0005L, + -9.439485626565616989352750672499008021041e-0006L, + 4.374838162403994645138200419356844574219e-0006L, + -2.038979492862555348577006944451002161496e-0006L, + 9.536763152382263548086981191378885102802e-0007L, + -4.426111214332434049863595231916564014913e-0007L, + 1.911148847512947464234633846270287546882e-0007L, + -5.788673944861923038157839080272303519671e-0008L, +}; + +static const long double ap2[] = { /* 19 terms */ + -0.077215664901532860606428624449354836087L, + 3.224670334241132182271948744265855440139e-0001L, + -6.735230105319809467356126599005051676203e-0002L, + 2.058080842778453315716389815213496002588e-0002L, + -7.385551028673653323064118422580096222959e-0003L, + 2.890510330735923572088003424849289006039e-0003L, + -1.192753911629952368606185543945790688144e-0003L, + 5.096695239806718875364547587043220998766e-0004L, + -2.231547520600616108991867127392089144886e-0004L, + 9.945746913898151120612322833059416008973e-0005L, + -4.492599307461977003570224943054585729684e-0005L, + 2.050609891889165453592046505651759999090e-0005L, + -9.435329866734193796540515247917165988579e-0006L, + 4.362267138522223236241016136585565144581e-0006L, + -2.008556356653246579300491601497510230557e-0006L, + 8.961498103387207161105347118042844354395e-0007L, + -3.614187228330216282235692806488341157741e-0007L, + 1.136978988247816860500420915014777753153e-0007L, + -2.000532786387196664019286514899782691776e-0008L, +}; + +static const long double ap3[] = { /* 19 terms */ + -0.077215664901532859888521470795348856446L, + 3.224670334241131733364048614484228443077e-0001L, + -6.735230105319676541660495145259038151576e-0002L, + 2.058080842775975461837768839015444273830e-0002L, + -7.385551028347615729728618066663566606906e-0003L, + 2.890510327517954083379032008643080256676e-0003L, + -1.192753886919470728001821137439430882603e-0003L, + 5.096693728898932234814903769146577482912e-0004L, + -2.231540055048827662528594010961874258037e-0004L, + 9.945446210018649311491619999438833843723e-0005L, + -4.491608206598064519190236245753867697750e-0005L, + 2.047939071322271016498065052853746466669e-0005L, + -9.376824046522786006677541036631536790762e-0006L, + 4.259329829498149111582277209189150127347e-0006L, + -1.866064770421594266702176289764212873428e-0006L, + 7.462066721137579592928128104534957135669e-0007L, + -2.483546217529077735074007138457678727371e-0007L, + 5.915166576378161473299324673649144297574e-0008L, + -7.334139641706988966966252333759604701905e-0009L, +}; + +static const long double ap4[] = { /* 19 terms */ + -0.0772156649015326785569313252637238673675L, + 3.224670334241051435008842685722468344822e-0001L, + -6.735230105302832007479431772160948499254e-0002L, + 2.058080842553481183648529360967441889912e-0002L, + -7.385551007602909242024706804659879199244e-0003L, + 2.890510182473907253939821312248303471206e-0003L, + -1.192753098427856770847894497586825614450e-0003L, + 5.096659636418811568063339214203693550804e-0004L, + -2.231421144004355691166194259675004483639e-0004L, + 9.942073842343832132754332881883387625136e-0005L, + -4.483809261973204531263252655050701205397e-0005L, + 2.033260142610284888319116654931994447173e-0005L, + -9.153539544026646699870528191410440585796e-0006L, + 3.988460469925482725894144688699584997971e-0006L, + -1.609692980087029172567957221850825977621e-0006L, + 5.634916377249975825399706694496688803488e-0007L, + -1.560065465929518563549083208482591437696e-0007L, + 2.961350193868935325526962209019387821584e-0008L, + -2.834602215195368130104649234505033159842e-0009L, +}; + +static long double +primary(long double s) { /* assume |s|<=0.5 */ + int i; + + i = (int) (8.0L * (s + 0.5L)); + switch(i) { + case 0: return ch*s+s*poly(s,an4,21); + case 1: return ch*s+s*poly(s,an3,20); + case 2: return ch*s+s*poly(s,an2,20); + case 3: return ch*s+s*poly(s,an1,20); + case 4: return ch*s+s*poly(s,ap1,19); + case 5: return ch*s+s*poly(s,ap2,19); + case 6: return ch*s+s*poly(s,ap3,19); + case 7: return ch*s+s*poly(s,ap4,19); + } + /* NOTREACHED */ + return 0.0L; +} + +static long double +poly(long double s, const long double *p, int n) { + long double y; + int i; + y = p[n-1]; + for (i=n-2;i>=0;i--) y = p[i]+s*y; + return y; +} + +static const long double pt[] = { + 9.189385332046727417803297364056176804663e-0001L, + 8.333333333333333333333333333331286969123e-0002L, + -2.777777777777777777777777553194796036402e-0003L, + 7.936507936507936507927283071433584248176e-0004L, + -5.952380952380952362351042163192634108297e-0004L, + 8.417508417508395661774286645578379460131e-0004L, + -1.917526917525263651186066417934685675649e-0003L, + 6.410256409395203164659292973142293199083e-0003L, + -2.955065327248303301763594514012418438188e-0002L, + 1.796442830099067542945998615411893822886e-0001L, + -1.392413465829723742489974310411118662919e+0000L, + 1.339984238037267658352656597960492029261e+0001L, + -1.564707657605373662425785904278645727813e+0002L, + 2.156323807499211356127813962223067079300e+0003L, + -3.330486427626223184647299834137041307569e+0004L, + 5.235535072011889213611369254140123518699e+0005L, + -7.258160984602220710491988573430212593080e+0006L, + 7.316526934569686459641438882340322673357e+0007L, + -3.806450279064900548836571789284896711473e+0008L, +}; + +static long double +polytail(long double s) { + long double t,z; + int i; + z = s*s; + t = pt[18]; + for (i=17;i>=1;i--) t = pt[i]+z*t; + return pt[0]+s*t; +} + +static long double +neg(long double z, int *signgamlp) { + long double t,p; + + /* + * written by K.C. Ng, Feb 2, 1989. + * + * Since + * -z*G(-z)*G(z) = pi/sin(pi*z), + * we have + * G(-z) = -pi/(sin(pi*z)*G(z)*z) + * = pi/(sin(pi*(-z))*G(z)*z) + * Algorithm + * z = |z| + * t = sinpi(z); ...note that when z>2**112, z is an int + * and hence t=0. + * + * if (t == 0.0) return 1.0/0.0; + * if (t< 0.0) *signgamlp = -1; else t= -t; + * if (z<1.0e-40) ...tiny z + * return -log(z); + * else + * return log(pi/(t*z))-lgamma(z); + * + */ + + t = sinpil(z); /* t := sin(pi*z) */ + if (t == c0) /* return 1.0/0.0 = +INF */ + return c1/c0; + + z = -z; + if (z<=tiny) + p = -logl(z); + else + p = logl(pi/(fabsl(t)*z))-__k_lgammal(z,signgamlp); + if (t<c0) *signgamlp = -1; + return p; +} diff --git a/usr/src/lib/libm/common/Q/__poly_libmq.c b/usr/src/lib/libm/common/Q/__poly_libmq.c new file mode 100644 index 0000000000..a2492754e8 --- /dev/null +++ b/usr/src/lib/libm/common/Q/__poly_libmq.c @@ -0,0 +1,41 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "libm.h" + +long double +__poly_libmq(long double x, int n, const long double p[]) { + long double t; + int i; + + t = p[n - 1]; + for (i = n - 2; i >= 0; i--) + t = p[i] + x * t; + return (t); +} diff --git a/usr/src/lib/libm/common/Q/__rem_pio2l.c b/usr/src/lib/libm/common/Q/__rem_pio2l.c new file mode 100644 index 0000000000..1ac009edcc --- /dev/null +++ b/usr/src/lib/libm/common/Q/__rem_pio2l.c @@ -0,0 +1,87 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * __rem_pio2l(x,y) + * + * return the remainder of x rem pi/2 in y[0]+y[1] by calling __rem_pio2m + */ + +#ifndef FDLIBM_BASED +#include "libm.h" +extern int __rem_pio2m(double *, double *, int, int, int, const int *); +#else /* FDLIBM_BASED */ +#include "fdlibm.h" +#define __rem_pio2m __kernel_rem_pio2 +#endif /* FDLIBM_BASED */ + +#include "longdouble.h" + +extern const int _TBL_ipio2l_inf[]; + +static const long double + two24l = 16777216.0L, + pio4 = 0.7853981633974483096156608458198757210495L; + +int +__rem_pio2l(long double x, long double *y) { + long double z, w; + double t[5], v[5]; + int e0, i, nx, n, sign; + const int *ipio2; + + sign = signbitl(x); + z = fabsl(x); + if (z <= pio4) { + y[0] = x; + y[1] = 0; + return (0); + } + e0 = ilogbl(z) - 23; + z = scalbnl(z, -e0); + for (i = 0; i < 5; i++) { + t[i] = (double) ((int) (z)); + z = (z - (long double) t[i]) * two24l; + } + nx = 5; + while (t[nx - 1] == 0.0) + nx--; /* skip zero term */ + ipio2 = _TBL_ipio2l_inf; + n = __rem_pio2m(t, v, e0, nx, 3, (const int *) ipio2); + z = (long double) v[2] + (long double) v[1]; + w = (long double) v[0]; + y[0] = z + w; + y[1] = z - (y[0] - w); + if (sign == 1) { + y[0] = -y[0]; + y[1] = -y[1]; + return (-n); + } + return (n); +} diff --git a/usr/src/lib/libm/common/Q/__sincosl.c b/usr/src/lib/libm/common/Q/__sincosl.c new file mode 100644 index 0000000000..f1c72af2db --- /dev/null +++ b/usr/src/lib/libm/common/Q/__sincosl.c @@ -0,0 +1,145 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * long double __k_sincos(long double x, long double y, long double *c); + * kernel sincosl function on [-pi/4, pi/4], pi/4 ~ 0.785398164 + * Input x is assumed to be bounded by ~pi/4 in magnitude. + * Input y is the tail of x. + * return sinl(x) with *c = cosl(x) + * + * Table look up algorithm + * see __k_sinl and __k_cosl + */ + +#include "libm.h" + +extern const long double _TBL_sinl_hi[], _TBL_sinl_lo[], + _TBL_cosl_hi[], _TBL_cosl_lo[]; +static const long double + one = 1.0L, +/* + * 3 11 -122.32 + * |sin(x) - (x+pp1*x +...+ pp5*x )| <= 2 for |x|<1/64 + */ + pp1 = -1.666666666666666666666666666586782940810e-0001L, + pp2 = +8.333333333333333333333003723660929317540e-0003L, + pp3 = -1.984126984126984076045903483778337804470e-0004L, + pp4 = +2.755731922361906641319723106210900949413e-0006L, + pp5 = -2.505198398570947019093998469135012057673e-0008L, +/* + * |(sin(x) - (x+p1*x^3+...+p8*x^17)| + * |------------------------------- | <= 2^-116.17 for |x|<0.1953125 + * | x | + */ + p1 = -1.666666666666666666666666666666211262297e-0001L, + p2 = +8.333333333333333333333333301497876908541e-0003L, + p3 = -1.984126984126984126984041302881180621922e-0004L, + p4 = +2.755731922398589064100587351307269621093e-0006L, + p5 = -2.505210838544163129378906953765595393873e-0008L, + p6 = +1.605904383643244375050998243778534074273e-0010L, + p7 = -7.647162722800685516901456114270824622699e-0013L, + p8 = +2.810046428661902961725428841068844462603e-0015L, +/* + * 2 10 -123.84 + * |cos(x) - (1+qq1*x +...+ qq5*x )| <= 2 for |x|<=1/128 + */ + qq1 = -4.999999999999999999999999999999378373641e-0001L, + qq2 = +4.166666666666666666666665478399327703130e-0002L, + qq3 = -1.388888888888888888058211230618051613494e-0003L, + qq4 = +2.480158730156105377771585658905303111866e-0005L, + qq5 = -2.755728099762526325736488376695157008736e-0007L, +/* + * 2 16 -117.11 + * |cos(x) - (1+q1*x + ... + q8*x )| <= 2 for |x|<= 0.15625 + */ + q1 = -4.999999999999999999999999999999756416975e-0001L, + q2 = +4.166666666666666666666666664006066577258e-0002L, + q3 = -1.388888888888888888888877700363937169637e-0003L, + q4 = +2.480158730158730158494468463031814083559e-0005L, + q5 = -2.755731922398586276322819250356005542871e-0007L, + q6 = +2.087675698767424261441959760729854017855e-0009L, + q7 = -1.147074481239662089072452129010790774761e-0011L, + q8 = +4.777761647399651599730663422263531034782e-0014L; + +#define i0 0 + +long double +__k_sincosl(long double x, long double y, long double *c) { + long double a1, a2, t, t1, t2, z, w; + int *pt = (int *) &t, *px = (int *) &x; + int i, j, hx, ix; + + t = 1.0L; + hx = px[i0]; + ix = hx & 0x7fffffff; + if (ix < 0x3ffc4000) { + if (ix < 0x3fc60000) + if (((int) x) == 0) { + *c = one; + return (x); + } /* generate inexact */ + z = x * x; + + if (ix < 0x3ff80000) { + *c = one + z * (qq1 + z * (qq2 + z * (qq3 + + z * (qq4 + z * qq5)))); + t = z * (p1 + z * (p2 + z * (p3 + z * (p4 + + z * (p5 + z * p6))))); + } else { + *c = one + z * (q1 + z * (q2 + z * (q3 + z * (q4 + + z * (q5 + z * (q6 + z * (q7 + z * q8))))))); + t = z * (p1 + z * (p2 + z * (p3 + z * (p4 + z * (p5 + + z * (p6 + z * (p7 + z * p8))))))); + } + + t = y + x * t; + return (x + t); + } + j = (ix + 0x400) & 0x7ffff800; + i = (j - 0x3ffc4000) >> 11; + pt[i0] = j; + if (hx > 0) + x = y - (t - x); + else + x = (-y) - (t + x); + a1 = _TBL_sinl_hi[i]; + z = x * x; + t = z * (qq1 + z * (qq2 + z * (qq3 + z * (qq4 + z * qq5)))); + w = x * (one + z * (pp1 + z * (pp2 + z * (pp3 + z * (pp4 + z * pp5))))); + a2 = _TBL_cosl_hi[i]; + t2 = _TBL_cosl_lo[i] - (a1 * w - a2 * t); + *c = a2 + t2; + t1 = a2 * w + a1 * t; + t1 += _TBL_sinl_lo[i]; + if (hx < 0) + return (-a1 - t1); + else + return (a1 + t1); +} diff --git a/usr/src/lib/libm/common/Q/__sinl.c b/usr/src/lib/libm/common/Q/__sinl.c new file mode 100644 index 0000000000..de6df8c79c --- /dev/null +++ b/usr/src/lib/libm/common/Q/__sinl.c @@ -0,0 +1,143 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * long double __k_sinl(long double x, long double y); + * kernel sin function on [-pi/4, pi/4], pi/4 ~ 0.785398164 + * Input x is assumed to be bounded by ~pi/4 in magnitude. + * Input y is the tail of x. + * + * Table look up algorithm + * 1. by sin(-x) = -sin(x), need only to consider positive x + * 2. if x < 25/128 = [0x3ffc9000,0,0,0] = 0.1953125 , then + * if x < 2^-57 (hx < 0x3fc60000,0,0,0), return x (inexact if x != 0) + * z = x*x; + * if x <= 1/64 = 2**-6 + * sin(x) = x + (y+(x*z)*(p1 + z*p2)) + * else + * sin(x) = x + (y+(x*z)*(p1 + z*(p2 + z*(p3 + z*p4)))) + * 3. else + * ht = (hx + 0x400)&0x7ffff800 (round x to a break point t) + * lt = 0 + * i = (hy-0x3ffc4000)>>11; (i<=64) + * x' = (x - t)+y (|x'| ~<= 2^-7 + * By + * sin(t+x') + * = sin(t)cos(x')+cos(t)sin(x') + * = sin(t)(1+z*(qq1+z*qq2))+[cos(t)]*x*(1+z*(pp1+z*pp2)) + * = sin(t) + [sin(t)]*(z*(qq1+z*qq2))+ + * [cos(t)]*x*(1+z*(pp1+z*pp2)) + * + * Thus, + * let a= _TBL_sin_hi[i], b = _TBL_sin_lo[i], c= _TBL_cos_hi[i], + * x = (x-t)+y + * z = x*x; + * sin(t+x) = a+(b+ ((c*x)*(1+z*(pp1+z*pp2))+a*(z*(qq1+z*qq2))) + */ + +#include "libm.h" + +extern const long double _TBL_sinl_hi[], _TBL_sinl_lo[], _TBL_cosl_hi[]; +static const long double +one = 1.0L, +/* + * 3 11 -122.32 + * |sin(x) - (x+pp1*x +...+ pp5*x )| <= 2 for |x|<1/64 + */ + pp1 = -1.666666666666666666666666666586782940810e-0001L, + pp2 = +8.333333333333333333333003723660929317540e-0003L, + pp3 = -1.984126984126984076045903483778337804470e-0004L, + pp4 = +2.755731922361906641319723106210900949413e-0006L, + pp5 = -2.505198398570947019093998469135012057673e-0008L, +/* + * |(sin(x) - (x+p1*x^3+...+p8*x^17)| + * |------------------------------- | <= 2^-116.17 for |x|<0.1953125 + * | x | + */ + p1 = -1.666666666666666666666666666666211262297e-0001L, + p2 = +8.333333333333333333333333301497876908541e-0003L, + p3 = -1.984126984126984126984041302881180621922e-0004L, + p4 = +2.755731922398589064100587351307269621093e-0006L, + p5 = -2.505210838544163129378906953765595393873e-0008L, + p6 = +1.605904383643244375050998243778534074273e-0010L, + p7 = -7.647162722800685516901456114270824622699e-0013L, + p8 = +2.810046428661902961725428841068844462603e-0015L, +/* + * 2 10 -123.84 + * |cos(x) - (1+qq1*x +...+ qq5*x )| <= 2 for |x|<=1/128 + */ + qq1 = -4.999999999999999999999999999999378373641e-0001L, + qq2 = +4.166666666666666666666665478399327703130e-0002L, + qq3 = -1.388888888888888888058211230618051613494e-0003L, + qq4 = +2.480158730156105377771585658905303111866e-0005L, + qq5 = -2.755728099762526325736488376695157008736e-0007L; + +#define i0 0 + +long double +__k_sinl(long double x, long double y) { + long double a, t, z, w; + int *pt = (int *) &t, *px = (int *) &x; + int i, j, hx, ix; + + t = 1.0L; + hx = px[i0]; + ix = hx & 0x7fffffff; + if (ix < 0x3ffc9000) { + *(3 - i0 + (int *) &t) = -1; /* one-ulp */ + *(2 + (int *) &t) = -1; /* one-ulp */ + *(1 + (int *) &t) = -1; /* one-ulp */ + *(i0 + (int *) &t) -= 1; /* one-ulp */ + if (ix < 0x3fc60000) + if (((int) (x * t)) < 1) + return (x); /* inexact and underflow */ + z = x * x; + t = z * (p1 + z * (p2 + z * (p3 + z * (p4 + z * (p5 + + z * (p6 + z * (p7 + z * p8))))))); + t = y + x * t; + return (x + t); + } + j = (ix + 0x400) & 0x7ffff800; + i = (j - 0x3ffc4000) >> 11; + pt[i0] = j; + if (hx > 0) + x = y - (t - x); + else + x = (-y) - (t + x); + a = _TBL_sinl_hi[i]; + z = x * x; + t = z * (qq1 + z * (qq2 + z * (qq3 + z * (qq4 + z * qq5)))); + w = x * (one + z * (pp1 + z * (pp2 + z * (pp3 + z * (pp4 + z * pp5))))); + t = _TBL_cosl_hi[i] * w + a * t; + t += _TBL_sinl_lo[i]; + if (hx < 0) + return (-a - t); + else + return (a + t); +} diff --git a/usr/src/lib/libm/common/Q/__tanl.c b/usr/src/lib/libm/common/Q/__tanl.c new file mode 100644 index 0000000000..7c80162e2e --- /dev/null +++ b/usr/src/lib/libm/common/Q/__tanl.c @@ -0,0 +1,163 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * long double __k_tanl(long double x; long double y, int k); + * kernel tan/cotan function on [-pi/4, pi/4], pi/4 ~ 0.785398164 + * Input x is assumed to be bounded by ~pi/4 in magnitude. + * Input y is the tail of x. + * Input k indicate -- tan if k=0; else -1/tan + * + * Table look up algorithm + * 1. by tan(-x) = -tan(x), need only to consider positive x + * 2. if x < 5/32 = [0x3ffc4000, 0] = 0.15625 , then + * if x < 2^-57 (hx < 0x3fc40000 0), set w=x with inexact if x != 0 + * else + * z = x*x; + * w = x + (y+(x*z)*(t1+z*(t2+z*(t3+z*(t4+z*(t5+z*t6)))))) + * return (k == 0)? w: 1/w; + * 3. else + * ht = (hx + 0x400)&0x7ffff800 (round x to a break point t) + * lt = 0 + * i = (hy-0x3ffc4000)>>11; (i<=64) + * x' = (x - t)+y (|x'| ~<= 2^-7) + * By + * tan(t+x') + * = (tan(t)+tan(x'))/(1-tan(x')tan(t)) + * We have + * sin(x')+tan(t)*(tan(t)*sin(x')) + * = tan(t) + ------------------------------- for k=0 + * cos(x') - tan(t)*sin(x') + * + * cos(x') - tan(t)*sin(x') + * = - -------------------------------------- for k=1 + * tan(t) + tan(t)*(cos(x')-1) + sin(x') + * + * + * where tan(t) is from the table, + * sin(x') = x + pp1*x^3 + ...+ pp5*x^11 + * cos(x') = 1 + qq1*x^2 + ...+ qq5*x^10 + */ + +#include "libm.h" + +extern const long double _TBL_tanl_hi[], _TBL_tanl_lo[]; +static const long double + one = 1.0L, +/* + * 3 11 -122.32 + * |sin(x) - (x+pp1*x +...+ pp5*x )| <= 2 for |x|<1/64 + */ + pp1 = -1.666666666666666666666666666586782940810e-0001L, + pp2 = +8.333333333333333333333003723660929317540e-0003L, + pp3 = -1.984126984126984076045903483778337804470e-0004L, + pp4 = +2.755731922361906641319723106210900949413e-0006L, + pp5 = -2.505198398570947019093998469135012057673e-0008L, +/* + * 2 10 -123.84 + * |cos(x) - (1+qq1*x +...+ qq5*x )| <= 2 for |x|<=1/128 + */ + qq1 = -4.999999999999999999999999999999378373641e-0001L, + qq2 = +4.166666666666666666666665478399327703130e-0002L, + qq3 = -1.388888888888888888058211230618051613494e-0003L, + qq4 = +2.480158730156105377771585658905303111866e-0005L, + qq5 = -2.755728099762526325736488376695157008736e-0007L, +/* + * |tan(x) - (x+t1*x^3+...+t6*x^13)| + * |------------------------------ | <= 2^-59.73 for |x|<0.15625 + * | x | + */ + t1 = +3.333333333333333333333333333333423342490e-0001L, + t2 = +1.333333333333333333333333333093838744537e-0001L, + t3 = +5.396825396825396825396827906318682662250e-0002L, + t4 = +2.186948853615520282185576976994418486911e-0002L, + t5 = +8.863235529902196573354554519991152936246e-0003L, + t6 = +3.592128036572480064652191427543994878790e-0003L, + t7 = +1.455834387051455257856833807581901305474e-0003L, + t8 = +5.900274409318599857829983256201725587477e-0004L, + t9 = +2.391291152117265181501116961901122362937e-0004L, + t10 = +9.691533169382729742394024173194981882375e-0005L, + t11 = +3.927994733186415603228178184225780859951e-0005L, + t12 = +1.588300018848323824227640064883334101288e-0005L, + t13 = +6.916271223396808311166202285131722231723e-0006L; + +#define i0 0 + +long double +__k_tanl(long double x, long double y, int k) { + long double a, t, z, w = 0, s, c; + int *pt = (int *) &t, *px = (int *) &x; + int i, j, hx, ix; + + t = 1.0L; + hx = px[i0]; + ix = hx & 0x7fffffff; + if (ix < 0x3ffc4000) { + *(3 - i0 + (int *) &t) = 1; /* make t = one+ulp */ + if (ix < 0x3fc60000) { + if (((int) (x * t)) < 1) /* generate inexact */ + w = x; /* generate underflow if subnormal */ + } else { + z = x * x; + if (ix < 0x3ff30000) /* 2**-12 */ + t = z * (t1 + z * (t2 + z * (t3 + z * t4))); + else + t = z * (t1 + z * (t2 + z * (t3 + z * (t4 + + z * (t5 + z * (t6 + z * (t7 + z * (t8 + + z * (t9 + z * (t10 + z * (t11 + + z * (t12 + z * t13)))))))))))); + t = y + x * t; + w = x + t; + } + return (k == 0 ? w : -one / w); + } + j = (ix + 0x400) & 0x7ffff800; + i = (j - 0x3ffc4000) >> 11; + pt[i0] = j; + if (hx > 0) + x = y - (t - x); + else + x = (-y) - (t + x); + a = _TBL_tanl_hi[i]; + z = x * x; + /* cos(x)-1 */ + t = z * (qq1 + z * (qq2 + z * (qq3 + z * (qq4 + z * qq5)))); + /* sin(x) */ + s = x * (one + z * (pp1 + z * (pp2 + z * (pp3 + z * (pp4 + z * pp5))))); + if (k == 0) { + w = a * s; + t = _TBL_tanl_lo[i] + (s + a * w) / (one - (w - t)); + return (hx < 0 ? -a - t : a + t); + } else { + w = s + a * t; + c = w + _TBL_tanl_lo[i]; + z = one - (a * s - t); + return (hx >= 0 ? z / (-a - c) : z / (a + c)); + } +} diff --git a/usr/src/lib/libm/common/Q/acoshl.c b/usr/src/lib/libm/common/Q/acoshl.c new file mode 100644 index 0000000000..8f6d155fae --- /dev/null +++ b/usr/src/lib/libm/common/Q/acoshl.c @@ -0,0 +1,57 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak acoshl = __acoshl +#endif + +#include "libm.h" + +static const long double + zero = 0.0L, + ln2 = 6.931471805599453094172321214581765680755e-0001L, + one = 1.0L, + big = 1.e+20L; + +long double +acoshl(long double x) { + long double t; + + if (isnanl(x)) + return (x + x); + else if (x > big) + return (logl(x) + ln2); + else if (x > one) { + t = sqrtl(x - one); + return (log1pl(t * (t + sqrtl(x + one)))); + } else if (x == one) + return (zero); + else + return ((x - x) / (x - x)); +} diff --git a/usr/src/lib/libm/common/Q/acosl.c b/usr/src/lib/libm/common/Q/acosl.c new file mode 100644 index 0000000000..b4d77ff4e3 --- /dev/null +++ b/usr/src/lib/libm/common/Q/acosl.c @@ -0,0 +1,67 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * arccosin function + * ________ + * / 1 - x + * acos(x) = 2*atan2( / -------- , 1 ) + * \/ 1 + x + * + * ________ + * / 1 - x + * = 2*atan ( / -------- ) for non-exceptional x. + * \/ 1 + x + * + * Special cases: + * if x is NaN, return x itself; + * if |x|>1, return NaN with invalid signal. + */ + +#pragma weak acosl = __acosl + +#include "libm.h" + +static const long double zero = 0.0L, one = 1.0L; + +long double +acosl(long double x) { + if (isnanl(x)) + return (x + x); + else if (fabsl(x) < one) + x = atanl(sqrtl((one - x) / (one + x))); + else if (x == -one) + x = atan2l(one, zero); /* x <- PI */ + else if (x == one) + x = zero; + else { /* |x| > 1 create invalid signal */ + return (zero / zero); + } + return (x + x); +} diff --git a/usr/src/lib/libm/common/Q/asinhl.c b/usr/src/lib/libm/common/Q/asinhl.c new file mode 100644 index 0000000000..32f9d4b086 --- /dev/null +++ b/usr/src/lib/libm/common/Q/asinhl.c @@ -0,0 +1,60 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak asinhl = __asinhl +#endif + +#include "libm.h" + +static const long double + ln2 = 6.931471805599453094172321214581765680755e-0001L, + one = 1.0L, + big = 1.0e+20L, + tiny = 1.0e-20L; + +long double +asinhl(long double x) { + long double t, w; + volatile long double dummy; + + w = fabsl(x); + if (isnanl(x)) + return (x + x); /* x is NaN */ + if (w < tiny) { +#ifndef lint + dummy = x + big; /* inexact if x != 0 */ +#endif + return (x); /* tiny x */ + } else if (w < big) { + t = one / w; + return (copysignl(log1pl(w + w / (t + sqrtl(one + t * t))), x)); + } else + return (copysignl(logl(w) + ln2, x)); +} diff --git a/usr/src/lib/libm/common/Q/asinl.c b/usr/src/lib/libm/common/Q/asinl.c new file mode 100644 index 0000000000..8594a2195b --- /dev/null +++ b/usr/src/lib/libm/common/Q/asinl.c @@ -0,0 +1,78 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak asinl = __asinl +#endif + +/* + * asinl(x) = atan2l(x,sqrt(1-x*x)); + * + * For better accuracy, 1-x*x is computed as follows + * 1-x*x if x < 0.5, + * 2*(1-|x|)-(1-|x|)*(1-|x|) if x >= 0.5. + * + * Special cases: + * if x is NaN, return x itself; + * if |x|>1, return NaN with invalid signal. + */ + +#include "libm.h" + +static const long double zero = 0.0L, small = 1.0e-20L, half = 0.5L, one = 1.0L; +#ifndef lint +static const long double big = 1.0e+20L; +#endif + +long double +asinl(long double x) { + long double t, w; + volatile long double dummy; + + w = fabsl(x); + if (isnanl(x)) + return (x + x); + else if (w <= half) { + if (w < small) { +#ifndef lint + dummy = w + big; + /* inexact if w != 0 */ +#endif + return (x); + } else + return (atanl(x / sqrtl(one - x * x))); + } else if (w < one) { + t = one - w; + w = t + t; + return (atanl(x / sqrtl(w - t * t))); + } else if (w == one) + return (atan2l(x, zero)); /* asin(+-1) = +- PI/2 */ + else + return (zero / zero); /* |x| > 1: invalid */ +} diff --git a/usr/src/lib/libm/common/Q/atan2l.c b/usr/src/lib/libm/common/Q/atan2l.c new file mode 100644 index 0000000000..d7a538cd2b --- /dev/null +++ b/usr/src/lib/libm/common/Q/atan2l.c @@ -0,0 +1,160 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * atan2l(y,x) + * + * Method : + * 1. Reduce y to positive by atan2(y,x)=-atan2(-y,x). + * 2. Reduce x to positive by (if x and y are unexceptional): + * ARG (x+iy) = arctan(y/x) ... if x > 0, + * ARG (x+iy) = pi - arctan[y/(-x)] ... if x < 0, + * + * Special cases: + * + * ATAN2((anything), NaN ) is NaN; + * ATAN2(NAN , (anything) ) is NaN; + * ATAN2(+-0, +(anything but NaN)) is +-0 ; + * ATAN2(+-0, -(anything but NaN)) is +-PI ; + * ATAN2(+-(anything but 0 and NaN), 0) is +-PI/2; + * ATAN2(+-(anything but INF and NaN), +INF) is +-0 ; + * ATAN2(+-(anything but INF and NaN), -INF) is +-PI; + * ATAN2(+-INF,+INF ) is +-PI/4 ; + * ATAN2(+-INF,-INF ) is +-3PI/4; + * ATAN2(+-INF, (anything but,0,NaN, and INF)) is +-PI/2; + * + * Constants: + * The hexadecimal values are the intended ones for the following constants. + * The decimal values may be used, provided that the compiler will convert + * from decimal to binary accurately enough to produce the hexadecimal values + * shown. + */ + +#pragma weak atan2l = __atan2l + +#include "libm.h" +#include "longdouble.h" + +static const long double + zero = 0.0L, + tiny = 1.0e-40L, + one = 1.0L, + half = 0.5L, + PI3o4 = 2.356194490192344928846982537459627163148L, + PIo4 = 0.785398163397448309615660845819875721049L, + PIo2 = 1.570796326794896619231321691639751442099L, + PI = 3.141592653589793238462643383279502884197L, + PI_lo = 8.671810130123781024797044026043351968762e-35L; + +long double +atan2l(long double y, long double x) { + long double t, z; + int k, m, signy, signx; + + if (x != x || y != y) + return (x + y); /* return NaN if x or y is NAN */ + signy = signbitl(y); + signx = signbitl(x); + if (x == one) + return (atanl(y)); + m = signy + signx + signx; + + /* when y = 0 */ + if (y == zero) + switch (m) { + case 0: + return (y); /* atan(+0,+anything) */ + case 1: + return (y); /* atan(-0,+anything) */ + case 2: + return (PI + tiny); /* atan(+0,-anything) */ + case 3: + return (-PI - tiny); /* atan(-0,-anything) */ + } + + /* when x = 0 */ + if (x == zero) + return (signy == 1 ? -PIo2 - tiny : PIo2 + tiny); + + /* when x is INF */ + if (!finitel(x)) { + if (!finitel(y)) { + switch (m) { + case 0: + return (PIo4 + tiny); /* atan(+INF,+INF) */ + case 1: + return (-PIo4 - tiny); /* atan(-INF,+INF) */ + case 2: + return (PI3o4 + tiny); /* atan(+INF,-INF) */ + case 3: + return (-PI3o4 - tiny); /* atan(-INF,-INF) */ + } + } else { + switch (m) { + case 0: + return (zero); /* atan(+...,+INF) */ + case 1: + return (-zero); /* atan(-...,+INF) */ + case 2: + return (PI + tiny); /* atan(+...,-INF) */ + case 3: + return (-PI - tiny); /* atan(-...,-INF) */ + } + } + } + /* when y is INF */ + if (!finitel(y)) + return (signy == 1 ? -PIo2 - tiny : PIo2 + tiny); + + /* compute y/x */ + x = fabsl(x); + y = fabsl(y); + t = PI_lo; + k = (ilogbl(y) - ilogbl(x)); + + if (k > 120) + z = PIo2 + half * t; + else if (m > 1 && k < -120) + z = zero; + else + z = atanl(y / x); + + switch (m) { + case 0: + return (z); /* atan(+,+) */ + case 1: + return (-z); /* atan(-,+) */ + case 2: + return (PI - (z - t)); /* atan(+,-) */ + case 3: + return ((z - t) - PI); /* atan(-,-) */ + } + /* NOTREACHED */ + return 0.0L; +} diff --git a/usr/src/lib/libm/common/Q/atan2pil.c b/usr/src/lib/libm/common/Q/atan2pil.c new file mode 100644 index 0000000000..28b31e2ed9 --- /dev/null +++ b/usr/src/lib/libm/common/Q/atan2pil.c @@ -0,0 +1,43 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak atan2pil = __atan2pil + +#include "libm.h" + +/* + * atan2pil(y,x) = atan2l(y, x) / pi + */ + +static const long double invpi = 3.183098861837906715377675267450287240689e-1L; + +long double +atan2pil(long double y, long double x) { + return (atan2l(y, x) * invpi); +} diff --git a/usr/src/lib/libm/common/Q/atanhl.c b/usr/src/lib/libm/common/Q/atanhl.c new file mode 100644 index 0000000000..f7462ca1ef --- /dev/null +++ b/usr/src/lib/libm/common/Q/atanhl.c @@ -0,0 +1,61 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak atanhl = __atanhl + +#include "libm.h" + +/* + * 1 2x x + * atanhl(x) = --- * LOG(1 + -------) = 0.5 * log1pl(2 * --------) + * 2 1 - x 1 - x + * Note: to guarantee atanhl(-x) = -atanhl(x), we use + * sign(x) |x| + * atanhl(x) = ------- * log1pl(2*-------). + * 2 1 - |x| + * + * Special cases: + * atanhl(x) is NaN if |x| > 1 with signal; + * atanhl(NaN) is that NaN with no signal; + * atanhl(+-1) is +-INF with signal. + * + */ + +static const long double zero = 0.0L, half = 0.5L, one = 1.0L; + +long double +atanhl(long double x) { + long double t; + + t = fabsl(x); + if (t == one) + return (x / zero); + t = t / (one - t); + return (copysignl(half, x) * log1pl(t + t)); +} diff --git a/usr/src/lib/libm/common/Q/atanl.c b/usr/src/lib/libm/common/Q/atanl.c new file mode 100644 index 0000000000..10b6b71daf --- /dev/null +++ b/usr/src/lib/libm/common/Q/atanl.c @@ -0,0 +1,209 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak atanl = __atanl + +/* + * atanl(x) + * Table look-up algorithm + * By K.C. Ng, March 9, 1989 + * + * Algorithm. + * + * The algorithm is based on atan(x)=atan(y)+atan((x-y)/(1+x*y)). + * We use poly1(x) to approximate atan(x) for x in [0,1/8] with + * error (relative) + * |(atan(x)-poly1(x))/x|<= 2^-115.94 long double + * |(atan(x)-poly1(x))/x|<= 2^-58.85 double + * |(atan(x)-poly1(x))/x|<= 2^-25.53 float + * and use poly2(x) to approximate atan(x) for x in [0,1/65] with + * error (absolute) + * |atan(x)-poly2(x)|<= 2^-122.15 long double + * |atan(x)-poly2(x)|<= 2^-64.79 double + * |atan(x)-poly2(x)|<= 2^-35.36 float + * Here poly1 and poly2 are odd polynomial with the following form: + * x + x^3*(a1+x^2*(a2+...)) + * + * (0). Purge off Inf and NaN and 0 + * (1). Reduce x to positive by atan(x) = -atan(-x). + * (2). For x <= 1/8, use + * (2.1) if x < 2^(-prec/2-2), atan(x) = x with inexact + * (2.2) Otherwise + * atan(x) = poly1(x) + * (3). For x >= 8 then + * (3.1) if x >= 2^(prec+2), atan(x) = atan(inf) - pio2lo + * (3.2) if x >= 2^(prec/3+2), atan(x) = atan(inf) - 1/x + * (3.3) if x > 65, atan(x) = atan(inf) - poly2(1/x) + * (3.4) Otherwise, atan(x) = atan(inf) - poly1(1/x) + * + * (4). Now x is in (0.125, 8) + * Find y that match x to 4.5 bit after binary (easy). + * If iy is the high word of y, then + * single : j = (iy - 0x3e000000) >> 19 + * double : j = (iy - 0x3fc00000) >> 16 + * quad : j = (iy - 0x3ffc0000) >> 12 + * + * Let s = (x-y)/(1+x*y). Then + * atan(x) = atan(y) + poly1(s) + * = _TBL_atanl_hi[j] + (_TBL_atanl_lo[j] + poly2(s) ) + * + * Note. |s| <= 1.5384615385e-02 = 1/65. Maxium occurs at x = 1.03125 + * + */ + +#include "libm.h" + +extern const long double _TBL_atanl_hi[], _TBL_atanl_lo[]; +static const long double + one = 1.0L, + p1 = -3.333333333333333333333333333331344526118e-0001L, + p2 = 1.999999999999999999999999989931277668570e-0001L, + p3 = -1.428571428571428571428553606221309530901e-0001L, + p4 = 1.111111111111111111095219842737139747418e-0001L, + p5 = -9.090909090909090825503603835248061123323e-0002L, + p6 = 7.692307692307664052130743214708925258904e-0002L, + p7 = -6.666666666660213835187713228363717388266e-0002L, + p8 = 5.882352940152439399097283359608661949504e-0002L, + p9 = -5.263157780447533993046614040509529668487e-0002L, + p10 = 4.761895816878184933175855990886788439447e-0002L, + p11 = -4.347345005832274022681019724553538135922e-0002L, + p12 = 3.983031914579635037502589204647752042736e-0002L, + p13 = -3.348206704469830575196657749413894897554e-0002L, + q1 = -3.333333333333333333333333333195273650186e-0001L, + q2 = 1.999999999999999999999988146114392615808e-0001L, + q3 = -1.428571428571428571057630319435467111253e-0001L, + q4 = 1.111111111111105373263048208994541544098e-0001L, + q5 = -9.090909090421834209167373258681021816441e-0002L, + q6 = 7.692305377813692706850171767150701644539e-0002L, + q7 = -6.660896644393861499914731734305717901330e-0002L, + pio2hi = 1.570796326794896619231321691639751398740e+0000L, + pio2lo = 4.335905065061890512398522013021675984381e-0035L; + +#define i0 0 +#define i1 3 + +long double +atanl(long double x) { + long double y, z, r, p, s; + int *px = (int *) &x, *py = (int *) &y; + int ix, iy, sign, j; + + ix = px[i0]; + sign = ix & 0x80000000; + ix ^= sign; + + /* for |x| < 1/8 */ + if (ix < 0x3ffc0000) { + if (ix < 0x3feb0000) { /* when |x| < 2**(-prec/6-2) */ + if (ix < 0x3fc50000) { /* if |x| < 2**(-prec/2-2) */ + s = one; + *(3 - i0 + (int *) &s) = -1; /* s = 1-ulp */ + *(1 + (int *) &s) = -1; + *(2 + (int *) &s) = -1; + *(i0 + (int *) &s) -= 1; + if ((int) (s * x) < 1) + return (x); /* raise inexact */ + } + z = x * x; + if (ix < 0x3fe20000) { /* if |x| < 2**(-prec/4-1) */ + return (x + (x * z) * p1); + } else { /* if |x| < 2**(-prec/6-2) */ + return (x + (x * z) * (p1 + z * p2)); + } + } + z = x * x; + return (x + (x * z) * (p1 + z * (p2 + z * (p3 + z * (p4 + + z * (p5 + z * (p6 + z * (p7 + z * (p8 + z * (p9 + + z * (p10 + z * (p11 + z * (p12 + z * p13))))))))))))); + } + + /* for |x| >= 8.0 */ + if (ix >= 0x40020000) { + px[i0] = ix; + if (ix < 0x40050400) { /* x < 65 */ + r = one / x; + z = r * r; + /* + * poly1 + */ + y = r * (one + z * (p1 + z * (p2 + z * (p3 + + z * (p4 + z * (p5 + z * (p6 + z * (p7 + + z * (p8 + z * (p9 + z * (p10 + z * (p11 + + z * (p12 + z * p13))))))))))))); + y -= pio2lo; + } else if (ix < 0x40260000) { /* x < 2**(prec/3+2) */ + r = one / x; + z = r * r; + /* + * poly2 + */ + y = r * (one + z * (q1 + z * (q2 + z * (q3 + z * (q4 + + z * (q5 + z * (q6 + z * q7))))))); + y -= pio2lo; + } else if (ix < 0x40720000) { /* x < 2**(prec+2) */ + y = one / x - pio2lo; + } else if (ix < 0x7fff0000) { /* x < inf */ + y = -pio2lo; + } else { /* x is inf or NaN */ + if (((ix - 0x7fff0000) | px[1] | px[2] | px[i1]) != 0) + return (x - x); + y = -pio2lo; + } + + if (sign == 0) + return (pio2hi - y); + else + return (y - pio2hi); + } + + /* now x is between 1/8 and 8 */ + px[i0] = ix; + iy = (ix + 0x00000800) & 0x7ffff000; + py[i0] = iy; + py[1] = py[2] = py[i1] = 0; + j = (iy - 0x3ffc0000) >> 12; + + if (sign == 0) + s = (x - y) / (one + x * y); + else + s = (y - x) / (one + x * y); + z = s * s; + if (ix == iy) + p = s * (one + z * (q1 + z * (q2 + z * (q3 + z * q4)))); + else + p = s * (one + z * (q1 + z * (q2 + z * (q3 + z * (q4 + + z * (q5 + z * (q6 + z * q7))))))); + if (sign == 0) { + r = p + _TBL_atanl_lo[j]; + return (r + _TBL_atanl_hi[j]); + } else { + r = p - _TBL_atanl_lo[j]; + return (r - _TBL_atanl_hi[j]); + } +} diff --git a/usr/src/lib/libm/common/Q/cbrtl.c b/usr/src/lib/libm/common/Q/cbrtl.c new file mode 100644 index 0000000000..274fa24430 --- /dev/null +++ b/usr/src/lib/libm/common/Q/cbrtl.c @@ -0,0 +1,70 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak cbrtl = __cbrtl + +#include "libm.h" +#include "longdouble.h" + +#define n0 0 + +long double +cbrtl(long double x) { + long double s, t, r, w, y; + double dx, dy; + int *py = (int *) &dy; + int n, m, m3, sx; + + if (!finitel(x)) + return (x + x); + if (iszerol(x)) + return (x); + sx = signbitl(x); + x = fabsl(x); + n = ilogbl(x); + m = n / 3; + m3 = m + m + m; + y = scalbnl(x, -m3); + dx = (double) y; + dy = cbrt(dx); + py[1 - n0] += 2; + if (py[1 - n0] == 0) + py[n0] += 1; + + /* one step newton iteration to 113 bits with error < 0.667ulps */ + t = (long double) dy; + t = scalbnl(t, m); + s = t * t; + r = x / s; + w = t + t; + r = (r - t) / (w + r); + t += t * r; + + return (sx == 0 ? t : -t); +} diff --git a/usr/src/lib/libm/common/Q/copysignl.c b/usr/src/lib/libm/common/Q/copysignl.c new file mode 100644 index 0000000000..97c758d303 --- /dev/null +++ b/usr/src/lib/libm/common/Q/copysignl.c @@ -0,0 +1,43 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak copysignl = __copysignl +#endif + +#include "libm.h" + +long double +copysignl(long double x, long double y) { + int *px = (int *) &x; + int *py = (int *) &y; + + px[HIXWORD] = (px[HIXWORD] & ~XSGNMSK) | (py[HIXWORD] & XSGNMSK); + return (x); +} diff --git a/usr/src/lib/libm/common/Q/coshl.c b/usr/src/lib/libm/common/Q/coshl.c new file mode 100644 index 0000000000..5187f9dbcf --- /dev/null +++ b/usr/src/lib/libm/common/Q/coshl.c @@ -0,0 +1,107 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak coshl = __coshl + +#include "libm.h" +#include "longdouble.h" + + +/* + * coshl(X) + * RETURN THE HYPERBOLIC COSINE OF X + * + * Method : + * 1. Replace x by |x| (coshl(x) = coshl(-x)). + * 2. + * [ expl(x) - 1 ]^2 + * 0 <= x <= 0.3465 : coshl(x) := 1 + ------------------- + * 2*expl(x) + * + * expl(x) + 1/expl(x) + * 0.3465 <= x <= thresh : coshl(x) := ------------------- + * 2 + * thresh <= x <= lnovft : coshl(x) := expl(x)/2 + * lnovft <= x < INF : coshl(x) := scalbnl(expl(x-1024*ln2),1023) + * + * here + * thr1 a number that is near one half of ln2. + * thr2 a number such that + * expl(thresh)+expl(-thresh)=expl(thresh) + * lnovft: logrithm of the overflow threshold + * = MEP1*ln2 chopped to machine precision. + * ME maximum exponent + * MEP1 maximum exponent plus 1 + * + * Special cases: + * coshl(x) is |x| if x is +INF, -INF, or NaN. + * only coshl(0)=1 is exact for finite x. + */ + +#define ME 16383 +#define MEP1 16384 +#define LNOVFT 1.135652340629414394949193107797076342845e+4L + /* last 32 bits of LN2HI is zero */ +#define LN2HI 6.931471805599453094172319547495844850203e-0001L +#define LN2LO 1.667085920830552208890449330400379754169e-0025L +#define THR1 0.3465L +#define THR2 45.L + +static const long double + half = 0.5L, + tinyl = 7.5e-37L, + one = 1.0L, + ln2hi = LN2HI, + ln2lo = LN2LO, + lnovftL = LNOVFT, + thr1 = THR1, + thr2 = THR2; + +long double +coshl(long double x) { + long double t, w; + + w = fabsl(x); + if (!finitel(w)) + return (w + w); /* x is INF or NaN */ + if (w < thr1) { + t = w < tinyl ? w : expm1l(w); + w = one + t; + if (w != one) + w = one + (t * t) / (w + w); + return (w); + } else if (w < thr2) { + t = expl(w); + return (half * (t + one / t)); + } else if (w <= lnovftL) + return (half * expl(w)); + else { + return (scalbnl(expl((w - MEP1 * ln2hi) - MEP1 * ln2lo), ME)); + } +} diff --git a/usr/src/lib/libm/common/Q/cosl.c b/usr/src/lib/libm/common/Q/cosl.c new file mode 100644 index 0000000000..bea6b35b37 --- /dev/null +++ b/usr/src/lib/libm/common/Q/cosl.c @@ -0,0 +1,94 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * cosl(x) + * Table look-up algorithm by K.C. Ng, November, 1989. + * + * kernel function: + * __k_sinl ... sin function on [-pi/4,pi/4] + * __k_cosl ... cos function on [-pi/4,pi/4] + * __rem_pio2l ... argument reduction routine + * + * Method. + * Let S and C denote the sin and cos respectively on [-PI/4, +PI/4]. + * 1. Assume the argument x is reduced to y1+y2 = x-k*pi/2 in + * [-pi/2 , +pi/2], and let n = k mod 4. + * 2. Let S=S(y1+y2), C=C(y1+y2). Depending on n, we have + * + * n sin(x) cos(x) tan(x) + * ---------------------------------------------------------- + * 0 S C S/C + * 1 C -S -C/S + * 2 -S -C S/C + * 3 -C S -C/S + * ---------------------------------------------------------- + * + * Special cases: + * Let trig be any of sin, cos, or tan. + * trig(+-INF) is NaN, with signals; + * trig(NaN) is that NaN; + * + * Accuracy: + * computer TRIG(x) returns trig(x) nearly rounded. + */ + +#pragma weak cosl = __cosl + +#include "libm.h" +#include "longdouble.h" + +long double +cosl(long double x) { + long double y[2], z = 0.0L; + int n, ix; + + ix = *(int *) &x; /* High word of x */ + + ix &= 0x7fffffff; + if (ix <= 0x3ffe9220) /* |x| ~< pi/4 */ + return (__k_cosl(x, z)); + else if (ix >= 0x7fff0000) /* trig(Inf or NaN) is NaN */ + return (x - x); + else { /* argument reduction needed */ + n = __rem_pio2l(x, y); + switch (n & 3) { + case 0: + return (__k_cosl(y[0], y[1])); + case 1: + return (-__k_sinl(y[0], y[1])); + case 2: + return (-__k_cosl(y[0], y[1])); + case 3: + return (__k_sinl(y[0], y[1])); + } + } + /* NOTREACHED */ + return 0.0L; +} diff --git a/usr/src/lib/libm/common/Q/erfl.c b/usr/src/lib/libm/common/Q/erfl.c new file mode 100644 index 0000000000..4ec7b83eb5 --- /dev/null +++ b/usr/src/lib/libm/common/Q/erfl.c @@ -0,0 +1,366 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * long double function erf,erfc (long double x) + * K.C. Ng, September, 1989. + * x + * 2 |\ + * erf(x) = --------- | exp(-t*t)dt + * sqrt(pi) \| + * 0 + * + * erfc(x) = 1-erf(x) + * + * method: + * Since erf(-x) = -erf(x), we assume x>=0. + * For x near 0, we have the expansion + * + * erf(x) = (2/sqrt(pi))*(x - x^3/3 + x^5/10 - x^7/42 + ....). + * + * Since 2/sqrt(pi) = 1.128379167095512573896158903121545171688, + * we use x + x*P(x^2) to approximate erf(x). This formula will + * guarantee the error less than one ulp where x is not too far + * away from 0. We note that erf(x)=x at x = 0.6174...... After + * some experiment, we choose the following approximation on + * interval [0,0.84375]. + * + * For x in [0,0.84375] + * 2 2 4 40 + * P = P(x ) = (p0 + p1 * x + p2 * x + ... + p20 * x ) + * + * erf(x) = x + x*P + * erfc(x) = 1 - erf(x) if x<=0.25 + * = 0.5 + ((0.5-x)-x*P) if x in [0.25,0.84375] + * precision: |P(x^2)-(erf(x)-x)/x| <= 2**-122.50 + * + * For x in [0.84375,1.25], let s = x - 1, and + * c = 0.84506291151 rounded to single (24 bits) + * erf(x) = c + P1(s)/Q1(s) + * erfc(x) = (1-c) - P1(s)/Q1(s) + * precision: |P1/Q1 - (erf(x)-c)| <= 2**-118.41 + * + * + * For x in [1.25,1.75], let s = x - 1.5, and + * c = 0.95478588343 rounded to single (24 bits) + * erf(x) = c + P2(s)/Q2(s) + * erfc(x) = (1-c) - P2(s)/Q2(s) + * precision: |P1/Q1 - (erf(x)-c)| <= 2**-123.83 + * + * + * For x in [1.75,16/3] + * erfc(x) = exp(-x*x)*(1/x)*R1(1/x)/S1(1/x) + * erf(x) = 1 - erfc(x) + * precision: absolute error of R1/S1 is bounded by 2**-124.03 + * + * For x in [16/3,107] + * erfc(x) = exp(-x*x)*(1/x)*R2(1/x)/S2(1/x) + * erf(x) = 1 - erfc(x) (if x>=9 simple return erf(x)=1 with inexact) + * precision: absolute error of R2/S2 is bounded by 2**-120.07 + * + * Else if inf > x >= 107 + * erf(x) = 1 with inexact + * erfc(x) = 0 with underflow + * + * Special case: + * erf(inf) = 1 + * erfc(inf) = 0 + */ + +#pragma weak erfl = __erfl +#pragma weak erfcl = __erfcl + +#include "libm.h" +#include "longdouble.h" + +static const long double + tiny = 1e-40L, + nearunfl = 1e-4000L, + half = 0.5L, + one = 1.0L, + onehalf = 1.5L, + L16_3 = 16.0L/3.0L; +/* + * Coefficients for even polynomial P for erf(x)=x+x*P(x^2) on [0,0.84375] + */ +static const long double P[] = { /* 21 coeffs */ + 1.283791670955125738961589031215451715556e-0001L, + -3.761263890318375246320529677071815594603e-0001L, + 1.128379167095512573896158903121205899135e-0001L, + -2.686617064513125175943235483344625046092e-0002L, + 5.223977625442187842111846652980454568389e-0003L, + -8.548327023450852832546626271083862724358e-0004L, + 1.205533298178966425102164715902231976672e-0004L, + -1.492565035840625097674944905027897838996e-0005L, + 1.646211436588924733604648849172936692024e-0006L, + -1.636584469123491976815834704799733514987e-0007L, + 1.480719281587897445302529007144770739305e-0008L, + -1.229055530170782843046467986464722047175e-0009L, + 9.422759064320307357553954945760654341633e-0011L, + -6.711366846653439036162105104991433380926e-0012L, + 4.463224090341893165100275380693843116240e-0013L, + -2.783513452582658245422635662559779162312e-0014L, + 1.634227412586960195251346878863754661546e-0015L, + -9.060782672889577722765711455623117802795e-0017L, + 4.741341801266246873412159213893613602354e-0018L, + -2.272417596497826188374846636534317381203e-0019L, + 8.069088733716068462496835658928566920933e-0021L, +}; + +/* + * Rational erf(x) = ((float)0.84506291151) + P1(x-1)/Q1(x-1) on [0.84375,1.25] + */ +static const long double C1 = (long double)((float)0.84506291151); +static const long double P1[] = { /* 12 top coeffs */ + -2.362118560752659955654364917390741930316e-0003L, + 4.129623379624420034078926610650759979146e-0001L, + -3.973857505403547283109417923182669976904e-0002L, + 4.357503184084022439763567513078036755183e-0002L, + 8.015593623388421371247676683754171456950e-0002L, + -1.034459310403352486685467221776778474602e-0002L, + 5.671850295381046679675355719017720821383e-0003L, + 1.219262563232763998351452194968781174318e-0003L, + 5.390833481581033423020320734201065475098e-0004L, + -1.978853912815115495053119023517805528300e-0004L, + 6.184234513953600118335017885706420552487e-0005L, + -5.331802711697810861017518515816271808286e-0006L, +}; +static const long double Q1[] = { /* 12 bottom coeffs with leading 1.0 hidden */ + 9.081506296064882195280178373107623196655e-0001L, + 6.821049531968204097604392183650687642520e-0001L, + 4.067869178233539502315055970743271822838e-0001L, + 1.702332233546316765818144723063881095577e-0001L, + 7.498098377690553934266423088708614219356e-0002L, + 2.050154396918178697056927234366372760310e-0002L, + 7.012988534031999899054782333851905939379e-0003L, + 1.149904787014400354649843451234570731076e-0003L, + 3.185620255011299476196039491205159718620e-0004L, + 1.273405072153008775426376193374105840517e-0005L, + 4.753866999959432971956781228148402971454e-0006L, + -1.002287602111660026053981728549540200683e-0006L, +}; +/* + * Rational erf(x) = ((float)0.95478588343) + P2(x-1.5)/Q2(x-1.5) + * on [1.25,1.75] + */ +static const long double C2 = (long double)((float)0.95478588343); +static const long double P2[] = { /* 12 top coeffs */ + 1.131926304864446730135126164594785863512e-0002L, + 1.273617996967754151544330055186210322832e-0001L, + -8.169980734667512519897816907190281143423e-0002L, + 9.512267486090321197833634271787944271746e-0002L, + -2.394251569804872160005274999735914368170e-0002L, + 1.108768660227528667525252333184520222905e-0002L, + 3.527435492933902414662043314373277494221e-0004L, + 4.946116273341953463584319006669474625971e-0004L, + -4.289851942513144714600285769022420962418e-0005L, + 8.304719841341952705874781636002085119978e-0005L, + -1.040460226177309338781902252282849903189e-0005L, + 2.122913331584921470381327583672044434087e-0006L, +}; +static const long double Q2[] = { /* 13 bottom coeffs with leading 1.0 hidden */ + 7.448815737306992749168727691042003832150e-0001L, + 7.161813850236008294484744312430122188043e-0001L, + 3.603134756584225766144922727405641236121e-0001L, + 1.955811609133766478080550795194535852653e-0001L, + 7.253059963716225972479693813787810711233e-0002L, + 2.752391253757421424212770221541238324978e-0002L, + 7.677654852085240257439050673446546828005e-0003L, + 2.141102244555509687346497060326630061069e-0003L, + 4.342123013830957093949563339130674364271e-0004L, + 8.664587895570043348530991997272212150316e-0005L, + 1.109201582511752087060167429397033701988e-0005L, + 1.357834375781831062713347000030984364311e-0006L, + 4.957746280594384997273090385060680016451e-0008L, +}; +/* + * erfc(x) = exp(-x*x)/x * R1(1/x)/S1(1/x) on [1.75, 16/3] + */ +static const long double R1[] = { /* 14 top coeffs */ + 4.630195122654315016370705767621550602948e+0006L, + 1.257949521746494830700654204488675713628e+0007L, + 1.704153822720260272814743497376181625707e+0007L, + 1.502600568706061872381577539537315739943e+0007L, + 9.543710793431995284827024445387333922861e+0006L, + 4.589344808584091011652238164935949522427e+0006L, + 1.714660662941745791190907071920671844289e+0006L, + 5.034802147768798894307672256192466283867e+0005L, + 1.162286400443554670553152110447126850725e+0005L, + 2.086643834548901681362757308058660399137e+0004L, + 2.839793161868140305907004392890348777338e+0003L, + 2.786687241658423601778258694498655680778e+0002L, + 1.779177837102695602425897452623985786464e+0001L, + 5.641895835477470769043614623819144434731e-0001L, +}; +static const long double S1[] = { /* 15 bottom coeffs with leading 1.0 hidden */ + 4.630195122654331529595606896287596843110e+0006L, + 1.780411093345512024324781084220509055058e+0007L, + 3.250113097051800703707108623715776848283e+0007L, + 3.737857099176755050912193712123489115755e+0007L, + 3.029787497516578821459174055870781168593e+0007L, + 1.833850619965384765005769632103205777227e+0007L, + 8.562719999736915722210391222639186586498e+0006L, + 3.139684562074658971315545539760008136973e+0006L, + 9.106421313731384880027703627454366930945e+0005L, + 2.085108342384266508613267136003194920001e+0005L, + 3.723126272693120340730491416449539290600e+0004L, + 5.049169878567344046145695360784436929802e+0003L, + 4.944274532748010767670150730035392093899e+0002L, + 3.153510608818213929982940249162268971412e+0001L, + 1.0e00L, +}; + +/* + * erfc(x) = exp(-x*x)/x * R2(1/x)/S2(1/x) on [16/3, 107] + */ +static const long double R2[] = { /* 15 top coeffs in reverse order!!*/ + 2.447288012254302966796326587537136931669e+0005L, + 8.768592567189861896653369912716538739016e+0005L, + 1.552293152581780065761497908005779524953e+0006L, + 1.792075924835942935864231657504259926729e+0006L, + 1.504001463155897344947500222052694835875e+0006L, + 9.699485556326891411801230186016013019935e+0005L, + 4.961449933661807969863435013364796037700e+0005L, + 2.048726544693474028061176764716228273791e+0005L, + 6.891532964330949722479061090551896886635e+0004L, + 1.888014709010307507771964047905823237985e+0004L, + 4.189692064988957745054734809642495644502e+0003L, + 7.362346487427048068212968889642741734621e+0002L, + 9.980359714211411423007641056580813116207e+0001L, + 9.426910895135379181107191962193485174159e+0000L, + 5.641895835477562869480794515623601280429e-0001L, +}; +static const long double S2[] = { /* 16 coefficients */ + 2.447282203601902971246004716790604686880e+0005L, + 1.153009852759385309367759460934808489833e+0006L, + 2.608580649612639131548966265078663384849e+0006L, + 3.766673917346623308850202792390569025740e+0006L, + 3.890566255138383910789924920541335370691e+0006L, + 3.052882073900746207613166259994150527732e+0006L, + 1.885574519970380988460241047248519418407e+0006L, + 9.369722034759943185851450846811445012922e+0005L, + 3.792278350536686111444869752624492443659e+0005L, + 1.257750606950115799965366001773094058720e+0005L, + 3.410830600242369370645608634643620355058e+0004L, + 7.513984469742343134851326863175067271240e+0003L, + 1.313296320593190002554779998138695507840e+0003L, + 1.773972700887629157006326333696896516769e+0002L, + 1.670876451822586800422009013880457094162e+0001L, + 1.000L, +}; + +long double erfl(x) +long double x; +{ + long double s,y,t; + + if (!finitel(x)) { + if (x != x) return x+x; /* NaN */ + return copysignl(one,x); /* return +-1.0 is x=Inf */ + } + + y = fabsl(x); + if (y <= 0.84375L) { + if (y<=tiny) return x+P[0]*x; + s = y*y; + t = __poly_libmq(s,21,P); + return x+x*t; + } + if (y<=1.25L) { + s = y-one; + t = C1+__poly_libmq(s,12,P1)/(one+s*__poly_libmq(s,12,Q1)); + return (signbitl(x))? -t: t; + } else if (y<=1.75L) { + s = y-onehalf; + t = C2+__poly_libmq(s,12,P2)/(one+s*__poly_libmq(s,13,Q2)); + return (signbitl(x))? -t: t; + } + if (y<=9.0L) t = erfcl(y); else t = tiny; + return (signbitl(x))? t-one: one-t; +} + +long double erfcl(x) +long double x; +{ + long double s,y,t; + + if (!finitel(x)) { + if (x != x) return x+x; /* NaN */ + /* return 2.0 if x= -inf; 0.0 if x= +inf */ + if (x < 0.0L) return 2.0L; else return 0.0L; + } + + if (x <= 0.84375L) { + if (x<=0.25) return one-erfl(x); + s = x*x; + t = half-x; + t = t - x*__poly_libmq(s,21,P); + return half+t; + } + if (x<=1.25L) { + s = x-one; + t = one-C1; + return t - __poly_libmq(s,12,P1)/(one+s*__poly_libmq(s,12,Q1)); + } else if (x<=1.75L) { + s = x-onehalf; + t = one-C2; + return t - __poly_libmq(s,12,P2)/(one+s*__poly_libmq(s,13,Q2)); + } + if (x>=107.0L) return nearunfl*nearunfl; /* underflow */ + else if (x >= L16_3) { + y = __poly_libmq(x,15,R2); + t = y/__poly_libmq(x,16,S2); + } else { + y = __poly_libmq(x,14,R1); + t = y/__poly_libmq(x,15,S1); + } + /* + * Note that exp(-x*x+d) = exp(-x*x)*exp(d), so to compute + * exp(-x*x) with a small relative error, we need to compute + * -x*x with a small absolute error. To this end, we set y + * equal to the leading part of x but with enough trailing + * zeros that y*y can be computed exactly and we rewrite x*x + * as y*y + (x-y)*(x+y), distributing the latter expression + * across the exponential. + * + * We could construct y in a portable way by setting + * + * int i = (int)(x * ptwo); + * y = (long double)i * 1/ptwo; + * + * where ptwo is some power of two large enough to make x-y + * small but not so large that the conversion to int overflows. + * When long double arithmetic is slow, however, the following + * non-portable code is preferable. + */ + y = x; + *(2+(int*)&y) = *(3+(int*)&y) = 0; + t *= expl(-y*y)*expl(-(x-y)*(x+y)); + return t; +} diff --git a/usr/src/lib/libm/common/Q/exp10l.c b/usr/src/lib/libm/common/Q/exp10l.c new file mode 100644 index 0000000000..34fce2721a --- /dev/null +++ b/usr/src/lib/libm/common/Q/exp10l.c @@ -0,0 +1,104 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak exp10l = __exp10l + +#include "libm.h" +#include "longdouble.h" + +/* + * exp10l(x) + * n = nint(x*(log10/log2)) ; + * exp10(x) = 10**x = exp(x*ln(10)) = exp(n*ln2+(x*ln10-n*ln2)) + * = 2**n*exp(ln10*(x-n*log2/log10))) + * If x is an integer <= M then use repeat multiplication. For + * 10**M is the largest representable integer, where + * M = 10 single precision (24 bits) + * M = 22 double precision (53 bits) + * M = 48 quadruple precision (113 bits) + */ + +#define TINY 1.0e-20L /* single: 1e-5, double: 1e-10, quad: 1e-20 */ +#define LG10OVT 4933.L /* single: 39, double: 309, quad: 4933 */ +#define LG10UFT -4966.L /* single: -45, double: -323, quad: -4966 */ +#define M 48 + /* logt2hi : last 32 bits is zero for quad prec */ +#define LOGT2HI 0.30102999566398119521373889472420986034688L +#define LOGT2LO 2.831664213089468167896664371953e-31L + +static const long double + zero = 0.0L, + tiny = TINY * TINY, + one = 1.0L, + lg10 = 3.321928094887362347870319429489390175865e+0000L, + ln10 = 2.302585092994045684017991454684364207601e+0000L, + logt2hi = LOGT2HI, + logt2lo = LOGT2LO, + lg10ovt = LG10OVT, + lg10uft = LG10UFT; + +long double +exp10l(long double x) { + long double t, tenp; + int k; + + if (!finitel(x)) { + if (isnanl(x) || x > zero) + return (x + x); + else + return (zero); + } + if (fabsl(x) < tiny) + return (one + x); + if (x <= lg10ovt) + if (x >= lg10uft) { + k = (int) x; + tenp = 10.0L; + /* x is a small +integer */ + if (0 <= k && k <= M && (long double) k == x) { + t = one; + if (k & 1) + t *= tenp; + k >>= 1; + while (k) { + tenp *= tenp; + if (k & 1) + t *= tenp; + k >>= 1; + } + return (t); + } + t = anintl(x * lg10); + return (scalbnl(expl(ln10 * ((x - t * logt2hi) - + t * logt2lo)), (int) t)); + } else + return (scalbnl(one, -50000)); /* underflow */ + else + return (scalbnl(one, 50000)); /* overflow */ +} diff --git a/usr/src/lib/libm/common/Q/exp2l.c b/usr/src/lib/libm/common/Q/exp2l.c new file mode 100644 index 0000000000..7b0814d90a --- /dev/null +++ b/usr/src/lib/libm/common/Q/exp2l.c @@ -0,0 +1,81 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak exp2l = __exp2l + +#include "libm.h" +#include "longdouble.h" + +/* + * exp2l(x) = 2**x = 2**((x-anint(x))+anint(x)) + * = 2**anint(x)*2**(x-anint(x)) + * = 2**anint(x)*exp((x-anint(x))*ln2) + */ + +#define TINY 1.0e-20L /* single: 1e-5, double: 1e-10, quad: 1e-20 */ +#define OVFLEXP 16400 /* single: 130, double 1030, quad: 16400 */ +#define UNFLEXP -16520 /* single:-155, double -1080, quad:-16520 */ + +static const long double + zero = 0.0L, + tiny = TINY * TINY, + half = 0.5L, + ln2 = 6.931471805599453094172321214581765680755e-0001L, + one = 1.0L; + +static const int + ovflexp = OVFLEXP, + unflexp = UNFLEXP; + +long double +exp2l(long double x) { + long double t; + + if (!finitel(x)) { + if (isnanl(x) || x > zero) + return (x + x); + else + return (zero); + } + t = fabsl(x); + if (t < half) { + if (t < tiny) + return (one + x); + else + return (expl(ln2 * x)); + } + t = anintl(x); + if (t < ovflexp) { + if (t >= unflexp) + return (scalbnl(expl(ln2 * (x - t)), (int) t)); + else + return (scalbnl(one, unflexp)); /* underflow */ + } else + return (scalbnl(one, ovflexp)); /* overflow */ +} diff --git a/usr/src/lib/libm/common/Q/expl.c b/usr/src/lib/libm/common/Q/expl.c new file mode 100644 index 0000000000..92ace61d75 --- /dev/null +++ b/usr/src/lib/libm/common/Q/expl.c @@ -0,0 +1,126 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * expl(x) + * Table driven method + * Written by K.C. Ng, November 1988. + * Algorithm : + * 1. Argument Reduction: given the input x, find r and integer k + * and j such that + * x = (32k+j)*ln2 + r, |r| <= (1/64)*ln2 . + * + * 2. expl(x) = 2^k * (2^(j/32) + 2^(j/32)*expm1(r)) + * Note: + * a. expm1(r) = (2r)/(2-R), R = r - r^2*(t1 + t2*r^2) + * b. 2^(j/32) is represented as + * _TBL_expl_hi[j]+_TBL_expl_lo[j] + * where + * _TBL_expl_hi[j] = 2^(j/32) rounded + * _TBL_expl_lo[j] = 2^(j/32) - _TBL_expl_hi[j]. + * + * Special cases: + * expl(INF) is INF, expl(NaN) is NaN; + * expl(-INF)= 0; + * for finite argument, only expl(0)=1 is exact. + * + * Accuracy: + * according to an error analysis, the error is always less than + * an ulp (unit in the last place). + * + * Misc. info. + * For 113 bit long double + * if x > 1.135652340629414394949193107797076342845e+4 + * then expl(x) overflow; + * if x < -1.143346274333629787883724384345262150341e+4 + * then expl(x) underflow + * + * Constants: + * Only decimal values are given. We assume that the compiler will convert + * from decimal to binary accurately enough to produce the correct + * hexadecimal values. + */ + +#pragma weak expl = __expl + +#include "libm.h" + +extern const long double _TBL_expl_hi[], _TBL_expl_lo[]; + +static const long double +one = 1.0L, +two = 2.0L, +ln2_64 = 1.083042469624914545964425189778400898568e-2L, +ovflthreshold = 1.135652340629414394949193107797076342845e+4L, +unflthreshold = -1.143346274333629787883724384345262150341e+4L, +invln2_32 = 4.616624130844682903551758979206054839765e+1L, +ln2_32hi = 2.166084939249829091928849858592451515688e-2L, +ln2_32lo = 5.209643502595475652782654157501186731779e-27L; + +/* rational approximation coeffs for [-(ln2)/64,(ln2)/64] */ +static const long double +t1 = 1.666666666666666666666666666660876387437e-1L, +t2 = -2.777777777777777777777707812093173478756e-3L, +t3 = 6.613756613756613482074280932874221202424e-5L, +t4 = -1.653439153392139954169609822742235851120e-6L, +t5 = 4.175314851769539751387852116610973796053e-8L; + +long double +expl(long double x) { + int *px = (int *) &x, ix, j, k, m; + long double t, r; + + ix = px[0]; /* high word of x */ + if (ix >= 0x7fff0000) + return (x + x); /* NaN of +inf */ + if (((unsigned) ix) >= 0xffff0000) + return (-one / x); /* NaN or -inf */ + if ((ix & 0x7fffffff) < 0x3fc30000) { + if ((int) x < 1) + return (one + x); /* |x|<2^-60 */ + } + if (ix > 0) { + if (x > ovflthreshold) + return (scalbnl(x, 20000)); + k = (int) (invln2_32 * (x + ln2_64)); + } else { + if (x < unflthreshold) + return (scalbnl(-x, -40000)); + k = (int) (invln2_32 * (x - ln2_64)); + } + j = k&0x1f; + m = k>>5; + t = (long double) k; + x = (x - t * ln2_32hi) - t * ln2_32lo; + t = x * x; + r = (x - t * (t1 + t * (t2 + t * (t3 + t * (t4 + t * t5))))) - two; + x = _TBL_expl_hi[j] - ((_TBL_expl_hi[j] * (x + x)) / r - + _TBL_expl_lo[j]); + return (scalbnl(x, m)); +} diff --git a/usr/src/lib/libm/common/Q/expm1l.c b/usr/src/lib/libm/common/Q/expm1l.c new file mode 100644 index 0000000000..155acbdc54 --- /dev/null +++ b/usr/src/lib/libm/common/Q/expm1l.c @@ -0,0 +1,185 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak expm1l = __expm1l +#endif +#if !defined(__sparc) +#error Unsupported architecture +#endif + +/* + * expm1l(x) + * + * Table driven method + * Written by K.C. Ng, June 1995. + * Algorithm : + * 1. expm1(x) = x if x<2**-114 + * 2. if |x| <= 0.0625 = 1/16, use approximation + * expm1(x) = x + x*P/(2-P) + * where + * P = x - z*(P1+z*(P2+z*(P3+z*(P4+z*(P5+z*P6+z*P7))))), z = x*x; + * (this formula is derived from + * 2-P+x = R = x*(exp(x)+1)/(exp(x)-1) ~ 2 + x*x/6 - x^4/360 + ...) + * + * P1 = 1.66666666666666666666666666666638500528074603030e-0001 + * P2 = -2.77777777777777777777777759668391122822266551158e-0003 + * P3 = 6.61375661375661375657437408890138814721051293054e-0005 + * P4 = -1.65343915343915303310185228411892601606669528828e-0006 + * P5 = 4.17535139755122945763580609663414647067443411178e-0008 + * P6 = -1.05683795988668526689182102605260986731620026832e-0009 + * P7 = 2.67544168821852702827123344217198187229611470514e-0011 + * + * Accuracy: |R-x*(exp(x)+1)/(exp(x)-1)|<=2**-119.13 + * + * 3. For 1/16 < |x| < 1.125, choose x(+-i) ~ +-(i+4.5)/64, i=0,..,67 + * since + * exp(x) = exp(xi+(x-xi))= exp(xi)*exp((x-xi)) + * we have + * expm1(x) = expm1(xi)+(exp(xi))*(expm1(x-xi)) + * where + * |s=x-xi| <= 1/128 + * and + * expm1(s)=2s/(2-R), R= s-s^2*(T1+s^2*(T2+s^2*(T3+s^2*(T4+s^2*T5)))) + * + * T1 = 1.666666666666666666666666666660876387437e-1L, + * T2 = -2.777777777777777777777707812093173478756e-3L, + * T3 = 6.613756613756613482074280932874221202424e-5L, + * T4 = -1.653439153392139954169609822742235851120e-6L, + * T5 = 4.175314851769539751387852116610973796053e-8L; + * + * 4. For |x| >= 1.125, return exp(x)-1. + * (see algorithm for exp) + * + * Special cases: + * expm1l(INF) is INF, expm1l(NaN) is NaN; + * expm1l(-INF)= -1; + * for finite argument, only expm1l(0)=0 is exact. + * + * Accuracy: + * according to an error analysis, the error is always less than + * 2 ulp (unit in the last place). + * + * Misc. info. + * For 113 bit long double + * if x > 1.135652340629414394949193107797076342845e+4 + * then expm1l(x) overflow; + * + * Constants: + * Only decimal values are given. We assume that the compiler will convert + * from decimal to binary accurately enough to produce the correct + * hexadecimal values. + */ + +#include "libm.h" + +extern const long double _TBL_expl_hi[], _TBL_expl_lo[]; +extern const long double _TBL_expm1lx[], _TBL_expm1l[]; + +static const long double + zero = +0.0L, + one = +1.0L, + two = +2.0L, + ln2_64 = +1.083042469624914545964425189778400898568e-2L, + ovflthreshold = +1.135652340629414394949193107797076342845e+4L, + invln2_32 = +4.616624130844682903551758979206054839765e+1L, + ln2_32hi = +2.166084939249829091928849858592451515688e-2L, + ln2_32lo = +5.209643502595475652782654157501186731779e-27L, + huge = +1.0e4000L, + tiny = +1.0e-4000L, + P1 = +1.66666666666666666666666666666638500528074603030e-0001L, + P2 = -2.77777777777777777777777759668391122822266551158e-0003L, + P3 = +6.61375661375661375657437408890138814721051293054e-0005L, + P4 = -1.65343915343915303310185228411892601606669528828e-0006L, + P5 = +4.17535139755122945763580609663414647067443411178e-0008L, + P6 = -1.05683795988668526689182102605260986731620026832e-0009L, + P7 = +2.67544168821852702827123344217198187229611470514e-0011L, +/* rational approximation coeffs for [-(ln2)/64,(ln2)/64] */ + T1 = +1.666666666666666666666666666660876387437e-1L, + T2 = -2.777777777777777777777707812093173478756e-3L, + T3 = +6.613756613756613482074280932874221202424e-5L, + T4 = -1.653439153392139954169609822742235851120e-6L, + T5 = +4.175314851769539751387852116610973796053e-8L; + +long double +expm1l(long double x) { + int hx, ix, j, k, m; + long double t, r, s, w; + + hx = ((int *) &x)[HIXWORD]; + ix = hx & ~0x80000000; + if (ix >= 0x7fff0000) { + if (x != x) + return (x + x); /* NaN */ + if (x < zero) + return (-one); /* -inf */ + return (x); /* +inf */ + } + if (ix < 0x3fff4000) { /* |x| < 1.25 */ + if (ix < 0x3ffb0000) { /* |x| < 0.0625 */ + if (ix < 0x3f8d0000) { + if ((int) x == 0) + return (x); /* |x|<2^-114 */ + } + t = x * x; + r = (x - t * (P1 + t * (P2 + t * (P3 + t * (P4 + t * + (P5 + t * (P6 + t * P7))))))); + return (x + (x * r) / (two - r)); + } + /* compute i = [64*x] */ + m = 0x4009 - (ix >> 16); + j = ((ix & 0x0000ffff) | 0x10000) >> m; /* j=4,...,67 */ + if (hx < 0) + j += 82; /* negative */ + s = x - _TBL_expm1lx[j]; + t = s * s; + r = s - t * (T1 + t * (T2 + t * (T3 + t * (T4 + t * T5)))); + r = (s + s) / (two - r); + w = _TBL_expm1l[j]; + return (w + (w + one) * r); + } + if (hx > 0) { + if (x > ovflthreshold) + return (huge * huge); + k = (int) (invln2_32 * (x + ln2_64)); + } else { + if (x < -80.0) + return (tiny - x / x); + k = (int) (invln2_32 * (x - ln2_64)); + } + j = k & 0x1f; + m = k >> 5; + t = (long double) k; + x = (x - t * ln2_32hi) - t * ln2_32lo; + t = x * x; + r = (x - t * (T1 + t * (T2 + t * (T3 + t * (T4 + t * T5))))) - two; + x = _TBL_expl_hi[j] - ((_TBL_expl_hi[j] * (x + x)) / r - + _TBL_expl_lo[j]); + return (scalbnl(x, m) - one); +} diff --git a/usr/src/lib/libm/common/Q/fabsl.c b/usr/src/lib/libm/common/Q/fabsl.c new file mode 100644 index 0000000000..0b0ddf2527 --- /dev/null +++ b/usr/src/lib/libm/common/Q/fabsl.c @@ -0,0 +1,40 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak fabsl = __fabsl + +#include "libm.h" + +long double +fabsl(long double x) { + int *px = (int *) &x; + + px[0] &= 0x7fffffff; + return (x); +} diff --git a/usr/src/lib/libm/common/Q/finitel.c b/usr/src/lib/libm/common/Q/finitel.c new file mode 100644 index 0000000000..bbe255514f --- /dev/null +++ b/usr/src/lib/libm/common/Q/finitel.c @@ -0,0 +1,52 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak finitel = __finitel +#endif + +#include "libm.h" + +#if defined(__sparc) +int +finitel(long double x) { + int *px = (int *) &x; + return ((px[0] & ~0x80000000) < 0x7fff0000); +} +#elif defined(__x86) +int +finitel(long double x) { + int *px = (int *) &x, t = px[2] & 0x7fff; +#if defined(HANDLE_UNSUPPORTED) + return (t != 0x7fff && ((px[1] & 0x80000000) != 0 || t == 0)); +#else + return (t != 0x7fff); +#endif +} +#endif /* defined(__sparc) || defined(__x86) */ diff --git a/usr/src/lib/libm/common/Q/floorl.c b/usr/src/lib/libm/common/Q/floorl.c new file mode 100644 index 0000000000..d899d3fac8 --- /dev/null +++ b/usr/src/lib/libm/common/Q/floorl.c @@ -0,0 +1,70 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * ceill(x) return the biggest integral value below x + * floorl(x) return the least integral value above x + * + * NOTE: aintl(x), anintl(x), ceill(x), floorl(x), and rintl(x) return result + * with the same sign as x's, including 0.0. + */ + +#pragma weak ceill = __ceill +#pragma weak floorl = __floorl + +#include "libm.h" +#include "longdouble.h" + +static const long double qone = 1.0L; + +long double +ceill(long double x) { + long double t; + + if (!finitel(x)) + return (x + x); + t = rintl(x); + if (t >= x) /* already ceil(x) */ + return (t); + else /* t < x case: return t+1 */ + return (copysignl(t + qone, x)); +} + +long double +floorl(long double x) { + long double t; + + if (!finitel(x)) + return (x + x); + t = rintl(x); + if (t <= x) + return (t); /* already floor(x) */ + else /* x < t case: return t-1 */ + return (copysignl(t - qone, x)); +} diff --git a/usr/src/lib/libm/common/Q/fmodl.c b/usr/src/lib/libm/common/Q/fmodl.c new file mode 100644 index 0000000000..e5dc93dee5 --- /dev/null +++ b/usr/src/lib/libm/common/Q/fmodl.c @@ -0,0 +1,275 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak fmodl = __fmodl + +#include "libm.h" + +static const int + is = -0x7fffffff - 1, + im = 0x0000ffff, + iu = 0x00010000; + +static const long double + zero = 0.0L, + one = 1.0L; + +#ifdef __LITTLE_ENDIAN +#define __H0(x) *(3 + (int *) &x) +#define __H1(x) *(2 + (int *) &x) +#define __H2(x) *(1 + (int *) &x) +#define __H3(x) *(0 + (int *) &x) +#else +#define __H0(x) *(0 + (int *) &x) +#define __H1(x) *(1 + (int *) &x) +#define __H2(x) *(2 + (int *) &x) +#define __H3(x) *(3 + (int *) &x) +#endif + +long double +fmodl(long double x, long double y) { + long double a, b; + int n, ix, iy, k, sx; + int hx; + int x0, y0, z0, carry; + unsigned x1, x2, x3, y1, y2, y3, z1, z2, z3; + + hx = __H0(x); + x1 = __H1(x); + x2 = __H2(x); + x3 = __H3(x); + y0 = __H0(y); + y1 = __H1(y); + y2 = __H2(y); + y3 = __H3(y); + + sx = hx & 0x80000000; + x0 = hx ^ sx; + y0 &= 0x7fffffff; + + /* purge off exception values */ + if (x0 >= 0x7fff0000 || /* !finitel(x) */ + (y0 > 0x7fff0000) || (y0 == 0x7fff0000 && ((y1 | y2 | y3) != 0)) || + (y0 | y1 | y2 | y3) == 0) /* isnanl(y) || y = 0 */ + return ((x * y) / (x * y)); + a = fabsl(x); + b = fabsl(y); + if (a <= b) { + if (a < b) + return (x); + else + return (zero * x); + } + /* determine ix = ilogbl(x) */ + if (x0 < iu) { /* subnormal x */ + ix = -16382; + while (x0 == 0) { + ix -= 16; + x0 = x1 >> 16; + x1 = (x1 << 16) | (x2 >> 16); + x2 = (x2 << 16) | (x3 >> 16); + x3 = (x3 << 16); + } + while (x0 < iu) { + ix -= 1; + x0 = (x0 << 1) | (x1 >> 31); + x1 = (x1 << 1) | (x2 >> 31); + x2 = (x2 << 1) | (x3 >> 31); + x3 <<= 1; + } + } else { + ix = (x0 >> 16) - 16383; + x0 = iu | (x0 & im); + } + + /* determine iy = ilogbl(y) */ + if (y0 < iu) { /* subnormal y */ + iy = -16382; + while (y0 == 0) { + iy -= 16; + y0 = y1 >> 16; + y1 = (y1 << 16) | (y2 >> 16); + y2 = (y2 << 16) | (y3 >> 16); + y3 = (y3 << 16); + } + while (y0 < iu) { + iy -= 1; + y0 = (y0 << 1) | (y1 >> 31); + y1 = (y1 << 1) | (y2 >> 31); + y2 = (y2 << 1) | (y3 >> 31); + y3 <<= 1; + } + } else { + iy = (y0 >> 16) - 16383; + y0 = iu | (y0 & im); + } + + /* fix point fmod */ + n = ix - iy; + while (n--) { + while (x0 == 0 && n >= 16) { + n -= 16; + x0 = x1 >> 16; + x1 = (x1 << 16) | (x2 >> 16); + x2 = (x2 << 16) | (x3 >> 16); + x3 = (x3 << 16); + } + while (x0 < iu && n >= 1) { + n -= 1; + x0 = (x0 << 1) | (x1 >> 31); + x1 = (x1 << 1) | (x2 >> 31); + x2 = (x2 << 1) | (x3 >> 31); + x3 = (x3 << 1); + } + carry = 0; + z3 = x3 - y3; + carry = (z3 > x3); + if (carry == 0) { + z2 = x2 - y2; + carry = (z2 > x2); + } else { + z2 = x2 - y2 - 1; + carry = (z2 >= x2); + } + if (carry == 0) { + z1 = x1 - y1; + carry = (z1 > x1); + } else { + z1 = x1 - y1 - 1; + carry = (z1 >= x1); + } + z0 = x0 - y0 - carry; + if (z0 < 0) { /* double x */ + x0 = x0 + x0 + ((x1 & is) != 0); + x1 = x1 + x1 + ((x2 & is) != 0); + x2 = x2 + x2 + ((x3 & is) != 0); + x3 = x3 + x3; + } else { + if (z0 == 0) { + if ((z1 | z2 | z3) == 0) { /* 0: done */ + __H0(a) = hx & is; + __H1(a) = __H2(a) = __H3(a) = 0; + return (a); + } + } + /* x = z << 1 */ + z0 = z0 + z0 + ((z1 & is) != 0); + z1 = z1 + z1 + ((z2 & is) != 0); + z2 = z2 + z2 + ((z3 & is) != 0); + z3 = z3 + z3; + x0 = z0; + x1 = z1; + x2 = z2; + x3 = z3; + } + } + + carry = 0; + z3 = x3 - y3; + carry = (z3 > x3); + if (carry == 0) { + z2 = x2 - y2; + carry = (z2 > x2); + } else { + z2 = x2 - y2 - 1; + carry = (z2 >= x2); + } + if (carry == 0) { + z1 = x1 - y1; + carry = (z1 > x1); + } else { + z1 = x1 - y1 - 1; + carry = (z1 >= x1); + } + z0 = x0 - y0 - carry; + if (z0 >= 0) { + x0 = z0; + x1 = z1; + x2 = z2; + x3 = z3; + } + /* convert back to floating value and restore the sign */ + if ((x0 | x1 | x2 | x3) == 0) { + __H0(a) = hx & is; + __H1(a) = __H2(a) = __H3(a) = 0; + return (a); + } + while (x0 < iu) { + if (x0 == 0) { + iy -= 16; + x0 = x1 >> 16; + x1 = (x1 << 16) | (x2 >> 16); + x2 = (x2 << 16) | (x3 >> 16); + x3 = (x3 << 16); + } else { + x0 = x0 + x0 + ((x1 & is) != 0); + x1 = x1 + x1 + ((x2 & is) != 0); + x2 = x2 + x2 + ((x3 & is) != 0); + x3 = x3 + x3; + iy -= 1; + } + } + + /* normalize output */ + if (iy >= -16382) { + __H0(a) = sx | (x0 - iu) | ((iy + 16383) << 16); + __H1(a) = x1; + __H2(a) = x2; + __H3(a) = x3; + } else { /* subnormal output */ + n = -16382 - iy; + k = n & 31; + if (k != 0) { + if (k <= 16) { + x3 = (x2 << (32 - k)) | (x3 >> k); + x2 = (x1 << (32 - k)) | (x2 >> k); + x1 = (x0 << (32 - k)) | (x1 >> k); + x0 >>= k; + } else { + x3 = (x2 << (32 - k)) | (x3 >> k); + x2 = (x1 << (32 - k)) | (x2 >> k); + x1 = (x0 << (32 - k)) | (x1 >> k); + x0 = 0; + } + } + while (n >= 32) { + n -= 32; + x3 = x2; + x2 = x1; + x1 = x0; + x0 = 0; + } + __H0(a) = x0 | sx; + __H1(a) = x1; + __H2(a) = x2; + __H3(a) = x3; + a *= one; + } + return (a); +} diff --git a/usr/src/lib/libm/common/Q/gammal.c b/usr/src/lib/libm/common/Q/gammal.c new file mode 100644 index 0000000000..961c2d08d0 --- /dev/null +++ b/usr/src/lib/libm/common/Q/gammal.c @@ -0,0 +1,48 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak gammal = __gammal + +/* + * long double gammal(long double x); + */ + +#include "libm.h" +#include "longdouble.h" + +extern int signgam; +extern int signgaml; + +long double +gammal(long double x) { + long double y = __k_lgammal(x, &signgaml); + + signgam = signgaml; /* SUSv3 requires the setting of signgam */ + return (y); +} diff --git a/usr/src/lib/libm/common/Q/gammal_r.c b/usr/src/lib/libm/common/Q/gammal_r.c new file mode 100644 index 0000000000..26d9745c6d --- /dev/null +++ b/usr/src/lib/libm/common/Q/gammal_r.c @@ -0,0 +1,42 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * long double gammal_r(long double x, int *signgamlp); + */ + +#pragma weak gammal_r = __gammal_r + +#include "libm.h" +#include "longdouble.h" + +long double +gammal_r(long double x, int *signgamlp) { + return (__k_lgammal(x, signgamlp)); +} diff --git a/usr/src/lib/libm/common/Q/hypotl.c b/usr/src/lib/libm/common/Q/hypotl.c new file mode 100644 index 0000000000..def794358d --- /dev/null +++ b/usr/src/lib/libm/common/Q/hypotl.c @@ -0,0 +1,152 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak hypotl = __hypotl + +/* + * long double hypotl(long double x, long double y); + * Method : + * If z=x*x+y*y has error less than sqrt(2)/2 ulp than sqrt(z) has + * error less than 1 ulp. + * So, compute sqrt(x*x+y*y) with some care as follows: + * Assume x>y>0; + * 1. save and set rounding to round-to-nearest + * 2. if x > 2y use + * x1*x1+(y*y+(x2*(x+x2))) for x*x+y*y + * where x1 = x with lower 64 bits cleared, x2 = x-x1; else + * 3. if x <= 2y use + * t1*y1+((x-y)*(x-y)+(t1*y2+t2*y)) + * where t1 = 2x with lower 64 bits cleared, t2 = 2x-t1, y1= y with + * lower 64 bits chopped, y2 = y-y1. + * + * NOTE: DO NOT remove parenthsis! + * + * Special cases: + * hypot(x,y) is INF if x or y is +INF or -INF; else + * hypot(x,y) is NAN if x or y is NAN. + * + * Accuracy: + * hypot(x,y) returns sqrt(x^2+y^2) with error less than 1 ulps (units + * in the last place) + */ + +#include "libm.h" +#include "longdouble.h" + +extern enum fp_direction_type __swapRD(enum fp_direction_type); + +static const long double zero = 0.0L, one = 1.0L; + +long double +hypotl(long double x, long double y) { + int n0, n1, n2, n3; + long double t1, t2, y1, y2, w; + int *px = (int *) &x, *py = (int *) &y; + int *pt1 = (int *) &t1, *py1 = (int *) &y1; + enum fp_direction_type rd; + int j, k, nx, ny, nz; + + if ((*(int *) &one) != 0) { /* determine word ordering */ + n0 = 0; + n1 = 1; + n2 = 2; + n3 = 3; + } else { + n0 = 3; + n1 = 2; + n2 = 1; + n3 = 0; + } + + px[n0] &= 0x7fffffff; /* clear sign bit of x and y */ + py[n0] &= 0x7fffffff; + k = 0x7fff0000; + nx = px[n0] & k; /* exponent of x and y */ + ny = py[n0] & k; + if (ny > nx) { + w = x; + x = y; + y = w; + nz = ny; + ny = nx; + nx = nz; + } /* force x > y */ + if ((nx - ny) >= 0x00730000) + return (x + y); /* x/y >= 2**116 */ + if (nx < 0x5ff30000 && ny > 0x205b0000) { /* medium x,y */ + /* save and set RD to Rounding to nearest */ + rd = __swapRD(fp_nearest); + w = x - y; + if (w > y) { + pt1[n0] = px[n0]; + pt1[n1] = px[n1]; + pt1[n2] = pt1[n3] = 0; + t2 = x - t1; + x = sqrtl(t1 * t1 - (y * (-y) - t2 * (x + t1))); + } else { + x = x + x; + py1[n0] = py[n0]; + py1[n1] = py[n1]; + py1[n2] = py1[n3] = 0; + y2 = y - y1; + pt1[n0] = px[n0]; + pt1[n1] = px[n1]; + pt1[n2] = pt1[n3] = 0; + t2 = x - t1; + x = sqrtl(t1 * y1 - (w * (-w) - (t2 * y1 + y2 * x))); + } + if (rd != fp_nearest) + (void) __swapRD(rd); /* restore rounding mode */ + return (x); + } else { + if (nx == k || ny == k) { /* x or y is INF or NaN */ + if (isinfl(x)) + t2 = x; + else if (isinfl(y)) + t2 = y; + else + t2 = x + y; /* invalid if x or y is sNaN */ + return (t2); + } + if (ny == 0) { + if (y == zero || x == zero) + return (x + y); + t1 = scalbnl(one, 16381); + x *= t1; + y *= t1; + return (scalbnl(one, -16381) * hypotl(x, y)); + } + j = nx - 0x3fff0000; + px[n0] -= j; + py[n0] -= j; + pt1[n0] = nx; + pt1[n1] = pt1[n2] = pt1[n3] = 0; + return (t1 * hypotl(x, y)); + } +} diff --git a/usr/src/lib/libm/common/Q/ieee_funcl.c b/usr/src/lib/libm/common/Q/ieee_funcl.c new file mode 100644 index 0000000000..74e6cacfe8 --- /dev/null +++ b/usr/src/lib/libm/common/Q/ieee_funcl.c @@ -0,0 +1,113 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak isinfl = __isinfl +#pragma weak isnormall = __isnormall +#pragma weak issubnormall = __issubnormall +#pragma weak iszerol = __iszerol +#pragma weak signbitl = __signbitl +#endif + +#include "libm.h" + +#if defined(__sparc) +int +isinfl(long double x) { + int *px = (int *) &x; + return ((px[0] & ~0x80000000) == 0x7fff0000 && px[1] == 0 && + px[2] == 0 && px[3] == 0); +} + +int +isnormall(long double x) { + int *px = (int *) &x; + return ((unsigned) ((px[0] & 0x7fff0000) - 0x10000) < 0x7ffe0000); +} + +int +issubnormall(long double x) { + int *px = (int *) &x; + px[0] &= ~0x80000000; + return (px[0] < 0x00010000 && (px[0] | px[1] | px[2] | px[3]) != 0); +} + +int +iszerol(long double x) { + int *px = (int *) &x; + return (((px[0] & ~0x80000000) | px[1] | px[2] | px[3]) == 0); +} + +int +signbitl(long double x) { + unsigned *px = (unsigned *) &x; + return (px[0] >> 31); +} +#elif defined(__x86) +int +isinfl(long double x) { + int *px = (int *) &x; +#if defined(HANDLE_UNSUPPORTED) + return ((px[2] & 0x7fff) == 0x7fff && + ((px[1] ^ 0x80000000) | px[0]) == 0); +#else + return ((px[2] & 0x7fff) == 0x7fff && + ((px[1] & ~0x80000000) | px[0]) == 0); +#endif +} + +int +isnormall(long double x) { + int *px = (int *) &x; +#if defined(HANDLE_UNSUPPORTED) + return ((unsigned) ((px[2] & 0x7fff) - 1) < 0x7ffe && + (px[1] & 0x80000000) != 0); +#else + return ((unsigned) ((px[2] & 0x7fff) - 1) < 0x7ffe); +#endif +} + +int +issubnormall(long double x) { + int *px = (int *) &x; + return ((px[2] & 0x7fff) == 0 && (px[0] | px[1]) != 0); +} + +int +iszerol(long double x) { + int *px = (int *) &x; + return (((px[2] & 0x7fff) | px[0] | px[1]) == 0); +} + +int +signbitl(long double x) { + unsigned *px = (unsigned *) &x; + return ((px[2] >> 15) & 1); +} +#endif /* defined(__sparc) || defined(__x86) */ diff --git a/usr/src/lib/libm/common/Q/ilogbl.c b/usr/src/lib/libm/common/Q/ilogbl.c new file mode 100644 index 0000000000..bdbe2dbd0f --- /dev/null +++ b/usr/src/lib/libm/common/Q/ilogbl.c @@ -0,0 +1,87 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak ilogbl = __ilogbl +#endif + +#include "libm.h" +#include "xpg6.h" /* __xpg6 */ + +#if defined(__sparc) +#define ISNORMALL(k, x) (k != 0x7fff) /* assuming k != 0 */ +#define X86PDNRM(k, x) +#define XSCALE_OFFSET 0x406f /* 0x3fff + 112 */ +static const long double xscale = 5192296858534827628530496329220096.0L; + /* 2^112 */ +#elif defined(__x86) +/* + * if pseudo-denormal, replace by the equivalent normal + */ +#define X86PDNRM(k, x) if (k == 0 && (((int *) &x)[1] & 0x80000000) != 0) \ + ((int *) &x)[2] |= k = 1 +#if defined(HANDLE_UNSUPPORTED) /* assuming k != 0 */ +#define ISNORMALL(k, x) (k != 0x7fff && (((int *) &x)[1] & 0x80000000) != 0) +#else +#define ISNORMALL(k, x) (k != 0x7fff) +#endif +#define XSCALE_OFFSET 0x403e /* 0x3fff + 63 */ +static const long double xscale = 9223372036854775808.0L; /* 2^63 */ +#endif + +static int +raise_invalid(int v) { /* SUSv3 requires ilogbl(0,+/-Inf,NaN) raise invalid */ +#ifndef lint + if ((__xpg6 & _C99SUSv3_ilogb_0InfNaN_raises_invalid) != 0) { + static const double zero = 0.0; + volatile double dummy; + + dummy = zero / zero; + } +#endif + return (v); +} + +int +ilogbl(long double x) { + int k = XBIASED_EXP(x); + + X86PDNRM(k, x); + if (k == 0) { + if (ISZEROL(x)) + return (raise_invalid(0x80000001)); + else { + x *= xscale; /* scale up by 2^112 or 2^63 */ + return (XBIASED_EXP(x) - XSCALE_OFFSET); + } + } else if (ISNORMALL(k, x)) + return (k - 0x3fff); + else + return (raise_invalid(0x7fffffff)); +} diff --git a/usr/src/lib/libm/common/Q/isnanl.c b/usr/src/lib/libm/common/Q/isnanl.c new file mode 100644 index 0000000000..0d3a853c8e --- /dev/null +++ b/usr/src/lib/libm/common/Q/isnanl.c @@ -0,0 +1,54 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak isnanl = __isnanl +#endif + +#include "libm.h" + +#if defined(__sparc) +int +isnanl(long double x) { + int *px = (int *) &x; + return ((px[0] & ~0x80000000) >= 0x7fff0000 && + ((px[0] & ~0xffff0000) | px[1] | px[2] | px[3]) != 0); +} +#elif defined(__x86) +int +isnanl(long double x) { + int *px = (int *) &x, t = px[2] & 0x7fff; +#if defined(HANDLE_UNSUPPORTED) + return (t == 0x7fff && ((px[1] & ~0x80000000) | px[0]) != 0 || + t != 0 && (px[1] & 0x80000000) == 0); +#else + return (t == 0x7fff && ((px[1] & ~0x80000000) | px[0]) != 0); +#endif +} +#endif /* defined(__sparc) || defined(__x86) */ diff --git a/usr/src/lib/libm/common/Q/j0l.c b/usr/src/lib/libm/common/Q/j0l.c new file mode 100644 index 0000000000..5d030e74d0 --- /dev/null +++ b/usr/src/lib/libm/common/Q/j0l.c @@ -0,0 +1,738 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * Floating point Bessel's function of the first and second kinds + * of order zero: j0(x),y0(x); + * + * Special cases: + * y0(0)=y1(0)=yn(n,0) = -inf with division by zero signal; + * y0(-ve)=y1(-ve)=yn(n,-ve) are NaN with invalid signal. + */ + +#pragma weak j0l = __j0l +#pragma weak y0l = __y0l + +#include "libm.h" +#include "longdouble.h" + +#define GENERIC long double +static const GENERIC +zero = 0.0L, +small = 1.0e-9L, +tiny = 1.0e-38L, +one = 1.0L, +five = 5.0L, +eight = 8.0L, +invsqrtpi= 5.641895835477562869480794515607725858441e-0001L, +tpi = 0.636619772367581343075535053490057448L; + +static GENERIC pzero(GENERIC); +static GENERIC qzero(GENERIC); + +static GENERIC r0[7] = { + -2.499999999999999999999999999999998934492e-0001L, + 1.272657927360049786327618451133763714880e-0002L, + -2.694499763712963276900636693400659600898e-0004L, + 2.724877475058977576903234070919616447883e-0006L, + -1.432617103214330236967477495393076320281e-0008L, + 3.823248804080079168706683540513792224471e-0011L, + -4.183174277567983647337568504286313665065e-0014L, +}; +static GENERIC s0[7] = { + 1.0e0L, + 1.159368290559800854689526195462884666395e-0002L, + 6.629397597394973383009743876169946772559e-0005L, + 2.426779981394054406305431142501735094340e-0007L, + 6.097663491248511069094400469635449749883e-0010L, + 1.017019133340929220238747413216052224036e-0012L, + 9.012593179306197579518374581969371278481e-0016L, +}; + +GENERIC +j0l(x) GENERIC x;{ + GENERIC z, s,c,ss,cc,r,u,v; + int i; + + if (isnanl(x)) return x+x; + x = fabsl(x); + if (x > 1.28L) { + if (!finitel(x)) return zero; + s = sinl(x); + c = cosl(x); + /* + * j0(x) = sqrt(2/(pi*x))*(p0(x)*cos(x0)-q0(x)*sin(x0)) + * where x0 = x-pi/4 + * Better formula: + * cos(x0) = cos(x)cos(pi/4)+sin(x)sin(pi/4) + * = 1/sqrt(2) * (cos(x) + sin(x)) + * sin(x0) = sin(x)cos(pi/4)-cos(x)sin(pi/4) + * = 1/sqrt(2) * (sin(x) - cos(x)) + * To avoid cancellation, use + * sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x)) + * to compute the worse one. + */ + if (x>1.0e2450L) { /* x+x may overflow */ + ss = s-c; + cc = s+c; + } else if (signbitl(s) != signbitl(c)) { + ss = s - c; + cc = -cosl(x+x)/ss; + } else { + cc = s + c; + ss = -cosl(x+x)/cc; + } + /* + * j0(x) = 1/sqrt(pi) * (P(0,x)*cc - Q(0,x)*ss) / sqrt(x) + * y0(x) = 1/sqrt(pi) * (P(0,x)*ss + Q(0,x)*cc) / sqrt(x) + */ + if (x>1.0e120L) return (invsqrtpi*cc)/sqrtl(x); + u = pzero(x); v = qzero(x); + return invsqrtpi*(u*cc-v*ss)/sqrtl(x); + } + if (x<=small) { + if (x<=tiny) return one-x; + else return one-x*x*0.25L; + } + z = x*x; + r = r0[6]; s = s0[6]; + for(i=5;i>=0;i--) { + r = r*z + r0[i]; + s = s*z + s0[i]; + } + return(one+z*(r/s)); +} + +static const GENERIC u0[8] = { + -7.380429510868722527434392794848301631220e-0002L, + 1.766855559625940791857536949301981816513e-0001L, + -1.386470722701047923235553251240162839408e-0002L, + 3.520149242724811578636970811631224862615e-0004L, + -3.978599663243790049853642275624951870025e-0006L, + 2.228801153263957224547222556806915479763e-0008L, + -6.121246764298785018658597179498837316177e-0011L, + 6.677103629722678833475965810525587396596e-0014L, +}; +static const GENERIC v0[8] = { + 1.0e0L, + 1.247164416539111311571676766127767127970e-0002L, + 7.829144749639791500052900281489367443576e-0005L, + 3.247126540422245330511218321013360336606e-0007L, + 9.750516724789499678567062572549568447869e-0010L, + 2.156713223173591212250543390258458098776e-0012L, + 3.322169561597890004231482431236452752624e-0015L, + 2.821213295314000924252226486305726805093e-0018L, +}; + +GENERIC +y0l(x) GENERIC x;{ + GENERIC z, s,c,ss,cc,u,v; + int i; + volatile GENERIC d; + + if (isnanl(x)) return x+x; + if (x <= zero) { + if (x == zero) + d= -one/(x-x); + else + d = zero/(x-x); + } +#ifdef lint + d = d; +#endif + if (x > 1.28L) { + if (!finitel(x)) return zero; + s = sinl(x); + c = cosl(x); + /* + * j0(x) = sqrt(2/(pi*x))*(p0(x)*cos(x0)-q0(x)*sin(x0)) + * where x0 = x-pi/4 + * Better formula: + * cos(x0) = cos(x)cos(pi/4)+sin(x)sin(pi/4) + * = 1/sqrt(2) * (cos(x) + sin(x)) + * sin(x0) = sin(x)cos(pi/4)-cos(x)sin(pi/4) + * = 1/sqrt(2) * (sin(x) - cos(x)) + * To avoid cancellation, use + * sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x)) + * to compute the worse one. + */ + if (x>1.0e2450L) { /* x+x may overflow */ + ss = s-c; + cc = s+c; + } else if (signbitl(s) != signbitl(c)) { + ss = s - c; + cc = -cosl(x+x)/ss; + } else { + cc = s + c; + ss = -cosl(x+x)/cc; + } + /* + * j0(x) = 1/sqrt(pi*x) * (P(0,x)*cc - Q(0,x)*ss) + * y0(x) = 1/sqrt(pi*x) * (P(0,x)*ss + Q(0,x)*cc) + */ + if (x>1.0e120L) return (invsqrtpi*ss)/sqrtl(x); + return invsqrtpi*(pzero(x)*ss+qzero(x)*cc)/sqrtl(x); + + } + if (x<=tiny) { + return (u0[0] + tpi*logl(x)); + } + z = x*x; + u = u0[7]; v = v0[7]; + for(i=6;i>=0;i--) { + u = u*z + u0[i]; + v = v*z + v0[i]; + } + return(u/v + tpi*(j0l(x)*logl(x))); +} + +static const GENERIC pr0[12] = { /* [16 -- inf] */ + 9.999999999999999999999999999999999997515e-0001L, + 1.065981615377273376425365823967550598358e+0003L, + 4.390991200927588978306374718984240719130e+0005L, + 9.072086218607986711847069407339321363103e+0007L, + 1.022552886177375367408408501046461671528e+0010L, + 6.420766912243658241570635854089597269031e+0011L, + 2.206451725126933913591080211081242266908e+0013L, + 3.928369596816895077363705478743346298368e+0014L, + 3.258159928874124597286701119721482876596e+0015L, + 1.025715808134188978860679130140685101348e+0016L, + 7.537170874795721255796001687024031280685e+0015L, + -1.579413901450157332307745586004207687796e+0014L, +}; +static const GENERIC ps0[11] = { + 1.0e0L, + 1.066051927877273376425365823967550512687e+0003L, + 4.391739647168381592399173804329266353038e+0005L, + 9.075162261801343671805658294123888867884e+0007L, + 1.023186118519904751819581912075985995058e+0010L, + 6.427861860414223746340515376512730275061e+0011L, + 2.210861503237823589735481303627993406235e+0013L, + 3.943247335784292905915956840901818177989e+0014L, + 3.283720976777545142150200110647270004481e+0015L, + 1.045346918812754048903645641538728986759e+0016L, + 8.043455468065618900750599584291193680463e+0015L, +}; +static const GENERIC pr1[12] = { /* [8 -- 16] */ + 9.999999999999999999999784422701108683618e-0001L, + 6.796098532948334207755488692777907062894e+0002L, + 1.840036112605722168824530758797169836042e+0005L, + 2.598490483191916637264894340635847598122e+0007L, + 2.105774863242707025525730249472054578523e+0009L, + 1.015822044230542426666314997796944979959e+0011L, + 2.931557457008110436764077699944189071875e+0012L, + 4.962885121125457633655259224179322808824e+0013L, + 4.705424055148223269155430598563351566279e+0014L, + 2.294439854910747229152056080910427001110e+0015L, + 4.905531843137486691500950019322475458629e+0015L, + 3.187543169710339218793442542845735994565e+0015L, +}; +static const GENERIC ps1[14] = { + 1.0e0L, + 6.796801657948334207754571576066758180288e+0002L, + 1.840512891201300567325421059826676366447e+0005L, + 2.599777028312918975306252167127695075221e+0007L, + 2.107582572771047636846811284634244892537e+0009L, + 1.017275794694156108975782763889979940348e+0011L, + 2.938487645192463845428059755454762316011e+0012L, + 4.982512164735557054521042916182317924466e+0013L, + 4.737639900153703274792677468264564361437e+0014L, + 2.323398719123742743524249528275097100646e+0015L, + 5.033419107069210577868909797896984419391e+0015L, + 3.409036105931068609601317076759804716059e+0015L, + 7.505655364352679737585745147753521662166e+0013L, + -9.976837153983688250780198248297109118313e+0012L, +}; +static const GENERIC pr2[12] = { /* [5 -- 8 ] */ + 9.999999999999999937857236789277366320220e-0001L, + 3.692848765268649571651602420376358849214e+0002L, + 5.373022067535476576926715900057760985410e+0004L, + 4.038738891191314969971504035057219430725e+0006L, + 1.728285706306940523397385566659762646999e+0008L, + 4.375400819645889911158688737206054788534e+0009L, + 6.598950418204912408375591217782088567076e+0010L, + 5.827182039183238492480275401520072793783e+0011L, + 2.884222642913492390887572414999490975844e+0012L, + 7.373278873797767721932837830628688632775e+0012L, + 8.338295457568973761205077964397969230489e+0012L, + 2.911383183467288345772308817209806922143e+0012L, +}; +static const GENERIC ps2[14] = { + 1.0e0L, + 3.693551890268649477288896267171993213102e+0002L, + 5.375607880998361502474715133828068514297e+0004L, + 4.042477764024108249744998862572786367328e+0006L, + 1.731069838737016956685839588670132939513e+0008L, + 4.387147674049898778738226585935491417728e+0009L, + 6.628058659620653765349556940567715258165e+0010L, + 5.869659904164177740471685856367322160664e+0011L, + 2.919839445622817017058977559638969436383e+0012L, + 7.535314897696671402628203718612309253907e+0012L, + 8.696355561452933775773309859748610658935e+0012L, + 3.216155103141537221173601557697083216257e+0012L, + 4.756857081068942248246880159213789086363e+0010L, + -3.496356619666608032231074866481472824067e+0009L, +}; +static const GENERIC pr3[13] = { /* [3.5 -- 5 ] */ + 9.999999999999916693107285612398196588247e-0001L, + 2.263975921282917721194425320484974336945e+0002L, + 1.994358386744245848889492762781484199966e+0004L, + 8.980067458430542243559962493831661323168e+0005L, + 2.282213787521372663705567756420087553508e+0007L, + 3.409784374889063618250288699908375135923e+0008L, + 3.024380857401448589254343517589811711108e+0009L, + 1.571110368046740246895071721443082286379e+0010L, + 4.603187020243604632153685300463160593768e+0010L, + 7.087196453409712719449549280664058793403e+0010L, + 5.046196021776346356803687409644239065041e+0010L, + 1.287758439080165765709154276618854799932e+0010L, + 5.900679773415023433787846658096813590784e+0008L, +}; +static const GENERIC ps3[13] = { + 1.0e0L, + 2.264679046282855061328604619231774747116e+0002L, + 1.995939523988944553755653255389812103448e+0004L, + 8.993853144706348727038389967490183236820e+0005L, + 2.288326099634588843906989983704795468773e+0007L, + 3.424967100255240885169240956804790118282e+0008L, + 3.046311797972463991368023759640028910016e+0009L, + 1.589614961932826812790222479700797224003e+0010L, + 4.692406624527744816497089139325073939927e+0010L, + 7.320486495902008912866462849073108323948e+0010L, + 5.345945972828978289935309597742981360994e+0010L, + 1.444033091910423754121309915092247171008e+0010L, + 7.987714685115314668378957273824383610525e+0008L, +}; +static const GENERIC pr4[13] = { /* [2.5, 3.5] */ + 9.999999999986736677961118722747757712260e-0001L, + 1.453824980703800559037873123568378845663e+0002L, + 8.097327216430682288267610447006508661032e+0003L, + 2.273847252038264370231169686380192662135e+0005L, + 3.561056728046211111354759998976985449622e+0006L, + 3.244933588800096378434627029369680378599e+0007L, + 1.740112392860717950376210038908476792588e+0008L, + 5.426170187455893285197878563881579269524e+0008L, + 9.490107486454362321004377336020526281371e+0008L, + 8.688872439428470049801714121070005313806e+0008L, + 3.673315853166437222811910656900123215515e+0008L, + 5.577770470359303305164877446339693270239e+0007L, + 1.540438642031689641308197880181291865714e+0006L, +}; +static const GENERIC ps4[13] = { /* [2.5, 3.5] */ + 1.0e0L, + 1.454528105698159439773035951959131799816e+0002L, + 8.107442215200392397172179900434987859618e+0003L, + 2.279390393778242887574177096606328994140e+0005L, + 3.576251625592252008424781111770934135844e+0006L, + 3.267909499056932631405942058670933813863e+0007L, + 1.760021515330805537499778238099704648805e+0008L, + 5.525553787667353981242060222587465726729e+0008L, + 9.769870295912820457889384082671269328511e+0008L, + 9.110582071004774279226905629624018008454e+0008L, + 3.981857678621955599371967680343918454345e+0008L, + 6.482404686230769399073192961667697036706e+0007L, + 2.210046943095878402443535460329391782298e+0006L, +}; +static const GENERIC pr5[13] = { /* [1.777..., 2.5] */ + 9.999999999114986107951817871144655880699e-0001L, + 9.252583736048588342568344570315435947614e+0001L, + 3.218726757856078715214631502407386264637e+0003L, + 5.554009964621111656479588505862577040831e+0004L, + 5.269993115643664338253196944523510290175e+0005L, + 2.874613773778430691192912190618220544575e+0006L, + 9.133538151103658353874146919613442436035e+0006L, + 1.673067041410338922825193013077354249193e+0007L, + 1.706913873848398011744790289200151840498e+0007L, + 9.067766583853288534551600235576747618679e+0006L, + 2.216746733457884568532695355036338655872e+0006L, + 1.945753880802872541235703812722344514405e+0005L, + 3.132374412921948071539195638885330951749e+0003L, +}; +static const GENERIC ps5[13] = { /* [1.777..., 2.5] */ + 1.0e0L, + 9.259614983862181118883831670990340052982e+0001L, + 3.225125275462903384842124075132609290304e+0003L, + 5.575705362829101545292760055941855246492e+0004L, + 5.306049863037087855496170121958448492522e+0005L, + 2.907060758873509564309729903109018597215e+0006L, + 9.298059206584995898298257827131208539289e+0006L, + 1.720391071006963176836108026556547062980e+0007L, + 1.782614812922865190479394509487941920612e+0007L, + 9.708016389605273153536452032839879950155e+0006L, + 2.476495084688170096480215640962175140027e+0006L, + 2.363200660365585759668077790194604917187e+0005L, + 4.803239569848196077121203575704356936731e+0003L, +}; +static const GENERIC pr6[13] = { /* [1.28, 1.777...] */ + 9.999999969777095495998606925524322559556e-0001L, + 5.825486719466194430503283824096872219216e+0001L, + 1.248155491637757281915184824965379905380e+0003L, + 1.302093199842358609321338417071710477615e+0004L, + 7.353835804186292782840961999810543016039e+0004L, + 2.356471661113686180549195092555751341757e+0005L, + 4.350553267429009581632987060942780847101e+0005L, + 4.588762661876600638719159826652389418235e+0005L, + 2.675796398548523436544221045225290128611e+0005L, + 8.077649557108971388298292919988449940464e+0004L, + 1.117640459221306873519068741664054573776e+0004L, + 5.544400072396814695175787511557757885585e+0002L, + 5.072550541191480498431289089905822910718e+0000L, +}; +static const GENERIC ps6[13] = { /* [1.28, 1.777...] */ + 1.0e0L, + 5.832517925357165050639075848183613063291e+0001L, + 1.252144364743592128171256104364976466898e+0003L, + 1.310300234342216813579118022415585740772e+0004L, + 7.434667697093812197817292154032863632923e+0004L, + 2.398706595587719165726469002404004614711e+0005L, + 4.472737517625103157004869372427480602511e+0005L, + 4.786313523337761975294171429067037723611e+0005L, + 2.851161872872731228472536061865365370192e+0005L, + 8.891648269899148412331918021801385815586e+0004L, + 1.297097489535351517572978123584751042287e+0004L, + 7.096761640545975756202184143400469812618e+0002L, + 8.378049338590233325977702401733340820351e+0000L, +}; +static const GENERIC sixteen = 16.0L; +static const GENERIC huge = 1.0e30L; + +static GENERIC pzero(x) +GENERIC x; +{ + GENERIC s,r,t,z; + int i; + if (x>huge) return one; + t = one/x; z = t*t; + if (x>sixteen) { + r = z*pr0[11]+pr0[10]; s = ps0[10]; + for (i=9;i>=0;i--) { + r = z*r + pr0[i]; + s = z*s + ps0[i]; + } + } else if (x > eight) { + r = pr1[11]; s = ps1[11]+z*(ps1[12]+z*ps1[13]); + for (i=10;i>=0;i--) { + r = z*r + pr1[i]; + s = z*s + ps1[i]; + } + } else if (x > five) { /* x > 5.0 */ + r = pr2[11]; s = ps2[11]+z*(ps2[12]+z*ps2[13]); + for (i=10;i>=0;i--) { + r = z*r + pr2[i]; + s = z*s + ps2[i]; + } + } else if (x>3.5L) { + r = pr3[12]; s = ps3[12]; + for (i=11;i>=0;i--) { + r = z*r + pr3[i]; + s = z*s + ps3[i]; + } + } else if (x>2.5L) { + r = pr4[12]; s = ps4[12]; + for (i=11;i>=0;i--) { + r = z*r + pr4[i]; + s = z*s + ps4[i]; + } + } else if (x> (1.0L/0.5625L)) { + r = pr5[12]; s = ps5[12]; + for (i=11;i>=0;i--) { + r = z*r + pr5[i]; + s = z*s + ps5[i]; + } + } else { /* assume x > 1.28 */ + r = pr6[12]; s = ps6[12]; + for (i=11;i>=0;i--) { + r = z*r + pr6[i]; + s = z*s + ps6[i]; + } + } + return r/s; +} + + +static const GENERIC qr0[12] = { /* [16, inf] */ + -1.249999999999999999999999999999999972972e-0001L, + -1.425179595545670577414395762503991596897e+0002L, + -6.312499645625970845534460257936222407219e+0004L, + -1.411374326457208384315121243698814446848e+0007L, + -1.735034212758873581410984757860787252842e+0009L, + -1.199777647512789489421826342485055280680e+0011L, + -4.596025334081655714499860409699100373644e+0012L, + -9.262525628201284107792924477031653399187e+0013L, + -8.858394728685039245344398842180662867639e+0014L, + -3.267527953687534887623740622709505972113e+0015L, + -2.664222971186311967587129347029450062019e+0015L, + 3.442464060723987869585180095344504100204e+0014L, +}; +static const GENERIC qs0[11] = { + 1.0e0L, + 1.140729613936536461931516610003185687881e+0003L, + 5.056665510442299351009198186490085803580e+0005L, + 1.132041763825642787943941650522718199115e+0008L, + 1.394570111872581606392620678214246479767e+0010L, + 9.677945218152264789534431079563744378421e+0011L, + 3.731140327851536828225143058896348502096e+0013L, + 7.612785951064869291722846681020881676410e+0014L, + 7.476077016406764891730191004811863975940e+0015L, + 2.951246482613592035421503427100393831709e+0016L, + 3.108361803691811711136854587074302034901e+0016L, +}; +static const GENERIC qr1[12] = { /* [8, 16 ] */ + -1.249999999999999999997949010383433818157e-0001L, + -9.051215166393822640636752244895124126934e+0001L, + -2.620782703428148837671179031904208303947e+0004L, + -3.975571261553504457766177974508785790884e+0006L, + -3.479029330759311306270072218074074994090e+0008L, + -1.823955008124268573036216746186239829089e+0010L, + -5.765932697111801375765156029221568664435e+0011L, + -1.079843680798742592954002192417934779114e+0013L, + -1.146893630504592739082205764611581332897e+0014L, + -6.367016059683898464936104447282880704182e+0014L, + -1.583109041961213490464459111903484209098e+0015L, + -1.230149555764242473103128650135795639412e+0015L, +}; +static const GENERIC qs1[14] = { + 1.0e0L, + 7.246831508115058112438579847778014458432e+0002L, + 2.100854184439168518399383786306927037611e+0005L, + 3.192636418837951507430188285940994235122e+0007L, + 2.801558443383354674538443461124434216152e+0009L, + 1.475026997664373739293483927250653467487e+0011L, + 4.694486824913954608552363821799927145318e+0012L, + 8.890350100919200250838438709601547334021e+0013L, + 9.626844429082905144874701068760469752067e+0014L, + 5.541110744600460773528263862687521642140e+0015L, + 1.486500494789452556727470329232123096563e+0016L, + 1.415840104845959400365430773732093899210e+0016L, + 1.780866095241517418081312567239682336483e+0015L, + -2.359230917384889357887631544079990129494e+0014L, +}; +static const GENERIC qr2[12] = { /* [5, 8] */ + -1.249999999999999531937744362527772181614e-0001L, + -4.944373897356969774839375977239241573966e+0001L, + -7.728449175433465285314261650078450473909e+0003L, + -6.262574329612752346336901434651220705903e+0005L, + -2.900948220220943306027235217424380672732e+0007L, + -7.988719647634192770463917157562874119535e+0008L, + -1.318228171927181389547760026626357012375e+0010L, + -1.282439773983029245309263271945424928196e+0011L, + -7.050925570827818040186149940257918845138e+0011L, + -2.021751882573871990004205616874202684429e+0012L, + -2.592939962400668552384333900573812635658e+0012L, + -1.038267109518891262840601514932972850326e+0012L, +}; +static const GENERIC qs2[14] = { + 1.0e0L, + 3.961358492885570003202784022894248952116e+0002L, + 6.205788738864701882828752634586510926968e+0004L, + 5.045715603932670286550673813011764406749e+0006L, + 2.349248611362658323353343389430968751429e+0008L, + 6.520244524415828635917683553721880063911e+0009L, + 1.089111211223507719337067159886281887722e+0011L, + 1.080406000905359867958779409414903018610e+0012L, + 6.135645280895514703514154680623769562148e+0012L, + 1.862433040246625874245867151368643668215e+0013L, + 2.667780805786648888840777888702193708994e+0013L, + 1.394401107289087774765300711809313112824e+0013L, + 1.093247500616320375562898297156722445484e+0012L, + -7.228875530378928722826604216491493780775e+0010L, +}; +static const GENERIC qr3[13] = { /* [3.5 5] */ + -1.249999999999473067748420379578481661075e-0001L, + -3.044549048635289351913574324803250977998e+0001L, + -2.890081140649769078496693003524681440869e+0003L, + -1.404922456817202235879343275330529107684e+0005L, + -3.862746614385573443518177403617349281869e+0006L, + -6.257517309110249049201133708911155047689e+0007L, + -6.031451330920839916987079782727323477520e+0008L, + -3.411542405173830611454025765755854382346e+0009L, + -1.089392478149726672133014498723021526099e+0010L, + -1.824934078420210941290140903415956782726e+0010L, + -1.400780278304358710423481070486939531139e+0010L, + -3.716484136064917363926635716743771092093e+0009L, + -1.397591075296425529970434890954904331580e+0008L, +}; +static const GENERIC qs3[13] = { + 1.0e0L, + 2.441498613904962049391000187014945858042e+0002L, + 2.326188882072370711500164222341514337043e+0004L, + 1.137138213121231338494977104659239578165e+0006L, + 3.152918070735662728722998452605364253517e+0007L, + 5.172877993426507259314270488444013595108e+0008L, + 5.083086439731669807455961078856470774115e+0009L, + 2.961842732066434123119325521139476909941e+0010L, + 9.912185866862440735829781856081353151390e+0010L, + 1.793560561251622234430564181567297983598e+0011L, + 1.577090119341228122525265108497940403073e+0011L, + 5.509910306780166194333889999985463681636e+0010L, + 4.761691134078874491202320181517936758141e+0009L, +}; +static const GENERIC qr4[13] = { /* [2.5 3.5] */ + -1.249999999928567734339745043490705340835e-0001L, + -1.967201748731419063051601624435565528481e+0001L, + -1.186329146714562236407099740615528170707e+0003L, + -3.607736959222941810356301491152457934060e+0004L, + -6.119200717978104904932828468575194267125e+0005L, + -6.037847781158358226670305078652205586384e+0006L, + -3.503558153336140359700536720393565984740e+0007L, + -1.180196478268225718757218523746787309773e+0008L, + -2.221860232085134915841426363505169680528e+0008L, + -2.173372505452747585296176761701746236760e+0008L, + -9.649364865061237558517730539506568013963e+0007L, + -1.465429227847933034546039640094862650385e+0007L, + -3.083003197920262085170581866246663380607e+0005L, +}; +static const GENERIC qs4[13] = { /* [2.5 3.5] */ + 1.0e0L, + 1.579620773732259142752614142139986854055e+0002L, + 9.581372220329138733203879503753685054968e+0003L, + 2.939598672379108095776114131010825885308e+0005L, + 5.052183049314542218630341818692588448168e+0006L, + 5.083497695595206639433839326338971980149e+0007L, + 3.036385361800553388049719014005099206516e+0008L, + 1.067826481452753409910563785161661492137e+0009L, + 2.145644125557118044720741775125319669272e+0009L, + 2.324115615959719949363946673491552216799e+0009L, + 1.223262962112070757966959855619847011146e+0009L, + 2.569765553318495423738478585947110270709e+0008L, + 1.354744744299227127897905787732636565504e+0007L, +}; +static const GENERIC qr5[13] = { /* [1.777.., 2.5] */ + -1.249999995936639697637680428174576069971e-0001L, + -1.260846055371311453485891923426489068315e+0001L, + -4.772398467544467480801174330290141578895e+0002L, + -8.939852599990298486613760833996490599724e+0003L, + -9.184070787149542050979542226446134243197e+0004L, + -5.406038945018274458362637897739280435171e+0005L, + -1.845896544705190261018653728678171084418e+0006L, + -3.613616990680809501878667570653308071547e+0006L, + -3.908782978135693252252557720414348623779e+0006L, + -2.173711022517323927109138170588442768176e+0006L, + -5.431253130679918485836408549007856244495e+0005L, + -4.591098546452684510082591587275940765959e+0004L, + -5.244711364168207806835520057792229646578e+0002L, +}; +static const GENERIC qs5[13] = { /* [1.777.., 2.5] */ + 1.0e0L, + 1.014536210851290878350892750972474861447e+0002L, + 3.875547510687135314064434160096139681076e+0003L, + 7.361913122670079814955259281995617732580e+0004L, + 7.720288944218771126581086539585529314636e+0005L, + 4.681529554446752496404431433608306558038e+0006L, + 1.667882621940503925455031252308367745820e+0007L, + 3.469403153761399881888272620855305156241e+0007L, + 4.096992047964210711867089384719947863019e+0007L, + 2.596804755829217449311530735959560630554e+0007L, + 7.983933774697889238154465064019410763845e+0006L, + 9.818133816979900819087242425280757938152e+0005L, + 3.061083930868694396013541535670745443560e+0004L, +}; + +static const GENERIC qr6[13] = { /* [1.28, 1.777..] */ + -1.249999881577289001807137282824929082771e-0001L, + -7.998273510053110759610810594119533619282e+0000L, + -1.872481955335172543369089617771565632719e+0002L, + -2.122116786726300805079874003303799646812e+0003L, + -1.293850285839529282503178263484773478457e+0004L, + -4.445024742266316181033354192262529356093e+0004L, + -8.730161378334357767668344467356505347070e+0004L, + -9.706222895172078442801444972505315054736e+0004L, + -5.896325518259858270165531513618195321041e+0004L, + -1.823172034368108822276420827074668832233e+0004L, + -2.509304178635055926638833040337472387175e+0003L, + -1.156608965715779237316769828941729964099e+0002L, + -7.028005789650731396887346826397785210442e-0001L, +}; +static const GENERIC qs6[13] = { /* [1.28, 1.777..] */ + 1.0e0L, + 6.457211085058064845601261321277721075900e+0001L, + 1.534005216588011210342824555136008682950e+0003L, + 1.777217999176441782593357660462379097171e+0004L, + 1.118372652642469468091084810263231199696e+0005L, + 4.015242433858461813142365748386473605294e+0005L, + 8.377081045517098645448616514388280497673e+0005L, + 1.011495020008010352575398009604164287337e+0006L, + 6.886722075290430568652227875200208955970e+0005L, + 2.504735189948021472047157148613171956537e+0005L, + 4.408138920171044846941001844352009817062e+0004L, + 3.105572178072115145673058722853640854884e+0003L, + 5.588294821118916113437396504573817033678e+0001L, +}; +static GENERIC qzero(x) +GENERIC x; +{ + GENERIC s,r,t,z; + int i; + if (x>huge) return -0.125L/x; + t = one/x; z = t*t; + if (x>sixteen) { + r = z*qr0[11]+qr0[10]; s = qs0[10]; + for (i=9;i>=0;i--) { + r = z*r + qr0[i]; + s = z*s + qs0[i]; + } + } else if (x>eight) { + r = qr1[11]; s = qs1[11]+z*(qs1[12]+z*qs1[13]); + for (i=10;i>=0;i--) { + r = z*r + qr1[i]; + s = z*s + qs1[i]; + } + } else if (x>five) { /* assume x > 5.0 */ + r = qr2[11]; s = qs2[11]+z*(qs2[12]+z*qs2[13]); + for (i=10;i>=0;i--) { + r = z*r + qr2[i]; + s = z*s + qs2[i]; + } + } else if (x>3.5L) { + r = qr3[12]; s = qs3[12]; + for (i=11;i>=0;i--) { + r = z*r + qr3[i]; + s = z*s + qs3[i]; + } + } else if (x>2.5L) { + r = qr4[12]; s = qs4[12]; + for (i=11;i>=0;i--) { + r = z*r + qr4[i]; + s = z*s + qs4[i]; + } + } else if (x> (1.0L/0.5625L)) { + r = qr5[12]; s = qs5[12]; + for (i=11;i>=0;i--) { + r = z*r + qr5[i]; + s = z*s + qs5[i]; + } + } else { /* assume x > 1.28 */ + r = qr6[12]; s = qs6[12]; + for (i=11;i>=0;i--) { + r = z*r + qr6[i]; + s = z*s + qs6[i]; + } + } + return t*(r/s); +} diff --git a/usr/src/lib/libm/common/Q/j1l.c b/usr/src/lib/libm/common/Q/j1l.c new file mode 100644 index 0000000000..9009b4fb16 --- /dev/null +++ b/usr/src/lib/libm/common/Q/j1l.c @@ -0,0 +1,734 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * floating point Bessel's function of the first and second kinds + * of order zero: j1(x),y1(x); + * + * Special cases: + * y0(0)=y1(0)=yn(n,0) = -inf with division by zero signal; + * y0(-ve)=y1(-ve)=yn(n,-ve) are NaN with invalid signal. + */ + +#pragma weak j1l = __j1l +#pragma weak y1l = __y1l + +#include "libm.h" +#include "longdouble.h" + +#define GENERIC long double +static const GENERIC +zero = 0.0L, +small = 1.0e-9L, +tiny = 1.0e-38L, +one = 1.0L, +five = 5.0L, +invsqrtpi= 5.641895835477562869480794515607725858441e-0001L, +tpi = 0.636619772367581343075535053490057448L; + +static GENERIC pone(GENERIC); +static GENERIC qone(GENERIC); + +static const GENERIC r0[7] = { + -6.249999999999999999999999999999999627320e-0002L, + 1.940606727194041716205384618494641565464e-0003L, + -3.005630423155733701856481469986459043883e-0005L, + 2.345586219403918667468341047369572169358e-0007L, + -9.976809285885253587529010109133336669724e-0010L, + 2.218743258363623946078958783775107473381e-0012L, + -2.071079656218700604767650924103578046280e-0015L, +}; +static const GENERIC s0[7] = { + 1.0e0L, + 1.061695903156199920738051277075003059555e-0002L, + 5.521860513111180371566951179398862692060e-0005L, + 1.824214367413754193524107877084979441407e-0007L, + 4.098957778439576834818838198039029353925e-0010L, + 6.047735079699666389853240090925264056197e-0013L, + 4.679044728878836197247923279512047035041e-0016L, +}; + +GENERIC +j1l(x) GENERIC x;{ + GENERIC z, d, s,c,ss,cc,r; + int i, sgn; + + if (!finitel(x)) return one/x; + sgn = signbitl(x); + x = fabsl(x); + if (x > 1.28L) { + s = sinl(x); + c = cosl(x); + /* + * j1(x) = sqrt(2/(pi*x))*(p1(x)*cos(x0)-q1(x)*sin(x0)) + * where x0 = x-3pi/4 + * Better formula: + * cos(x0) = cos(x)cos(3pi/4)+sin(x)sin(3pi/4) + * = 1/sqrt(2) * (sin(x) - cos(x)) + * sin(x0) = sin(x)cos(3pi/4)-cos(x)sin(3pi/4) + * = -1/sqrt(2) * (cos(x) + sin(x)) + * To avoid cancellation, use + * sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x)) + * to compute the worse one. + */ + if (x>1.0e2450L) { /* x+x may overflow */ + ss = -s-c; + cc = s-c; + } else if (signbitl(s) != signbitl(c)) { + cc = s - c; + ss = cosl(x+x)/cc; + } else { + ss = -s-c; + cc = cosl(x+x)/ss; + } + /* + * j1(x) = 1/sqrt(pi*x) * (P(1,x)*cc - Q(1,x)*ss) + * y1(x) = 1/sqrt(pi*x) * (P(1,x)*ss + Q(1,x)*cc) + */ + if (x>1.0e120L) return (invsqrtpi*cc)/sqrtl(x); + d = invsqrtpi*(pone(x)*cc-qone(x)*ss)/sqrtl(x); + if (sgn == 0) return d; else return -d; + } + if (x<=small) { + if (x<=tiny) d = 0.5L*x; + else d = x*(0.5L-x*x*0.125L); + if (sgn == 0) return d; else return -d; + } + z = x*x; + r = r0[6]; + s = s0[6]; + for (i=5;i>=0;i--) { + r = r*z + r0[i]; + s = s*z + s0[i]; + } + d = x*0.5L+x*(z*(r/s)); + if (sgn == 0) return d; else return -d; +} + +static const GENERIC u0[7] = { + -1.960570906462389484060557273467558703503e-0001L, + 5.166389353148318460304315890665450006495e-0002L, + -2.229699464105910913337190798743451115604e-0003L, + 3.625437034548863342715657067759078267158e-0005L, + -2.689902826993117212255524537353883987171e-0007L, + 9.304570592456930912969387719010256018466e-0010L, + -1.234878126794286643318321347997500346131e-0012L, +}; +static const GENERIC v0[8] = { + 1.0e0L, + 1.369394302535807332517110204820556695644e-0002L, + 9.508438148097659501433367062605935379588e-0005L, + 4.399007309420092056052714797296467565655e-0007L, + 1.488083087443756398305819693177715000787e-0009L, + 3.751609832625793536245746965768587624922e-0012L, + 6.680926434086257291872903276124244131448e-0015L, + 6.676602383908906988160099057991121446058e-0018L, +}; + +GENERIC +y1l(x) GENERIC x;{ + GENERIC z, s,c,ss,cc,u,v; + int i; + + if (isnanl(x)) return x+x; + if (x <= zero) { + if (x == zero) + return -one/zero; + else + return zero/zero; + } + if (x > 1.28L) { + if (!finitel(x)) return zero; + s = sinl(x); + c = cosl(x); + /* + * j1(x) = sqrt(2/(pi*x))*(p1(x)*cos(x0)-q1(x)*sin(x0)) + * where x0 = x-3pi/4 + * Better formula: + * cos(x0) = cos(x)cos(3pi/4)+sin(x)sin(3pi/4) + * = 1/sqrt(2) * (sin(x) - cos(x)) + * sin(x0) = sin(x)cos(3pi/4)-cos(x)sin(3pi/4) + * = -1/sqrt(2) * (cos(x) + sin(x)) + * To avoid cancellation, use + * sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x)) + * to compute the worse one. + */ + if (x>1.0e2450L) { /* x+x may overflow */ + ss = -s-c; + cc = s-c; + } else if (signbitl(s) != signbitl(c)) { + cc = s - c; + ss = cosl(x+x)/cc; + } else { + ss = -s-c; + cc = cosl(x+x)/ss; + } + /* + * j1(x) = 1/sqrt(pi*x) * (P(1,x)*cc - Q(1,x)*ss) + * y1(x) = 1/sqrt(pi*x) * (P(1,x)*ss + Q(1,x)*cc) + */ + if (x>1.0e91L) return (invsqrtpi*ss)/sqrtl(x); + return invsqrtpi*(pone(x)*ss+qone(x)*cc)/sqrtl(x); + } + if (x<=tiny) { + return (-tpi/x); + } + z = x*x; + u = u0[6]; v = v0[6]+z*v0[7]; + for (i=5;i>=0;i--) { + u = u*z + u0[i]; + v = v*z + v0[i]; + } + return(x*(u/v) + tpi*(j1l(x)*logl(x)-one/x)); +} + +static const GENERIC pr0[12] = { + 1.000000000000000000000000000000000000267e+0000L, + 1.060717875045891455602180843276758003035e+0003L, + 4.344347542892127024446687712181105852335e+0005L, + 8.915680220724007016377924252717410457094e+0007L, + 9.969502259938406062809873257569171272819e+0009L, + 6.200290193138613035646510338707386316595e+0011L, + 2.105978548788015119851815854422247330118e+0013L, + 3.696635772784601239371730810311998368948e+0014L, + 3.015913097920694682057958412534134515156e+0015L, + 9.370298471339353098123277427328592725921e+0015L, + 7.190349005196335967340799265074029443057e+0015L, + 2.736097786240689996880391074927552517982e+0014L, +}; +static const GENERIC ps0[11] = { + 1.0e0L, + 1.060600687545891455602180843276758095107e+0003L, + 4.343106093416975589147153906505338900961e+0005L, + 8.910605869002176566582072242244353399059e+0007L, + 9.959122058635087888690713917622056540190e+0009L, + 6.188744967234948231792482949171041843894e+0011L, + 2.098863976953783506401759873801990304907e+0013L, + 3.672870357018063196746729751479938908450e+0014L, + 2.975538419246824921049011529574385888420e+0015L, + 9.063657659995043205018686029284479837091e+0015L, + 6.401953344314747916729366441508892711691e+0015L, +}; +static const GENERIC pr1[12] = { + 1.000000000000000000000023667524130660984e+0000L, + 6.746154419979618754354803488126452971204e+0002L, + 1.811210781083390154857018330296145970502e+0005L, + 2.533098390379924268038005329095287842244e+0007L, + 2.029683619805342145252338570875424600729e+0009L, + 9.660859662192711465301069401598929980319e+0010L, + 2.743396238644831519934098967716621316316e+0012L, + 4.553097354140854377931023170263455246288e+0013L, + 4.210245069852219757476169864974870720374e+0014L, + 1.987334056229596485076645967176169801727e+0015L, + 4.067120052787096893838970455751338930462e+0015L, + 2.486539606380406398310845264910691398133e+0015L, +}; +static const GENERIC ps1[14] = { + 1.0e0L, + 6.744982544979618754355808680196859521782e+0002L, + 1.810421795396966762032155290441364740350e+0005L, + 2.530986460644310651529583759699988435573e+0007L, + 2.026743276048023121360249288818290224145e+0009L, + 9.637461924407405935245269407052641341836e+0010L, + 2.732378628423766417402292797028314160831e+0012L, + 4.522345274960527124354844364012184278488e+0013L, + 4.160650668341743132685335758415469856545e+0014L, + 1.943730242988858208243492424892435901211e+0015L, + 3.880228532692127989901131618598067450001e+0015L, + 2.178020816161154615841000173683302999728e+0015L, + -8.994062666842225551554346698171600634173e+0013L, + 1.368520368508851253495764806934619574990e+0013L, +}; +static const GENERIC pr2[12] = { + 1.000000000000000006938651621840396237282e+0000L, + 3.658416291850404981407101077037948144698e+0002L, + 5.267073772170356547709794670602812447537e+0004L, + 3.912012101226837463014925210735894620442e+0006L, + 1.651295648974103957193874928714180765625e+0008L, + 4.114901144480797609972484998142146783499e+0009L, + 6.092524309766036681542980572526335147672e+0010L, + 5.263913178071282616719249969074134570577e+0011L, + 2.538408581124324223367341020538081330994e+0012L, + 6.288607929360291027895126983015365677648e+0012L, + 6.848330048211148419047055075386525945280e+0012L, + 2.290309646838867941423178163991423244690e+0012L, +}; +static const GENERIC ps2[14] = { + 1.0e0L, + 3.657244416850405086459410165762319861856e+0002L, + 5.262802358425023243992387075861237306312e+0004L, + 3.905896813959919648136295861661483848364e+0006L, + 1.646791907791461220742694842108202772763e+0008L, + 4.096132803064256022224954120208201437344e+0009L, + 6.046665195915950447544429445730680236759e+0010L, + 5.198061739781991313414052212328653295168e+0011L, + 2.484233851814333966401527626421254279796e+0012L, + 6.047868806925315879339651539434315255940e+0012L, + 6.333103831254091652501642567294101813354e+0012L, + 1.875143098754284994467609936924685024968e+0012L, + -5.238330920563392692965412762508813601534e+0010L, + 4.656888609439364725427789198383779259957e+0009L, +}; +static const GENERIC pr3[13] = { + 1.000000000000009336887318068056137842897e+0000L, + 2.242719942728459588488051572002835729183e+0002L, + 1.955450611382026550266257737331095691092e+0004L, + 8.707143293993619899395400562409175590739e+0005L, + 2.186267894487004565948324289010954505316e+0007L, + 3.224328510541957792360691585667502864688e+0008L, + 2.821057355151380597331792896882741364897e+0009L, + 1.445371387295422404365584793796028979840e+0010L, + 4.181743160669891357783011002656658107864e+0010L, + 6.387371088767993119325536137794535513922e+0010L, + 4.575619999412716078064070587767416436396e+0010L, + 1.228415651211639160620284441690503550842e+0010L, + 7.242170349875563053436050532153112882072e+0008L, +}; +static const GENERIC ps3[13] = { + 1.0e0L, + 2.241548067728529551049804610486061401070e+0002L, + 1.952838216795552145132137932931237181307e+0004L, + 8.684574926493185744628127341069974575526e+0005L, + 2.176357771067037962940853412819852189164e+0007L, + 3.199958682356132977319258783167122100567e+0008L, + 2.786218931525334687844675219914201872570e+0009L, + 1.416283776951741549631417572317916039767e+0010L, + 4.042962659271567948735676834609348842922e+0010L, + 6.028168462646694510083847222968444402161e+0010L, + 4.118410226794641413833887606580085281111e+0010L, + 9.918735736297038430744161253338202230263e+0009L, + 4.092967198238098023219124487437130332038e+0008L, +}; +static const GENERIC pr4[13] = { + 1.000000000001509220978157399042059553390e+0000L, + 1.437551868378147851133499996323782607787e+0002L, + 7.911335537418177296041518061404505428004e+0003L, + 2.193710939115317214716518908935756104804e+0005L, + 3.390662495136730962513489796538274984382e+0006L, + 3.048655347929348891006070609293884274789e+0007L, + 1.613781633489496606354045161527450975195e+0008L, + 4.975089835037230277110156150038482159988e+0008L, + 8.636047087015115403880904418339566323264e+0008L, + 7.918202912328366140110671223076949101509e+0008L, + 3.423294665798984733439650311722794853294e+0008L, + 5.621904953441963961040503934782662613621e+0007L, + 2.086303543310240260758670404509484499793e+0006L, +}; +static const GENERIC ps4[13] = { + 1.0e0L, + 1.436379993384532371670493319591847362304e+0002L, + 7.894647154785430678061053848847436659499e+0003L, + 2.184659753392097529008981741550878586174e+0005L, + 3.366109083305465176803513738147049499361e+0006L, + 3.011911545968996817697665866587226343186e+0007L, + 1.582262913779689851316760148459414895301e+0008L, + 4.819268809494937919217938589530138201770e+0008L, + 8.201355762990450679702837123432527154830e+0008L, + 7.268232093982510937417446421282341425212e+0008L, + 2.950911909015572933262131323934036480462e+0008L, + 4.242839924305934423010858966540621219396e+0007L, + 1.064387620445090779182117666330405186866e+0006L, +}; +static const GENERIC pr5[13] = { + 1.000000000102434805241171427253847353861e+0000L, + 9.129332257083629259060502249025963234821e+0001L, + 3.132238483586953037576119377504557191413e+0003L, + 5.329782528269307971278943122454171107861e+0004L, + 4.988460157184117790692873002103052944145e+0005L, + 2.686602071615786816147010334256047469378e+0006L, + 8.445418526028961197703799808701268301831e+0006L, + 1.536575358646141157475725889907900827390e+0007L, + 1.568405818236523821796862770586544811945e+0007L, + 8.450876239888770102387618667362302173547e+0006L, + 2.154414900139567328424026827163203446077e+0006L, + 2.105656926565043898888460254808062352205e+0005L, + 4.739165011023396507022134303736862812975e+0003L, +}; +static const GENERIC ps5[13] = { + 1.0e0L, + 9.117613509595327476509152673394703847793e+0001L, + 3.121697972484015639301279229281770795147e+0003L, + 5.294447222735893568040911873834576440255e+0004L, + 4.930368882192772335798256684110887882807e+0005L, + 2.634854685641165298302167435798357437768e+0006L, + 8.185462775400326393555896157031818280918e+0006L, + 1.462417423080215192609668642663030667086e+0007L, + 1.450624993985851675982860844153954896015e+0007L, + 7.460467647561995283219086567162006113864e+0006L, + 1.754210981405612478869227142579056338965e+0006L, + 1.463286721155271971526264914524746699596e+0005L, + 2.155894725796702015341211116579827039459e+0003L, +}; +static const GENERIC pr6[13] = { + 1.000000003564855546741735920315743157129e+0000L, + 5.734003934862540458119423509909510288366e+0001L, + 1.209572491935850486086559692291796887976e+0003L, + 1.243398391422281247933674779163660286838e+0004L, + 6.930996755181437937258220998601708278787e+0004L, + 2.198067659532757598646722249966767620099e+0005L, + 4.033659432712058633933179115820576858455e+0005L, + 4.257759657219008027016047206574574358678e+0005L, + 2.511917395876004349480721277445763916389e+0005L, + 7.813756153070623654178731651381881953552e+0004L, + 1.152069173381127881385588092905864352891e+0004L, + 6.548580782804088553777816037551523398082e+0002L, + 8.668725370116906132327542766127938496880e+0000L, +}; +static const GENERIC ps6[13] = { + 1.0e0L, + 5.722285236357114566499221525736286205184e+0001L, + 1.203010842878317935444582950620339570506e+0003L, + 1.230058335378583550155825502172435371208e+0004L, + 6.800998550607861288865300438648089894412e+0004L, + 2.130767829599304262987769347536850885921e+0005L, + 3.840483466643916681759936972992155310026e+0005L, + 3.947432373459225542861819148108081160393e+0005L, + 2.237816239393081111481588434457838526738e+0005L, + 6.545820495124419723398946273790921540774e+0004L, + 8.729563630320892741500726213278834737196e+0003L, + 4.130762660291894753450174794196998813709e+0002L, + 3.480368898672684645130335786015075595598e+0000L, +}; +static const GENERIC sixteen = 16.0L; +static const GENERIC eight = 8.0L; +static const GENERIC huge = 1.0e30L; + +static GENERIC pone(x) +GENERIC x; +{ + GENERIC s,r,t,z; + int i; + if (x>huge) return one; + t = one/x; z = t*t; + if (x>sixteen) { + r = z*pr0[11]+pr0[10]; s = ps0[10]; + for (i=9;i>=0;i--) { + r = z*r + pr0[i]; + s = z*s + ps0[i]; + } + } else if (x>eight) { + r = pr1[11]; s = ps1[11]+z*(ps1[12]+z*ps1[13]); + for (i=10;i>=0;i--) { + r = z*r + pr1[i]; + s = z*s + ps1[i]; + } + } else if (x>five) { + r = pr2[11]; s = ps2[11]+z*(ps2[12]+z*ps2[13]); + for (i=10;i>=0;i--) { + r = z*r + pr2[i]; + s = z*s + ps2[i]; + } + } else if (x>3.5L) { + r = pr3[12]; s = ps3[12]; + for (i=11;i>=0;i--) { + r = z*r + pr3[i]; + s = z*s + ps3[i]; + } + } else if (x>2.5L) { + r = pr4[12]; s = ps4[12]; + for (i=11;i>=0;i--) { + r = z*r + pr4[i]; + s = z*s + ps4[i]; + } + } else if (x> (1.0L/0.5625L)) { + r = pr5[12]; s = ps5[12]; + for (i=11;i>=0;i--) { + r = z*r + pr5[i]; + s = z*s + ps5[i]; + } + } else { /* assume x > 1.28 */ + r = pr6[12]; s = ps6[12]; + for (i=11;i>=0;i--) { + r = z*r + pr6[i]; + s = z*s + ps6[i]; + } + } + return r/s; +} + + +static const GENERIC qr0[12] = { + 3.749999999999999999999999999999999971033e-0001L, + 4.256726035237050601607682277433094262226e+0002L, + 1.875976490812878489192409978945401066066e+0005L, + 4.170314268048041914273603680317745592790e+0007L, + 5.092750132543855817293451118974555746551e+0009L, + 3.494749676278488654103505795794139483404e+0011L, + 1.327062148257437316997667817096694173709e+0013L, + 2.648993138273427226907503742066551150490e+0014L, + 2.511695665909547412222430494473998127684e+0015L, + 9.274694506662289043224310499164702306096e+0015L, + 8.150904170663663829331320302911792892002e+0015L, + -5.001918733707662355772037829620388765122e+0014L, +}; +static const GENERIC qs0[11] = { + 1.0e0L, + 1.135400380229880160428715273982155760093e+0003L, + 5.005701183877126164326765545516590744360e+0005L, + 1.113444200113712167984337603933040102987e+0008L, + 1.361074819925223062778717565699039471124e+0010L, + 9.355750985802849484438933905325982809653e+0011L, + 3.563462786008988825003965543857998084828e+0013L, + 7.155145113900094163648726863803802910454e+0014L, + 6.871266835834472758055559013851843654113e+0015L, + 2.622030899226736712644974988157345234092e+0016L, + 2.602912729172876330650077021706139707746e+0016L, +}; +static const GENERIC qr1[12] = { + 3.749999999999999999997762458207284405806e-0001L, + 2.697883998881706839929255517498189980485e+0002L, + 7.755195925781028489386938870473834411019e+0004L, + 1.166777762104017777198211072895528968355e+0007L, + 1.011504772984321168320010084520261069362e+0009L, + 5.246007703574156853577754571720205550010e+0010L, + 1.637692549885592683166116551691266537647e+0012L, + 3.022303623698185669912990310925039382495e+0013L, + 3.154769927290655684846107030265909987946e+0014L, + 1.715819913441554770089730934808123360921e+0015L, + 4.165044355759732622273534445131736188510e+0015L, + 3.151381420874174705643100381708086287596e+0015L, +}; +static const GENERIC qs1[14] = { + 1.0e0L, + 7.197091705351218239785633172408276982828e+0002L, + 2.070012799599548685544883041297609861055e+0005L, + 3.117014815317656221871840152778458754516e+0007L, + 2.705719678902554974863325877025902971727e+0009L, + 1.406113614727345726925060648750867264098e+0011L, + 4.403777536067131320363005978631674817359e+0012L, + 8.170725690209322283061499386703167242894e+0013L, + 8.609458844975495289227794126964431210566e+0014L, + 4.766766367015473481257280600694952920204e+0015L, + 1.202286587943342194863557940888115641650e+0016L, + 1.012474328306200909525063936061756024120e+0016L, + 6.183552022678917858273222879615824070703e+0014L, + -9.756731548558226997573737400988225722740e+0013L, +}; +static const GENERIC qr2[12] = { + 3.749999999999999481245647262226994293189e-0001L, + 1.471366807289771354491181140167359026735e+0002L, + 2.279432486768448220142080962843526951250e+0004L, + 1.828943048523771225163804043356958285893e+0006L, + 8.379828388647823135832220596417725010837e+0007L, + 2.279814029335044024585393671278378022053e+0009L, + 3.711653952257118120832817785271466441420e+0010L, + 3.557650914518554549916730572553105048068e+0011L, + 1.924583483146095896259774329498934160650e+0012L, + 5.424386256063736390759567088291887140278e+0012L, + 6.839325621241776786206509704671746841737e+0012L, + 2.702169563144001166291686452305436313971e+0012L, +}; +static const GENERIC qs2[14] = { + 1.0e0L, + 3.926379194439388135703211933895203191089e+0002L, + 6.089148804106598297488336063007609312276e+0004L, + 4.893546162973278583711376356041614150645e+0006L, + 2.247571119114497845046388801813832219404e+0008L, + 6.137635663350177751290469334200757872645e+0009L, + 1.005115019784102856424493519524998953678e+0011L, + 9.725664462014503832860151384604677240620e+0011L, + 5.345525100485511116148634192844434636072e+0012L, + 1.549944007398946691720862738173956994779e+0013L, + 2.067148441178952625710302124163264760362e+0013L, + 9.401565402641963611295119487242595462301e+0012L, + 3.548217088622398274748837287769709374385e+0011L, + -2.934470341719047120076509938432417352365e+0010L, +}; +static const GENERIC qr3[13] = { + 3.749999999999412724084579833297451472091e-0001L, + 9.058478580291706212422978492938435582527e+0001L, + 8.524056033161038750461083666711724381171e+0003L, + 4.105967158629109427753434569223631014730e+0005L, + 1.118326603378531348259783091972623333657e+0007L, + 1.794636683403578918528064904714132329343e+0008L, + 1.714314157463635959556133236004368896724e+0009L, + 9.622092032236084846572067257267661456030e+0009L, + 3.057759524485859159957762858780768355020e+0010L, + 5.129306780754798531609621454415938890020e+0010L, + 3.999122002794961070680636194346316041352e+0010L, + 1.122298454643493485989721564358100345388e+0010L, + 5.603981987645989709668830968522362582221e+0008L, +}; +static const GENERIC qs3[13] = { + 1.0e0L, + 2.418328663076578169836155170053634419922e+0002L, + 2.279620205900121042587523541281272875520e+0004L, + 1.100984222585729521470129014992217092794e+0006L, + 3.010743223679247091004262516286654516282e+0007L, + 4.860925542827367817289619265215599433996e+0008L, + 4.686668111035348691982715864307839581243e+0009L, + 2.668701788405102017427214705946730894074e+0010L, + 8.677395746106802640390580944836650584903e+0010L, + 1.511936455574951790658498795945106643036e+0011L, + 1.260845604432623478002018696873608353093e+0011L, + 4.052692278419853853911440231600864589805e+0010L, + 2.965516519212226064983267822243329694729e+0009L, +}; +static const GENERIC qr4[13] = { + 3.749999999919234164154669754440123072618e-0001L, + 5.844218580776819864791168253485055101858e+0001L, + 3.489273514092912982675669411371435670220e+0003L, + 1.050523637774575684509663430018995479594e+0005L, + 1.764549172059701565500717319792780115289e+0006L, + 1.725532438844133795028063102681497371154e+0007L, + 9.938114847359778539965140247590176334874e+0007L, + 3.331710808184595545396883770200772842314e+0008L, + 6.271970557641881511609560444872797282698e+0008L, + 6.188529798677357075020774923903737913285e+0008L, + 2.821905302742849974509982167877885011629e+0008L, + 4.615467358646911976773290256984329814896e+0007L, + 1.348140608731546467396685802693380693275e+0006L, +}; +static const GENERIC qs4[13] = { + 1.0e0L, + 1.561192663112345185261418296389902133372e+0002L, + 9.346678031144098270547225423124213083072e+0003L, + 2.825851246482293547838023847601704751590e+0005L, + 4.776572711622156091710902891124911556293e+0006L, + 4.715106953717135402977938048006267859302e+0007L, + 2.753962350894311316439652227611209035193e+0008L, + 9.428501434615463207768964787500411575223e+0008L, + 1.832650858775206787088236896454141572617e+0009L, + 1.901697378939743226948920874296595242257e+0009L, + 9.433322226854293780627188599226380812725e+0008L, + 1.808520540608671608680284520798858587370e+0008L, + 7.983342331736662753157217446919462398008e+0006L, +}; +static const GENERIC qr5[13] = { + 3.749999995331364437028988850515190446719e-0001L, + 3.739356381766559882677514593041627547911e+0001L, + 1.399562500629413529355265462912819802551e+0003L, + 2.594154053098947925345332218062210111753e+0004L, + 2.640149879297408640394163979394594318371e+0005L, + 1.542471854873199142031889093591449397995e+0006L, + 5.242272868972053374067572098992335425895e+0006L, + 1.025834487769410221329633071426044839935e+0007L, + 1.116553924239448940142230579060124209622e+0007L, + 6.318076065595910176374916303525884653514e+0006L, + 1.641218086168640408527639735915512881785e+0006L, + 1.522369793529178644168813882912134706444e+0005L, + 2.526530541062297200914180060208669584055e+0003L, +}; +static const GENERIC qs5[13] = { + 1.0e0L, + 9.998960735935075380397545659016287506660e+0001L, + 3.758767417842043742686475060540416737562e+0003L, + 7.013652806952306520121959742519780781653e+0004L, + 7.208949808818615099246529616211730446850e+0005L, + 4.272753927109614455417836186072202009252e+0006L, + 1.482524411356470699336129814111025434703e+0007L, + 2.988750366665678233425279237627700803473e+0007L, + 3.396957890261080492694709150553619185065e+0007L, + 2.050652487738593004111578091156304540386e+0007L, + 5.900504120811732547616511555946279451316e+0006L, + 6.563391409260160897024498082273183468347e+0005L, + 1.692629845012790205348966731477187041419e+0004L, +}; +static const GENERIC qr6[13] = { + 3.749999861516664133157566870858975421296e-0001L, + 2.367863756747764863120797431599473468918e+0001L, + 5.476715802114976248882067325630793143777e+0002L, + 6.143190357869842894025012945444096170251e+0003L, + 3.716250534677997850513733595140463851730e+0004L, + 1.270883463823876752138326905022875657430e+0005L, + 2.495301449636814481646371665429083801388e+0005L, + 2.789578988212952248340486296254398601942e+0005L, + 1.718247946911109055931819087137397324634e+0005L, + 5.458973214011665714330326732204106364229e+0004L, + 7.912102686687948786048943339759596652813e+0003L, + 4.077961006160866935722030715149087938091e+0002L, + 3.765206972770245085551057237882528510428e+0000L, +}; +static const GENERIC qs6[13] = { + 1.0e0L, + 6.341646532940517305641893852673926809601e+0001L, + 1.477058277414040790932597537920671025359e+0003L, + 1.674406564031044491436044253393536487604e+0004L, + 1.028516501369755949895050806908994650768e+0005L, + 3.593620042532885295087463507733285434207e+0005L, + 7.267924991381020915185873399453724799625e+0005L, + 8.462277510768818399961191426205006083088e+0005L, + 5.514399892230892163373611895645500250514e+0005L, + 1.898084241009259353540620272932188102299e+0005L, + 3.102941242117739015721984123081026253068e+0004L, + 1.958971184431466907681440650181421086143e+0003L, + 2.878853357310495087181721613889455121867e+0001L, +}; +static GENERIC qone(x) +GENERIC x; +{ + GENERIC s,r,t,z; + int i; + if (x>huge) return 0.375L/x; + t = one/x; z = t*t; + if (x>sixteen) { + r = z*qr0[11]+qr0[10]; s = qs0[10]; + for (i=9;i>=0;i--) { + r = z*r + qr0[i]; + s = z*s + qs0[i]; + } + } else if (x>eight) { + r = qr1[11]; s = qs1[11]+z*(qs1[12]+z*qs1[13]); + for (i=10;i>=0;i--) { + r = z*r + qr1[i]; + s = z*s + qs1[i]; + } + } else if (x>five) { /* x > 5.0 */ + r = qr2[11]; s = qs2[11]+z*(qs2[12]+z*qs2[13]); + for (i=10;i>=0;i--) { + r = z*r + qr2[i]; + s = z*s + qs2[i]; + } + } else if (x>3.5L) { + r = qr3[12]; s = qs3[12]; + for (i=11;i>=0;i--) { + r = z*r + qr3[i]; + s = z*s + qs3[i]; + } + } else if (x>2.5L) { + r = qr4[12]; s = qs4[12]; + for (i=11;i>=0;i--) { + r = z*r + qr4[i]; + s = z*s + qs4[i]; + } + } else if (x> (1.0L/0.5625L)) { + r = qr5[12]; s = qs5[12]; + for (i=11;i>=0;i--) { + r = z*r + qr5[i]; + s = z*s + qs5[i]; + } + } else { /* assume x > 1.28 */ + r = qr6[12]; s = qs6[12]; + for (i=11;i>=0;i--) { + r = z*r + qr6[i]; + s = z*s + qs6[i]; + } + } + return t*(r/s); +} diff --git a/usr/src/lib/libm/common/Q/jnl.c b/usr/src/lib/libm/common/Q/jnl.c new file mode 100644 index 0000000000..40d5d061e5 --- /dev/null +++ b/usr/src/lib/libm/common/Q/jnl.c @@ -0,0 +1,289 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak jnl = __jnl +#pragma weak ynl = __ynl +#endif + +/* + * floating point Bessel's function of the 1st and 2nd kind + * of order n: jn(n,x),yn(n,x); + * + * Special cases: + * y0(0)=y1(0)=yn(n,0) = -inf with division by zero signal; + * y0(-ve)=y1(-ve)=yn(n,-ve) are NaN with invalid signal. + * Note 2. About jn(n,x), yn(n,x) + * For n=0, j0(x) is called, + * for n=1, j1(x) is called, + * for n<x, forward recursion us used starting + * from values of j0(x) and j1(x). + * for n>x, a continued fraction approximation to + * j(n,x)/j(n-1,x) is evaluated and then backward + * recursion is used starting from a supposed value + * for j(n,x). The resulting value of j(0,x) is + * compared with the actual value to correct the + * supposed value of j(n,x). + * + * yn(n,x) is similar in all respects, except + * that forward recursion is used for all + * values of n>1. + * + */ + +#include "libm.h" +#include "longdouble.h" +#include <float.h> /* LDBL_MAX */ + +#define GENERIC long double + +static const GENERIC +invsqrtpi = 5.641895835477562869480794515607725858441e-0001L, +two = 2.0L, +zero = 0.0L, +one = 1.0L; + +GENERIC +jnl(n, x) int n; GENERIC x; { + int i, sgn; + GENERIC a, b, temp, z, w; + + /* + * J(-n,x) = (-1)^n * J(n, x), J(n, -x) = (-1)^n * J(n, x) + * Thus, J(-n,x) = J(n,-x) + */ + if (n < 0) { + n = -n; + x = -x; + } + if (n == 0) + return (j0l(x)); + if (n == 1) + return (j1l(x)); + if (x != x) + return (x+x); + if ((n&1) == 0) + sgn = 0; /* even n */ + else + sgn = signbitl(x); /* old n */ + x = fabsl(x); + if (x == zero || !finitel(x)) b = zero; + else if ((GENERIC)n <= x) { + /* + * Safe to use + * J(n+1,x)=2n/x *J(n,x)-J(n-1,x) + */ + if (x > 1.0e91L) { + /* + * x >> n**2 + * Jn(x) = cos(x-(2n+1)*pi/4)*sqrt(2/x*pi) + * Yn(x) = sin(x-(2n+1)*pi/4)*sqrt(2/x*pi) + * Let s=sin(x), c=cos(x), + * xn=x-(2n+1)*pi/4, sqt2 = sqrt(2),then + * + * n sin(xn)*sqt2 cos(xn)*sqt2 + * ---------------------------------- + * 0 s-c c+s + * 1 -s-c -c+s + * 2 -s+c -c-s + * 3 s+c c-s + */ + switch (n&3) { + case 0: temp = cosl(x)+sinl(x); break; + case 1: temp = -cosl(x)+sinl(x); break; + case 2: temp = -cosl(x)-sinl(x); break; + case 3: temp = cosl(x)-sinl(x); break; + } + b = invsqrtpi*temp/sqrtl(x); + } else { + a = j0l(x); + b = j1l(x); + for (i = 1; i < n; i++) { + temp = b; + b = b*((GENERIC)(i+i)/x) - a; /* avoid underflow */ + a = temp; + } + } + } else { + if (x < 1e-17L) { /* use J(n,x) = 1/n!*(x/2)^n */ + b = powl(0.5L*x, (GENERIC)n); + if (b != zero) { + for (a = one, i = 1; i <= n; i++) a *= (GENERIC)i; + b = b/a; + } + } else { + /* use backward recurrence */ + /* + * x x^2 x^2 + * J(n,x)/J(n-1,x) = ---- ------ ------ ..... + * 2n - 2(n+1) - 2(n+2) + * + * 1 1 1 + * (for large x) = ---- ------ ------ ..... + * 2n 2(n+1) 2(n+2) + * -- - ------ - ------ - + * x x x + * + * Let w = 2n/x and h=2/x, then the above quotient + * is equal to the continued fraction: + * 1 + * = ----------------------- + * 1 + * w - ----------------- + * 1 + * w+h - --------- + * w+2h - ... + * + * To determine how many terms needed, let + * Q(0) = w, Q(1) = w(w+h) - 1, + * Q(k) = (w+k*h)*Q(k-1) - Q(k-2), + * When Q(k) > 1e4 good for single + * When Q(k) > 1e9 good for double + * When Q(k) > 1e17 good for quaduple + */ + /* determin k */ + GENERIC t, v; + double q0, q1, h, tmp; int k, m; + w = (n+n)/(double)x; h = 2.0/(double)x; + q0 = w; z = w+h; q1 = w*z - 1.0; k = 1; + while (q1 < 1.0e17) { + k += 1; z += h; + tmp = z*q1 - q0; + q0 = q1; + q1 = tmp; + } + m = n+n; + for (t = zero, i = 2*(n+k); i >= m; i -= 2) t = one/(i/x-t); + a = t; + b = one; + /* + * estimate log((2/x)^n*n!) = n*log(2/x)+n*ln(n) + * hence, if n*(log(2n/x)) > ... + * single 8.8722839355e+01 + * double 7.09782712893383973096e+02 + * long double 1.1356523406294143949491931077970765006170e+04 + * then recurrent value may overflow and the result is + * likely underflow to zero + */ + tmp = n; + v = two/x; + tmp = tmp*logl(fabsl(v*tmp)); + if (tmp < 1.1356523406294143949491931077970765e+04L) { + for (i = n-1; i > 0; i--) { + temp = b; + b = ((i+i)/x)*b - a; + a = temp; + } + } else { + for (i = n-1; i > 0; i--) { + temp = b; + b = ((i+i)/x)*b - a; + a = temp; + if (b > 1e1000L) { + a /= b; + t /= b; + b = 1.0; + } + } + } + b = (t*j0l(x)/b); + } + } + if (sgn == 1) + return (-b); + else + return (b); +} + +GENERIC ynl(n, x) +int n; GENERIC x; { + int i; + int sign; + GENERIC a, b, temp; + + if (x != x) + return (x+x); + if (x <= zero) { + if (x == zero) + return (-one/zero); + else + return (zero/zero); + } + sign = 1; + if (n < 0) { + n = -n; + if ((n&1) == 1) sign = -1; + } + if (n == 0) + return (y0l(x)); + if (n == 1) + return (sign*y1l(x)); + if (!finitel(x)) + return (zero); + + if (x > 1.0e91L) { /* x >> n**2 + Jn(x) = cos(x-(2n+1)*pi/4)*sqrt(2/x*pi) + Yn(x) = sin(x-(2n+1)*pi/4)*sqrt(2/x*pi) + Let s = sin(x), c = cos(x), + xn = x-(2n+1)*pi/4, sqt2 = sqrt(2), then + + n sin(xn)*sqt2 cos(xn)*sqt2 + ---------------------------------- + 0 s-c c+s + 1 -s-c -c+s + 2 -s+c -c-s + 3 s+c c-s + */ + switch (n&3) { + case 0: temp = sinl(x)-cosl(x); break; + case 1: temp = -sinl(x)-cosl(x); break; + case 2: temp = -sinl(x)+cosl(x); break; + case 3: temp = sinl(x)+cosl(x); break; + } + b = invsqrtpi*temp/sqrtl(x); + } else { + a = y0l(x); + b = y1l(x); + /* + * fix 1262058 and take care of non-default rounding + */ + for (i = 1; i < n; i++) { + temp = b; + b *= (GENERIC) (i + i) / x; + if (b <= -LDBL_MAX) + break; + b -= a; + a = temp; + } + } + if (sign > 0) + return (b); + else + return (-b); +} diff --git a/usr/src/lib/libm/common/Q/lgammal.c b/usr/src/lib/libm/common/Q/lgammal.c new file mode 100644 index 0000000000..f54621dc99 --- /dev/null +++ b/usr/src/lib/libm/common/Q/lgammal.c @@ -0,0 +1,48 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak lgammal = __lgammal + +/* + * long double lgammal(long double x); + */ + +#include "libm.h" +#include "longdouble.h" + +extern int signgam; +extern int signgaml; + +long double +lgammal(long double x) { + long double y = __k_lgammal(x, &signgaml); + + signgam = signgaml; /* SUSv3 requires the setting of signgam */ + return (y); +} diff --git a/usr/src/lib/libm/common/Q/lgammal_r.c b/usr/src/lib/libm/common/Q/lgammal_r.c new file mode 100644 index 0000000000..bba4986004 --- /dev/null +++ b/usr/src/lib/libm/common/Q/lgammal_r.c @@ -0,0 +1,42 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * long double lgammal_r(long double x, int *signgamlp); + */ + +#pragma weak lgammal_r = __lgammal_r + +#include "libm.h" +#include "longdouble.h" + +long double +lgammal_r(long double x, int *signgamlp) { + return (__k_lgammal(x, signgamlp)); +} diff --git a/usr/src/lib/libm/common/Q/log10l.c b/usr/src/lib/libm/common/Q/log10l.c new file mode 100644 index 0000000000..9c83580531 --- /dev/null +++ b/usr/src/lib/libm/common/Q/log10l.c @@ -0,0 +1,110 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak log10l = __log10l +#endif + +/* + * log10l(X) + * + * Method : + * Let log10_2hi = leading 98(SPARC)/49(x86) bits of log10(2) and + * log10_2lo = log10(2) - log10_2hi, + * ivln10 = 1/log(10) rounded. + * Then + * n = ilogb(x), + * if (n<0) n = n+1; + * x = scalbn(x,-n); + * LOG10(x) := n*log10_2hi + (n*log10_2lo + ivln10*log(x)) + * + * Note1: + * For fear of destroying log10(10**n)=n, the rounding mode is + * set to Round-to-Nearest. + * + * Special cases: + * log10(x) is NaN with signal if x < 0; + * log10(+INF) is +INF with no signal; log10(0) is -INF with signal; + * log10(NaN) is that NaN with no signal; + * log10(10**N) = N for N=0,1,...,22. + * + * Constants: + * The hexadecimal values are the intended ones for the following constants. + * The decimal values may be used, provided that the compiler will convert + * from decimal to binary accurately enough to produce the hexadecimal values + * shown. + */ + +#include "libm.h" +#include "longdouble.h" + +#if defined(__x86) +#define __swapRD __swap87RD +#endif +extern enum fp_direction_type __swapRD(enum fp_direction_type); + +static const long double + zero = 0.0L, + ivln10 = 4.342944819032518276511289189166050822944e-0001L, + one = 1.0L, +#if defined(__x86) + log10_2hi = 3.010299956639803653501985536422580480576e-01L, + log10_2lo = 8.298635403410822349787106337291183585413e-16L; +#elif defined(__sparc) + log10_2hi = 3.010299956639811952137388947242098603469e-01L, + log10_2lo = 2.831664213089468167896664371953210945664e-31L; +#else +#error Unknown Architecture! +#endif + +long double +log10l(long double x) { + long double y, z; + enum fp_direction_type rd; + int n; + + if (!finitel(x)) + return (x + fabsl(x)); /* x is +-INF or NaN */ + else if (x > zero) { + n = ilogbl(x); + if (n < 0) + n += 1; + rd = __swapRD(fp_nearest); + y = n; + x = scalbnl(x, -n); + z = y * log10_2lo + ivln10 * logl(x); + z += y * log10_2hi; + if (rd != fp_nearest) + (void) __swapRD(rd); + return (z); + } else if (x == zero) /* -INF */ + return (-one / zero); + else /* x <0, return NaN */ + return (zero / zero); +} diff --git a/usr/src/lib/libm/common/Q/log1pl.c b/usr/src/lib/libm/common/Q/log1pl.c new file mode 100644 index 0000000000..f00c5eb4ed --- /dev/null +++ b/usr/src/lib/libm/common/Q/log1pl.c @@ -0,0 +1,217 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifdef __LITTLE_ENDIAN +#define H0(x) *(3 + (int *) &x) +#define H1(x) *(2 + (int *) &x) +#define H2(x) *(1 + (int *) &x) +#define H3(x) *(int *) &x +#else +#define H0(x) *(int *) &x +#define H1(x) *(1 + (int *) &x) +#define H2(x) *(2 + (int *) &x) +#define H3(x) *(3 + (int *) &x) +#endif + +/* + * log1pl(x) + * Table look-up algorithm by modifying logl.c + * By K.C. Ng, July 6, 1995 + * + * (a). For 1+x in [31/33,33/31], using a special approximation: + * s = x/(2.0+x); ... here |s| <= 0.03125 + * z = s*s; + * return x-s*(x-z*(B1+z*(B2+z*(B3+z*(B4+...+z*B9)...)))); + * (i.e., x is in [-2/33,2/31]) + * + * (b). Otherwise, normalize 1+x = 2^n * 1.f. + * Here we may need a correction term for 1+x rounded. + * Use a 6-bit table look-up: find a 6 bit g that match f to 6.5 bits, + * then + * log(1+x) = n*ln2 + log(1.g) + log(1.f/1.g). + * Here the leading and trailing values of log(1.g) are obtained from + * a size-64 table. + * For log(1.f/1.g), let s = (1.f-1.g)/(1.f+1.g). Note that + * 1.f = 2^-n(1+x) + * + * then + * log(1.f/1.g) = log((1+s)/(1-s)) = 2s + 2/3 s^3 + 2/5 s^5 +... + * Note that |s|<2**-8=0.00390625. We use an odd s-polynomial + * approximation to compute log(1.f/1.g): + * s*(A1+s^2*(A2+s^2*(A3+s^2*(A4+s^2*(A5+s^2*(A6+s^2*A7)))))) + * (Precision is 2**-136.91 bits, absolute error) + * + * CAUTION: + * For x>=1, compute 1+x will lost one bit (OK). + * For x in [-0.5,-1), 1+x is exact. + * For x in (-0.5,-2/33]U[2/31,1), up to 4 last bits of x will be lost + * in 1+x. Therefore, to recover the lost bits, one need to compute + * 1.f-1.g accurately. + * + * Let hx = HI(x), m = (hx>>16)-0x3fff (=ilogbl(x)), note that + * -2/33 = -0.0606...= 2^-5 * 1.939..., + * 2/31 = 0.09375 = 2^-4 * 1.500..., + * so for x in (-0.5,-2/33], -5<=m<=-2, n= -1, 1+f=2*(1+x) + * for x in [2/33,1), -4<=m<=-1, n= 0, f=x + * + * In short: + * if x>0, let g: hg= ((hx + (0x200<<(-m)))>>(10-m))<<(10-m) + * then 1.f-1.g = x-g + * if x<0, let g': hg' =((ix-(0x200)<<(-m-1))>>(9-m))<<(9-m) + * (ix=hx&0x7fffffff) + * then 1.f-1.g = 2*(g'+x), + * + * (c). The final result is computed by + * (n*ln2_hi+_TBL_logl_hi[j]) + + * ( (n*ln2_lo+_TBL_logl_lo[j]) + s*(A1+...) ) + * + * Note. + * For ln2_hi and _TBL_logl_hi[j], we force their last 32 bit to be zero + * so that n*ln2_hi + _TBL_logl_hi[j] is exact. Here + * _TBL_logl_hi[j] + _TBL_logl_lo[j] match log(1+j*2**-6) to 194 bits + * + * + * Special cases: + * log(x) is NaN with signal if x < 0 (including -INF) ; + * log(+INF) is +INF; log(0) is -INF with signal; + * log(NaN) is that NaN with no signal. + * + * Constants: + * The hexadecimal values are the intended ones for the following constants. + * The decimal values may be used, provided that the compiler will convert + * from decimal to binary accurately enough to produce the hexadecimal values + * shown. + */ + +#pragma weak log1pl = __log1pl + +#include "libm.h" + +extern const long double _TBL_logl_hi[], _TBL_logl_lo[]; + +static const long double +zero = 0.0L, +one = 1.0L, +two = 2.0L, +ln2hi = 6.931471805599453094172319547495844850203e-0001L, +ln2lo = 1.667085920830552208890449330400379754169e-0025L, +A1 = 2.000000000000000000000000000000000000024e+0000L, +A2 = 6.666666666666666666666666666666091393804e-0001L, +A3 = 4.000000000000000000000000407167070220671e-0001L, +A4 = 2.857142857142857142730077490612903681164e-0001L, +A5 = 2.222222222222242577702836920812882605099e-0001L, +A6 = 1.818181816435493395985912667105885828356e-0001L, +A7 = 1.538537835211839751112067512805496931725e-0001L, +B1 = 6.666666666666666666666666666666961498329e-0001L, +B2 = 3.999999999999999999999999990037655042358e-0001L, +B3 = 2.857142857142857142857273426428347457918e-0001L, +B4 = 2.222222222222222221353229049747910109566e-0001L, +B5 = 1.818181818181821503532559306309070138046e-0001L, +B6 = 1.538461538453809210486356084587356788556e-0001L, +B7 = 1.333333344463358756121456892645178795480e-0001L, +B8 = 1.176460904783899064854645174603360383792e-0001L, +B9 = 1.057293869956598995326368602518056990746e-0001L; + +long double +log1pl(long double x) { + long double f, s, z, qn, h, t, y, g; + int i, j, ix, iy, n, hx, m; + + hx = H0(x); + ix = hx & 0x7fffffff; + if (ix < 0x3ffaf07c) { /* |x|<2/33 */ + if (ix <= 0x3f8d0000) { /* x <= 2**-114, return x */ + if ((int) x == 0) + return (x); + } + s = x / (two + x); /* |s|<2**-8 */ + z = s * s; + return (x - s * (x - z * (B1 + z * (B2 + z * (B3 + z * (B4 + + z * (B5 + z * (B6 + z * (B7 + z * (B8 + z * B9)))))))))); + } + if (ix >= 0x7fff0000) { /* x is +inf or NaN */ + return (x + fabsl(x)); + } + if (hx < 0 && ix >= 0x3fff0000) { + if (ix > 0x3fff0000 || (H1(x) | H2(x) | H3(x)) != 0) + x = zero; + return (x / zero); /* log1p(x) is NaN if x<-1 */ + /* log1p(-1) is -inf */ + } + if (ix >= 0x7ffeffff) + y = x; /* avoid spurious overflow */ + else + y = one + x; + iy = H0(y); + n = ((iy + 0x200) >> 16) - 0x3fff; + iy = (iy & 0x0000ffff) | 0x3fff0000; /* scale 1+x to [1,2] */ + H0(y) = iy; + z = zero; + m = (ix >> 16) - 0x3fff; + /* HI(1+x) = (((hx&0xffff)|0x10000)>>(-m))|0x3fff0000 */ + if (n == 0) { /* x in [2/33,1) */ + g = zero; + H0(g) = ((hx + (0x200 << (-m))) >> (10 - m)) << (10 - m); + t = x - g; + i = (((((hx & 0xffff) | 0x10000) >> (-m)) | 0x3fff0000) + + 0x200) >> 10; + H0(z) = i << 10; + + } else if ((1 + n) == 0 && (ix < 0x3ffe0000)) { /* x in (-0.5,-2/33] */ + g = zero; + H0(g) = ((ix + (0x200 << (-m - 1))) >> (9 - m)) << (9 - m); + t = g + x; + t = t + t; + /* + * HI(2*(1+x)) = + * ((0x10000-(((hx&0xffff)|0x10000)>>(-m)))<<1)|0x3fff0000 + */ + /* + * i = + * ((((0x10000-(((hx&0xffff)|0x10000)>>(-m)))<<1)|0x3fff0000)+ + * 0x200)>>10; H0(z)=i<<10; + */ + z = two * (one - g); + i = H0(z) >> 10; + } else { + i = (iy + 0x200) >> 10; + H0(z) = i << 10; + t = y - z; + } + + s = t / (y + z); + j = i & 0x3f; + z = s * s; + qn = (long double) n; + t = qn * ln2lo + _TBL_logl_lo[j]; + h = qn * ln2hi + _TBL_logl_hi[j]; + f = t + s * (A1 + z * (A2 + z * (A3 + z * (A4 + z * (A5 + z * (A6 + + z * A7)))))); + return (h + f); +} diff --git a/usr/src/lib/libm/common/Q/log2l.c b/usr/src/lib/libm/common/Q/log2l.c new file mode 100644 index 0000000000..08023126f3 --- /dev/null +++ b/usr/src/lib/libm/common/Q/log2l.c @@ -0,0 +1,66 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * log2l(x) + * RETURN THE BASE 2 LOGARITHM OF X + * + * Method: + * purge off 0,INF, and NaN. + * n = ilogb(x) + * if (n<0) n+=1 + * z = scalbn(x,-n) + * LOG2(x) = n + (1/ln2)*log(x) + */ + +#pragma weak log2l = __log2l + +#include "libm.h" +#include "longdouble.h" + +static const long double + zero = 0.0L, + half = 0.5L, + one = 1.0L, + invln2 = 1.442695040888963407359924681001892137427e+0000L; + +long double +log2l(long double x) { + int n; + + if (x == zero || !finitel(x)) + return (logl(x)); + n = ilogbl(x); + if (n < 0) + n += 1; + x = scalbnl(x, -n); + if (x == half) + return (n - one); + return (n + invln2 * logl(x)); +} diff --git a/usr/src/lib/libm/common/Q/logbl.c b/usr/src/lib/libm/common/Q/logbl.c new file mode 100644 index 0000000000..62bb59e6e8 --- /dev/null +++ b/usr/src/lib/libm/common/Q/logbl.c @@ -0,0 +1,83 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak logbl = __logbl +#endif + +#include "libm.h" +#include "xpg6.h" /* __xpg6 */ +#define _C99SUSv3_logb _C99SUSv3_logb_subnormal_is_like_ilogb + +#if defined(__sparc) +#define ISNORMALL(k, x) (k != 0x7fff) /* assuming k != 0 */ +#define X86PDNRM(k, x) +#define XSCALE_OFFSET 0x406f /* 0x3fff + 112 */ +static const long double xscale = 5192296858534827628530496329220096.0L; + /* 2^112 */ +#elif defined(__x86) +/* + * if pseudo-denormal, replace by the equivalent normal + */ +#define X86PDNRM(k, x) if (k == 0 && (((int *) &x)[1] & 0x80000000) != 0) \ + ((int *) &x)[2] |= k = 1 +#if defined(HANDLE_UNSUPPORTED) /* assuming k != 0 */ +#define ISNORMALL(k, x) (k != 0x7fff && (((int *) &x)[1] & 0x80000000) != 0) +#else +#define ISNORMALL(k, x) (k != 0x7fff) +#endif +#define XSCALE_OFFSET 0x403e /* 0x3fff + 63 */ +static const long double xscale = 9223372036854775808.0L; /* 2^63 */ +#endif + +static long double +raise_division(long double v) { +#pragma STDC FENV_ACCESS ON + static const long double zero = 0.0L; + return (v / zero); +} + +long double +logbl(long double x) { + int k = XBIASED_EXP(x); + + X86PDNRM(k, x); + if (k == 0) { + if (ISZEROL(x)) + return (raise_division(-1.0L)); + else if ((__xpg6 & _C99SUSv3_logb) != 0) { + x *= xscale; /* scale up by 2^112 or 2^63 */ + return (long double) (XBIASED_EXP(x) - XSCALE_OFFSET); + } else + return (-16382.L); + } else if (ISNORMALL(k, x)) + return ((long double) (k - 0x3fff)); + else + return (x * x); +} diff --git a/usr/src/lib/libm/common/Q/logl.c b/usr/src/lib/libm/common/Q/logl.c new file mode 100644 index 0000000000..1cdf224c07 --- /dev/null +++ b/usr/src/lib/libm/common/Q/logl.c @@ -0,0 +1,173 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak logl = __logl + +/* + * logl(x) + * Table look-up algorithm + * By K.C. Ng, March 6, 1989 + * + * (a). For x in [31/33,33/31], using a special approximation: + * f = x - 1; + * s = f/(2.0+f); ... here |s| <= 0.03125 + * z = s*s; + * return f-s*(f-z*(B1+z*(B2+z*(B3+z*(B4+...+z*B9)...)))); + * + * (b). Otherwise, normalize x = 2^n * 1.f. + * Use a 6-bit table look-up: find a 6 bit g that match f to 6.5 bits, + * then + * log(x) = n*ln2 + log(1.g) + log(1.f/1.g). + * Here the leading and trailing values of log(1.g) are obtained from + * a size-64 table. + * For log(1.f/1.g), let s = (1.f-1.g)/(1.f+1.g), then + * log(1.f/1.g) = log((1+s)/(1-s)) = 2s + 2/3 s^3 + 2/5 s^5 +... + * Note that |s|<2**-8=0.00390625. We use an odd s-polynomial + * approximation to compute log(1.f/1.g): + * s*(A1+s^2*(A2+s^2*(A3+s^2*(A4+s^2*(A5+s^2*(A6+s^2*A7)))))) + * (Precision is 2**-136.91 bits, absolute error) + * + * (c). The final result is computed by + * (n*ln2_hi+_TBL_logl_hi[j]) + + * ( (n*ln2_lo+_TBL_logl_lo[j]) + s*(A1+...) ) + * + * Note. + * For ln2_hi and _TBL_logl_hi[j], we force their last 32 bit to be zero + * so that n*ln2_hi + _TBL_logl_hi[j] is exact. Here + * _TBL_logl_hi[j] + _TBL_logl_lo[j] match log(1+j*2**-6) to 194 bits + * + * + * Special cases: + * log(x) is NaN with signal if x < 0 (including -INF) ; + * log(+INF) is +INF; log(0) is -INF with signal; + * log(NaN) is that NaN with no signal. + * + * Constants: + * The hexadecimal values are the intended ones for the following constants. + * The decimal values may be used, provided that the compiler will convert + * from decimal to binary accurately enough to produce the hexadecimal values + * shown. + */ + +#include "libm.h" + +extern const long double _TBL_logl_hi[], _TBL_logl_lo[]; + +static const long double + zero = 0.0L, + one = 1.0L, + two = 2.0L, + two113 = 10384593717069655257060992658440192.0L, + ln2hi = 6.931471805599453094172319547495844850203e-0001L, + ln2lo = 1.667085920830552208890449330400379754169e-0025L, + A1 = 2.000000000000000000000000000000000000024e+0000L, + A2 = 6.666666666666666666666666666666091393804e-0001L, + A3 = 4.000000000000000000000000407167070220671e-0001L, + A4 = 2.857142857142857142730077490612903681164e-0001L, + A5 = 2.222222222222242577702836920812882605099e-0001L, + A6 = 1.818181816435493395985912667105885828356e-0001L, + A7 = 1.538537835211839751112067512805496931725e-0001L, + B1 = 6.666666666666666666666666666666961498329e-0001L, + B2 = 3.999999999999999999999999990037655042358e-0001L, + B3 = 2.857142857142857142857273426428347457918e-0001L, + B4 = 2.222222222222222221353229049747910109566e-0001L, + B5 = 1.818181818181821503532559306309070138046e-0001L, + B6 = 1.538461538453809210486356084587356788556e-0001L, + B7 = 1.333333344463358756121456892645178795480e-0001L, + B8 = 1.176460904783899064854645174603360383792e-0001L, + B9 = 1.057293869956598995326368602518056990746e-0001L; + +long double +logl(long double x) { + long double f, s, z, qn, h, t; + int *px = (int *) &x; + int *pz = (int *) &z; + int i, j, ix, i0, i1, n; + + /* get long double precision word ordering */ + if (*(int *) &one == 0) { + i0 = 3; + i1 = 0; + } else { + i0 = 0; + i1 = 3; + } + + n = 0; + ix = px[i0]; + if (ix > 0x3ffee0f8) { /* if x > 31/33 */ + if (ix < 0x3fff1084) { /* if x < 33/31 */ + f = x - one; + z = f * f; + if (((ix - 0x3fff0000) | px[i1] | px[2] | px[1]) == 0) { + return (zero); /* log(1)= +0 */ + } + s = f / (two + f); /* |s|<2**-8 */ + z = s * s; + return (f - s * (f - z * (B1 + z * (B2 + z * (B3 + + z * (B4 + z * (B5 + z * (B6 + z * (B7 + + z * (B8 + z * B9)))))))))); + } + if (ix >= 0x7fff0000) + return (x + x); /* x is +inf or NaN */ + goto LARGE_N; + } + if (ix >= 0x00010000) + goto LARGE_N; + i = ix & 0x7fffffff; + if ((i | px[i1] | px[2] | px[1]) == 0) { + px[i0] |= 0x80000000; + return (one / x); /* log(0.0) = -inf */ + } + if (ix < 0) { + if ((unsigned) ix >= 0xffff0000) + return (x - x); /* x is -inf or NaN */ + return (zero / zero); /* log(x<0) is NaN */ + } + /* subnormal x */ + x *= two113; + n = -113; + ix = px[i0]; +LARGE_N: + n += ((ix + 0x200) >> 16) - 0x3fff; + ix = (ix & 0x0000ffff) | 0x3fff0000; /* scale x to [1,2] */ + px[i0] = ix; + i = ix + 0x200; + pz[i0] = i & 0xfffffc00; + pz[i1] = pz[1] = pz[2] = 0; + s = (x - z) / (x + z); + j = (i >> 10) & 0x3f; + z = s * s; + qn = (long double) n; + t = qn * ln2lo + _TBL_logl_lo[j]; + h = qn * ln2hi + _TBL_logl_hi[j]; + f = t + s * (A1 + z * (A2 + z * (A3 + z * (A4 + z * (A5 + + z * (A6 + z * A7)))))); + return (h + f); +} diff --git a/usr/src/lib/libm/common/Q/longdouble.h b/usr/src/lib/libm/common/Q/longdouble.h new file mode 100644 index 0000000000..5aca94ded1 --- /dev/null +++ b/usr/src/lib/libm/common/Q/longdouble.h @@ -0,0 +1,161 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _Q_LONGDOUBLE_H +#define _Q_LONGDOUBLE_H + +#include <sys/ieeefp.h> + +extern long double __k_cosl(long double, long double); +extern long double __k_lgammal(long double, int *); +extern long double __k_sincosl(long double, long double, long double *); +extern long double __k_sinl(long double, long double); +extern long double __k_tanl(long double, long double, int); +extern long double __poly_libmq(long double, int, const long double *); +extern int __rem_pio2l(long double, long double *); + +extern long double acosdl(long double); +extern long double acoshl(long double); +extern long double acosl(long double); +extern long double acospil(long double); +extern long double acospl(long double); +extern long double aintl(long double); +extern long double anintl(long double); +extern long double annuityl(long double, long double); +extern long double asindl(long double); +extern long double asinhl(long double); +extern long double asinl(long double); +extern long double asinpil(long double); +extern long double asinpl(long double); +extern long double atan2dl(long double, long double); +extern long double atan2l(long double, long double); +extern long double atan2pil(long double, long double); +extern long double atandl(long double); +extern long double atanhl(long double); +extern long double atanl(long double); +extern long double atanpil(long double); +extern long double atanpl(long double); +extern long double cbrtl(long double); +extern long double ceill(long double); +extern long double compoundl(long double, long double); +extern long double copysignl(long double, long double); +extern long double cosdl(long double); +extern long double coshl(long double); +extern long double cosl(long double); +extern long double cospil(long double); +extern long double cospl(long double); +extern long double erfcl(long double); +extern long double erfl(long double); +extern long double exp10l(long double); +extern long double exp2l(long double); +extern long double expl(long double); +extern long double expm1l(long double); +extern long double fabsl(long double); +extern int finitel(long double); +extern long double floorl(long double); +extern long double fmodl(long double, long double); +extern enum fp_class_type fp_classl(long double); +extern long double gammal(long double); +extern long double hypotl(long double, long double); +extern int ilogbl(long double); +extern long double infinityl(void); +extern int irintl(long double); +extern int isinfl(long double); +extern int isnanl(long double); +extern int isnormall(long double); +extern int issubnormall(long double); +extern int iszerol(long double); +extern long double j0l(long double); +extern long double j1l(long double); +extern long double jnl(int, long double); +extern long double lgammal(long double); +extern long double log10l(long double); +extern long double log1pl(long double); +extern long double log2l(long double); +extern long double logbl(long double); +extern long double logl(long double); +extern long double max_normall(void); +extern long double max_subnormall(void); +extern long double min_normall(void); +extern long double min_subnormall(void); +extern long double nextafterl(long double, long double); +extern int nintl(long double); +extern long double pow_li(long double *, int *); +extern long double powl(long double, long double); +extern long double quiet_nanl(long); +extern long double remainderl(long double, long double); +extern long double rintl(long double); +extern long double scalbl(long double, long double); +extern long double scalbnl(long double, int); +extern long double signaling_nanl(long); +extern int signbitl(long double); +extern long double significandl(long double); +extern void sincosdl(long double, long double *, long double *); +extern void sincosl(long double, long double *, long double *); +extern void sincospil(long double, long double *, long double *); +extern void sincospl(long double, long double *, long double *); +extern long double sindl(long double); +extern long double sinhl(long double); +extern long double sinl(long double); +extern long double sinpil(long double); +extern long double sinpl(long double); +extern long double sqrtl(long double); +extern long double tandl(long double); +extern long double tanhl(long double); +extern long double tanl(long double); +extern long double tanpil(long double); +extern long double tanpl(long double); +extern long double y0l(long double); +extern long double y1l(long double); +extern long double ynl(int, long double); + +extern long double q_copysign_(long double *, long double *); +extern long double q_fabs_(long double *); +extern int iq_finite_(long double *); +extern long double q_fmod_(long double *, long double *); +extern enum fp_class_type iq_fp_class_(long double *); +extern int iq_ilogb_(long double *); +extern long double q_infinity_(void); +extern int iq_isinf_(long double *); +extern int iq_isnan_(long double *); +extern int iq_isnormal_(long double *); +extern int iq_issubnormal_(long double *); +extern int iq_iszero_(long double *); +extern long double q_max_normal_(void); +extern long double q_max_subnormal_(void); +extern long double q_min_normal_(void); +extern long double q_min_subnormal_(void); +extern long double q_nextafter_(long double *, long double *); +extern long double q_quiet_nan_(long *); +extern long double q_remainder_(long double *, long double *); +extern long double q_scalbn_(long double *, int *); +extern long double q_signaling_nan_(long *); +extern int iq_signbit_(long double *); + +#endif /* _Q_LONGDOUBLE_H */ diff --git a/usr/src/lib/libm/common/Q/nextafterl.c b/usr/src/lib/libm/common/Q/nextafterl.c new file mode 100644 index 0000000000..1f6e40d398 --- /dev/null +++ b/usr/src/lib/libm/common/Q/nextafterl.c @@ -0,0 +1,119 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak nextafterl = __nextafterl +#endif + +#include "libm.h" +#include <float.h> /* LDBL_MAX, LDBL_MIN */ + +#if defined(__sparc) +#define n0 0 +#define n1 1 +#define n2 2 +#define n3 3 +#define X86PDNRM1(x) +#define INC(px) { \ + if (++px[n3] == 0) \ + if (++px[n2] == 0) \ + if (++px[n1] == 0) \ + ++px[n0]; \ + } +#define DEC(px) { \ + if (--px[n3] == 0xffffffff) \ + if (--px[n2] == 0xffffffff) \ + if (--px[n1] == 0xffffffff) \ + --px[n0]; \ + } +#elif defined(__x86) +#define n0 2 +#define n1 1 +#define n2 0 +#define n3 0 +/* + * if pseudo-denormal, replace by the equivalent normal + */ +#define X86PDNRM1(x) if (XBIASED_EXP(x) == 0 && (((int *) &x)[1] & \ + 0x80000000) != 0) \ + ((int *) &x)[2] |= 1 +#define INC(px) { \ + if (++px[n2] == 0) \ + if ((++px[n1] & ~0x80000000) == 0) \ + px[n1] = 0x80000000, ++px[n0]; \ + } +#define DEC(px) { \ + if (--px[n2] == 0xffffffff) \ + if (--px[n1] == 0x7fffffff) \ + if ((--px[n0] & 0x7fff) != 0) \ + px[n1] |= 0x80000000; \ + } +#endif + +long double +nextafterl(long double x, long double y) { + int *px = (int *) &x; + int *py = (int *) &y; + + if (x == y) + return (y); /* C99 requirement */ + if (x != x || y != y) + return (x * y); + + if (ISZEROL(x)) { /* x == 0.0 */ + px[n0] = py[n0] & XSGNMSK; + px[n1] = px[n2] = 0; + px[n3] = 1; + } else { + X86PDNRM1(x); + if ((px[n0] & XSGNMSK) == 0) { /* x > 0.0 */ + if (x > y) /* x > y */ + DEC(px) + else + INC(px) + } else { + if (x < y) /* x < y */ + DEC(px) + else + INC(px) + } + } +#ifndef lint + { + volatile long double dummy; + int k = XBIASED_EXP(x); + + if (k == 0) + dummy = LDBL_MIN * copysignl(LDBL_MIN, x); + else if (k == 0x7fff) + dummy = LDBL_MAX * copysignl(LDBL_MAX, x); + } +#endif + return (x); +} diff --git a/usr/src/lib/libm/common/Q/powl.c b/usr/src/lib/libm/common/Q/powl.c new file mode 100644 index 0000000000..b88e42a1ca --- /dev/null +++ b/usr/src/lib/libm/common/Q/powl.c @@ -0,0 +1,320 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak powl = __powl +#endif + +#include "libm.h" +#include "xpg6.h" /* __xpg6 */ +#define _C99SUSv3_pow _C99SUSv3_pow_treats_Inf_as_an_even_int + +#if defined(__sparc) +#define i0 0 +#define i1 1 +#define i2 2 +#define i3 3 + +static const long double zero = 0.0L, one = 1.0L, two = 2.0L; + +extern const long double _TBL_logl_hi[], _TBL_logl_lo[]; + +static const long double + two113 = 10384593717069655257060992658440192.0L, + ln2hi = 6.931471805599453094172319547495844850203e-0001L, + ln2lo = 1.667085920830552208890449330400379754169e-0025L, + A2 = 6.666666666666666666666666666666091393804e-0001L, + A3 = 4.000000000000000000000000407167070220671e-0001L, + A4 = 2.857142857142857142730077490612903681164e-0001L, + A5 = 2.222222222222242577702836920812882605099e-0001L, + A6 = 1.818181816435493395985912667105885828356e-0001L, + A7 = 1.538537835211839751112067512805496931725e-0001L, + B1 = 6.666666666666666666666666666666666667787e-0001L, + B2 = 3.999999999999999999999999999999848524411e-0001L, + B3 = 2.857142857142857142857142865084581075070e-0001L, + B4 = 2.222222222222222222222010781800643808497e-0001L, + B5 = 1.818181818181818185051442171337036403674e-0001L, + B6 = 1.538461538461508363540720286292008207673e-0001L, + B7 = 1.333333333506731842033180638329317108428e-0001L, + B8 = 1.176469984587418890634302788283946761670e-0001L, + B9 = 1.053794891561452331722969901564862497132e-0001L; + +static long double +logl_x(long double x, long double *w) { + long double f, f1, v, s, z, qn, h, t; + int *px = (int *) &x; + int *pz = (int *) &z; + int i, j, ix, n; + + n = 0; + ix = px[i0]; + if (ix > 0x3ffef03f && ix < 0x3fff0820) { /* 65/63 > x > 63/65 */ + f = x - one; + z = f * f; + if (((ix - 0x3fff0000) | px[i1] | px[i2] | px[i3]) == 0) { + *w = zero; + return (zero); /* log(1)= +0 */ + } + qn = one / (two + f); + s = f * qn; /* |s|<2**-6 */ + v = s * s; + h = (long double) (2.0 * (double) s); + f1 = (long double) ((double) f); + t = ((two * (f - h) - h * f1) - h * (f - f1)) * qn + + s * (v * (B1 + v * (B2 + v * (B3 + v * (B4 + + v * (B5 + v * (B6 + v * (B7 + v * (B8 + v * B9))))))))); + s = (long double) ((double) (h + t)); + *w = t - (s - h); + return (s); + } + if (ix < 0x00010000) { /* subnormal x */ + x *= two113; + n = -113; + ix = px[i0]; + } + /* LARGE_N */ + n += ((ix + 0x200) >> 16) - 0x3fff; + ix = (ix & 0x0000ffff) | 0x3fff0000; /* scale x to [1,2] */ + px[i0] = ix; + i = ix + 0x200; + pz[i0] = i & 0xfffffc00; + pz[i1] = pz[i2] = pz[i3] = 0; + qn = one / (x + z); + f = x - z; + s = f * qn; + f1 = (long double) ((double) f); + h = (long double) (2.0 * (double) s); + t = qn * ((two * (f - z * h) - h * f1) - h * (f - f1)); + j = (i >> 10) & 0x3f; + v = s * s; + qn = (long double) n; + t += qn * ln2lo + _TBL_logl_lo[j]; + t += s * (v * (A2 + v * (A3 + v * (A4 + v * (A5 + v * (A6 + + v * A7)))))); + v = qn * ln2hi + _TBL_logl_hi[j]; + s = h + v; + t += (h - (s - v)); + z = (long double) ((double) (s + t)); + *w = t - (z - s); + return (z); +} + +extern const long double _TBL_expl_hi[], _TBL_expl_lo[]; +static const long double + invln2_32 = 4.616624130844682903551758979206054839765e+1L, + ln2_32hi = 2.166084939249829091928849858592451515688e-2L, + ln2_32lo = 5.209643502595475652782654157501186731779e-27L, + ln2_64 = 1.083042469624914545964425189778400898568e-2L; + +long double +powl(long double x, long double y) { + long double z, ax; + long double y1, y2, w1, w2; + int sbx, sby, j, k, yisint, m; + int hx, lx, hy, ly, ahx, ahy; + int *pz = (int *) &z; + int *px = (int *) &x; + int *py = (int *) &y; + + hx = px[i0]; + lx = px[i1] | px[i2] | px[i3]; + hy = py[i0]; + ly = py[i1] | py[i2] | py[i3]; + ahx = hx & ~0x80000000; + ahy = hy & ~0x80000000; + + if ((ahy | ly) == 0) + return (one); /* x**+-0 = 1 */ + else if (hx == 0x3fff0000 && lx == 0 && + (__xpg6 & _C99SUSv3_pow) != 0) + return (one); /* C99: 1**anything = 1 */ + else if (ahx > 0x7fff0000 || (ahx == 0x7fff0000 && lx != 0) || + ahy > 0x7fff0000 || (ahy == 0x7fff0000 && ly != 0)) + return (x + y); /* +-NaN return x+y */ + /* includes Sun: 1**NaN = NaN */ + sbx = (unsigned) hx >> 31; + sby = (unsigned) hy >> 31; + ax = fabsl(x); + /* + * determine if y is an odd int when x < 0 + * yisint = 0 ... y is not an integer + * yisint = 1 ... y is an odd int + * yisint = 2 ... y is an even int + */ + yisint = 0; + if (sbx) { + if (ahy >= 0x40700000) /* if |y|>=2**113 */ + yisint = 2; /* even integer y */ + else if (ahy >= 0x3fff0000) { + k = (ahy >> 16) - 0x3fff; /* exponent */ + if (k > 80) { + j = ((unsigned) py[i3]) >> (112 - k); + if ((j << (112 - k)) == py[i3]) + yisint = 2 - (j & 1); + } else if (k > 48) { + j = ((unsigned) py[i2]) >> (80 - k); + if ((j << (80 - k)) == py[i2]) + yisint = 2 - (j & 1); + } else if (k > 16) { + j = ((unsigned) py[i1]) >> (48 - k); + if ((j << (48 - k)) == py[i1]) + yisint = 2 - (j & 1); + } else if (ly == 0) { + j = ahy >> (16 - k); + if ((j << (16 - k)) == ahy) + yisint = 2 - (j & 1); + } + } + } + + /* special value of y */ + if (ly == 0) { + if (ahy == 0x7fff0000) { /* y is +-inf */ + if (((ahx - 0x3fff0000) | lx) == 0) { + if ((__xpg6 & _C99SUSv3_pow) != 0) + return (one); + /* C99: (-1)**+-inf = 1 */ + else + return (y - y); + /* Sun: (+-1)**+-inf = NaN */ + } else if (ahx >= 0x3fff0000) + /* (|x|>1)**+,-inf = inf,0 */ + return (sby == 0 ? y : zero); + else /* (|x|<1)**-,+inf = inf,0 */ + return (sby != 0 ? -y : zero); + } else if (ahy == 0x3fff0000) { /* y is +-1 */ + if (sby != 0) + return (one / x); + else + return (x); + } else if (hy == 0x40000000) /* y is 2 */ + return (x * x); + else if (hy == 0x3ffe0000) { /* y is 0.5 */ + if (!((ahx | lx) == 0 || ((ahx - 0x7fff0000) | lx) == + 0)) + return (sqrtl(x)); + } + } + + /* special value of x */ + if (lx == 0) { + if (ahx == 0x7fff0000 || ahx == 0 || ahx == 0x3fff0000) { + /* x is +-0,+-inf,+-1 */ + z = ax; + if (sby == 1) + z = one / z; /* z = 1/|x| if y is negative */ + if (sbx == 1) { + if (ahx == 0x3fff0000 && yisint == 0) + z = zero / zero; + /* (-1)**non-int is NaN */ + else if (yisint == 1) + z = -z; /* (x<0)**odd = -(|x|**odd) */ + } + return (z); + } + } + + /* (x<0)**(non-int) is NaN */ + if (sbx == 1 && yisint == 0) + return (zero / zero); /* should be volatile */ + + /* Now ax is finite, y is finite */ + /* first compute log(ax) = w1+w2, with 53 bits w1 */ + w1 = logl_x(ax, &w2); + + /* split up y into y1+y2 and compute (y1+y2)*(w1+w2) */ + if (ly == 0 || ahy >= 0x43fe0000) { + y1 = y * w1; + y2 = y * w2; + } else { + y1 = (long double) ((double) y); + y2 = (y - y1) * w1 + y * w2; + y1 *= w1; + } + z = y1 + y2; + j = pz[i0]; + if ((unsigned) j >= 0xffff0000) { /* NaN or -inf */ + if (sbx == 1 && yisint == 1) + return (one / z); + else + return (-one / z); + } else if ((j & ~0x80000000) < 0x3fc30000) { /* |x|<2^-60 */ + if (sbx == 1 && yisint == 1) + return (-one - z); + else + return (one + z); + } else if (j > 0) { + if (j > 0x400d0000) { + if (sbx == 1 && yisint == 1) + return (scalbnl(-one, 20000)); + else + return (scalbnl(one, 20000)); + } + k = (int) (invln2_32 * (z + ln2_64)); + } else { + if ((unsigned) j > 0xc00d0000) { + if (sbx == 1 && yisint == 1) + return (scalbnl(-one, -20000)); + else + return (scalbnl(one, -20000)); + } + k = (int) (invln2_32 * (z - ln2_64)); + } + j = k & 0x1f; + m = k >> 5; + { + /* rational approximation coeffs for [-(ln2)/64,(ln2)/64] */ + long double + t1 = 1.666666666666666666666666666660876387437e-1L, + t2 = -2.777777777777777777777707812093173478756e-3L, + t3 = 6.613756613756613482074280932874221202424e-5L, + t4 = -1.653439153392139954169609822742235851120e-6L, + t5 = 4.175314851769539751387852116610973796053e-8L; + long double t = (long double) k; + + w1 = (y2 - (t * ln2_32hi - y1)) - t * ln2_32lo; + t = w1 * w1; + w2 = (w1 - t * (t1 + t * (t2 + t * (t3 + t * (t4 + t * t5))))) - + two; + z = _TBL_expl_hi[j] - ((_TBL_expl_hi[j] * (w1 + w1)) / w2 - + _TBL_expl_lo[j]); + } + j = m + (pz[i0] >> 16); + if (j && (unsigned) j < 0x7fff) + pz[i0] += m << 16; + else + z = scalbnl(z, m); + + if (sbx == 1 && yisint == 1) + z = -z; /* (-ve)**(odd int) */ + return (z); +} +#else +#error Unsupported Architecture +#endif /* defined(__sparc) */ diff --git a/usr/src/lib/libm/common/Q/remainderl.c b/usr/src/lib/libm/common/Q/remainderl.c new file mode 100644 index 0000000000..82d47328ca --- /dev/null +++ b/usr/src/lib/libm/common/Q/remainderl.c @@ -0,0 +1,88 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak remainderl = __remainderl + +#include "libm.h" +#include "longdouble.h" + +/* + * remainderl(x,p) + * returns x REM p = x - [x/p]*p as if in infinite + * precise arithmetic, where [x/p] is the (inifinite bit) + * integer nearest x/p (in half way case choose the even one). + * Method : + * Based on fmodl() return x-[x/p]chopped*p exactly. + */ + +#define HFMAX 5.948657476786158825428796633140035080982e+4931L +#define DBMIN 6.724206286224187012525355634643505205196e-4932L + +static const long double + zero = 0.0L, + half = 0.5L, + hfmax = HFMAX, /* half of the maximum number */ + dbmin = DBMIN; /* double of the minimum (normal) number */ + +long double +remainderl(long double x, long double p) { + long double hp; + int sx; + + if (isnanl(p)) + return (x + p); + if (!finitel(x)) + return (x - x); + p = fabsl(p); + if (p <= hfmax) + x = fmodl(x, p + p); + sx = signbitl(x); + x = fabsl(x); + if (p < dbmin) { + if (x + x > p) { + if (x == p) + x = zero; + else + x -= p; /* avoid x-x=-0 in RM mode */ + if (x + x >= p) + x -= p; + } + } else { + hp = half * p; + if (x > hp) { + if (x == p) + x = zero; + else + x -= p; /* avoid x-x=-0 in RM mode */ + if (x >= hp) + x -= p; + } + } + return (sx == 0 ? x : -x); +} diff --git a/usr/src/lib/libm/common/Q/rintl.c b/usr/src/lib/libm/common/Q/rintl.c new file mode 100644 index 0000000000..a721e4231b --- /dev/null +++ b/usr/src/lib/libm/common/Q/rintl.c @@ -0,0 +1,75 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak rintl = __rintl + +/* + * rintl(long double x) return x rounded to integral according to + * the prevailing rounding direction + * + * NOTE: aintl(x), anintl(x), ceill(x), floorl(x), and rintl(x) return result + * with the same sign as x's, including 0.0L. + */ + +#include "libm.h" +#include "longdouble.h" + +extern enum fp_precision_type __swapRP(enum fp_precision_type); + +static const double one = 1.0; +static const long double qzero = 0.0L; + +long double +rintl(long double x) { + enum fp_precision_type rp; + long double t, w, two112; + int *pt = (int *) &two112; + + if (!finitel(x)) + return (x + x); + + if (*(int *) &one != 0) { /* set two112 = 2^112 */ + pt[0] = 0x406f0000; + pt[1] = pt[2] = pt[3] = 0; + } else { + pt[3] = 0x406f0000; + pt[0] = pt[1] = pt[2] = 0; + } + + if (fabsl(x) >= two112) + return (x); /* already an integer */ + t = copysignl(two112, x); + rp = __swapRP(fp_extended); /* make sure precision is long double */ + w = x + t; /* x+sign(x)*2^112 rounded to integer */ + (void) __swapRP(rp); /* restore precision mode */ + if (w == t) + return (copysignl(qzero, x)); /* x rounded to zero */ + else + return (w - t); +} diff --git a/usr/src/lib/libm/common/Q/rndintl.c b/usr/src/lib/libm/common/Q/rndintl.c new file mode 100644 index 0000000000..50e7ed1f77 --- /dev/null +++ b/usr/src/lib/libm/common/Q/rndintl.c @@ -0,0 +1,116 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak aintl = __aintl +#pragma weak anintl = __anintl +#pragma weak irintl = __irintl +#pragma weak nintl = __nintl + +/* + * aintl(x) return x chopped to integral value + * anintl(x) return sign(x)*(|x|+0.5) chopped to integral value + * irintl(x) return rint(x) in integer format + * nintl(x) return anint(x) in integer format + * + * NOTE: aintl(x), anintl(x), ceill(x), floorl(x), and rintl(x) return result + * with the same sign as x's, including 0.0. + */ + +#include "libm.h" +#include "longdouble.h" + +extern enum fp_direction_type __swapRD(enum fp_direction_type); + +static const long double qone = 1.0L, qhalf = 0.5L, qmhalf = -0.5L; + +long double +aintl(long double x) { + long double t, w; + + if (!finitel(x)) + return (x + x); + w = fabsl(x); + t = rintl(w); + if (t <= w) + return (copysignl(t, x)); /* NaN or already aint(|x|) */ + else /* |t|>|x| case */ + return (copysignl(t - qone, x)); /* |t-1|*sign(x) */ +} + +long double +anintl(long double x) { + long double t, w, z; + + if (!finitel(x)) + return (x + x); + w = fabsl(x); + t = rintl(w); + if (t == w) + return (copysignl(t, x)); + z = t - w; + if (z > qhalf) + t = t - qone; + else if (z <= qmhalf) + t = t + qone; + return (copysignl(t, x)); +} + +int +irintl(long double x) { + enum fp_direction_type rd; + + rd = __swapRD(fp_nearest); + (void) __swapRD(rd); /* restore Rounding Direction */ + switch (rd) { + case fp_nearest: + if (x < 2147483647.5L && x >= -2147483648.5L) + return ((int)rintl(x)); + break; + case fp_tozero: + if (x < 2147483648.0L && x > -2147483649.0L) + return ((int)rintl(x)); + break; + case fp_positive: + if (x <= 2147483647.0L && x > -2147483649.0L) + return ((int)rintl(x)); + break; + case fp_negative: + if (x < 2147483648.0L && x >= -2147483648.0L) + return ((int)rintl(x)); + break; + } + return ((int)copysignl(1.0e100L, x)); +} + +int +nintl(long double x) { + if ((x < 2147483647.5L) && (x > -2147483648.5L)) + return ((int)anintl(x)); + else + return ((int)copysignl(1.0e100L, x)); +} diff --git a/usr/src/lib/libm/common/Q/scalbl.c b/usr/src/lib/libm/common/Q/scalbl.c new file mode 100644 index 0000000000..7346954a95 --- /dev/null +++ b/usr/src/lib/libm/common/Q/scalbl.c @@ -0,0 +1,68 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak scalbl = __scalbl + +/* + * scalbl(x,n): return x * 2^n by manipulating exponent. + */ + +#include "libm.h" + +#define n0 0 + +long double +scalbl(long double x, long double fn) { + int *py = (int *) &fn, n; + long double z; + + if (isnanl(x) || isnanl(fn)) + return (x * fn); + + /* + * fn is inf or NaN + */ + if ((py[n0] & 0x7fff0000) == 0x7fff0000) { + if ((py[n0] & 0x80000000) != 0) + return (x / (-fn)); + else + return (x * fn); + } + if (rintl(fn) != fn) + return ((fn - fn) / (fn - fn)); + if (fn > 65000.0L) + z = scalbnl(x, 65000); + else if (-fn > 65000.0L) + z = scalbnl(x, -65000); + else { + n = (int) fn; + z = scalbnl(x, n); + } + return (z); +} diff --git a/usr/src/lib/libm/common/Q/scalbnl.c b/usr/src/lib/libm/common/Q/scalbnl.c new file mode 100644 index 0000000000..9351040f0b --- /dev/null +++ b/usr/src/lib/libm/common/Q/scalbnl.c @@ -0,0 +1,85 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak scalbnl = __scalbnl +#endif + +#include "libm.h" +#include <float.h> /* LDBL_MAX, LDBL_MIN */ +#include <stdlib.h> /* abs */ + +#if defined(__sparc) +#define XSET_EXP(k, x) ((int *) &x)[0] = (((int *) &x)[0] & ~0x7fff0000) | \ + (k << 16) +#define ISINFNANL(k, x) (k == 0x7fff) +#define XTWOT_OFFSET 113 +static const long double xtwot = 10384593717069655257060992658440192.0L, + /* 2^113 */ + twomtm1 = 4.814824860968089632639944856462318296E-35L; /* 2^-114 */ +#elif defined(__x86) +#define XSET_EXP(k, x) ((int *) &x)[2] = (((int *) &x)[2] & ~0x7fff) | k +#if defined(HANDLE_UNSUPPORTED) +#define ISINFNANL(k, x) (k == 0x7fff || k != 0 && \ + (((int *) &x)[1] & 0x80000000) == 0) +#else +#define ISINFNANL(k, x) (k == 0x7fff) +#endif +#define XTWOT_OFFSET 64 +static const long double xtwot = 18446744073709551616.0L, /* 2^64 */ + twomtm1 = 2.7105054312137610850186E-20L; /* 2^-65 */ +#endif + +long double +scalbnl(long double x, int n) { + int k = XBIASED_EXP(x); + + if (ISINFNANL(k, x)) + return (x + x); + if (ISZEROL(x) || n == 0) + return (x); + if (k == 0) { + x *= xtwot; + k = XBIASED_EXP(x) - XTWOT_OFFSET; + } + if ((unsigned) abs(n) >= 131072) /* cast to unsigned for -2^31 */ + n >>= 1; /* avoid subsequent integer overflow */ + k += n; + if (k > 0x7ffe) + return (LDBL_MAX * copysignl(LDBL_MAX, x)); + if (k <= -XTWOT_OFFSET - 1) + return (LDBL_MIN * copysignl(LDBL_MIN, x)); + if (k > 0) { + XSET_EXP(k, x); + return (x); + } + k += XTWOT_OFFSET + 1; + XSET_EXP(k, x); + return (x * twomtm1); +} diff --git a/usr/src/lib/libm/common/Q/signgaml.c b/usr/src/lib/libm/common/Q/signgaml.c new file mode 100644 index 0000000000..77aeecf389 --- /dev/null +++ b/usr/src/lib/libm/common/Q/signgaml.c @@ -0,0 +1,34 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak signgaml = __signgaml + +#include "libm.h" + +int signgaml = 0; diff --git a/usr/src/lib/libm/common/Q/significandl.c b/usr/src/lib/libm/common/Q/significandl.c new file mode 100644 index 0000000000..8140463a2e --- /dev/null +++ b/usr/src/lib/libm/common/Q/significandl.c @@ -0,0 +1,42 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak significandl = __significandl +#endif + +#include "libm.h" + +long double +significandl(long double x) { + if (ISZEROL(x) || XBIASED_EXP(x) == 0x7fff) /* 0/+-Inf/NaN */ + return (x + x); + else + return (scalbnl(x, -ilogbl(x))); +} diff --git a/usr/src/lib/libm/common/Q/sincosl.c b/usr/src/lib/libm/common/Q/sincosl.c new file mode 100644 index 0000000000..95fafc9523 --- /dev/null +++ b/usr/src/lib/libm/common/Q/sincosl.c @@ -0,0 +1,98 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * sincosl(x) + * Table look-up algorithm by K.C. Ng, November, 1989. + * + * kernel function: + * __k_sincosl ... sin and cos function on [-pi/4,pi/4] + * __rem_pio2l ... argument reduction routine + * + * Method. + * Let S and C denote the sin and cos respectively on [-PI/4, +PI/4]. + * 1. Assume the argument x is reduced to y1+y2 = x-k*pi/2 in + * [-pi/2 , +pi/2], and let n = k mod 4. + * 2. Let S=S(y1+y2), C=C(y1+y2). Depending on n, we have + * + * n sin(x) cos(x) tan(x) + * ---------------------------------------------------------- + * 0 S C S/C + * 1 C -S -C/S + * 2 -S -C S/C + * 3 -C S -C/S + * ---------------------------------------------------------- + * + * Special cases: + * Let trig be any of sin, cos, or tan. + * trig(+-INF) is NaN, with signals; + * trig(NaN) is that NaN; + * + * Accuracy: + * computer TRIG(x) returns trig(x) nearly rounded. + */ + +#pragma weak sincosl = __sincosl + +#include "libm.h" +#include "longdouble.h" + +void +sincosl(long double x, long double *s, long double *c) { + long double y[2], z = 0.0L; + int n, ix; + + ix = *(int *) &x; /* High word of x */ + + /* |x| ~< pi/4 */ + ix &= 0x7fffffff; + if (ix <= 0x3ffe9220) + *s = __k_sincosl(x, z, c); + else if (ix >= 0x7fff0000) + *s = *c = x - x; /* trig(Inf or NaN) is NaN */ + else { /* argument reduction needed */ + n = __rem_pio2l(x, y); + switch (n & 3) { + case 0: + *s = __k_sincosl(y[0], y[1], c); + break; + case 1: + *c = -__k_sincosl(y[0], y[1], s); + break; + case 2: + *s = -__k_sincosl(y[0], y[1], c); + *c = -*c; + break; + case 3: + *c = __k_sincosl(y[0], y[1], s); + *s = -*s; + break; + } + } +} diff --git a/usr/src/lib/libm/common/Q/sincospil.c b/usr/src/lib/libm/common/Q/sincospil.c new file mode 100644 index 0000000000..fda3d45490 --- /dev/null +++ b/usr/src/lib/libm/common/Q/sincospil.c @@ -0,0 +1,195 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak sincospil = __sincospil + +/* + * void sincospil(long double x, long double *s, long double *c) + * *s = sinl(pi*x); *c = cosl(pi*x); + * + * Algorithm, 10/17/2002, K.C. Ng + * ------------------------------ + * Let y = |4x|, z = floor(y), and n = (int)(z mod 8.0) (displayed in binary). + * 1. If y == z, then x is a multiple of pi/4. Return the following values: + * --------------------------------------------------- + * n x mod 2 sin(x*pi) cos(x*pi) tan(x*pi) + * --------------------------------------------------- + * 000 0.00 +0 ___ +1 ___ +0 + * 001 0.25 +\/0.5 +\/0.5 +1 + * 010 0.50 +1 ___ +0 ___ +inf + * 011 0.75 +\/0.5 -\/0.5 -1 + * 100 1.00 -0 ___ -1 ___ +0 + * 101 1.25 -\/0.5 -\/0.5 +1 + * 110 1.50 -1 ___ -0 ___ +inf + * 111 1.75 -\/0.5 +\/0.5 -1 + * --------------------------------------------------- + * 2. Otherwise, + * --------------------------------------------------- + * n t sin(x*pi) cos(x*pi) tan(x*pi) + * --------------------------------------------------- + * 000 (y-z)/4 sinpi(t) cospi(t) tanpi(t) + * 001 (z+1-y)/4 cospi(t) sinpi(t) 1/tanpi(t) + * 010 (y-z)/4 cospi(t) -sinpi(t) -1/tanpi(t) + * 011 (z+1-y)/4 sinpi(t) -cospi(t) -tanpi(t) + * 100 (y-z)/4 -sinpi(t) -cospi(t) tanpi(t) + * 101 (z+1-y)/4 -cospi(t) -sinpi(t) 1/tanpi(t) + * 110 (y-z)/4 -cospi(t) sinpi(t) -1/tanpi(t) + * 111 (z+1-y)/4 -sinpi(t) cospi(t) -tanpi(t) + * --------------------------------------------------- + * + * NOTE. This program compute sinpi/cospi(t<0.25) by __k_sin/cos(pi*t, 0.0). + * This will return a result with error slightly more than one ulp (but less + * than 2 ulp). If one wants accurate result, one may break up pi*t in + * high (tpi_h) and low (tpi_l) parts and call __k_sin/cos(tip_h, tip_lo) + * instead. + */ + +#include "libm.h" +#include "longdouble.h" + +#define I(q, m) ((int *) &(q))[m] +#define U(q, m) ((unsigned *) &(q))[m] +#if defined(__LITTLE_ENDIAN) || defined(__x86) +#define LDBL_MOST_SIGNIF_I(ld) ((I(ld, 2) << 16) | (0xffff & (I(ld, 1) >> 15))) +#define LDBL_LEAST_SIGNIF_U(ld) U(ld, 0) +#define PREC 64 +#define PRECM1 63 +#define PRECM2 62 +static const long double twoPRECM2 = 9.223372036854775808000000000000000e+18L; +#else +#define LDBL_MOST_SIGNIF_I(ld) I(ld, 0) +#define LDBL_LEAST_SIGNIF_U(ld) U(ld, sizeof (long double) / sizeof (int) - 1) +#define PREC 113 +#define PRECM1 112 +#define PRECM2 111 +static const long double twoPRECM2 = 5.192296858534827628530496329220096e+33L; +#endif + +static const long double +zero = 0.0L, +quater = 0.25L, +one = 1.0L, +pi = 3.141592653589793238462643383279502884197e+0000L, +sqrth = 0.707106781186547524400844362104849039284835937688474, +tiny = 1.0e-100; + +void +sincospil(long double x, long double *s, long double *c) { + long double y, z, t; + int hx, n, k; + unsigned lx; + + hx = LDBL_MOST_SIGNIF_I(x); + lx = LDBL_LEAST_SIGNIF_U(x); + k = ((hx & 0x7fff0000) >> 16) - 0x3fff; + if (k >= PRECM2) { /* |x| >= 2**(Prec-2) */ + if (k >= 16384) { + *s = *c = x - x; + } else { + if (k >= PREC) { + *s = zero; + *c = one; + } else if (k == PRECM1) { + if ((lx & 1) == 0) { + *s = zero; + *c = one; + } else { + *s = -zero; + *c = -one; + } + } else { /* k = Prec - 2 */ + if ((lx & 1) == 0) { + *s = zero; + *c = one; + } else { + *s = one; + *c = zero; + } + if ((lx & 2) != 0) { + *s = -*s; + *c = -*c; + } + } + } + } else if (k < -2) /* |x| < 0.25 */ + *s = __k_sincosl(pi * fabsl(x), zero, c); + else { + /* y = |4x|, z = floor(y), and n = (int)(z mod 8.0) */ + y = 4.0L * fabsl(x); + if (k < PRECM2) { + z = y + twoPRECM2; + n = LDBL_LEAST_SIGNIF_U(z) & 7; /* 3 LSb of z */ + t = z - twoPRECM2; + k = 0; + if (t == y) + k = 1; + else if (t > y) { + n -= 1; + t = quater + (y - t) * quater; + } else + t = (y - t) * quater; + } else { /* k = Prec-3 */ + n = LDBL_LEAST_SIGNIF_U(y) & 7; /* 3 LSb of z */ + k = 1; + } + if (k) { /* x = N/4 */ + if ((n & 1) != 0) + *s = *c = sqrth + tiny; + else + if ((n & 2) == 0) { + *s = zero; + *c = one; + } else { + *s = one; + *c = zero; + } + if ((n & 4) != 0) + *s = -*s; + if (((n + 1) & 4) != 0) + *c = -*c; + } else { + if ((n & 1) != 0) + t = quater - t; + if (((n + (n & 1)) & 2) == 0) + *s = __k_sincosl(pi * t, zero, c); + else + *c = __k_sincosl(pi * t, zero, s); + if ((n & 4) != 0) + *s = -*s; + if (((n + 2) & 4) != 0) + *c = -*c; + } + } + if (hx < 0) + *s = -*s; +} +#undef U +#undef LDBL_LEAST_SIGNIF_U +#undef I +#undef LDBL_MOST_SIGNIF_I diff --git a/usr/src/lib/libm/common/Q/sinhl.c b/usr/src/lib/libm/common/Q/sinhl.c new file mode 100644 index 0000000000..dc37e457a7 --- /dev/null +++ b/usr/src/lib/libm/common/Q/sinhl.c @@ -0,0 +1,90 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak sinhl = __sinhl + +#include "libm.h" +#include "longdouble.h" + +/* + * sinhl(X) + * RETURN THE HYPERBOLIC SINE OF X + * + * Method : + * 1. reduce x to non-negative by sinhl(-x) = - sinhl(x). + * 2. + * + * expm1l(x) + expm1l(x)/(expm1l(x)+1) + * 0 <= x <= lnovft : sinhl(x) := -------------------------------- + * 2 + * + * lnovft <= x < INF : sinhl(x) := expl(x-MEP1*ln2)*2**ME + * + * here + * lnovft: logrithm of the overflow threshold + * = MEP1*ln2 chopped to machine precision. + * ME maximum exponent + * MEP1 maximum exponent plus 1 + * + * Special cases: + * sinhl(x) is x if x is +INF, -INF, or NaN. + * only sinhl(0)=0 is exact for finite argument. + * + */ + +#define ME 16383 +#define MEP1 16384 +#define LNOVFT 1.135652340629414394949193107797076342845e+4L + /* last 32 bits of LN2HI is zero */ +#define LN2HI 6.931471805599453094172319547495844850203e-0001L +#define LN2LO 1.667085920830552208890449330400379754169e-0025L + +static const long double + half = 0.5L, + one = 1.0L, + ln2hi = LN2HI, + ln2lo = LN2LO, + lnovftL = LNOVFT; + +long double +sinhl(long double x) { + long double r, t; + + if (!finitel(x)) + return (x + x); /* sinh of NaN or +-INF is itself */ + r = fabsl(x); + if (r < lnovftL) { + t = expm1l(r); + r = copysignl((t + t / (one + t)) * half, x); + } else { + r = copysignl(expl((r - MEP1 * ln2hi) - MEP1 * ln2lo), x); + r = scalbnl(r, ME); + } + return (r); +} diff --git a/usr/src/lib/libm/common/Q/sinl.c b/usr/src/lib/libm/common/Q/sinl.c new file mode 100644 index 0000000000..302b9f4f1b --- /dev/null +++ b/usr/src/lib/libm/common/Q/sinl.c @@ -0,0 +1,93 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * sinl(x) + * Table look-up algorithm by K.C. Ng, November, 1989. + * + * kernel function: + * __k_sinl ... sin function on [-pi/4,pi/4] + * __k_cosl ... cos function on [-pi/4,pi/4] + * __rem_pio2l ... argument reduction routine + * + * Method. + * Let S and C denote the sin and cos respectively on [-PI/4, +PI/4]. + * 1. Assume the argument x is reduced to y1+y2 = x-k*pi/2 in + * [-pi/2 , +pi/2], and let n = k mod 4. + * 2. Let S=S(y1+y2), C=C(y1+y2). Depending on n, we have + * + * n sin(x) cos(x) tan(x) + * ---------------------------------------------------------- + * 0 S C S/C + * 1 C -S -C/S + * 2 -S -C S/C + * 3 -C S -C/S + * ---------------------------------------------------------- + * + * Special cases: + * Let trig be any of sin, cos, or tan. + * trig(+-INF) is NaN, with signals; + * trig(NaN) is that NaN; + * + * Accuracy: + * computer TRIG(x) returns trig(x) nearly rounded. + */ + +#pragma weak sinl = __sinl + +#include "libm.h" +#include "longdouble.h" + +long double +sinl(long double x) { + long double y[2], z = 0.0L; + int n, ix; + + ix = *(int *) &x; /* High word of x */ + ix &= 0x7fffffff; + if (ix <= 0x3ffe9220) /* |x| ~< pi/4 */ + return (__k_sinl(x, z)); + else if (ix >= 0x7fff0000) /* sin(Inf or NaN) is NaN */ + return (x - x); + else { /* argument reduction needed */ + n = __rem_pio2l(x, y); + switch (n & 3) { + case 0: + return (__k_sinl(y[0], y[1])); + case 1: + return (__k_cosl(y[0], y[1])); + case 2: + return (-__k_sinl(y[0], y[1])); + case 3: + return (-__k_cosl(y[0], y[1])); + } + } + /* NOTREACHED */ + return 0.0L; +} diff --git a/usr/src/lib/libm/common/Q/sinpil.c b/usr/src/lib/libm/common/Q/sinpil.c new file mode 100644 index 0000000000..b708069de5 --- /dev/null +++ b/usr/src/lib/libm/common/Q/sinpil.c @@ -0,0 +1,169 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak sinpil = __sinpil + +/* + * long double sinpil(long double x), + * return long double precision sinl(pi*x). + * + * Algorithm, 10/17/2002, K.C. Ng + * ------------------------------ + * Let y = |4x|, z = floor(y), and n = (int)(z mod 8.0) (displayed in binary). + * 1. If y == z, then x is a multiple of pi/4. Return the following values: + * --------------------------------------------------- + * n x mod 2 sin(x*pi) cos(x*pi) tan(x*pi) + * --------------------------------------------------- + * 000 0.00 +0 ___ +1 ___ +0 + * 001 0.25 +\/0.5 +\/0.5 +1 + * 010 0.50 +1 ___ +0 ___ +inf + * 011 0.75 +\/0.5 -\/0.5 -1 + * 100 1.00 -0 ___ -1 ___ +0 + * 101 1.25 -\/0.5 -\/0.5 +1 + * 110 1.50 -1 ___ -0 ___ +inf + * 111 1.75 -\/0.5 +\/0.5 -1 + * --------------------------------------------------- + * 2. Otherwise, + * --------------------------------------------------- + * n t sin(x*pi) cos(x*pi) tan(x*pi) + * --------------------------------------------------- + * 000 (y-z)/4 sinpi(t) cospi(t) tanpi(t) + * 001 (z+1-y)/4 cospi(t) sinpi(t) 1/tanpi(t) + * 010 (y-z)/4 cospi(t) -sinpi(t) -1/tanpi(t) + * 011 (z+1-y)/4 sinpi(t) -cospi(t) -tanpi(t) + * 100 (y-z)/4 -sinpi(t) -cospi(t) tanpi(t) + * 101 (z+1-y)/4 -cospi(t) -sinpi(t) 1/tanpi(t) + * 110 (y-z)/4 -cospi(t) sinpi(t) -1/tanpi(t) + * 111 (z+1-y)/4 -sinpi(t) cospi(t) -tanpi(t) + * --------------------------------------------------- + * + * NOTE. This program compute sinpi/cospi(t<0.25) by __k_sin/cos(pi*t, 0.0). + * This will return a result with error slightly more than one ulp (but less + * than 2 ulp). If one wants accurate result, one may break up pi*t in + * high (tpi_h) and low (tpi_l) parts and call __k_sin/cos(tip_h, tip_lo) + * instead. + */ + +#include "libm.h" +#include "longdouble.h" + +#define I(q, m) ((int *) &(q))[m] +#define U(q, m) ((unsigned *) &(q))[m] +#if defined(__LITTLE_ENDIAN) || defined(__x86) +#define LDBL_MOST_SIGNIF_I(ld) ((I(ld, 2) << 16) | (0xffff & (I(ld, 1) >> 15))) +#define LDBL_LEAST_SIGNIF_U(ld) U(ld, 0) +#define PREC 64 +#define PRECM1 63 +#define PRECM2 62 +static const long double twoPRECM2 = 9.223372036854775808000000000000000e+18L; +#else +#define LDBL_MOST_SIGNIF_I(ld) I(ld, 0) +#define LDBL_LEAST_SIGNIF_U(ld) U(ld, sizeof (long double) / sizeof (int) - 1) +#define PREC 113 +#define PRECM1 112 +#define PRECM2 111 +static const long double twoPRECM2 = 5.192296858534827628530496329220096e+33L; +#endif + +static const long double +zero = 0.0L, +quater = 0.25L, +one = 1.0L, +pi = 3.141592653589793238462643383279502884197e+0000L, +sqrth = 0.707106781186547524400844362104849039284835937688474, +tiny = 1.0e-100; + +long double +sinpil(long double x) { + long double y, z, t; + int hx, n, k; + unsigned lx; + + hx = LDBL_MOST_SIGNIF_I(x); + lx = LDBL_LEAST_SIGNIF_U(x); + k = ((hx & 0x7fff0000) >> 16) - 0x3fff; + if (k >= PRECM2) { /* |x| >= 2**(Prec-2) */ + if (k >= 16384) + y = x - x; + else { + if (k >= PREC) + y = zero; + else if (k == PRECM1) + y = (lx & 1) == 0 ? zero: -zero; + else { /* k = Prec - 2 */ + y = (lx & 1) == 0 ? zero : one; + if ((lx & 2) != 0) + y = -y; + } + } + } else if (k < -2) /* |x| < 0.25 */ + y = __k_sinl(pi * fabsl(x), zero); + else { + /* y = |4x|, z = floor(y), and n = (int)(z mod 8.0) */ + y = 4.0L * fabsl(x); + if (k < PRECM2) { + z = y + twoPRECM2; + n = LDBL_LEAST_SIGNIF_U(z) & 7; /* 3 LSb of z */ + t = z - twoPRECM2; + k = 0; + if (t == y) + k = 1; + else if (t > y) { + n -= 1; + t = quater + (y - t) * quater; + } else + t = (y - t) * quater; + } else { /* k = Prec-3 */ + n = LDBL_LEAST_SIGNIF_U(y) & 7; /* 3 LSb of z */ + k = 1; + } + if (k) { /* x = N/4 */ + if ((n & 1) != 0) + y = sqrth + tiny; + else + y = (n & 2) == 0 ? zero : one; + if ((n & 4) != 0) + y = -y; + } else { + if ((n & 1) != 0) + t = quater - t; + if (((n + (n & 1)) & 2) == 0) + y = __k_sinl(pi * t, zero); + else + y = __k_cosl(pi * t, zero); + if ((n & 4) != 0) + y = -y; + } + } + return (hx >= 0 ? y : -y); +} +#undef U +#undef LDBL_LEAST_SIGNIF_U +#undef I +#undef LDBL_MOST_SIGNIF_I diff --git a/usr/src/lib/libm/common/Q/sqrtl.c b/usr/src/lib/libm/common/Q/sqrtl.c new file mode 100644 index 0000000000..30c8f5e097 --- /dev/null +++ b/usr/src/lib/libm/common/Q/sqrtl.c @@ -0,0 +1,479 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak sqrtl = __sqrtl + +#include "libm.h" +#include "longdouble.h" + +extern int __swapTE(int); +extern int __swapEX(int); +extern enum fp_direction_type __swapRD(enum fp_direction_type); + +/* + * in struct longdouble, msw consists of + * unsigned short sgn:1; + * unsigned short exp:15; + * unsigned short frac1:16; + */ + +#ifdef __LITTLE_ENDIAN + +/* array indices used to access words within a double */ +#define HIWORD 1 +#define LOWORD 0 + +/* structure used to access words within a quad */ +union longdouble { + struct { + unsigned int frac4; + unsigned int frac3; + unsigned int frac2; + unsigned int msw; + } l; + long double d; +}; + +/* default NaN returned for sqrt(neg) */ +static const union longdouble + qnan = { 0xffffffff, 0xffffffff, 0xffffffff, 0x7fffffff }; + +/* signalling NaN used to raise invalid */ +static const union { + unsigned u[2]; + double d; +} snan = { 0, 0x7ff00001 }; + +#else + +/* array indices used to access words within a double */ +#define HIWORD 0 +#define LOWORD 1 + +/* structure used to access words within a quad */ +union longdouble { + struct { + unsigned int msw; + unsigned int frac2; + unsigned int frac3; + unsigned int frac4; + } l; + long double d; +}; + +/* default NaN returned for sqrt(neg) */ +static const union longdouble + qnan = { 0x7fffffff, 0xffffffff, 0xffffffff, 0xffffffff }; + +/* signalling NaN used to raise invalid */ +static const union { + unsigned u[2]; + double d; +} snan = { 0x7ff00001, 0 }; + +#endif /* __LITTLE_ENDIAN */ + + +static const double + zero = 0.0, + half = 0.5, + one = 1.0, + huge = 1.0e300, + tiny = 1.0e-300, + two36 = 6.87194767360000000000e+10, + two30 = 1.07374182400000000000e+09, + two6 = 6.40000000000000000000e+01, + two4 = 1.60000000000000000000e+01, + twom18 = 3.81469726562500000000e-06, + twom28 = 3.72529029846191406250e-09, + twom42 = 2.27373675443232059479e-13, + twom60 = 8.67361737988403547206e-19, + twom62 = 2.16840434497100886801e-19, + twom66 = 1.35525271560688054251e-20, + twom90 = 8.07793566946316088742e-28, + twom113 = 9.62964972193617926528e-35, + twom124 = 4.70197740328915003187e-38; + + +/* +* Extract the exponent and normalized significand (represented as +* an array of five doubles) from a finite, nonzero quad. +*/ +static int +__q_unpack(const union longdouble *x, double *s) +{ + union { + double d; + unsigned int l[2]; + } u; + double b; + unsigned int lx, w[3]; + int ex; + + /* get the normalized significand and exponent */ + ex = (int) ((x->l.msw & 0x7fffffff) >> 16); + lx = x->l.msw & 0xffff; + if (ex) + { + lx |= 0x10000; + w[0] = x->l.frac2; + w[1] = x->l.frac3; + w[2] = x->l.frac4; + } + else + { + if (lx | (x->l.frac2 & 0xfffe0000)) + { + w[0] = x->l.frac2; + w[1] = x->l.frac3; + w[2] = x->l.frac4; + ex = 1; + } + else if (x->l.frac2 | (x->l.frac3 & 0xfffe0000)) + { + lx = x->l.frac2; + w[0] = x->l.frac3; + w[1] = x->l.frac4; + w[2] = 0; + ex = -31; + } + else if (x->l.frac3 | (x->l.frac4 & 0xfffe0000)) + { + lx = x->l.frac3; + w[0] = x->l.frac4; + w[1] = w[2] = 0; + ex = -63; + } + else + { + lx = x->l.frac4; + w[0] = w[1] = w[2] = 0; + ex = -95; + } + while ((lx & 0x10000) == 0) + { + lx = (lx << 1) | (w[0] >> 31); + w[0] = (w[0] << 1) | (w[1] >> 31); + w[1] = (w[1] << 1) | (w[2] >> 31); + w[2] <<= 1; + ex--; + } + } + + /* extract the significand into five doubles */ + u.l[HIWORD] = 0x42300000; + u.l[LOWORD] = 0; + b = u.d; + u.l[LOWORD] = lx; + s[0] = u.d - b; + + u.l[HIWORD] = 0x40300000; + u.l[LOWORD] = 0; + b = u.d; + u.l[LOWORD] = w[0] & 0xffffff00; + s[1] = u.d - b; + + u.l[HIWORD] = 0x3e300000; + u.l[LOWORD] = 0; + b = u.d; + u.l[HIWORD] |= w[0] & 0xff; + u.l[LOWORD] = w[1] & 0xffff0000; + s[2] = u.d - b; + + u.l[HIWORD] = 0x3c300000; + u.l[LOWORD] = 0; + b = u.d; + u.l[HIWORD] |= w[1] & 0xffff; + u.l[LOWORD] = w[2] & 0xff000000; + s[3] = u.d - b; + + u.l[HIWORD] = 0x3c300000; + u.l[LOWORD] = 0; + b = u.d; + u.l[LOWORD] = w[2] & 0xffffff; + s[4] = u.d - b; + + return ex - 0x3fff; +} + + +/* +* Pack an exponent and array of three doubles representing a finite, +* nonzero number into a quad. Assume the sign is already there and +* the rounding mode has been fudged accordingly. +*/ +static void +__q_pack(const double *z, int exp, enum fp_direction_type rm, + union longdouble *x, int *inexact) +{ + union { + double d; + unsigned int l[2]; + } u; + double s[3], t, t2; + unsigned int msw, frac2, frac3, frac4; + + /* bias exponent and strip off integer bit */ + exp += 0x3fff; + s[0] = z[0] - one; + s[1] = z[1]; + s[2] = z[2]; + + /* + * chop the significand to obtain the fraction; + * use round-to-minus-infinity to ensure chopping + */ + (void) __swapRD(fp_negative); + + /* extract the first eighty bits of fraction */ + t = s[1] + s[2]; + u.d = two36 + (s[0] + t); + msw = u.l[LOWORD]; + s[0] -= (u.d - two36); + + u.d = two4 + (s[0] + t); + frac2 = u.l[LOWORD]; + s[0] -= (u.d - two4); + + u.d = twom28 + (s[0] + t); + frac3 = u.l[LOWORD]; + s[0] -= (u.d - twom28); + + /* condense the remaining fraction; errors here won't matter */ + t = s[0] + s[1]; + s[1] = ((s[0] - t) + s[1]) + s[2]; + s[0] = t; + + /* get the last word of fraction */ + u.d = twom60 + (s[0] + s[1]); + frac4 = u.l[LOWORD]; + s[0] -= (u.d - twom60); + + /* + * keep track of what's left for rounding; note that + * t2 will be non-negative due to rounding mode + */ + t = s[0] + s[1]; + t2 = (s[0] - t) + s[1]; + + if (t != zero) + { + *inexact = 1; + + /* decide whether to round the fraction up */ + if (rm == fp_positive || (rm == fp_nearest && (t > twom113 || + (t == twom113 && (t2 != zero || frac4 & 1))))) + { + /* round up and renormalize if necessary */ + if (++frac4 == 0) + if (++frac3 == 0) + if (++frac2 == 0) + if (++msw == 0x10000) + { + msw = 0; + exp++; + } + } + } + + /* assemble the result */ + x->l.msw |= msw | (exp << 16); + x->l.frac2 = frac2; + x->l.frac3 = frac3; + x->l.frac4 = frac4; +} + + +/* +* Compute the square root of x and place the TP result in s. +*/ +static void +__q_tp_sqrt(const double *x, double *s) +{ + double c, rr, r[3], tt[3], t[5]; + + /* approximate the divisor for the Newton iteration */ + c = sqrt((x[0] + x[1]) + x[2]); + rr = half / c; + + /* compute the first five "digits" of the square root */ + t[0] = (c + two30) - two30; + tt[0] = t[0] + t[0]; + r[0] = ((x[0] - t[0] * t[0]) + x[1]) + x[2]; + + t[1] = (rr * (r[0] + x[3]) + two6) - two6; + tt[1] = t[1] + t[1]; + r[0] -= tt[0] * t[1]; + r[1] = x[3] - t[1] * t[1]; + c = (r[1] + twom18) - twom18; + r[0] += c; + r[1] = (r[1] - c) + x[4]; + + t[2] = (rr * (r[0] + r[1]) + twom18) - twom18; + tt[2] = t[2] + t[2]; + r[0] -= tt[0] * t[2]; + r[1] -= tt[1] * t[2]; + c = (r[1] + twom42) - twom42; + r[0] += c; + r[1] = (r[1] - c) - t[2] * t[2]; + + t[3] = (rr * (r[0] + r[1]) + twom42) - twom42; + r[0] = ((r[0] - tt[0] * t[3]) + r[1]) - tt[1] * t[3]; + r[1] = -tt[2] * t[3]; + c = (r[1] + twom90) - twom90; + r[0] += c; + r[1] = (r[1] - c) - t[3] * t[3]; + + t[4] = (rr * (r[0] + r[1]) + twom66) - twom66; + + /* here we just need to get the sign of the remainder */ + c = (((((r[0] - tt[0] * t[4]) - tt[1] * t[4]) + r[1]) + - tt[2] * t[4]) - (t[3] + t[3]) * t[4]) - t[4] * t[4]; + + /* reduce to three doubles */ + t[0] += t[1]; + t[1] = t[2] + t[3]; + t[2] = t[4]; + + /* if the third term might lie on a rounding boundary, perturb it */ + if (c != zero && t[2] == (twom62 + t[2]) - twom62) + { + if (c < zero) + t[2] -= twom124; + else + t[2] += twom124; + } + + /* condense the square root */ + c = t[1] + t[2]; + t[2] += (t[1] - c); + t[1] = c; + c = t[0] + t[1]; + s[1] = t[1] + (t[0] - c); + s[0] = c; + if (s[1] == zero) + { + c = s[0] + t[2]; + s[1] = t[2] + (s[0] - c); + s[0] = c; + s[2] = zero; + } + else + { + c = s[1] + t[2]; + s[2] = t[2] + (s[1] - c); + s[1] = c; + } +} + + +long double +sqrtl(long double ldx) +{ + union longdouble x; + volatile double t; + double xx[5], zz[3]; + enum fp_direction_type rm; + int ex, inexact, exc, traps; + + /* clear cexc */ + t = zero; + t -= zero; + + /* check for zero operand */ + x.d = ldx; + if (!((x.l.msw & 0x7fffffff) | x.l.frac2 | x.l.frac3 | x.l.frac4)) + return ldx; + + /* handle nan and inf cases */ + if ((x.l.msw & 0x7fffffff) >= 0x7fff0000) + { + if ((x.l.msw & 0xffff) | x.l.frac2 | x.l.frac3 | x.l.frac4) + { + if (!(x.l.msw & 0x8000)) + { + /* snan, signal invalid */ + t += snan.d; + } + x.l.msw |= 0x8000; + return x.d; + } + if (x.l.msw & 0x80000000) + { + /* sqrt(-inf), signal invalid */ + t = -one; + t = sqrt(t); + return qnan.d; + } + /* sqrt(inf), return inf */ + return x.d; + } + + /* handle negative numbers */ + if (x.l.msw & 0x80000000) + { + t = -one; + t = sqrt(t); + return qnan.d; + } + + /* now x is finite, positive */ + + traps = __swapTE(0); + exc = __swapEX(0); + rm = __swapRD(fp_nearest); + + ex = __q_unpack(&x, xx); + if (ex & 1) + { + /* make exponent even */ + xx[0] += xx[0]; + xx[1] += xx[1]; + xx[2] += xx[2]; + xx[3] += xx[3]; + xx[4] += xx[4]; + ex--; + } + __q_tp_sqrt(xx, zz); + + /* put everything together */ + x.l.msw = 0; + inexact = 0; + __q_pack(zz, ex >> 1, rm, &x, &inexact); + + (void) __swapRD(rm); + (void) __swapEX(exc); + (void) __swapTE(traps); + if (inexact) + { + t = huge; + t += tiny; + } + return x.d; +} diff --git a/usr/src/lib/libm/common/Q/tanhl.c b/usr/src/lib/libm/common/Q/tanhl.c new file mode 100644 index 0000000000..08283bded9 --- /dev/null +++ b/usr/src/lib/libm/common/Q/tanhl.c @@ -0,0 +1,100 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak tanhl = __tanhl +#endif + +/* + * tanhl(x) returns the hyperbolic tangent of x + * + * Method : + * 1. reduce x to non-negative: tanhl(-x) = - tanhl(x). + * 2. + * 0 < x <= small : tanhl(x) := x + * -expm1l(-2x) + * small < x <= 1 : tanhl(x) := -------------- + * expm1l(-2x) + 2 + * 2 + * 1 <= x <= threshold : tanhl(x) := 1 - --------------- + * expm1l(2x) + 2 + * threshold < x <= INF : tanhl(x) := 1. + * + * where + * single : small = 1.e-5 threshold = 11.0 + * double : small = 1.e-10 threshold = 22.0 + * quad : small = 1.e-20 threshold = 45.0 + * + * Note: threshold was chosen so that + * fl(1.0+2/(expm1(2*threshold)+2)) == 1. + * + * Special cases: + * tanhl(NaN) is NaN; + * only tanhl(0.0)=0.0 is exact for finite argument. + */ + +#include "libm.h" +#include "longdouble.h" + +static const long double small = 1.0e-20L, one = 1.0, two = 2.0, +#ifndef lint + big = 1.0e+20L, +#endif + threshold = 45.0L; + +long double +tanhl(long double x) { + long double t, y, z; + int signx; + volatile long double dummy; + + if (isnanl(x)) + return (x + x); /* x is NaN */ + signx = signbitl(x); + t = fabsl(x); + z = one; + if (t <= threshold) { + if (t > one) + z = one - two / (expm1l(t + t) + two); + else if (t > small) { + y = expm1l(-t - t); + z = -y / (y + two); + } else { +#ifndef lint + dummy = t + big; + /* inexact if t != 0 */ +#endif + return (x); + } + } else if (!finitel(t)) + return (copysignl(one, x)); + else + return (signx ? -z + small * small : z - small * small); + return (signx ? -z : z); +} diff --git a/usr/src/lib/libm/common/Q/tanl.c b/usr/src/lib/libm/common/Q/tanl.c new file mode 100644 index 0000000000..9fdd78ed8c --- /dev/null +++ b/usr/src/lib/libm/common/Q/tanl.c @@ -0,0 +1,81 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * tanl(x) + * Table look-up algorithm by K.C. Ng, November, 1989. + * + * kernel function: + * __k_tanl ... tangent function on [-pi/4,pi/4] + * __rem_pio2l ... argument reduction routine + * + * Method. + * Let S and C denote the sin and cos respectively on [-PI/4, +PI/4]. + * 1. Assume the argument x is reduced to y1+y2 = x-k*pi/2 in + * [-pi/2 , +pi/2], and let n = k mod 4. + * 2. Let S=S(y1+y2), C=C(y1+y2). Depending on n, we have + * + * n sin(x) cos(x) tan(x) + * ---------------------------------------------------------- + * 0 S C S/C + * 1 C -S -C/S + * 2 -S -C S/C + * 3 -C S -C/S + * ---------------------------------------------------------- + * + * Special cases: + * Let trig be any of sin, cos, or tan. + * trig(+-INF) is NaN, with signals; + * trig(NaN) is that NaN; + * + * Accuracy: + * computer TRIG(x) returns trig(x) nearly rounded. + */ + +#pragma weak tanl = __tanl + +#include "libm.h" +#include "longdouble.h" + +long double +tanl(long double x) { + long double y[2], z = 0.0L; + int n, ix; + + ix = *(int *) &x; /* High word of x */ + ix &= 0x7fffffff; + if (ix <= 0x3ffe9220) /* |x| ~< pi/4 */ + return (__k_tanl(x, z, 0)); + else if (ix >= 0x7fff0000) /* trig(Inf or NaN) is NaN */ + return (x - x); + else { /* argument reduction needed */ + n = __rem_pio2l(x, y); + return (__k_tanl(y[0], y[1], (n & 1))); + } +} diff --git a/usr/src/lib/libm/common/R/_TBL_r_atan_.c b/usr/src/lib/libm/common/R/_TBL_r_atan_.c new file mode 100644 index 0000000000..c5caa798e1 --- /dev/null +++ b/usr/src/lib/libm/common/R/_TBL_r_atan_.c @@ -0,0 +1,75 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * Table of constants for r_atan_(). + * By K.C. Ng, March 9, 1989 + */ + +#include "libm.h" + +const float _TBL_r_atan_hi[] = { + 4.636476040e-01, 4.883339405e-01, 5.123894811e-01, 5.358112454e-01, + 5.585992932e-01, 5.807563663e-01, 6.022873521e-01, 6.231993437e-01, + 6.435011029e-01, 6.632030010e-01, 6.823165417e-01, 7.008544207e-01, + 7.188299894e-01, 7.362574339e-01, 7.531512976e-01, 7.695264816e-01, + 7.853981853e-01, 8.156919479e-01, 8.441540003e-01, 8.709034324e-01, + 8.960554004e-01, 9.197195768e-01, 9.420000315e-01, 9.629943371e-01, + 9.827937484e-01, 1.001483083e+00, 1.019141316e+00, 1.035841227e+00, + 1.051650167e+00, 1.066630363e+00, 1.080839038e+00, 1.094328880e+00, + 1.107148767e+00, 1.130953789e+00, 1.152572036e+00, 1.172273874e+00, + 1.190289974e+00, 1.206817389e+00, 1.222025275e+00, 1.236059427e+00, + 1.249045730e+00, 1.261093378e+00, 1.272297382e+00, 1.282740831e+00, + 1.292496681e+00, 1.301628828e+00, 1.310193896e+00, 1.318242073e+00, + 1.325817704e+00, 1.339705706e+00, 1.352127433e+00, 1.363300085e+00, + 1.373400807e+00, 1.382574797e+00, 1.390942812e+00, 1.398605466e+00, + 1.405647635e+00, 1.412141085e+00, 1.418146968e+00, 1.423717976e+00, + 1.428899288e+00, 1.433730125e+00, 1.438244820e+00, 1.442473054e+00, + 1.446441293e+00, +}; + +const float _TBL_r_atan_lo[] = { + +5.012158688e-09, +1.055042365e-08, -2.075691974e-08, -7.480973174e-09, + +2.211159789e-08, -1.268522887e-08, -5.950149262e-09, -1.374726910e-08, + +5.868937336e-09, -8.316245470e-09, +1.320299514e-08, -1.277747597e-08, + +1.018833551e-08, -4.909868068e-09, -1.660708016e-08, -1.222759671e-09, + -2.185569414e-08, -2.462078896e-08, -1.416911655e-08, +2.470642002e-08, + -1.580020736e-08, +2.851478520e-08, +8.908211058e-09, -6.400973085e-09, + -2.513142405e-08, +5.292293181e-08, +2.785247055e-08, +2.643104224e-08, + +4.603683834e-08, +1.851388043e-09, -3.735403453e-08, +2.701113111e-08, + -4.872354964e-08, -4.477816518e-08, -3.857382325e-08, +6.845639611e-09, + -2.453011483e-08, -1.824929363e-08, +4.798058129e-08, +6.221672777e-08, + +4.276110843e-08, +4.185424007e-09, +1.285398099e-08, +4.836914869e-08, + -1.342359379e-08, +5.960489879e-09, +3.875391386e-08, -2.204224536e-08, + -4.053271141e-08, -4.604370218e-08, -5.190222652e-08, +1.529194549e-08, + -4.043566193e-08, +2.481348993e-08, +1.503647518e-08, +4.638297924e-08, + +1.392036975e-08, -2.006252586e-08, +3.051175312e-08, -4.209960824e-09, + -1.598675681e-08, +2.705746205e-08, -2.514289044e-08, +4.517691110e-08, + +3.948537852e-08, +}; diff --git a/usr/src/lib/libm/common/R/__cosf.c b/usr/src/lib/libm/common/R/__cosf.c new file mode 100644 index 0000000000..c17cc00b7f --- /dev/null +++ b/usr/src/lib/libm/common/R/__cosf.c @@ -0,0 +1,86 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "libm.h" + +/* INDENT OFF */ +/* + * float __k_cos(double x); + * kernel (float) cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164 + * Input x is in double and assumed to be bounded by ~pi/4 in magnitude. + * + * Method: Let z = x * x, then + * C(x) = (C0 + C1*z + C2*z*z) * (C3 + C4*z + z*z) + * where + * C0 = 1.09349482127188401868272000389539985058873853699e-0003 + * C1 = -5.03324285989964979398034700054920226866107675091e-0004 + * C2 = 2.43792880266971107750418061559602239831538067410e-0005 + * C3 = 9.14499072605666582228127405245558035523741471271e+0002 + * C4 = -3.63151270591815439197122504991683846785293207730e+0001 + * + * The remez error is bound by |cos(x) - C(x)| < 2**(-34.2) + * + * Constants: + * The hexadecimal values are the intended ones for the following constants. + * The decimal values may be used, provided that the compiler will convert + * from decimal to binary accurately enough to produce the hexadecimal values + * shown. + */ +/* INDENT ON */ + +static const double q[] = { +/* C0 = */ 1.09349482127188401868272000389539985058873853699e-0003, +/* C1 = */ -5.03324285989964979398034700054920226866107675091e-0004, +/* C2 = */ 2.43792880266971107750418061559602239831538067410e-0005, +/* C3 = */ 9.14499072605666582228127405245558035523741471271e+0002, +/* C4 = */ -3.63151270591815439197122504991683846785293207730e+0001, +}; + +#define C0 q[0] +#define C1 q[1] +#define C2 q[2] +#define C3 q[3] +#define C4 q[4] + +float +__k_cosf(double x) { + float ft; + double z; + int hx; + + hx = ((int *) &x)[HIWORD]; /* hx = leading x */ + if ((hx & ~0x80000000) < 0x3f100000) { /* |x| < 2**-14 */ + ft = (float) 1; + if (((int) x) == 0) /* raise inexact if x != 0 */ + return (ft); + } + z = x * x; + ft = (float) (((C0 + z * C1) + (z * z) * C2) * (C3 + z * (C4 + z))); + return (ft); +} diff --git a/usr/src/lib/libm/common/R/__sincosf.c b/usr/src/lib/libm/common/R/__sincosf.c new file mode 100644 index 0000000000..89c12eeab9 --- /dev/null +++ b/usr/src/lib/libm/common/R/__sincosf.c @@ -0,0 +1,101 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "libm.h" + +/* INDENT OFF */ +/* + * void __k_sincosf(double x, float *s, float *c); + * kernel (float) sincos function on [-pi/4, pi/4], pi/4 ~ 0.785398164 + * Input x is in double and assumed to be bounded by ~pi/4 in magnitude. + * + * Method: Let z = x * x, then + * S(x) = x(S0 + S1*z)(S2 + S3*z + z*z) + * C(x) = (C0 + C1*z + C2*z*z) * (C3 + C4*z + z*z) + * where + * S0 = 1.85735322054308378716204874632872525989806770558e-0003 + * S1 = -1.95035094218403635082921458859320791358115801259e-0004 + * S2 = 5.38400550766074785970952495168558701485841707252e+0002 + * S3 = -3.31975110777873728964197739157371509422022905947e+0001 + * C0 = 1.09349482127188401868272000389539985058873853699e-0003 + * C1 = -5.03324285989964979398034700054920226866107675091e-0004 + * C2 = 2.43792880266971107750418061559602239831538067410e-0005 + * C3 = 9.14499072605666582228127405245558035523741471271e+0002 + * C4 = -3.63151270591815439197122504991683846785293207730e+0001 + * + * The remez error in S is bound by |(sin(x) - S(x))/x| < 2**(-28.2) + * The remez error in C is bound by |cos(x) - C(x)| < 2**(-34.2) + * + * Constants: + * The hexadecimal values are the intended ones for the following constants. + * The decimal values may be used, provided that the compiler will convert + * from decimal to binary accurately enough to produce the hexadecimal values + * shown. + */ +/* INDENT ON */ + +static const double q[] = { +/* S0 = */ 1.85735322054308378716204874632872525989806770558e-0003, +/* S1 = */ -1.95035094218403635082921458859320791358115801259e-0004, +/* S2 = */ 5.38400550766074785970952495168558701485841707252e+0002, +/* S3 = */ -3.31975110777873728964197739157371509422022905947e+0001, +/* C0 = */ 1.09349482127188401868272000389539985058873853699e-0003, +/* C1 = */ -5.03324285989964979398034700054920226866107675091e-0004, +/* C2 = */ 2.43792880266971107750418061559602239831538067410e-0005, +/* C3 = */ 9.14499072605666582228127405245558035523741471271e+0002, +/* C4 = */ -3.63151270591815439197122504991683846785293207730e+0001, +}; + + +#define S0 q[0] +#define S1 q[1] +#define S2 q[2] +#define S3 q[3] +#define C0 q[4] +#define C1 q[5] +#define C2 q[6] +#define C3 q[7] +#define C4 q[8] + +void +__k_sincosf(double x, float *s, float *c) { + double z; + int hx; + + hx = ((int *) &x)[HIWORD]; /* hx = leading x */ + /* small argument */ + if ((hx & ~0x80000000) < 0x3f100000) { /* if |x| < 2**-14 */ + *s = (float) x; *c = (float) 1; + if ((int) x == 0) /* raise inexact if x != 0 */ + return; + } + z = x * x; + *s = (float) ((x * (S0 + z * S1)) * (S2 + z * (S3 + z))); + *c = (float) (((C0 + z * C1) + (z * z) * C2) * (C3 + z * (C4 + z))); +} diff --git a/usr/src/lib/libm/common/R/__sinf.c b/usr/src/lib/libm/common/R/__sinf.c new file mode 100644 index 0000000000..381ebb9512 --- /dev/null +++ b/usr/src/lib/libm/common/R/__sinf.c @@ -0,0 +1,83 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "libm.h" + +/* INDENT OFF */ +/* + * float __k_sin(double x); + * kernel (float) sin function on [-pi/4, pi/4], pi/4 ~ 0.785398164 + * Input x is in double and assumed to be bounded by ~pi/4 in magnitude. + * + * Method: Let z = x * x, then + * S(x) = x(S0 + S1*z)(S2 + S3*z + z*z) + * where + * S0 = 1.85735322054308378716204874632872525989806770558e-0003, + * S1 = -1.95035094218403635082921458859320791358115801259e-0004, + * S2 = 5.38400550766074785970952495168558701485841707252e+0002, + * S3 = -3.31975110777873728964197739157371509422022905947e+0001, + * + * The remez error is bound by |(sin(x) - S(x))/x| < 2**(-28.2) + * + * Constants: + * The hexadecimal values are the intended ones for the following constants. + * The decimal values may be used, provided that the compiler will convert + * from decimal to binary accurately enough to produce the hexadecimal values + * shown. + */ +/* INDENT ON */ + +static const double q[] = { +/* S0 = */ 1.85735322054308378716204874632872525989806770558e-0003, +/* S1 = */ -1.95035094218403635082921458859320791358115801259e-0004, +/* S2 = */ 5.38400550766074785970952495168558701485841707252e+0002, +/* S3 = */ -3.31975110777873728964197739157371509422022905947e+0001, +}; + +#define S0 q[0] +#define S1 q[1] +#define S2 q[2] +#define S3 q[3] + +float +__k_sinf(double x) { + float ft; + double z; + int hx; + + hx = ((int *) &x)[HIWORD]; /* hx = leading x */ + if ((hx & ~0x80000000) < 0x3f100000) { /* if |x| < 2**-14 */ + ft = (float) x; + if ((int) x == 0) /* raise inexact if x != 0 */ + return (ft); + } + z = x * x; + ft = (float) ((x * (S0 + z * S1)) * (S2 + z * (S3 + z))); + return (ft); +} diff --git a/usr/src/lib/libm/common/R/__tanf.c b/usr/src/lib/libm/common/R/__tanf.c new file mode 100644 index 0000000000..e42ec914e0 --- /dev/null +++ b/usr/src/lib/libm/common/R/__tanf.c @@ -0,0 +1,96 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "libm.h" + +/* INDENT OFF */ +/* + * float __k_tan(double x); + * kernel (float) tan function on [-pi/4, pi/4], pi/4 ~ 0.785398164 + * Input x is in double and assumed to be bounded by ~pi/4 in magnitude. + * + * Constants: + * The hexadecimal values are the intended ones for the following constants. + * The decimal values may be used, provided that the compiler will convert + * from decimal to binary accurately enough to produce the hexadecimal values + * shown. + */ + +static const double q[] = { +/* one */ 1.0, +/* P0 */ 4.46066928428959230679140546271810308098793029785e-0003, +/* P1 */ 4.92165316309189027066395283327437937259674072266e+0000, +/* P2 */ -7.11410648161473480044492134766187518835067749023e-0001, +/* P3 */ 4.08549808374053391446523164631798863410949707031e+0000, +/* P4 */ 2.50411070398050927821032018982805311679840087891e+0000, +/* P5 */ 1.11492064560251158411574579076841473579406738281e+0001, +/* P6 */ -1.50565540968422650891511693771462887525558471680e+0000, +/* P7 */ -1.81484378878349295050043110677506774663925170898e+0000, +/* T0 */ 3.333335997532835641297409611782510896641e-0001, +/* T1 */ 2.999997598248363761541668282006867229939e+00, +}; +/* INDENT ON */ + +#define one q[0] +#define P0 q[1] +#define P1 q[2] +#define P2 q[3] +#define P3 q[4] +#define P4 q[5] +#define P5 q[6] +#define P6 q[7] +#define P7 q[8] +#define T0 q[9] +#define T1 q[10] + +float +__k_tanf(double x, int n) { + float ft = 0.0; + double z, w; + int ix; + + ix = ((int *) &x)[HIWORD] & ~0x80000000; /* ix = leading |x| */ + /* small argument */ + if (ix < 0x3f800000) { /* if |x| < 0.0078125 = 2**-7 */ + if (ix < 0x3f100000) { /* if |x| < 2**-14 */ + if ((int) x == 0) { /* raise inexact if x != 0 */ + ft = n == 0 ? (float) x : (float) (-one / x); + } + return (ft); + } + z = (x * T0) * (T1 + x * x); + ft = n == 0 ? (float) z : (float) (-one / z); + return (ft); + } + z = x * x; + w = ((P0 * x) * (P1 + z * (P2 + z)) * (P3 + z * (P4 + z))) + * (P5 + z * (P6 + z * (P7 + z))); + ft = n == 0 ? (float) w : (float) (-one / w); + return (ft); +} diff --git a/usr/src/lib/libm/common/R/acosf.c b/usr/src/lib/libm/common/R/acosf.c new file mode 100644 index 0000000000..06d4cffd28 --- /dev/null +++ b/usr/src/lib/libm/common/R/acosf.c @@ -0,0 +1,43 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak acosf = __acosf + +#include "libm.h" + +static const float zero = 0.0f; + +float +acosf(float x) { + int ix; + + ix = *(int *)&x & ~0x80000000; + if (ix > 0x3f800000) /* |x| > 1 or x is nan */ + return ((x * zero) / zero); + return ((float)acos((double)x)); +} diff --git a/usr/src/lib/libm/common/R/acoshf.c b/usr/src/lib/libm/common/R/acoshf.c new file mode 100644 index 0000000000..7e0fdac2b1 --- /dev/null +++ b/usr/src/lib/libm/common/R/acoshf.c @@ -0,0 +1,43 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak acoshf = __acoshf + +#include "libm.h" + +static const float zero = 0.0f; + +float +acoshf(float x) { + int hx; + + hx = *(int *)&x; + if (hx < 0x3f800000 || hx > 0x7f800000) /* x < 1 or x is nan */ + return ((x * zero) / zero); + return ((float)acosh((double)x)); +} diff --git a/usr/src/lib/libm/common/R/asinf.c b/usr/src/lib/libm/common/R/asinf.c new file mode 100644 index 0000000000..1588c25f24 --- /dev/null +++ b/usr/src/lib/libm/common/R/asinf.c @@ -0,0 +1,43 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak asinf = __asinf + +#include "libm.h" + +static const float zero = 0.0f; + +float +asinf(float x) { + int ix; + + ix = *(int *)&x & ~0x80000000; + if (ix > 0x3f800000) /* |x| > 1 or x is nan */ + return ((x * zero) / zero); + return ((float)asin((double)x)); +} diff --git a/usr/src/lib/libm/common/R/asinhf.c b/usr/src/lib/libm/common/R/asinhf.c new file mode 100644 index 0000000000..6bc45572fb --- /dev/null +++ b/usr/src/lib/libm/common/R/asinhf.c @@ -0,0 +1,41 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak asinhf = __asinhf + +#include "libm.h" + +float +asinhf(float x) { + if (isnanf(x)) { + return (x * x); /* + -> * for Cheetah */ + } else { + return ((float) asinh((double) x)); + } +} diff --git a/usr/src/lib/libm/common/R/atan2f.c b/usr/src/lib/libm/common/R/atan2f.c new file mode 100644 index 0000000000..2650e5fc0b --- /dev/null +++ b/usr/src/lib/libm/common/R/atan2f.c @@ -0,0 +1,344 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak atan2f = __atan2f + +#include "libm.h" + +#if defined(__i386) && !defined(__amd64) +extern int __swapRP(int); +#endif + +/* + * For i = 0, ..., 192, let x[i] be the double precision number whose + * high order 32 bits are 0x3f900000 + (i << 16) and whose low order + * 32 bits are zero. Then TBL[i] := atan(x[i]) to double precision. + */ + +static const double TBL[] = { + 1.56237286204768313e-02, + 1.66000375562312640e-02, + 1.75763148444955872e-02, + 1.85525586258889763e-02, + 1.95287670414137082e-02, + 2.05049382324763683e-02, + 2.14810703409090559e-02, + 2.24571615089905717e-02, + 2.34332098794675855e-02, + 2.44092135955758099e-02, + 2.53851708010611396e-02, + 2.63610796402007873e-02, + 2.73369382578244127e-02, + 2.83127447993351995e-02, + 2.92884974107309737e-02, + 3.02641942386252458e-02, + 3.12398334302682774e-02, + 3.31909314971115949e-02, + 3.51417768027967800e-02, + 3.70923545503918164e-02, + 3.90426499551669928e-02, + 4.09926482452637811e-02, + 4.29423346623621707e-02, + 4.48916944623464972e-02, + 4.68407129159696539e-02, + 4.87893753095156174e-02, + 5.07376669454602178e-02, + 5.26855731431300420e-02, + 5.46330792393594777e-02, + 5.65801705891457105e-02, + 5.85268325663017702e-02, + 6.04730505641073168e-02, + 6.24188099959573500e-02, + 6.63088949198234884e-02, + 7.01969710718705203e-02, + 7.40829225490337306e-02, + 7.79666338315423008e-02, + 8.18479898030765457e-02, + 8.57268757707448092e-02, + 8.96031774848717461e-02, + 9.34767811585894698e-02, + 9.73475734872236709e-02, + 1.01215441667466668e-01, + 1.05080273416329528e-01, + 1.08941956989865793e-01, + 1.12800381201659389e-01, + 1.16655435441069349e-01, + 1.20507009691224562e-01, + 1.24354994546761438e-01, + 1.32039761614638762e-01, + 1.39708874289163648e-01, + 1.47361481088651630e-01, + 1.54996741923940973e-01, + 1.62613828597948568e-01, + 1.70211925285474408e-01, + 1.77790228992676075e-01, + 1.85347949995694761e-01, + 1.92884312257974672e-01, + 2.00398553825878512e-01, + 2.07889927202262986e-01, + 2.15357699697738048e-01, + 2.22801153759394521e-01, + 2.30219587276843718e-01, + 2.37612313865471242e-01, + 2.44978663126864143e-01, + 2.59629629408257512e-01, + 2.74167451119658789e-01, + 2.88587361894077410e-01, + 3.02884868374971417e-01, + 3.17055753209147029e-01, + 3.31096076704132103e-01, + 3.45002177207105132e-01, + 3.58770670270572245e-01, + 3.72398446676754202e-01, + 3.85882669398073752e-01, + 3.99220769575252543e-01, + 4.12410441597387323e-01, + 4.25449637370042266e-01, + 4.38336559857957830e-01, + 4.51069655988523499e-01, + 4.63647609000806094e-01, + 4.88333951056405535e-01, + 5.12389460310737732e-01, + 5.35811237960463704e-01, + 5.58599315343562441e-01, + 5.80756353567670414e-01, + 6.02287346134964152e-01, + 6.23199329934065904e-01, + 6.43501108793284371e-01, + 6.63202992706093286e-01, + 6.82316554874748071e-01, + 7.00854407884450192e-01, + 7.18829999621624527e-01, + 7.36257428981428097e-01, + 7.53151280962194414e-01, + 7.69526480405658297e-01, + 7.85398163397448279e-01, + 8.15691923316223422e-01, + 8.44153986113171051e-01, + 8.70903457075652976e-01, + 8.96055384571343927e-01, + 9.19719605350416858e-01, + 9.42000040379463610e-01, + 9.62994330680936206e-01, + 9.82793723247329054e-01, + 1.00148313569423464e+00, + 1.01914134426634972e+00, + 1.03584125300880014e+00, + 1.05165021254837376e+00, + 1.06663036531574362e+00, + 1.08083900054116833e+00, + 1.09432890732118993e+00, + 1.10714871779409041e+00, + 1.13095374397916038e+00, + 1.15257199721566761e+00, + 1.17227388112847630e+00, + 1.19028994968253166e+00, + 1.20681737028525249e+00, + 1.22202532321098967e+00, + 1.23605948947808186e+00, + 1.24904577239825443e+00, + 1.26109338225244039e+00, + 1.27229739520871732e+00, + 1.28274087974427076e+00, + 1.29249666778978534e+00, + 1.30162883400919616e+00, + 1.31019393504755555e+00, + 1.31824205101683711e+00, + 1.32581766366803255e+00, + 1.33970565959899957e+00, + 1.35212738092095464e+00, + 1.36330010035969384e+00, + 1.37340076694501589e+00, + 1.38257482149012589e+00, + 1.39094282700241845e+00, + 1.39860551227195762e+00, + 1.40564764938026987e+00, + 1.41214106460849531e+00, + 1.41814699839963154e+00, + 1.42371797140649403e+00, + 1.42889927219073276e+00, + 1.43373015248470903e+00, + 1.43824479449822262e+00, + 1.44247309910910193e+00, + 1.44644133224813509e+00, + 1.45368758222803240e+00, + 1.46013910562100091e+00, + 1.46591938806466282e+00, + 1.47112767430373470e+00, + 1.47584462045214027e+00, + 1.48013643959415142e+00, + 1.48405798811891154e+00, + 1.48765509490645531e+00, + 1.49096634108265924e+00, + 1.49402443552511865e+00, + 1.49685728913695626e+00, + 1.49948886200960629e+00, + 1.50193983749385196e+00, + 1.50422816301907281e+00, + 1.50636948736934317e+00, + 1.50837751679893928e+00, + 1.51204050407917401e+00, + 1.51529782154917969e+00, + 1.51821326518395483e+00, + 1.52083793107295384e+00, + 1.52321322351791322e+00, + 1.52537304737331958e+00, + 1.52734543140336587e+00, + 1.52915374769630819e+00, + 1.53081763967160667e+00, + 1.53235373677370856e+00, + 1.53377621092096650e+00, + 1.53509721411557254e+00, + 1.53632722579538861e+00, + 1.53747533091664934e+00, + 1.53854944435964280e+00, + 1.53955649336462841e+00, + 1.54139303859089161e+00, + 1.54302569020147562e+00, + 1.54448660954197448e+00, + 1.54580153317597646e+00, + 1.54699130060982659e+00, + 1.54807296595325550e+00, + 1.54906061995310385e+00, + 1.54996600675867957e+00, + 1.55079899282174605e+00, + 1.55156792769518947e+00, + 1.55227992472688747e+00, + 1.55294108165534417e+00, + 1.55355665560036682e+00, + 1.55413120308095598e+00, + 1.55466869295126031e+00, + 1.55517259817441977e+00, +}; + +static const double + pio4 = 7.8539816339744827900e-01, + pio2 = 1.5707963267948965580e+00, + negpi = -3.1415926535897931160e+00, + q1 = -3.3333333333296428046e-01, + q2 = 1.9999999186853752618e-01, + zero = 0.0; + +static const float two24 = 16777216.0; + +float +atan2f(float fy, float fx) +{ + double a, t, s, dbase; + float x, y, base; + int i, k, hx, hy, ix, iy, sign; +#if defined(__i386) && !defined(__amd64) + int rp; +#endif + + iy = *(int *)&fy; + ix = *(int *)&fx; + hy = iy & ~0x80000000; + hx = ix & ~0x80000000; + + sign = 0; + if (hy > hx) { + x = fy; + y = fx; + i = hx; + hx = hy; + hy = i; + if (iy < 0) { + x = -x; + sign = 1; + } + if (ix < 0) { + y = -y; + a = pio2; + } else { + a = -pio2; + sign = 1 - sign; + } + } else { + y = fy; + x = fx; + if (iy < 0) { + y = -y; + sign = 1; + } + if (ix < 0) { + x = -x; + a = negpi; + sign = 1 - sign; + } else { + a = zero; + } + } + + if (hx >= 0x7f800000 || hx - hy >= 0x0c800000) { + if (hx >= 0x7f800000) { + if (hx > 0x7f800000) /* nan */ + return (x * y); + else if (hy >= 0x7f800000) + a += pio4; + } else if ((int)a == 0) { + a = (double)y / x; + } + return ((float)((sign)? -a : a)); + } + + if (hy < 0x00800000) { + if (hy == 0) + return ((float)((sign)? -a : a)); + /* scale subnormal y */ + y *= two24; + x *= two24; + hy = *(int *)&y; + hx = *(int *)&x; + } + +#if defined(__i386) && !defined(__amd64) + rp = __swapRP(fp_extended); +#endif + k = (hy - hx + 0x3f800000) & 0xfff80000; + if (k >= 0x3c800000) { /* |y/x| >= 1/64 */ + *(int *)&base = k; + k = (k - 0x3c800000) >> 19; + a += TBL[k]; + } else { + /* + * For some reason this is faster on USIII than just + * doing t = y/x in this case. + */ + *(int *)&base = 0; + } + dbase = (double)base; + t = (y - x * dbase) / (x + y * dbase); + s = t * t; + a = (a + t) + t * s * (q1 + s * q2); +#if defined(__i386) && !defined(__amd64) + if (rp != fp_extended) + (void) __swapRP(rp); +#endif + return ((float)((sign)? -a : a)); +} diff --git a/usr/src/lib/libm/common/R/atan2pif.c b/usr/src/lib/libm/common/R/atan2pif.c new file mode 100644 index 0000000000..76f104db17 --- /dev/null +++ b/usr/src/lib/libm/common/R/atan2pif.c @@ -0,0 +1,52 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak atan2pif = __atan2pif + +#include "libm.h" + +static const double invpi = 0.3183098861837906715377675; + +float +atan2pif(float y, float x) { + int ix, iy, hx, hy; + + ix = *(int *)&x; + iy = *(int *)&y; + hx = ix & ~0x80000000; + hy = iy & ~0x80000000; + if (hx > 0x7f800000 || hy > 0x7f800000) /* x or y is nan */ + return (x * y); + if ((hx | hy) == 0) { + /* x and y are both zero */ + if (ix == 0) + return (y); + return ((iy == 0)? 1.0f : -1.0f); + } + return ((float)(invpi * atan2((double)y, (double)x))); +} diff --git a/usr/src/lib/libm/common/R/atanf.c b/usr/src/lib/libm/common/R/atanf.c new file mode 100644 index 0000000000..b68c48cca8 --- /dev/null +++ b/usr/src/lib/libm/common/R/atanf.c @@ -0,0 +1,196 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak atanf = __atanf + +/* INDENT OFF */ +/* + * float atanf(float x); + * Table look-up algorithm + * By K.C. Ng, March 9, 1989 + * + * Algorithm. + * + * The algorithm is based on atan(x)=atan(y)+atan((x-y)/(1+x*y)). + * We use poly1(x) to approximate atan(x) for x in [0,1/8] with + * error (relative) + * |(atan(x)-poly1(x))/x|<= 2^-115.94 long double + * |(atan(x)-poly1(x))/x|<= 2^-58.85 double + * |(atan(x)-poly1(x))/x|<= 2^-25.53 float + * and use poly2(x) to approximate atan(x) for x in [0,1/65] with + * error (absolute) + * |atan(x)-poly2(x)|<= 2^-122.15 long double + * |atan(x)-poly2(x)|<= 2^-64.79 double + * |atan(x)-poly2(x)|<= 2^-35.36 float + * and use poly3(x) to approximate atan(x) for x in [1/8,7/16] with + * error (relative, on for single precision) + * |(atan(x)-poly1(x))/x|<= 2^-25.53 float + * + * Here poly1-3 are odd polynomial with the following form: + * x + x^3*(a1+x^2*(a2+...)) + * + * (0). Purge off Inf and NaN and 0 + * (1). Reduce x to positive by atan(x) = -atan(-x). + * (2). For x <= 1/8, use + * (2.1) if x < 2^(-prec/2-2), atan(x) = x with inexact + * (2.2) Otherwise + * atan(x) = poly1(x) + * (3). For x >= 8 then + * (3.1) if x >= 2^(prec+2), atan(x) = atan(inf) - pio2lo + * (3.2) if x >= 2^(prec/3+2), atan(x) = atan(inf) - 1/x + * (3.3) if x > 65, atan(x) = atan(inf) - poly2(1/x) + * (3.4) Otherwise, atan(x) = atan(inf) - poly1(1/x) + * + * (4). Now x is in (0.125, 8) + * Find y that match x to 4.5 bit after binary (easy). + * If iy is the high word of y, then + * single : j = (iy - 0x3e000000) >> 19 + * (single is modified to (iy-0x3f000000)>>19) + * double : j = (iy - 0x3fc00000) >> 16 + * quad : j = (iy - 0x3ffc0000) >> 12 + * + * Let s = (x-y)/(1+x*y). Then + * atan(x) = atan(y) + poly1(s) + * = _TBL_r_atan_hi[j] + (_TBL_r_atan_lo[j] + poly2(s) ) + * + * Note. |s| <= 1.5384615385e-02 = 1/65. Maxium occurs at x = 1.03125 + * + */ + +#include "libm.h" + +extern const float _TBL_r_atan_hi[], _TBL_r_atan_lo[]; +static const float + big = 1.0e37F, + one = 1.0F, + p1 = -3.333185951111688247225368498733544672172e-0001F, + p2 = 1.969352894213455405211341983203180636021e-0001F, + q1 = -3.332921964095646819563419704110132937456e-0001F, + a1 = -3.333323465223893614063523351509338934592e-0001F, + a2 = 1.999425625935277805494082274808174062403e-0001F, + a3 = -1.417547090509737780085769846290301788559e-0001F, + a4 = 1.016250813871991983097273733227432685084e-0001F, + a5 = -5.137023693688358515753093811791755221805e-0002F, + pio2hi = 1.570796371e+0000F, + pio2lo = -4.371139000e-0008F; +/* INDENT ON */ + +float +atanf(float xx) { + float x, y, z, r, p, s; + volatile double dummy; + int ix, iy, sign, j; + + x = xx; + ix = *(int *) &x; + sign = ix & 0x80000000; + ix ^= sign; + + /* for |x| < 1/8 */ + if (ix < 0x3e000000) { + if (ix < 0x38800000) { /* if |x| < 2**(-prec/2-2) */ + dummy = big + x; /* get inexact flag if x != 0 */ +#ifdef lint + dummy = dummy; +#endif + return (x); + } + z = x * x; + if (ix < 0x3c000000) { /* if |x| < 2**(-prec/4-1) */ + x = x + (x * z) * p1; + return (x); + } else { + x = x + (x * z) * (p1 + z * p2); + return (x); + } + } + + /* for |x| >= 8.0 */ + if (ix >= 0x41000000) { + *(int *) &x = ix; + if (ix < 0x42820000) { /* x < 65 */ + r = one / x; + z = r * r; + y = r * (one + z * (p1 + z * p2)); /* poly1 */ + y -= pio2lo; + } else if (ix < 0x44800000) { /* x < 2**(prec/3+2) */ + r = one / x; + z = r * r; + y = r * (one + z * q1); /* poly2 */ + y -= pio2lo; + } else if (ix < 0x4c800000) { /* x < 2**(prec+2) */ + y = one / x - pio2lo; + } else if (ix < 0x7f800000) { /* x < inf */ + y = -pio2lo; + } else { /* x is inf or NaN */ + if (ix > 0x7f800000) { + return (x * x); /* - -> * for Cheetah */ + } + y = -pio2lo; + } + + if (sign == 0) + x = pio2hi - y; + else + x = y - pio2hi; + return (x); + } + + + /* now x is between 1/8 and 8 */ + if (ix < 0x3f000000) { /* between 1/8 and 1/2 */ + z = x * x; + x = x + (x * z) * (a1 + z * (a2 + z * (a3 + z * (a4 + + z * a5)))); + return (x); + } + *(int *) &x = ix; + iy = (ix + 0x00040000) & 0x7ff80000; + *(int *) &y = iy; + j = (iy - 0x3f000000) >> 19; + + if (ix == iy) + p = x - y; /* p=0.0 */ + else { + if (sign == 0) + s = (x - y) / (one + x * y); + else + s = (y - x) / (one + x * y); + z = s * s; + p = s * (one + z * q1); + } + if (sign == 0) { + r = p + _TBL_r_atan_lo[j]; + x = r + _TBL_r_atan_hi[j]; + } else { + r = p - _TBL_r_atan_lo[j]; + x = r - _TBL_r_atan_hi[j]; + } + return (x); +} diff --git a/usr/src/lib/libm/common/R/atanhf.c b/usr/src/lib/libm/common/R/atanhf.c new file mode 100644 index 0000000000..2d150aa9b2 --- /dev/null +++ b/usr/src/lib/libm/common/R/atanhf.c @@ -0,0 +1,45 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak atanhf = __atanhf + +#include "libm.h" + +static const float zero = 0.0f; + +float +atanhf(float x) { + int ix; + + ix = *((int *)&x) & ~0x80000000; + if (ix > 0x3f800000) /* |x| > 1 or x is nan */ + return ((x * zero) / zero); + if (ix == 0x3f800000) /* |x| == 1 */ + return (x / zero); + return ((float)atanh((double)x)); +} diff --git a/usr/src/lib/libm/common/R/besself.c b/usr/src/lib/libm/common/R/besself.c new file mode 100644 index 0000000000..720e4eb47f --- /dev/null +++ b/usr/src/lib/libm/common/R/besself.c @@ -0,0 +1,807 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak j0f = __j0f +#pragma weak j1f = __j1f +#pragma weak jnf = __jnf +#pragma weak y0f = __y0f +#pragma weak y1f = __y1f +#pragma weak ynf = __ynf + +#include "libm.h" +#include <float.h> + +#if defined(__i386) && !defined(__amd64) +extern int __swapRP(int); +#endif + +static const float + zerof = 0.0f, + onef = 1.0f; + +static const double C[] = { + 0.0, + -0.125, + 0.25, + 0.375, + 0.5, + 1.0, + 2.0, + 8.0, + 0.5641895835477562869480794515607725858441, /* 1/sqrt(pi) */ + 0.636619772367581343075535053490057448, /* 2/pi */ + 1.0e9, +}; + +#define zero C[0] +#define neighth C[1] +#define quarter C[2] +#define three8 C[3] +#define half C[4] +#define one C[5] +#define two C[6] +#define eight C[7] +#define isqrtpi C[8] +#define tpi C[9] +#define big C[10] + +static const double Cj0y0[] = { + 0.4861344183386052721391238447e5, /* pr */ + 0.1377662549407112278133438945e6, + 0.1222466364088289731869114004e6, + 0.4107070084315176135583353374e5, + 0.5026073801860637125889039915e4, + 0.1783193659125479654541542419e3, + 0.88010344055383421691677564e0, + 0.4861344183386052721414037058e5, /* ps */ + 0.1378196632630384670477582699e6, + 0.1223967185341006542748936787e6, + 0.4120150243795353639995862617e5, + 0.5068271181053546392490184353e4, + 0.1829817905472769960535671664e3, + 1.0, + -0.1731210995701068539185611951e3, /* qr */ + -0.5522559165936166961235240613e3, + -0.5604935606637346590614529613e3, + -0.2200430300226009379477365011e3, + -0.323869355375648849771296746e2, + -0.14294979207907956223499258e1, + -0.834690374102384988158918e-2, + 0.1107975037248683865326709645e5, /* qs */ + 0.3544581680627082674651471873e5, + 0.3619118937918394132179019059e5, + 0.1439895563565398007471485822e5, + 0.2190277023344363955930226234e4, + 0.106695157020407986137501682e3, + 1.0, +}; + +#define pr Cj0y0 +#define ps (Cj0y0+7) +#define qr (Cj0y0+14) +#define qs (Cj0y0+21) + +static const double Cj0[] = { + -2.500000000000003622131880894830476755537e-0001, /* r0 */ + 1.095597547334830263234433855932375353303e-0002, + -1.819734750463320921799187258987098087697e-0004, + 9.977001946806131657544212501069893930846e-0007, + 1.0, /* s0 */ + 1.867609810662950169966782360588199673741e-0002, + 1.590389206181565490878430827706972074208e-0004, + 6.520867386742583632375520147714499522721e-0007, + 9.999999999999999942156495584397047660949e-0001, /* r1 */ + -2.389887722731319130476839836908143731281e-0001, + 1.293359476138939027791270393439493640570e-0002, + -2.770985642343140122168852400228563364082e-0004, + 2.905241575772067678086738389169625218912e-0006, + -1.636846356264052597969042009265043251279e-0008, + 5.072306160724884775085431059052611737827e-0011, + -8.187060730684066824228914775146536139112e-0014, + 5.422219326959949863954297860723723423842e-0017, + 1.0, /* s1 */ + 1.101122772686807702762104741932076228349e-0002, + 6.140169310641649223411427764669143978228e-0005, + 2.292035877515152097976946119293215705250e-0007, + 6.356910426504644334558832036362219583789e-0010, + 1.366626326900219555045096999553948891401e-0012, + 2.280399586866739522891837985560481180088e-0015, + 2.801559820648939665270492520004836611187e-0018, + 2.073101088320349159764410261466350732968e-0021, +}; + +#define r0 Cj0 +#define s0 (Cj0+4) +#define r1 (Cj0+8) +#define s1 (Cj0+17) + +static const double Cy0[] = { + -7.380429510868722526754723020704317641941e-0002, /* u0 */ + 1.772607102684869924301459663049874294814e-0001, + -1.524370666542713828604078090970799356306e-0002, + 4.650819100693891757143771557629924591915e-0004, + -7.125768872339528975036316108718239946022e-0006, + 6.411017001656104598327565004771515257146e-0008, + -3.694275157433032553021246812379258781665e-0010, + 1.434364544206266624252820889648445263842e-0012, + -3.852064731859936455895036286874139896861e-0015, + 7.182052899726138381739945881914874579696e-0018, + -9.060556574619677567323741194079797987200e-0021, + 7.124435467408860515265552217131230511455e-0024, + -2.709726774636397615328813121715432044771e-0027, + 1.0, /* v0 */ + 4.678678931512549002587702477349214886475e-0003, + 9.486828955529948534822800829497565178985e-0006, + 1.001495929158861646659010844136682454906e-0008, + 4.725338116256021660204443235685358593611e-0012, +}; + +#define u0 Cy0 +#define v0 (Cy0+13) + +static const double Cj1y1[] = { + -0.4435757816794127857114720794e7, /* pr0 */ + -0.9942246505077641195658377899e7, + -0.6603373248364939109255245434e7, + -0.1523529351181137383255105722e7, + -0.1098240554345934672737413139e6, + -0.1611616644324610116477412898e4, + -0.4435757816794127856828016962e7, /* ps0 */ + -0.9934124389934585658967556309e7, + -0.6585339479723087072826915069e7, + -0.1511809506634160881644546358e7, + -0.1072638599110382011903063867e6, + -0.1455009440190496182453565068e4, + 0.3322091340985722351859704442e5, /* qr0 */ + 0.8514516067533570196555001171e5, + 0.6617883658127083517939992166e5, + 0.1849426287322386679652009819e5, + 0.1706375429020768002061283546e4, + 0.3526513384663603218592175580e2, + 0.7087128194102874357377502472e6, /* qs0 */ + 0.1819458042243997298924553839e7, + 0.1419460669603720892855755253e7, + 0.4002944358226697511708610813e6, + 0.3789022974577220264142952256e5, + 0.8638367769604990967475517183e3, +}; + +#define pr0 Cj1y1 +#define ps0 (Cj1y1+6) +#define qr0 (Cj1y1+12) +#define qs0 (Cj1y1+18) + +static const double Cj1[] = { + -6.250000000000002203053200981413218949548e-0002, /* a0 */ + 1.600998455640072901321605101981501263762e-0003, + -1.963888815948313758552511884390162864930e-0005, + 8.263917341093549759781339713418201620998e-0008, + 1.0e0, /* b0 */ + 1.605069137643004242395356851797873766927e-0002, + 1.149454623251299996428500249509098499383e-0004, + 3.849701673735260970379681807910852327825e-0007, + 4.999999999999999995517408894340485471724e-0001, + -6.003825028120475684835384519945468075423e-0002, + 2.301719899263321828388344461995355419832e-0003, + -4.208494869238892934859525221654040304068e-0005, + 4.377745135188837783031540029700282443388e-0007, + -2.854106755678624335145364226735677754179e-0009, + 1.234002865443952024332943901323798413689e-0011, + -3.645498437039791058951273508838177134310e-0014, + 7.404320596071797459925377103787837414422e-0017, + -1.009457448277522275262808398517024439084e-0019, + 8.520158355824819796968771418801019930585e-0023, + -3.458159926081163274483854614601091361424e-0026, + 1.0e0, /* b1 */ + 4.923499437590484879081138588998986303306e-0003, + 1.054389489212184156499666953501976688452e-0005, + 1.180768373106166527048240364872043816050e-0008, + 5.942665743476099355323245707680648588540e-0012, +}; + +#define a0 Cj1 +#define b0 (Cj1+4) +#define a1 (Cj1+8) +#define b1 (Cj1+20) + +static const double Cy1[] = { + -1.960570906462389461018983259589655961560e-0001, /* c0 */ + 4.931824118350661953459180060007970291139e-0002, + -1.626975871565393656845930125424683008677e-0003, + 1.359657517926394132692884168082224258360e-0005, + 1.0e0, /* d0 */ + 2.565807214838390835108224713630901653793e-0002, + 3.374175208978404268650522752520906231508e-0004, + 2.840368571306070719539936935220728843177e-0006, + 1.396387402048998277638900944415752207592e-0008, + -1.960570906462389473336339614647555351626e-0001, /* c1 */ + 5.336268030335074494231369159933012844735e-0002, + -2.684137504382748094149184541866332033280e-0003, + 5.737671618979185736981543498580051903060e-0005, + -6.642696350686335339171171785557663224892e-0007, + 4.692417922568160354012347591960362101664e-0009, + -2.161728635907789319335231338621412258355e-0011, + 6.727353419738316107197644431844194668702e-0014, + -1.427502986803861372125234355906790573422e-0016, + 2.020392498726806769468143219616642940371e-0019, + -1.761371948595104156753045457888272716340e-0022, + 7.352828391941157905175042420249225115816e-0026, + 1.0e0, /* d1 */ + 5.029187436727947764916247076102283399442e-0003, + 1.102693095808242775074856548927801750627e-0005, + 1.268035774543174837829534603830227216291e-0008, + 6.579416271766610825192542295821308730206e-0012, +}; + +#define c0 Cy1 +#define d0 (Cy1+4) +#define c1 (Cy1+9) +#define d1 (Cy1+21) + + +/* core of j0f computation; assumes fx is finite */ +static double +__k_j0f(float fx) +{ + double x, z, s, c, ss, cc, r, t, p0, q0; + int ix, i; + + ix = *(int *)&fx & ~0x80000000; + x = fabs((double)fx); + if (ix > 0x41000000) { + /* x > 8; see comments in j0.c */ + s = sin(x); + c = cos(x); + if (signbit(s) != signbit(c)) { + ss = s - c; + cc = -cos(x + x) / ss; + } else { + cc = s + c; + ss = -cos(x + x) / cc; + } + if (ix > 0x501502f9) { + /* x > 1.0e10 */ + p0 = one; + q0 = neighth / x; + } else { + t = eight / x; + z = t * t; + p0 = (pr[0] + z * (pr[1] + z * (pr[2] + z * (pr[3] + + z * (pr[4] + z * (pr[5] + z * pr[6])))))) / + (ps[0] + z * (ps[1] + z * (ps[2] + z * (ps[3] + + z * (ps[4] + z * (ps[5] + z)))))); + q0 = ((qr[0] + z * (qr[1] + z * (qr[2] + z * (qr[3] + + z * (qr[4] + z * (qr[5] + z * qr[6])))))) / + (qs[0] + z * (qs[1] + z * (qs[2] + z * (qs[3] + + z * (qs[4] + z * (qs[5] + z))))))) * t; + } + return (isqrtpi * (p0 * cc - q0 * ss) / sqrt(x)); + } + if (ix <= 0x3727c5ac) { + /* x <= 1.0e-5 */ + if (ix <= 0x219392ef) /* x <= 1.0e-18 */ + return (one - x); + return (one - x * x * quarter); + } + z = x * x; + if (ix <= 0x3fa3d70a) { + /* x <= 1.28 */ + r = r0[0] + z * (r0[1] + z * (r0[2] + z * r0[3])); + s = s0[0] + z * (s0[1] + z * (s0[2] + z * s0[3])); + return (one + z * (r / s)); + } + r = r1[8]; + s = s1[8]; + for (i = 7; i >= 0; i--) { + r = r * z + r1[i]; + s = s * z + s1[i]; + } + return (r / s); +} + +float +j0f(float fx) +{ + float f; + int ix; +#if defined(__i386) && !defined(__amd64) + int rp; +#endif + + ix = *(int *)&fx & ~0x80000000; + if (ix >= 0x7f800000) { /* nan or inf */ + if (ix > 0x7f800000) + return (fx * fx); + return (zerof); + } + +#if defined(__i386) && !defined(__amd64) + rp = __swapRP(fp_extended); +#endif + f = (float)__k_j0f(fx); +#if defined(__i386) && !defined(__amd64) + if (rp != fp_extended) + (void) __swapRP(rp); +#endif + return (f); +} + +/* core of y0f computation; assumes fx is finite and positive */ +static double +__k_y0f(float fx) +{ + double x, z, s, c, ss, cc, t, p0, q0, u, v; + int ix, i; + + ix = *(int *)&fx; + x = (double)fx; + if (ix > 0x41000000) { + /* x > 8; see comments in j0.c */ + s = sin(x); + c = cos(x); + if (signbit(s) != signbit(c)) { + ss = s - c; + cc = -cos(x + x) / ss; + } else { + cc = s + c; + ss = -cos(x + x) / cc; + } + if (ix > 0x501502f9) { + /* x > 1.0e10 */ + p0 = one; + q0 = neighth / x; + } else { + t = eight / x; + z = t * t; + p0 = (pr[0] + z * (pr[1] + z * (pr[2] + z * (pr[3] + + z * (pr[4] + z * (pr[5] + z * pr[6])))))) / + (ps[0] + z * (ps[1] + z * (ps[2] + z * (ps[3] + + z * (ps[4] + z * (ps[5] + z)))))); + q0 = ((qr[0] + z * (qr[1] + z * (qr[2] + z * (qr[3] + + z * (qr[4] + z * (qr[5] + z * qr[6])))))) / + (qs[0] + z * (qs[1] + z * (qs[2] + z * (qs[3] + + z * (qs[4] + z * (qs[5] + z))))))) * t; + } + return (isqrtpi * (p0 * ss + q0 * cc) / sqrt(x)); + } + if (ix <= 0x219392ef) /* x <= 1.0e-18 */ + return (u0[0] + tpi * log(x)); + z = x * x; + u = u0[12]; + for (i = 11; i >= 0; i--) + u = u * z + u0[i]; + v = v0[0] + z * (v0[1] + z * (v0[2] + z * (v0[3] + z * v0[4]))); + return (u / v + tpi * (__k_j0f(fx) * log(x))); +} + +float +y0f(float fx) +{ + float f; + int ix; +#if defined(__i386) && !defined(__amd64) + int rp; +#endif + + ix = *(int *)&fx; + if ((ix & ~0x80000000) > 0x7f800000) /* nan */ + return (fx * fx); + if (ix <= 0) { /* zero or negative */ + if ((ix << 1) == 0) + return (-onef / zerof); + return (zerof / zerof); + } + if (ix == 0x7f800000) /* +inf */ + return (zerof); + +#if defined(__i386) && !defined(__amd64) + rp = __swapRP(fp_extended); +#endif + f = (float)__k_y0f(fx); +#if defined(__i386) && !defined(__amd64) + if (rp != fp_extended) + (void) __swapRP(rp); +#endif + return (f); +} + +/* core of j1f computation; assumes fx is finite */ +static double +__k_j1f(float fx) +{ + double x, z, s, c, ss, cc, r, t, p1, q1; + int i, ix, sgn; + + ix = *(int *)&fx; + sgn = (unsigned)ix >> 31; + ix &= ~0x80000000; + x = fabs((double)fx); + if (ix > 0x41000000) { + /* x > 8; see comments in j1.c */ + s = sin(x); + c = cos(x); + if (signbit(s) != signbit(c)) { + cc = s - c; + ss = cos(x + x) / cc; + } else { + ss = -s - c; + cc = cos(x + x) / ss; + } + if (ix > 0x501502f9) { + /* x > 1.0e10 */ + p1 = one; + q1 = three8 / x; + } else { + t = eight / x; + z = t * t; + p1 = (pr0[0] + z * (pr0[1] + z * (pr0[2] + z * + (pr0[3] + z * (pr0[4] + z * pr0[5]))))) / + (ps0[0] + z * (ps0[1] + z * (ps0[2] + z * + (ps0[3] + z * (ps0[4] + z * (ps0[5] + z)))))); + q1 = ((qr0[0] + z * (qr0[1] + z * (qr0[2] + z * + (qr0[3] + z * (qr0[4] + z * qr0[5]))))) / + (qs0[0] + z * (qs0[1] + z * (qs0[2] + z * + (qs0[3] + z * (qs0[4] + z * (qs0[5] + z))))))) * t; + } + t = isqrtpi * (p1 * cc - q1 * ss) / sqrt(x); + return ((sgn)? -t : t); + } + if (ix <= 0x3727c5ac) { + /* x <= 1.0e-5 */ + if (ix <= 0x219392ef) /* x <= 1.0e-18 */ + t = half * x; + else + t = x * (half + neighth * x * x); + return ((sgn)? -t : t); + } + z = x * x; + if (ix < 0x3fa3d70a) { + /* x < 1.28 */ + r = a0[0] + z * (a0[1] + z * (a0[2] + z * a0[3])); + s = b0[0] + z * (b0[1] + z * (b0[2] + z * b0[3])); + t = x * half + x * (z * (r / s)); + } else { + r = a1[11]; + for (i = 10; i >= 0; i--) + r = r * z + a1[i]; + s = b1[0] + z * (b1[1] + z * (b1[2] + z * (b1[3] + z * b1[4]))); + t = x * (r / s); + } + return ((sgn)? -t : t); +} + +float +j1f(float fx) +{ + float f; + int ix; +#if defined(__i386) && !defined(__amd64) + int rp; +#endif + + ix = *(int *)&fx & ~0x80000000; + if (ix >= 0x7f800000) /* nan or inf */ + return (onef / fx); + +#if defined(__i386) && !defined(__amd64) + rp = __swapRP(fp_extended); +#endif + f = (float)__k_j1f(fx); +#if defined(__i386) && !defined(__amd64) + if (rp != fp_extended) + (void) __swapRP(rp); +#endif + return (f); +} + +/* core of y1f computation; assumes fx is finite and positive */ +static double +__k_y1f(float fx) +{ + double x, z, s, c, ss, cc, u, v, p1, q1, t; + int i, ix; + + ix = *(int *)&fx; + x = (double)fx; + if (ix > 0x41000000) { + /* x > 8; see comments in j1.c */ + s = sin(x); + c = cos(x); + if (signbit(s) != signbit(c)) { + cc = s - c; + ss = cos(x + x) / cc; + } else { + ss = -s - c; + cc = cos(x + x) / ss; + } + if (ix > 0x501502f9) { + /* x > 1.0e10 */ + p1 = one; + q1 = three8 / x; + } else { + t = eight / x; + z = t * t; + p1 = (pr0[0] + z * (pr0[1] + z * (pr0[2] + z * + (pr0[3] + z * (pr0[4] + z * pr0[5]))))) / + (ps0[0] + z * (ps0[1] + z * (ps0[2] + z * + (ps0[3] + z * (ps0[4] + z * (ps0[5] + z)))))); + q1 = ((qr0[0] + z * (qr0[1] + z * (qr0[2] + z * + (qr0[3] + z * (qr0[4] + z * qr0[5]))))) / + (qs0[0] + z * (qs0[1] + z * (qs0[2] + z * + (qs0[3] + z * (qs0[4] + z * (qs0[5] + z))))))) * t; + } + return (isqrtpi * (p1 * ss + q1 * cc) / sqrt(x)); + } + if (ix <= 0x219392ef) /* x <= 1.0e-18 */ + return (-tpi / x); + z = x * x; + if (ix < 0x3fa3d70a) { + /* x < 1.28 */ + u = c0[0] + z * (c0[1] + z * (c0[2] + z * c0[3])); + v = d0[0] + z * (d0[1] + z * (d0[2] + z * (d0[3] + z * d0[4]))); + } else { + u = c1[11]; + for (i = 10; i >= 0; i--) + u = u * z + c1[i]; + v = d1[0] + z * (d1[1] + z * (d1[2] + z * (d1[3] + z * d1[4]))); + } + return (x * (u / v) + tpi * (__k_j1f(fx) * log(x) - one / x)); +} + +float +y1f(float fx) +{ + float f; + int ix; +#if defined(__i386) && !defined(__amd64) + int rp; +#endif + + ix = *(int *)&fx; + if ((ix & ~0x80000000) > 0x7f800000) /* nan */ + return (fx * fx); + if (ix <= 0) { /* zero or negative */ + if ((ix << 1) == 0) + return (-onef / zerof); + return (zerof / zerof); + } + if (ix == 0x7f800000) /* +inf */ + return (zerof); + +#if defined(__i386) && !defined(__amd64) + rp = __swapRP(fp_extended); +#endif + f = (float)__k_y1f(fx); +#if defined(__i386) && !defined(__amd64) + if (rp != fp_extended) + (void) __swapRP(rp); +#endif + return (f); +} + +float +jnf(int n, float fx) +{ + double a, b, temp, x, z, w, t, q0, q1, h; + float f; + int i, ix, sgn, m, k; +#if defined(__i386) && !defined(__amd64) + int rp; +#endif + + if (n < 0) { + n = -n; + fx = -fx; + } + if (n == 0) + return (j0f(fx)); + if (n == 1) + return (j1f(fx)); + + ix = *(int *)&fx; + sgn = (n & 1)? ((unsigned)ix >> 31) : 0; + ix &= ~0x80000000; + if (ix >= 0x7f800000) { /* nan or inf */ + if (ix > 0x7f800000) + return (fx * fx); + return ((sgn)? -zerof : zerof); + } + if ((ix << 1) == 0) + return ((sgn)? -zerof : zerof); + +#if defined(__i386) && !defined(__amd64) + rp = __swapRP(fp_extended); +#endif + fx = fabsf(fx); + x = (double)fx; + if ((double)n <= x) { + /* safe to use J(n+1,x) = 2n/x * J(n,x) - J(n-1,x) */ + a = __k_j0f(fx); + b = __k_j1f(fx); + for (i = 1; i < n; i++) { + temp = b; + b = b * ((double)(i + i) / x) - a; + a = temp; + } + f = (float)b; +#if defined(__i386) && !defined(__amd64) + if (rp != fp_extended) + (void) __swapRP(rp); +#endif + return ((sgn)? -f : f); + } + if (ix < 0x3089705f) { + /* x < 1.0e-9; use J(n,x) = 1/n! * (x / 2)^n */ + if (n > 6) + n = 6; /* result underflows to zero for n >= 6 */ + b = t = half * x; + a = one; + for (i = 2; i <= n; i++) { + b *= t; + a *= (double)i; + } + b /= a; + } else { + /* + * Use the backward recurrence: + * + * x x^2 x^2 + * J(n,x)/J(n-1,x) = ---- - ------ - ------ ..... + * 2n 2(n+1) 2(n+2) + * + * Let w = 2n/x and h = 2/x. Then the above quotient + * is equal to the continued fraction: + * 1 + * = ----------------------- + * 1 + * w - ----------------- + * 1 + * w+h - --------- + * w+2h - ... + * + * To determine how many terms are needed, run the + * recurrence + * + * Q(0) = w, + * Q(1) = w(w+h) - 1, + * Q(k) = (w+k*h)*Q(k-1) - Q(k-2). + * + * Then when Q(k) > 1e4, k is large enough for single + * precision. + */ +/* XXX NOT DONE - rework this */ + w = (n + n) / x; + h = two / x; + q0 = w; + z = w + h; + q1 = w * z - one; + k = 1; + while (q1 < big) { + k++; + z += h; + temp = z * q1 - q0; + q0 = q1; + q1 = temp; + } + m = n + n; + t = zero; + for (i = (n + k) << 1; i >= m; i -= 2) + t = one / ((double)i / x - t); + a = t; + b = one; + /* + * estimate log((2/x)^n*n!) = n*log(2/x)+n*ln(n) + * hence, if n*(log(2n/x)) > ... + * single 8.8722839355e+01 + * double 7.09782712893383973096e+02 + * then recurrent value may overflow and the result is + * likely underflow to zero + */ + temp = (double)n; + temp *= log((two / x) * temp); + if (temp < 7.09782712893383973096e+02) { + for (i = n - 1; i > 0; i--) { + temp = b; + b = b * ((double)(i + i) / x) - a; + a = temp; + } + } else { + for (i = n - 1; i > 0; i--) { + temp = b; + b = b * ((double)(i + i) / x) - a; + a = temp; + if (b > 1.0e100) { + a /= b; + t /= b; + b = one; + } + } + } + b = (t * __k_j0f(fx) / b); + } + f = (float)b; +#if defined(__i386) && !defined(__amd64) + if (rp != fp_extended) + (void) __swapRP(rp); +#endif + return ((sgn)? -f : f); +} + +float +ynf(int n, float fx) +{ + double a, b, temp, x; + float f; + int i, sign, ix; +#if defined(__i386) && !defined(__amd64) + int rp; +#endif + + sign = 0; + if (n < 0) { + n = -n; + if (n & 1) + sign = 1; + } + if (n == 0) + return (y0f(fx)); + if (n == 1) + return ((sign)? -y1f(fx) : y1f(fx)); + + ix = *(int *)&fx; + if ((ix & ~0x80000000) > 0x7f800000) /* nan */ + return (fx * fx); + if (ix <= 0) { /* zero or negative */ + if ((ix << 1) == 0) + return (-onef / zerof); + return (zerof / zerof); + } + if (ix == 0x7f800000) /* +inf */ + return (zerof); + +#if defined(__i386) && !defined(__amd64) + rp = __swapRP(fp_extended); +#endif + a = __k_y0f(fx); + b = __k_y1f(fx); + x = (double)fx; + for (i = 1; i < n; i++) { + temp = b; + b *= (double)(i + i) / x; + if (b <= -DBL_MAX) + break; + b -= a; + a = temp; + } + f = (float)b; +#if defined(__i386) && !defined(__amd64) + if (rp != fp_extended) + (void) __swapRP(rp); +#endif + return ((sign)? -f : f); +} diff --git a/usr/src/lib/libm/common/R/cbrtf.c b/usr/src/lib/libm/common/R/cbrtf.c new file mode 100644 index 0000000000..ca00310791 --- /dev/null +++ b/usr/src/lib/libm/common/R/cbrtf.c @@ -0,0 +1,42 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak cbrtf = __cbrtf + +#include "libm.h" + +float +cbrtf(float x) { +#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN) + if (isnanf(x)) + return (x * x); + else +#endif + return ((float) cbrt((double) x)); +} diff --git a/usr/src/lib/libm/common/R/copysignf.c b/usr/src/lib/libm/common/R/copysignf.c new file mode 100644 index 0000000000..84ae2e31b4 --- /dev/null +++ b/usr/src/lib/libm/common/R/copysignf.c @@ -0,0 +1,42 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak copysignf = __copysignf +#endif + +#include "libm.h" + +float +copysignf(float x, float y) { + float w; + + *(int *) &w = (*(int *) &x & ~0x80000000) | (*(int *) &y & 0x80000000); + return (w); +} diff --git a/usr/src/lib/libm/common/R/cosf.c b/usr/src/lib/libm/common/R/cosf.c new file mode 100644 index 0000000000..34b436be56 --- /dev/null +++ b/usr/src/lib/libm/common/R/cosf.c @@ -0,0 +1,148 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak cosf = __cosf + +/* + * See sincosf.c + */ + +#include "libm.h" + +extern const int _TBL_ipio2_inf[]; +extern int __rem_pio2m(double *, double *, int, int, int, const int *); +#if defined(__i386) && !defined(__amd64) +extern int __swapRP(int); +#endif + +static const double C[] = { + 1.85735322054308378716204874632872525989806770558e-0003, + -1.95035094218403635082921458859320791358115801259e-0004, + 5.38400550766074785970952495168558701485841707252e+0002, + -3.31975110777873728964197739157371509422022905947e+0001, + 1.09349482127188401868272000389539985058873853699e-0003, + -5.03324285989964979398034700054920226866107675091e-0004, + 2.43792880266971107750418061559602239831538067410e-0005, + 9.14499072605666582228127405245558035523741471271e+0002, + -3.63151270591815439197122504991683846785293207730e+0001, + 0.636619772367581343075535, /* 2^ -1 * 1.45F306DC9C883 */ + 0.5, + 1.570796326734125614166, /* 2^ 0 * 1.921FB54400000 */ + 6.077100506506192601475e-11, /* 2^-34 * 1.0B4611A626331 */ +}; + +#define S0 C[0] +#define S1 C[1] +#define S2 C[2] +#define S3 C[3] +#define C0 C[4] +#define C1 C[5] +#define C2 C[6] +#define C3 C[7] +#define C4 C[8] +#define invpio2 C[9] +#define half C[10] +#define pio2_1 C[11] +#define pio2_t C[12] + +float +cosf(float x) +{ + double y, z, w; + float f; + int n, ix, hx, hy; + volatile int i; + + hx = *((int *)&x); + ix = hx & 0x7fffffff; + + y = (double)x; + + if (ix <= 0x4016cbe4) { /* |x| < 3*pi/4 */ + if (ix <= 0x3f490fdb) { /* |x| < pi/4 */ + if (ix <= 0x39800000) { /* |x| <= 2**-12 */ + i = (int)y; +#ifdef lint + i = i; +#endif + return (1.0f); + } + z = y * y; + return ((float)(((C0 + z * C1) + (z * z) * C2) * + (C3 + z * (C4 + z)))); + } else if (hx > 0) { + y = (y - pio2_1) - pio2_t; + z = y * y; + return ((float)-((y * (S0 + z * S1)) * + (S2 + z * (S3 + z)))); + } else { + y = (y + pio2_1) + pio2_t; + z = y * y; + return ((float)((y * (S0 + z * S1)) * + (S2 + z * (S3 + z)))); + } + } else if (ix <= 0x49c90fdb) { /* |x| < 2^19*pi */ +#if defined(__i386) && !defined(__amd64) + int rp; + + rp = __swapRP(fp_extended); +#endif + w = y * invpio2; + if (hx < 0) + n = (int)(w - half); + else + n = (int)(w + half); + y = (y - n * pio2_1) - n * pio2_t; + n++; +#if defined(__i386) && !defined(__amd64) + if (rp != fp_extended) + (void) __swapRP(rp); +#endif + } else { + if (ix >= 0x7f800000) + return (x / x); /* cos(Inf or NaN) is NaN */ + hy = ((int *)&y)[HIWORD]; + n = ((hy >> 20) & 0x7ff) - 1046; + ((int *)&w)[HIWORD] = (hy & 0xfffff) | 0x41600000; + ((int *)&w)[LOWORD] = ((int *)&y)[LOWORD]; + n = __rem_pio2m(&w, &y, n, 1, 0, _TBL_ipio2_inf) + 1; + } + + if (n & 1) { + /* compute cos y */ + z = y * y; + f = (float)(((C0 + z * C1) + (z * z) * C2) * + (C3 + z * (C4 + z))); + } else { + /* compute sin y */ + z = y * y; + f = (float)((y * (S0 + z * S1)) * (S2 + z * (S3 + z))); + } + + return ((n & 2)? -f : f); +} diff --git a/usr/src/lib/libm/common/R/coshf.c b/usr/src/lib/libm/common/R/coshf.c new file mode 100644 index 0000000000..0c715f6fcf --- /dev/null +++ b/usr/src/lib/libm/common/R/coshf.c @@ -0,0 +1,50 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak coshf = __coshf + +#include "libm.h" + +float +coshf(float x) { + double c; + float w; + int ix; + + ix = *(int *)&x & ~0x80000000; + if (ix >= 0x7f800000) { + /* coshf(x) is |x| if x is +-Inf or NaN */ + return (x * x); + } + if (ix >= 0x43000000) /* coshf(x) trivially overflows */ + c = 1.0e100; + else + c = cosh((double)x); + w = (float)c; + return (w); +} diff --git a/usr/src/lib/libm/common/R/erff.c b/usr/src/lib/libm/common/R/erff.c new file mode 100644 index 0000000000..ae58e3477e --- /dev/null +++ b/usr/src/lib/libm/common/R/erff.c @@ -0,0 +1,69 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak erff = __erff +#pragma weak erfcf = __erfcf + +#include "libm.h" + +#if defined(__i386) && !defined(__amd64) +extern int __swapRP(int); +#endif + +float +erff(float x) { + int ix; + + ix = *(int *)&x & ~0x80000000; + if (ix > 0x7f800000) /* x is NaN */ + return (x * x); + return ((float)erf((double)x)); +} + +float +erfcf(float x) { + float f; + int ix; +#if defined(__i386) && !defined(__amd64) + int rp; +#endif + + ix = *(int *)&x & ~0x80000000; + if (ix > 0x7f800000) /* x is NaN */ + return (x * x); + +#if defined(__i386) && !defined(__amd64) + rp = __swapRP(fp_extended); +#endif + f = (float)erfc((double)x); +#if defined(__i386) && !defined(__amd64) + if (rp != fp_extended) + (void) __swapRP(rp); +#endif + return (f); +} diff --git a/usr/src/lib/libm/common/R/exp10f.c b/usr/src/lib/libm/common/R/exp10f.c new file mode 100644 index 0000000000..b402899a0d --- /dev/null +++ b/usr/src/lib/libm/common/R/exp10f.c @@ -0,0 +1,44 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak exp10f = __exp10f + +#include "libm.h" + +extern double exp10(double); + +float +exp10f(float x) { +#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN) + if (isnanf(x)) + return (x * x); + else +#endif + return ((float) exp10((double) x)); +} diff --git a/usr/src/lib/libm/common/R/exp2f.c b/usr/src/lib/libm/common/R/exp2f.c new file mode 100644 index 0000000000..61d6d133d6 --- /dev/null +++ b/usr/src/lib/libm/common/R/exp2f.c @@ -0,0 +1,42 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak exp2f = __exp2f + +#include "libm.h" + +float +exp2f(float x) { +#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN) + if (isnanf(x)) + return (x * x); + else +#endif + return ((float) exp2((double) x)); +} diff --git a/usr/src/lib/libm/common/R/expf.c b/usr/src/lib/libm/common/R/expf.c new file mode 100644 index 0000000000..179612aa7c --- /dev/null +++ b/usr/src/lib/libm/common/R/expf.c @@ -0,0 +1,401 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak expf = __expf + +/* INDENT OFF */ +/* + * float expf(float x); + * Code by K.C. Ng for SUN 5.0 libmopt + * 11/5/99 + * Method : + * 1. For |x| >= 2^7, either underflow/overflow. + * More precisely: + * x > 88.722839355...(0x42B17218) => overflow; + * x < -103.97207642..(0xc2CFF1B4) => underflow. + * 2. For |x| < 2^-6, use polynomail + * exp(x) = 1 + x + p1*x^2 + p2*x^3 + * 3. Otherwise, write |x|=(1+r)*2^n, where 0<=r<1. + * Let t = 2^n * (1+r) .... x > 0; + * t = 2^n * (1-r) .... x < 0. (x= -2**(n+1)+t) + * Since -6 <= n <= 6, we may break t into + * six 6-bits chunks: + * -5 -11 -17 -23 -29 + * t=j *2+j *2 +j *2 +j *2 +j *2 +j *2 + * 1 2 3 4 5 6 + * + * where 0 <= j < 64 for i = 1,...,6. + * i + * Note that since t has only 24 significant bits, + * either j or j must be 0. + * 1 6 + * 7-6i + * One may define j by (int) ( t * 2 ) mod 64 + * i + * mathematically. In actual implementation, they can + * be obtained by manipulating the exponent and + * mantissa bits as follow: + * Let ix = (HEX(x)&0x007fffff)|0x00800000. + * If n>=0, let ix=ix<<n, then j =0 and + * 6 + * j = ix>>(30-6i)) mod 64 ...i=1,...,5 + * i + * Otherwise, let ix=ix<<(j+6), then j = 0 and + * 1 + * j = ix>>(36-6i)) mod 64 ...i=2,...,6 + * i + * + * 4. Compute exp(t) by table look-up method. + * Precompute ET[k] = exp(j*2^(7-6i)), k=j+64*(6-i). + * Then + * exp(t) = ET[j +320]*ET[j +256]*ET[j +192]* + * 1 2 3 + * + * ET[j +128]*ET[j +64]*ET[j ] + * 4 5 6 + * + * n+1 + * 5. If x < 0, return exp(-2 )* exp(t). Note that + * -6 <= n <= 6. Let k = n - 6, then we can + * precompute + * k-5 n+1 + * EN[k] = exp(-2 ) = exp(-2 ) for k=0,1,...,12. + * + * + * Special cases: + * exp(INF) is INF, exp(NaN) is NaN; + * exp(-INF) = 0; + * for finite argument, only exp(0) = 1 is exact. + * + * Accuracy: + * All calculations are done in double precision except for + * the case |x| < 2^-6. When |x| < 2^-6, the error is less + * than 0.55 ulp. When |x| >= 2^-6 and the result is normal, + * the error is less than 0.51 ulp. When FDTOS_TRAPS_... is + * defined and the result is subnormal, the error can be as + * large as 0.75 ulp. + */ +/* INDENT ON */ + +#include "libm.h" + +/* + * ET[k] = exp(j*2^(7-6i)) , where j = k mod 64, i = k/64 + */ +static const double ET[] = { + 1.00000000000000000000e+00, 1.00000000186264514923e+00, + 1.00000000372529029846e+00, 1.00000000558793544769e+00, + 1.00000000745058059692e+00, 1.00000000931322574615e+00, + 1.00000001117587089539e+00, 1.00000001303851604462e+00, + 1.00000001490116119385e+00, 1.00000001676380656512e+00, + 1.00000001862645171435e+00, 1.00000002048909686359e+00, + 1.00000002235174201282e+00, 1.00000002421438716205e+00, + 1.00000002607703253332e+00, 1.00000002793967768255e+00, + 1.00000002980232283178e+00, 1.00000003166496798102e+00, + 1.00000003352761335229e+00, 1.00000003539025850152e+00, + 1.00000003725290365075e+00, 1.00000003911554879998e+00, + 1.00000004097819417126e+00, 1.00000004284083932049e+00, + 1.00000004470348446972e+00, 1.00000004656612984100e+00, + 1.00000004842877499023e+00, 1.00000005029142036150e+00, + 1.00000005215406551073e+00, 1.00000005401671088201e+00, + 1.00000005587935603124e+00, 1.00000005774200140252e+00, + 1.00000005960464655175e+00, 1.00000006146729192302e+00, + 1.00000006332993707225e+00, 1.00000006519258244353e+00, + 1.00000006705522759276e+00, 1.00000006891787296404e+00, + 1.00000007078051811327e+00, 1.00000007264316348454e+00, + 1.00000007450580863377e+00, 1.00000007636845400505e+00, + 1.00000007823109937632e+00, 1.00000008009374452556e+00, + 1.00000008195638989683e+00, 1.00000008381903526811e+00, + 1.00000008568168063938e+00, 1.00000008754432578861e+00, + 1.00000008940697115989e+00, 1.00000009126961653116e+00, + 1.00000009313226190244e+00, 1.00000009499490705167e+00, + 1.00000009685755242295e+00, 1.00000009872019779422e+00, + 1.00000010058284316550e+00, 1.00000010244548853677e+00, + 1.00000010430813368600e+00, 1.00000010617077905728e+00, + 1.00000010803342442856e+00, 1.00000010989606979983e+00, + 1.00000011175871517111e+00, 1.00000011362136054238e+00, + 1.00000011548400591366e+00, 1.00000011734665128493e+00, + 1.00000000000000000000e+00, 1.00000011920929665621e+00, + 1.00000023841860752327e+00, 1.00000035762793260119e+00, + 1.00000047683727188996e+00, 1.00000059604662538959e+00, + 1.00000071525599310007e+00, 1.00000083446537502141e+00, + 1.00000095367477115360e+00, 1.00000107288418149665e+00, + 1.00000119209360605055e+00, 1.00000131130304481530e+00, + 1.00000143051249779091e+00, 1.00000154972196497738e+00, + 1.00000166893144637470e+00, 1.00000178814094198287e+00, + 1.00000190735045180190e+00, 1.00000202655997583179e+00, + 1.00000214576951407253e+00, 1.00000226497906652412e+00, + 1.00000238418863318657e+00, 1.00000250339821405987e+00, + 1.00000262260780914403e+00, 1.00000274181741843904e+00, + 1.00000286102704194491e+00, 1.00000298023667966163e+00, + 1.00000309944633158921e+00, 1.00000321865599772764e+00, + 1.00000333786567807692e+00, 1.00000345707537263706e+00, + 1.00000357628508140806e+00, 1.00000369549480438991e+00, + 1.00000381470454158261e+00, 1.00000393391429298617e+00, + 1.00000405312405860059e+00, 1.00000417233383842586e+00, + 1.00000429154363246198e+00, 1.00000441075344070896e+00, + 1.00000452996326316679e+00, 1.00000464917309983548e+00, + 1.00000476838295071502e+00, 1.00000488759281580542e+00, + 1.00000500680269510667e+00, 1.00000512601258861878e+00, + 1.00000524522249634174e+00, 1.00000536443241827556e+00, + 1.00000548364235442023e+00, 1.00000560285230477575e+00, + 1.00000572206226934213e+00, 1.00000584127224811937e+00, + 1.00000596048224110746e+00, 1.00000607969224830640e+00, + 1.00000619890226971620e+00, 1.00000631811230533685e+00, + 1.00000643732235516836e+00, 1.00000655653241921073e+00, + 1.00000667574249746394e+00, 1.00000679495258992802e+00, + 1.00000691416269660294e+00, 1.00000703337281748873e+00, + 1.00000715258295258536e+00, 1.00000727179310189285e+00, + 1.00000739100326541120e+00, 1.00000751021344314040e+00, + 1.00000000000000000000e+00, 1.00000762942363508046e+00, + 1.00001525890547848796e+00, 1.00002288844553022251e+00, + 1.00003051804379095024e+00, 1.00003814770026133729e+00, + 1.00004577741494138365e+00, 1.00005340718783175546e+00, + 1.00006103701893311886e+00, 1.00006866690824547383e+00, + 1.00007629685576948653e+00, 1.00008392686150582307e+00, + 1.00009155692545448346e+00, 1.00009918704761613384e+00, + 1.00010681722799144033e+00, 1.00011444746658040295e+00, + 1.00012207776338368781e+00, 1.00012970811840196106e+00, + 1.00013733853163522269e+00, 1.00014496900308413885e+00, + 1.00015259953274937565e+00, 1.00016023012063093311e+00, + 1.00016786076672947736e+00, 1.00017549147104567453e+00, + 1.00018312223357952462e+00, 1.00019075305433191581e+00, + 1.00019838393330284809e+00, 1.00020601487049298761e+00, + 1.00021364586590300050e+00, 1.00022127691953288675e+00, + 1.00022890803138353455e+00, 1.00023653920145494389e+00, + 1.00024417042974778091e+00, 1.00025180171626271175e+00, + 1.00025943306099973640e+00, 1.00026706446395974304e+00, + 1.00027469592514273167e+00, 1.00028232744454959047e+00, + 1.00028995902218031944e+00, 1.00029759065803558471e+00, + 1.00030522235211605242e+00, 1.00031285410442172257e+00, + 1.00032048591495348333e+00, 1.00032811778371155675e+00, + 1.00033574971069616488e+00, 1.00034338169590819589e+00, + 1.00035101373934764979e+00, 1.00035864584101541475e+00, + 1.00036627800091149076e+00, 1.00037391021903676602e+00, + 1.00038154249539146257e+00, 1.00038917482997580244e+00, + 1.00039680722279067382e+00, 1.00040443967383629875e+00, + 1.00041207218311289928e+00, 1.00041970475062136359e+00, + 1.00042733737636191371e+00, 1.00043497006033499375e+00, + 1.00044260280254104778e+00, 1.00045023560298029786e+00, + 1.00045786846165363215e+00, 1.00046550137856127272e+00, + 1.00047313435370366363e+00, 1.00048076738708124900e+00, + 1.00000000000000000000e+00, 1.00048840047869447289e+00, + 1.00097703949241645383e+00, 1.00146591715766675179e+00, + 1.00195503359100279717e+00, 1.00244438890903908579e+00, + 1.00293398322844673487e+00, 1.00342381666595459322e+00, + 1.00391388933834746489e+00, 1.00440420136246855165e+00, + 1.00489475285521656645e+00, 1.00538554393354861993e+00, + 1.00587657471447822211e+00, 1.00636784531507639251e+00, + 1.00685935585247099411e+00, 1.00735110644384739942e+00, + 1.00784309720644804642e+00, 1.00833532825757243856e+00, + 1.00882779971457803292e+00, 1.00932051169487890796e+00, + 1.00981346431594687374e+00, 1.01030665769531102782e+00, + 1.01080009195055753324e+00, 1.01129376719933050666e+00, + 1.01178768355933157430e+00, 1.01228184114831898377e+00, + 1.01277624008410960244e+00, 1.01327088048457714109e+00, + 1.01376576246765282008e+00, 1.01426088615132625748e+00, + 1.01475625165364347069e+00, 1.01525185909270931894e+00, + 1.01574770858668572693e+00, 1.01624380025379235093e+00, + 1.01674013421230657883e+00, 1.01723671058056375216e+00, + 1.01773352947695694404e+00, 1.01823059101993673714e+00, + 1.01872789532801233392e+00, 1.01922544251975000229e+00, + 1.01972323271377418585e+00, 1.02022126602876750390e+00, + 1.02071954258347008526e+00, 1.02121806249668067856e+00, + 1.02171682588725554197e+00, 1.02221583287410910934e+00, + 1.02271508357621376817e+00, 1.02321457811260052573e+00, + 1.02371431660235789884e+00, 1.02421429916463280207e+00, + 1.02471452591863054771e+00, 1.02521499698361440167e+00, + 1.02571571247890602763e+00, 1.02621667252388526492e+00, + 1.02671787723799012859e+00, 1.02721932674071725344e+00, + 1.02772102115162167202e+00, 1.02822296059031659254e+00, + 1.02872514517647339893e+00, 1.02922757502982276101e+00, + 1.02973025027015285815e+00, 1.03023317101731093359e+00, + 1.03073633739120262831e+00, 1.03123974951179242510e+00, + 1.00000000000000000000e+00, 1.03174340749910276038e+00, + 1.06449445891785954288e+00, 1.09828514030782575794e+00, + 1.13314845306682632220e+00, 1.16911844616950433284e+00, + 1.20623024942098067136e+00, 1.24452010776609522935e+00, + 1.28402541668774139438e+00, 1.32478475872886569675e+00, + 1.36683794117379631139e+00, 1.41022603492571074746e+00, + 1.45499141461820125087e+00, 1.50117780000012279729e+00, + 1.54883029863413312910e+00, 1.59799544995063325104e+00, + 1.64872127070012819416e+00, 1.70105730184840076014e+00, + 1.75505465696029849809e+00, 1.81076607211938722664e+00, + 1.86824595743222232613e+00, 1.92755045016754467113e+00, + 1.98873746958229191684e+00, 2.05186677348797674725e+00, + 2.11700001661267478426e+00, 2.18420081081561789915e+00, + 2.25353478721320854561e+00, 2.32506966027712103084e+00, + 2.39887529396709808793e+00, 2.47502376996302508871e+00, + 2.55358945806292680913e+00, 2.63464908881563086851e+00, + 2.71828182845904553488e+00, 2.80456935623722669604e+00, + 2.89359594417176113623e+00, 2.98544853936535581340e+00, + 3.08021684891803104733e+00, 3.17799342753883840018e+00, + 3.27887376793867346692e+00, 3.38295639409246895468e+00, + 3.49034295746184142217e+00, 3.60113833627217561073e+00, + 3.71545073794110392029e+00, 3.83339180475841034834e+00, + 3.95507672292057721464e+00, 4.08062433502646015882e+00, + 4.21015725614395996956e+00, 4.34380199356104235164e+00, + 4.48168907033806451778e+00, 4.62395315278208052234e+00, + 4.77073318196760265408e+00, 4.92217250943229078786e+00, + 5.07841903718008147450e+00, 5.23962536212848917216e+00, + 5.40594892514116676097e+00, 5.57755216479125959239e+00, + 5.75460267600573072144e+00, 5.93727337374560715233e+00, + 6.12574266188198635064e+00, 6.32019460743274397174e+00, + 6.52081912033011246166e+00, 6.72781213889469142941e+00, + 6.94137582119703555605e+00, 7.16171874249371143151e+00, + 1.00000000000000000000e+00, 7.38905609893065040694e+00, + 5.45981500331442362040e+01, 4.03428793492735110249e+02, + 2.98095798704172830185e+03, 2.20264657948067178950e+04, + 1.62754791419003915507e+05, 1.20260428416477679275e+06, + 8.88611052050787210464e+06, 6.56599691373305097222e+07, + 4.85165195409790277481e+08, 3.58491284613159179688e+09, + 2.64891221298434715271e+10, 1.95729609428838775635e+11, + 1.44625706429147509766e+12, 1.06864745815244628906e+13, + 7.89629601826806875000e+13, 5.83461742527454875000e+14, + 4.31123154711519500000e+15, 3.18559317571137560000e+16, + 2.35385266837020000000e+17, 1.73927494152050099200e+18, + 1.28516001143593082880e+19, 9.49611942060244828160e+19, + 7.01673591209763143680e+20, 5.18470552858707204506e+21, + 3.83100800071657691546e+22, 2.83075330327469394756e+23, + 2.09165949601299610311e+24, 1.54553893559010391826e+25, + 1.14200738981568423454e+26, 8.43835666874145383188e+26, + 6.23514908081161674391e+27, 4.60718663433129178064e+28, + 3.40427604993174075827e+29, 2.51543867091916687979e+30, + 1.85867174528412788702e+31, 1.37338297954017610775e+32, + 1.01480038811388874615e+33, 7.49841699699012090701e+33, + 5.54062238439350983445e+34, 4.09399696212745451138e+35, + 3.02507732220114256223e+36, 2.23524660373471497416e+37, + 1.65163625499400180987e+38, 1.22040329431784083418e+39, + 9.01762840503429851945e+39, 6.66317621641089618500e+40, + 4.92345828601205826106e+41, 3.63797094760880474988e+42, + 2.68811714181613560943e+43, 1.98626483613765434356e+44, + 1.46766223015544238535e+45, 1.08446385529002313207e+46, + 8.01316426400059069850e+46, 5.92097202766466993617e+47, + 4.37503944726134096988e+48, 3.23274119108485947460e+49, + 2.38869060142499127023e+50, 1.76501688569176554670e+51, + 1.30418087839363225614e+52, 9.63666567360320166416e+52, + 7.12058632688933793173e+53, 5.26144118266638596909e+54, +}; + +/* + * EN[k] = exp(-2^(k-5)) + */ +static const double EN[] = { + 9.69233234476344129860e-01, 9.39413062813475807644e-01, + 8.82496902584595455110e-01, 7.78800783071404878477e-01, + 6.06530659712633424263e-01, 3.67879441171442334024e-01, + 1.35335283236612702318e-01, 1.83156388887341786686e-02, + 3.35462627902511853224e-04, 1.12535174719259116458e-07, + 1.26641655490941755372e-14, 1.60381089054863792659e-28, +#if defined(FDTOS_TRAPS_INCOMPLETE_IN_FNS_MODE) + 2.96555550007072683578e-38, /* exp(-128) scaled up by 2^60 */ +#else + 2.57220937264241481170e-56, +#endif +}; + +static const float F[] = { + 0.0f, + 1.0f, + 5.0000000951292138e-01F, + 1.6666518897347284e-01F, + 3.4028234663852885981170E+38F, + 1.1754943508222875079688E-38F, +#if defined(FDTOS_TRAPS_INCOMPLETE_IN_FNS_MODE) + 8.67361737988403547205962240695953369140625e-19F +#endif +}; + +#define zero F[0] +#define one F[1] +#define p1 F[2] +#define p2 F[3] +#define big F[4] +#define tiny F[5] +#if defined(FDTOS_TRAPS_INCOMPLETE_IN_FNS_MODE) +#define twom60 F[6] +#endif + +float +expf(float xf) { + double w, p, q; + int hx, ix, n; + + hx = *(int *)&xf; + ix = hx & ~0x80000000; + + if (ix < 0x3c800000) { /* |x| < 2**-6 */ + if (ix < 0x38800000) /* |x| < 2**-14 */ + return (one + xf); + return (one + (xf + (xf * xf) * (p1 + xf * p2))); + } + + n = ix >> 23; /* biased exponent */ + + if (n >= 0x86) { /* |x| >= 2^7 */ + if (n >= 0xff) { /* x is nan of +-inf */ + if (hx == 0xff800000) + return (zero); /* exp(-inf)=0 */ + return (xf * xf); /* exp(nan/inf) is nan or inf */ + } + if (hx > 0) + return (big * big); /* overflow */ + else + return (tiny * tiny); /* underflow */ + } + + ix -= n << 23; + if (hx > 0) + ix += 0x800000; + else + ix = 0x800000 - ix; + if (n >= 0x7f) { /* n >= 0 */ + ix <<= n - 0x7f; + w = ET[(ix & 0x3f) + 64] * ET[((ix >> 6) & 0x3f) + 128]; + p = ET[((ix >> 12) & 0x3f) + 192] * + ET[((ix >> 18) & 0x3f) + 256]; + q = ET[((ix >> 24) & 0x3f) + 320]; + } else { + ix <<= n - 0x79; + w = ET[ix & 0x3f] * ET[((ix >> 6) & 0x3f) + 64]; + p = ET[((ix >> 12) & 0x3f) + 128] * + ET[((ix >> 18) & 0x3f) + 192]; + q = ET[((ix >> 24) & 0x3f) + 256]; + } + xf = (float)((w * p) * (hx < 0 ? q * EN[n - 0x79] : q)); +#if defined(FDTOS_TRAPS_INCOMPLETE_IN_FNS_MODE) + if ((unsigned)hx >= 0xc2800000u) { + if ((unsigned)hx >= 0xc2aeac50) { /* force underflow */ + volatile float t = tiny; + t *= t; + } + return (xf * twom60); + } +#endif + return (xf); +} diff --git a/usr/src/lib/libm/common/R/expm1f.c b/usr/src/lib/libm/common/R/expm1f.c new file mode 100644 index 0000000000..6643a2000a --- /dev/null +++ b/usr/src/lib/libm/common/R/expm1f.c @@ -0,0 +1,42 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak expm1f = __expm1f + +#include "libm.h" + +float +expm1f(float x) { +#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN) + if (isnanf(x)) + return (x * x); + else +#endif + return ((float) expm1((double) x)); +} diff --git a/usr/src/lib/libm/common/R/fabsf.c b/usr/src/lib/libm/common/R/fabsf.c new file mode 100644 index 0000000000..40eb86b6db --- /dev/null +++ b/usr/src/lib/libm/common/R/fabsf.c @@ -0,0 +1,38 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak fabsf = __fabsf + +#include "libm.h" + +float +fabsf(float x) { + *(int *) &x &= ~0x80000000; + return (x); +} diff --git a/usr/src/lib/libm/common/R/floorf.c b/usr/src/lib/libm/common/R/floorf.c new file mode 100644 index 0000000000..f1c3c93f3d --- /dev/null +++ b/usr/src/lib/libm/common/R/floorf.c @@ -0,0 +1,111 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak ceilf = __ceilf +#pragma weak floorf = __floorf + +/* INDENT OFF */ +/* + * ceilf(x) return the biggest integral value (in float) below x + * floorf(x) return the least integral value (in float) above x + * + * NOTE: ceilf(x) and floorf(x) return result + * with the same sign as x's, including 0.0F. + */ + +#include "libm.h" + +static const float xf[] = { +/* ZEROF */ 0.0f, +/* ONEF */ 1.0f, +/* MONEF */ -1.0f, +/* HUGEF */ 1.0e30f, +}; + +#define ZEROF xf[0] +#define ONEF xf[1] +#define MONEF xf[2] +#define HUGEF xf[3] +/* INDENT ON */ + +float +ceilf(float x) { + volatile float dummy; + int hx, k, j, ix; + + hx = *(int *) &x; + ix = hx & ~0x80000000; + k = ix >> 23; + if (((k - 127) ^ (k - 150)) < 0) { + k = (1 << (150 - k)) - 1; + if ((k & hx) != 0) + dummy = HUGEF + x; /* raise inexact */ + j = k & (~(hx >> 31)); + *(int *) &x = (hx + j) & ~k; + return (x); + } else if (k <= 126) { + dummy = HUGEF + x; + if (hx > 0) + return (ONEF); + else if (ix == 0) + return (x); + else + return (-ZEROF); + } else + /* signal invalid if x is a SNaN */ + return (x * ONEF); /* +0 -> *1 for Cheetah */ +} + +float +floorf(float x) { + volatile float dummy; + int hx, k, j, ix; + + hx = *(int *) &x; + ix = hx & ~0x80000000; + k = ix >> 23; + if (((k - 127) ^ (k - 150)) < 0) { + k = (1 << (150 - k)) - 1; + if ((k & hx) != 0) + dummy = HUGEF + x; /* raise inexact */ + j = k & (hx >> 31); + *(int *) &x = (hx + j) & ~k; + return (x); + } else if (k <= 126) { + dummy = HUGEF + x; + if (hx > 0) + return (ZEROF); + else if (ix == 0) + return (x); + else + return (MONEF); + } else + /* signal invalid if x is a SNaN */ + return (x * ONEF); /* +0 -> *1 for Cheetah */ +} diff --git a/usr/src/lib/libm/common/R/fmodf.c b/usr/src/lib/libm/common/R/fmodf.c new file mode 100644 index 0000000000..d19bb3f2f8 --- /dev/null +++ b/usr/src/lib/libm/common/R/fmodf.c @@ -0,0 +1,176 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak fmodf = __fmodf + +#include "libm.h" + +/* INDENT OFF */ +static const int + is = (int)0x80000000, + im = 0x007fffff, + ii = 0x7f800000, + iu = 0x00800000; +/* INDENT ON */ + +static const float zero = 0.0; + +float +fmodf(float x, float y) { + float w; + int hx, ix, iy, iz, k, ny, nd; + + hx = *(int *)&x; + ix = hx & 0x7fffffff; + iy = *(int *)&y & 0x7fffffff; + + /* purge off exception values */ + if (ix >= ii || iy > ii || iy == 0) { + w = x * y; + w = w / w; + } else if (ix <= iy) { + if (ix < iy) + w = x; /* return x if |x|<|y| */ + else + w = zero * x; /* return sign(x)*0.0 */ + } else { + /* INDENT OFF */ + /* + * scale x,y to "normal" with + * ny = exponent of y + * nd = exponent of x minus exponent of y + */ + /* INDENT ON */ + ny = iy >> 23; + k = ix >> 23; + + /* special case for subnormal y or x */ + if (ny == 0) { + ny = 1; + while (iy < iu) { + ny -= 1; + iy += iy; + } + nd = k - ny; + if (k == 0) { + nd += 1; + while (ix < iu) { + nd -= 1; + ix += ix; + } + } else { + ix = iu | (ix & im); + } + } else { + nd = k - ny; + ix = iu | (ix & im); + iy = iu | (iy & im); + } + + /* fix point fmod for normalized ix and iy */ + /* INDENT OFF */ + /* + * while (nd--) { + * iz = ix - iy; + * if (iz < 0) + * ix = ix + ix; + * else if (iz == 0) { + * *(int *) &w = is & hx; + * return w; + * } + * else + * ix = iz + iz; + * } + */ + /* INDENT ON */ + /* unroll the above loop 4 times to gain performance */ + k = nd >> 2; + nd -= k << 2; + while (k--) { + iz = ix - iy; + if (iz >= 0) + ix = iz + iz; + else + ix += ix; + iz = ix - iy; + if (iz >= 0) + ix = iz + iz; + else + ix += ix; + iz = ix - iy; + if (iz >= 0) + ix = iz + iz; + else + ix += ix; + iz = ix - iy; + if (iz >= 0) + ix = iz + iz; + else + ix += ix; + if (iz == 0) { + *(int *)&w = is & hx; + return (w); + } + } + while (nd--) { + iz = ix - iy; + if (iz >= 0) + ix = iz + iz; + else + ix += ix; + } + /* end of unrolling */ + + iz = ix - iy; + if (iz >= 0) + ix = iz; + + /* convert back to floating value and restore the sign */ + if (ix == 0) { + *(int *)&w = is & hx; + return (w); + } + while (ix < iu) { + ix += ix; + ny -= 1; + } + while (ix > (iu + iu)) { + ny += 1; + ix >>= 1; + } + if (ny > 0) { + *(int *)&w = (is & hx) | (ix & im) | (ny << 23); + } else { + /* subnormal output */ + k = -ny + 1; + ix >>= k; + *(int *)&w = (is & hx) | ix; + } + } + return (w); +} diff --git a/usr/src/lib/libm/common/R/gammaf.c b/usr/src/lib/libm/common/R/gammaf.c new file mode 100644 index 0000000000..89a6a6f406 --- /dev/null +++ b/usr/src/lib/libm/common/R/gammaf.c @@ -0,0 +1,36 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak gammaf = __gammaf + +#include "libm.h" + +float +gammaf(float x) { + return (lgammaf(x)); +} diff --git a/usr/src/lib/libm/common/R/gammaf_r.c b/usr/src/lib/libm/common/R/gammaf_r.c new file mode 100644 index 0000000000..4a1c317c4b --- /dev/null +++ b/usr/src/lib/libm/common/R/gammaf_r.c @@ -0,0 +1,36 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak gammaf_r = __gammaf_r + +#include "libm.h" + +float +gammaf_r(float x, int *signgamfp) { + return (lgammaf_r(x, signgamfp)); +} diff --git a/usr/src/lib/libm/common/R/hypotf.c b/usr/src/lib/libm/common/R/hypotf.c new file mode 100644 index 0000000000..87729c5344 --- /dev/null +++ b/usr/src/lib/libm/common/R/hypotf.c @@ -0,0 +1,64 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak hypotf = __hypotf + +#include "libm.h" + +float +hypotf(float x, float y) { + double dx, dy; + float w; + int ix, iy; + + ix = (*(int *) &x) & 0x7fffffff; + iy = (*(int *) &y) & 0x7fffffff; + if (ix >= 0x7f800000) { + if (ix == 0x7f800000) + *(int *) &w = x == y ? iy : ix; /* w = |x| = inf */ + else if (iy == 0x7f800000) + *(int *) &w = x == y ? ix : iy; /* w = |y| = inf */ + else + w = fabsf(x) * fabsf(y); /* + -> * for Cheetah */ + } else if (iy >= 0x7f800000) { + if (iy == 0x7f800000) + *(int *) &w = x == y ? ix : iy; /* w = |y| = inf */ + else + w = fabsf(x) * fabsf(y); /* + -> * for Cheetah */ + } else if (ix == 0) + *(int *) &w = iy; /* w = |y| */ + else if (iy == 0) + *(int *) &w = ix; /* w = |x| */ + else { + dx = (double) x; + dy = (double) y; + w = (float) sqrt(dx * dx + dy * dy); + } + return (w); +} diff --git a/usr/src/lib/libm/common/R/ilogbf.c b/usr/src/lib/libm/common/R/ilogbf.c new file mode 100644 index 0000000000..2579d024ed --- /dev/null +++ b/usr/src/lib/libm/common/R/ilogbf.c @@ -0,0 +1,90 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak ilogbf = __ilogbf +#endif + +#include "libm.h" +#include "xpg6.h" /* __xpg6 */ + +#if defined(USE_FPSCALE) || defined(__x86) +static const float two25 = 33554432.0F; +#else +/* + * v: a non-zero subnormal |x| + */ +static int +ilogbf_subnormal(unsigned v) { + int r = -126 - 23; + + if (v & 0xffff0000) + r += 16, v >>= 16; + if (v & 0xff00) + r += 8, v >>= 8; + if (v & 0xf0) + r += 4, v >>= 4; + v <<= 1; + return (r + ((0xffffaa50 >> v) & 0x3)); +} +#endif /* defined(USE_FPSCALE) */ + +static int +raise_invalid(int v) { /* SUSv3 requires ilogbf(0,+/-Inf,NaN) raise invalid */ +#ifndef lint + if ((__xpg6 & _C99SUSv3_ilogb_0InfNaN_raises_invalid) != 0) { + static const double zero = 0.0; + volatile double dummy; + + dummy = zero / zero; + } +#endif + return (v); +} + +int +ilogbf(float x) { + int k = *((int *) &x) & ~0x80000000; + + if (k < 0x00800000) { + if (k == 0) + return (raise_invalid(0x80000001)); + else { +#if defined(USE_FPSCALE) || defined(__x86) + x *= two25; + return (((*((int *) &x) & 0x7f800000) >> 23) - 152); +#else + return (ilogbf_subnormal(k)); +#endif + } + } else if (k < 0x7f800000) + return ((k >> 23) - 127); + else + return (raise_invalid(0x7fffffff)); +} diff --git a/usr/src/lib/libm/common/R/isnanf.c b/usr/src/lib/libm/common/R/isnanf.c new file mode 100644 index 0000000000..b2dd090a19 --- /dev/null +++ b/usr/src/lib/libm/common/R/isnanf.c @@ -0,0 +1,40 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak isnanf = __isnanf +#pragma weak _isnanf = __isnanf +#endif + +#include "libm.h" + +int +isnanf(float x) { + return ((*(int *) &x & ~0x80000000) > 0x7f800000); +} diff --git a/usr/src/lib/libm/common/R/lgammaf.c b/usr/src/lib/libm/common/R/lgammaf.c new file mode 100644 index 0000000000..bcc4ea03a7 --- /dev/null +++ b/usr/src/lib/libm/common/R/lgammaf.c @@ -0,0 +1,44 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak lgammaf = __lgammaf + +#include "libm.h" + +extern int signgamf; + +float +lgammaf(float x) { + float y; + + if (isnanf(x)) + return (x * x); + y = (float)__k_lgamma((double)x, &signgamf); + signgam = signgamf; /* SUSv3 requires the setting of signgam */ + return (y); +} diff --git a/usr/src/lib/libm/common/R/lgammaf_r.c b/usr/src/lib/libm/common/R/lgammaf_r.c new file mode 100644 index 0000000000..1c3cf27d2c --- /dev/null +++ b/usr/src/lib/libm/common/R/lgammaf_r.c @@ -0,0 +1,38 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak lgammaf_r = __lgammaf_r + +#include "libm.h" + +float +lgammaf_r(float x, int *signgamfp) { + if (isnanf(x)) + return (x * x); + return ((float)__k_lgamma((double)x, signgamfp)); +} diff --git a/usr/src/lib/libm/common/R/log10f.c b/usr/src/lib/libm/common/R/log10f.c new file mode 100644 index 0000000000..595ad9bf8b --- /dev/null +++ b/usr/src/lib/libm/common/R/log10f.c @@ -0,0 +1,55 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak log10f = __log10f + +#include "libm.h" + +static const float zero = 0.0f, mone = -1.0f; + +float +log10f(float x) { + int hx, ix; + float w; + + hx = *(int *)&x; + ix = hx & ~0x80000000; + if (ix > 0x7f800000) + return (x * x); + if (ix == 0x7f800000) + return (x + x * x); + if (ix == 0) { + w = mone; + return (w / zero); + } + if (hx < 0) { + w = zero; + return (w / zero); + } + return ((float)log10((double)x)); +} diff --git a/usr/src/lib/libm/common/R/log1pf.c b/usr/src/lib/libm/common/R/log1pf.c new file mode 100644 index 0000000000..0cb5b073e7 --- /dev/null +++ b/usr/src/lib/libm/common/R/log1pf.c @@ -0,0 +1,52 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak log1pf = __log1pf + +#include "libm.h" + +static const float zero = 0.0f; + +float +log1pf(float x) { + int ix; + + ix = *(int *)&x; + if (ix >= 0x7f800000) { + /* x is +inf or nan */ + return (x * x); + } + if (ix < 0) { + ix &= ~0x80000000; + if (ix == 0x3f800000) /* x is -1 */ + return (x / zero); + if (ix > 0x3f800000) /* x is < -1 or nan */ + return ((x * zero) / zero); + } + return ((float)log1p((double)x)); +} diff --git a/usr/src/lib/libm/common/R/log2f.c b/usr/src/lib/libm/common/R/log2f.c new file mode 100644 index 0000000000..cd168ffc3b --- /dev/null +++ b/usr/src/lib/libm/common/R/log2f.c @@ -0,0 +1,42 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak log2f = __log2f + +#include "libm.h" + +float +log2f(float x) { +#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN) + if (isnanf(x)) + return (x * x); + else +#endif + return ((float) log2((double) x)); +} diff --git a/usr/src/lib/libm/common/R/logbf.c b/usr/src/lib/libm/common/R/logbf.c new file mode 100644 index 0000000000..bf6851ef68 --- /dev/null +++ b/usr/src/lib/libm/common/R/logbf.c @@ -0,0 +1,87 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak logbf = __logbf +#endif + +#include "libm.h" +#include "xpg6.h" /* __xpg6 */ +#define _C99SUSv3_logb _C99SUSv3_logb_subnormal_is_like_ilogb + +#if defined(USE_FPSCALE) || defined(__x86) +static const float two25 = 33554432.0F; +#else +/* + * v: a non-zero subnormal |x| + */ +static int +ilogbf_subnormal(unsigned v) { + int r = -126 - 23; + + if (v & 0xffff0000) + r += 16, v >>= 16; + if (v & 0xff00) + r += 8, v >>= 8; + if (v & 0xf0) + r += 4, v >>= 4; + v <<= 1; + return (r + ((0xffffaa50 >> v) & 0x3)); +} +#endif /* defined(USE_FPSCALE) */ + +static float +raise_division(float t) { +#pragma STDC FENV_ACCESS ON + static const float zero = 0.0F; + return (t / zero); +} + +float +logbf(float x) { + int k = *((int *) &x) & ~0x80000000; + + if (k < 0x00800000) { + if (k == 0) + return (raise_division(-1.0F)); + else if ((__xpg6 & _C99SUSv3_logb) != 0) { +#if defined(USE_FPSCALE) || defined(__x86) + x *= two25; + return ((float) (((*((int *) &x) & 0x7f800000) >> 23) - + 152)); +#else + return ((float) ilogbf_subnormal(k)); +#endif + } else + return (-126.F); + } else if (k < 0x7f800000) + return ((float) ((k >> 23) - 127)); + else + return (x * x); +} diff --git a/usr/src/lib/libm/common/R/logf.c b/usr/src/lib/libm/common/R/logf.c new file mode 100644 index 0000000000..d746260917 --- /dev/null +++ b/usr/src/lib/libm/common/R/logf.c @@ -0,0 +1,148 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak logf = __logf + +/* + * Algorithm: + * + * Let y = x rounded to six significant bits. Then for any choice + * of e and z such that y = 2^e z, we have + * + * log(x) = e log(2) + log(z) + log(1+(x-y)/y) + * + * Note that (x-y)/y = (x'-y')/y' for any scaled x' = sx, y' = sy; + * in particular, we can take s to be the power of two that makes + * ulp(x') = 1. + * + * From a table, obtain l = log(z) and r = 1/y'. For |s| <= 2^-6, + * approximate log(1+s) by a polynomial p(s) where p(s) := s+s*s* + * (K1+s*(K2+s*K3)). Then we compute the expression above as + * e*ln2 + l + p(r*(x'-y')) all evaluated in double precision. + * + * When x is subnormal, we first scale it to the normal range, + * adjusting e accordingly. + * + * Accuracy: + * + * The largest error is less than 0.6 ulps. + */ + +#include "libm.h" + +/* + * For i = 0, ..., 12, + * TBL[2i] = log(1 + i/32) and TBL[2i+1] = 2^-23 / (1 + i/32) + * + * For i = 13, ..., 32, + * TBL[2i] = log(1/2 + i/64) and TBL[2i+1] = 2^-23 / (1 + i/32) + */ +static const double TBL[] = { + 0.000000000000000000e+00, 1.192092895507812500e-07, + 3.077165866675368733e-02, 1.155968868371212153e-07, + 6.062462181643483994e-02, 1.121969784007352926e-07, + 8.961215868968713805e-02, 1.089913504464285680e-07, + 1.177830356563834557e-01, 1.059638129340277719e-07, + 1.451820098444978890e-01, 1.030999260979729787e-07, + 1.718502569266592284e-01, 1.003867701480263102e-07, + 1.978257433299198675e-01, 9.781275040064102225e-08, + 2.231435513142097649e-01, 9.536743164062500529e-08, + 2.478361639045812692e-01, 9.304139672256097884e-08, + 2.719337154836417580e-01, 9.082612537202380448e-08, + 2.954642128938358980e-01, 8.871388989825581272e-08, + 3.184537311185345887e-01, 8.669766512784091150e-08, + -3.522205935893520934e-01, 8.477105034722222546e-08, + -3.302416868705768671e-01, 8.292820142663043248e-08, + -3.087354816496132859e-01, 8.116377160904255122e-08, + -2.876820724517809014e-01, 7.947285970052082892e-08, + -2.670627852490452536e-01, 7.785096460459183052e-08, + -2.468600779315257843e-01, 7.629394531250000159e-08, + -2.270574506353460753e-01, 7.479798560049019504e-08, + -2.076393647782444896e-01, 7.335956280048077330e-08, + -1.885911698075500298e-01, 7.197542010613207272e-08, + -1.698990367953974734e-01, 7.064254195601851460e-08, + -1.515498981272009327e-01, 6.935813210227272390e-08, + -1.335313926245226268e-01, 6.811959402901785336e-08, + -1.158318155251217008e-01, 6.692451343201754014e-08, + -9.844007281325252434e-02, 6.577064251077586116e-08, + -8.134563945395240081e-02, 6.465588585805084723e-08, + -6.453852113757117814e-02, 6.357828776041666578e-08, + -4.800921918636060631e-02, 6.253602074795082293e-08, + -3.174869831458029812e-02, 6.152737525201612732e-08, + -1.574835696813916761e-02, 6.055075024801586965e-08, + 0.000000000000000000e+00, 5.960464477539062500e-08, +}; + +static const double C[] = { + 6.931471805599452862e-01, + -2.49887584306188944706e-01, + 3.33368809981254554946e-01, + -5.00000008402474976565e-01 +}; + +#define ln2 C[0] +#define K3 C[1] +#define K2 C[2] +#define K1 C[3] + +float +logf(float x) +{ + double v, t; + float f; + int hx, ix, i, exp, iy; + + hx = *(int *)&x; + ix = hx & ~0x80000000; + + if (ix >= 0x7f800000) /* nan or inf */ + return ((hx < 0)? x * 0.0f : x * x); + + exp = 0; + if (hx < 0x00800000) { /* negative, zero, or subnormal */ + if (hx <= 0) { + f = 0.0f; + return ((ix == 0)? -1.0f / f : f / f); + } + + /* subnormal; scale by 2^149 */ + f = (float)ix; + ix = *(int *)&f; + exp = -149; + } + + exp += (ix - 0x3f320000) >> 23; + ix &= 0x007fffff; + iy = (ix + 0x20000) & 0xfffc0000; + i = iy >> 17; + t = ln2 * (double)exp + TBL[i]; + v = (double)(ix - iy) * TBL[i + 1]; + v += (v * v) * (K1 + v * (K2 + v * K3)); + f = (float)(t + v); + return (f); +} diff --git a/usr/src/lib/libm/common/R/nextafterf.c b/usr/src/lib/libm/common/R/nextafterf.c new file mode 100644 index 0000000000..39db44b552 --- /dev/null +++ b/usr/src/lib/libm/common/R/nextafterf.c @@ -0,0 +1,81 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak nextafterf = __nextafterf +#endif + +#include "libm.h" + +float +nextafterf(float x, float y) { + float w; + int *pw = (int *) &w; + int *px = (int *) &x; + int *py = (int *) &y; + int ix, iy, iz; + + ix = px[0]; + iy = py[0]; + if ((ix & ~0x80000000) > 0x7f800000) + return (x * y); /* + -> * for Cheetah */ + if ((iy & ~0x80000000) > 0x7f800000) + return (y * x); /* + -> * for Cheetah */ + if (ix == iy || (ix | iy) == 0x80000000) + return (y); /* C99 requirement */ + if ((ix & ~0x80000000) == 0) + iz = 1 | (iy & 0x80000000); + else if (ix > 0) { + if (ix > iy) + iz = ix - 1; + else + iz = ix + 1; + } else { + if (iy < 0 && ix < iy) + iz = ix + 1; + else + iz = ix - 1; + } + pw[0] = iz; + ix = iz & 0x7f800000; + if (ix == 0x7f800000) { + /* raise overflow */ + volatile float t; + + *(int *) &t = 0x7f7fffff; + t *= t; + } else if (ix == 0) { + /* raise underflow */ + volatile float t; + + *(int *) &t = 0x00800000; + t *= t; + } + return (w); +} diff --git a/usr/src/lib/libm/common/R/powf.c b/usr/src/lib/libm/common/R/powf.c new file mode 100644 index 0000000000..8623f9a8fb --- /dev/null +++ b/usr/src/lib/libm/common/R/powf.c @@ -0,0 +1,288 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak powf = __powf + +#include "libm.h" +#include "xpg6.h" /* __xpg6 */ +#define _C99SUSv3_pow _C99SUSv3_pow_treats_Inf_as_an_even_int + +#if defined(__i386) && !defined(__amd64) +extern int __swapRP(int); +#endif + +/* INDENT OFF */ +static const double + ln2 = 6.93147180559945286227e-01, /* 0x3fe62e42, 0xfefa39ef */ + invln2 = 1.44269504088896338700e+00, /* 0x3ff71547, 0x652b82fe */ + dtwo = 2.0, + done = 1.0, + dhalf = 0.5, + d32 = 32.0, + d1_32 = 0.03125, + A0 = 1.999999999813723303647511146995966439250e+0000, + A1 = 6.666910817935858533770138657139665608610e-0001, + t0 = 2.000000000004777489262405315073203746943e+0000, + t1 = 1.666663408349926379873111932994250726307e-0001; + +static const double S[] = { + 1.00000000000000000000e+00, /* 3FF0000000000000 */ + 1.02189714865411662714e+00, /* 3FF059B0D3158574 */ + 1.04427378242741375480e+00, /* 3FF0B5586CF9890F */ + 1.06714040067682369717e+00, /* 3FF11301D0125B51 */ + 1.09050773266525768967e+00, /* 3FF172B83C7D517B */ + 1.11438674259589243221e+00, /* 3FF1D4873168B9AA */ + 1.13878863475669156458e+00, /* 3FF2387A6E756238 */ + 1.16372485877757747552e+00, /* 3FF29E9DF51FDEE1 */ + 1.18920711500272102690e+00, /* 3FF306FE0A31B715 */ + 1.21524735998046895524e+00, /* 3FF371A7373AA9CB */ + 1.24185781207348400201e+00, /* 3FF3DEA64C123422 */ + 1.26905095719173321989e+00, /* 3FF44E086061892D */ + 1.29683955465100964055e+00, /* 3FF4BFDAD5362A27 */ + 1.32523664315974132322e+00, /* 3FF5342B569D4F82 */ + 1.35425554693689265129e+00, /* 3FF5AB07DD485429 */ + 1.38390988196383202258e+00, /* 3FF6247EB03A5585 */ + 1.41421356237309514547e+00, /* 3FF6A09E667F3BCD */ + 1.44518080697704665027e+00, /* 3FF71F75E8EC5F74 */ + 1.47682614593949934623e+00, /* 3FF7A11473EB0187 */ + 1.50916442759342284141e+00, /* 3FF82589994CCE13 */ + 1.54221082540794074411e+00, /* 3FF8ACE5422AA0DB */ + 1.57598084510788649659e+00, /* 3FF93737B0CDC5E5 */ + 1.61049033194925428347e+00, /* 3FF9C49182A3F090 */ + 1.64575547815396494578e+00, /* 3FFA5503B23E255D */ + 1.68179283050742900407e+00, /* 3FFAE89F995AD3AD */ + 1.71861929812247793414e+00, /* 3FFB7F76F2FB5E47 */ + 1.75625216037329945351e+00, /* 3FFC199BDD85529C */ + 1.79470907500310716820e+00, /* 3FFCB720DCEF9069 */ + 1.83400808640934243066e+00, /* 3FFD5818DCFBA487 */ + 1.87416763411029996256e+00, /* 3FFDFC97337B9B5F */ + 1.91520656139714740007e+00, /* 3FFEA4AFA2A490DA */ + 1.95714412417540017941e+00, /* 3FFF50765B6E4540 */ +}; + +static const double TBL[] = { + 0.00000000000000000e+00, + 3.07716586667536873e-02, + 6.06246218164348399e-02, + 8.96121586896871380e-02, + 1.17783035656383456e-01, + 1.45182009844497889e-01, + 1.71850256926659228e-01, + 1.97825743329919868e-01, + 2.23143551314209765e-01, + 2.47836163904581269e-01, + 2.71933715483641758e-01, + 2.95464212893835898e-01, + 3.18453731118534589e-01, + 3.40926586970593193e-01, + 3.62905493689368475e-01, + 3.84411698910332056e-01, + 4.05465108108164385e-01, + 4.26084395310900088e-01, + 4.46287102628419530e-01, + 4.66089729924599239e-01, + 4.85507815781700824e-01, + 5.04556010752395312e-01, + 5.23248143764547868e-01, + 5.41597282432744409e-01, + 5.59615787935422659e-01, + 5.77315365034823613e-01, + 5.94707107746692776e-01, + 6.11801541105992941e-01, + 6.28608659422374094e-01, + 6.45137961373584701e-01, + 6.61398482245365016e-01, + 6.77398823591806143e-01, +}; + +static const float zero = 0.0F, one = 1.0F, huge = 1.0e25f, tiny = 1.0e-25f; +/* INDENT ON */ + +float +powf(float x, float y) { + float fx = x, fy = y; + float fz; + int ix, iy, jx, jy, k, iw, yisint; + + ix = *(int *)&x; + iy = *(int *)&y; + jx = ix & ~0x80000000; + jy = iy & ~0x80000000; + + if (jy == 0) + return (one); /* x**+-0 = 1 */ + else if (ix == 0x3f800000 && (__xpg6 & _C99SUSv3_pow) != 0) + return (one); /* C99: 1**anything = 1 */ + else if (((0x7f800000 - jx) | (0x7f800000 - jy)) < 0) + return (fx * fy); /* at least one of x or y is NaN */ + /* includes Sun: 1**NaN = NaN */ + /* INDENT OFF */ + /* + * determine if y is an odd int + * yisint = 0 ... y is not an integer + * yisint = 1 ... y is an odd int + * yisint = 2 ... y is an even int + */ + /* INDENT ON */ + yisint = 0; + if (ix < 0) { + if (jy >= 0x4b800000) { + yisint = 2; /* |y|>=2**24: y must be even */ + } else if (jy >= 0x3f800000) { + k = (jy >> 23) - 0x7f; /* exponent */ + iw = jy >> (23 - k); + if ((iw << (23 - k)) == jy) + yisint = 2 - (iw & 1); + } + } + + /* special value of y */ + if ((jy & ~0x7f800000) == 0) { + if (jy == 0x7f800000) { /* y is +-inf */ + if (jx == 0x3f800000) { + if ((__xpg6 & _C99SUSv3_pow) != 0) + fz = one; + /* C99: (-1)**+-inf is 1 */ + else + fz = fy - fy; + /* Sun: (+-1)**+-inf = NaN */ + } else if (jx > 0x3f800000) { + /* (|x|>1)**+,-inf = inf,0 */ + if (iy > 0) + fz = fy; + else + fz = zero; + } else { /* (|x|<1)**-,+inf = inf,0 */ + if (iy < 0) + fz = -fy; + else + fz = zero; + } + return (fz); + } else if (jy == 0x3f800000) { /* y is +-1 */ + if (iy < 0) + fx = one / fx; /* y is -1 */ + return (fx); + } else if (iy == 0x40000000) { /* y is 2 */ + return (fx * fx); + } else if (iy == 0x3f000000) { /* y is 0.5 */ + if (jx != 0 && jx != 0x7f800000) + return (sqrtf(x)); + } + } + + /* special value of x */ + if ((jx & ~0x7f800000) == 0) { + if (jx == 0x7f800000 || jx == 0 || jx == 0x3f800000) { + /* x is +-0,+-inf,-1; set fz = |x|**y */ + *(int *)&fz = jx; + if (iy < 0) + fz = one / fz; + if (ix < 0) { + if (jx == 0x3f800000 && yisint == 0) { + /* (-1)**non-int is NaN */ + fz = zero; + fz /= fz; + } else if (yisint == 1) { + /* (x<0)**odd = -(|x|**odd) */ + fz = -fz; + } + } + return (fz); + } + } + + /* (x<0)**(non-int) is NaN */ + if (ix < 0 && yisint == 0) { + fz = zero; + return (fz / fz); + } + + /* + * compute exp(y*log(|x|)) + * fx = *(float *) &jx; + * fz = (float) exp(((double) fy) * log((double) fx)); + */ + { + double dx, dy, dz, ds; + int *px = (int *)&dx, *pz = (int *)&dz, i, n, m; +#if defined(__i386) && !defined(__amd64) + int rp = __swapRP(fp_extended); +#endif + + fx = *(float *)&jx; + dx = (double)fx; + + /* compute log(x)/ln2 */ + i = px[HIWORD] + 0x4000; + n = (i >> 20) - 0x3ff; + pz[HIWORD] = i & 0xffff8000; + pz[LOWORD] = 0; + ds = (dx - dz) / (dx + dz); + i = (i >> 15) & 0x1f; + dz = ds * ds; + dy = invln2 * (TBL[i] + ds * (A0 + dz * A1)); + if (n == 0) + dz = (double)fy * dy; + else + dz = (double)fy * (dy + (double)n); + + /* compute exp2(dz=y*ln(x)) */ + i = pz[HIWORD]; + if ((i & ~0x80000000) >= 0x40640000) { /* |z| >= 160.0 */ + fz = (i > 0)? huge : tiny; + if (ix < 0 && yisint == 1) + fz *= -fz; /* (-ve)**(odd int) */ + else + fz *= fz; +#if defined(__i386) && !defined(__amd64) + if (rp != fp_extended) + (void) __swapRP(rp); +#endif + return (fz); + } + + n = (int)(d32 * dz + (i > 0 ? dhalf : -dhalf)); + i = n & 0x1f; + m = n >> 5; + dy = ln2 * (dz - d1_32 * (double)n); + dx = S[i] * (done - (dtwo * dy) / (dy * (done - dy * t1) - t0)); + if (m != 0) + px[HIWORD] += m << 20; + fz = (float)dx; +#if defined(__i386) && !defined(__amd64) + if (rp != fp_extended) + (void) __swapRP(rp); +#endif + } + + /* end of computing exp(y*log(x)) */ + if (ix < 0 && yisint == 1) + fz = -fz; /* (-ve)**(odd int) */ + return (fz); +} diff --git a/usr/src/lib/libm/common/R/remainderf.c b/usr/src/lib/libm/common/R/remainderf.c new file mode 100644 index 0000000000..e701902608 --- /dev/null +++ b/usr/src/lib/libm/common/R/remainderf.c @@ -0,0 +1,47 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak remainderf = __remainderf +#endif + +#include "libm.h" + +float +remainderf(float x, float y) { + if (isnanf(x) || isnanf(y)) + return (x * y); + if (y == 0.0f || (*(int *) &x & ~0x80000000) == 0x7f800000) { + /* y is 0 or x is infinite; raise invalid and return NaN */ + y = 0.0f; + *(int *) &x = 0x7f800000; + return (x * y); + } + return ((float) remainder((double) x, (double) y)); +} diff --git a/usr/src/lib/libm/common/R/rintf.c b/usr/src/lib/libm/common/R/rintf.c new file mode 100644 index 0000000000..c958dfd942 --- /dev/null +++ b/usr/src/lib/libm/common/R/rintf.c @@ -0,0 +1,166 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak aintf = __aintf +#pragma weak anintf = __anintf +#pragma weak irintf = __irintf +#pragma weak nintf = __nintf +#pragma weak rintf = __rintf +#endif + +/* INDENT OFF */ +/* + * aintf(x) return x chopped to integral value + * anintf(x) return sign(x)*(|x|+0.5) chopped to integral value + * irintf(x) return rint(x) in integer format + * nintf(x) return anint(x) in integer format + * rintf(x) return x rounded to integral according to the rounding direction + * + * NOTE: rintf(x), aintf(x) and anintf(x) return results with the same sign as + * x's, including 0.0. + */ + +#include "libm.h" + +static const float xf[] = { +/* ZEROF */ 0.0f, +/* TWO_23F */ 8.3886080000e6f, +/* MTWO_23F */ -8.3886080000e6f, +/* ONEF */ 1.0f, +/* MONEF */ -1.0f, +/* HALFF */ 0.5f, +/* MHALFF */ -0.5f, +/* HUGEF */ 1.0e30f, +}; + +#define ZEROF xf[0] +#define TWO_23F xf[1] +#define MTWO_23F xf[2] +#define ONEF xf[3] +#define MONEF xf[4] +#define HALFF xf[5] +#define MHALFF xf[6] +#define HUGEF xf[7] +/* INDENT ON */ + +float +aintf(float x) { + int hx, k; + float y; + + hx = *(int *) &x; + k = (hx & ~0x80000000) >> 23; + if (k < 150) { + y = (float) ((int) x); + /* + * make sure y has the same sign of x when |x|<0.5 + * (i.e., y=0.0) + */ + return (((k - 127) & hx) < 0 ? -y : y); + } else + /* signal invalid if x is a SNaN */ + return (x * ONEF); /* +0 -> *1 for Cheetah */ +} + +float +anintf(float x) { + volatile float dummy; + int hx, k, j, ix; + + hx = *(int *) &x; + ix = hx & ~0x80000000; + k = ix >> 23; + if (((k - 127) ^ (k - 150)) < 0) { + j = 1 << (149 - k); + k = j + j - 1; + if ((k & hx) != 0) + dummy = HUGEF + x; /* raise inexact */ + *(int *) &x = (hx + j) & ~k; + return (x); + } else if (k <= 126) { + dummy = HUGEF + x; + *(int *) &x = (0x3f800000 & ((125 - k) >> 31)) | + (0x80000000 & hx); + return (x); + } else + /* signal invalid if x is a SNaN */ + return (x * ONEF); /* +0 -> *1 for Cheetah */ +} + +int +irintf(float x) { + float v; + int hx, k; + + hx = *(int *) &x; + k = (hx & ~0x80000000) >> 23; + v = xf[((k - 150) >> 31) & (1 - (hx >> 31))]; + return ((int) ((float) (x + v) - v)); +} + +int +nintf(float x) { + int hx, ix, k, j, m; + volatile float dummy; + + hx = *(int *) &x; + k = (hx & ~0x80000000) >> 23; + if (((k - 126) ^ (k - 150)) < 0) { + ix = (hx & 0x00ffffff) | 0x800000; + m = 149 - k; + j = 1 << m; + if ((ix & (j + j - 1)) != 0) + dummy = HUGEF + x; + hx = hx >> 31; + return ((((ix + j) >> (m + 1)) ^ hx) - hx); + } else + return ((int) x); +} + +float +rintf(float x) { + float w, v; + int hx, k; + + hx = *(int *) &x; + k = (hx & ~0x80000000) >> 23; +#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN) + if (k >= 150) + return (x * ONEF); + v = xf[1 - (hx >> 31)]; +#else + v = xf[((k - 150) >> 31) & (1 - (hx >> 31))]; +#endif + w = (float) (x + v); + if (k < 127 && w == v) + return (ZEROF * x); + else + return (w - v); +} diff --git a/usr/src/lib/libm/common/R/scalbf.c b/usr/src/lib/libm/common/R/scalbf.c new file mode 100644 index 0000000000..589eac0931 --- /dev/null +++ b/usr/src/lib/libm/common/R/scalbf.c @@ -0,0 +1,60 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak scalbf = __scalbf + +#include "libm.h" + +float +scalbf(float x, float y) { + int ix, iy, hx, hy, n; + + ix = *(int *)&x; + iy = *(int *)&y; + hx = ix & ~0x80000000; + hy = iy & ~0x80000000; + + if (hx > 0x7f800000 || hy >= 0x7f800000) { + /* x is nan or y is inf or nan */ + return ((iy < 0)? x / -y : x * y); + } + + /* see if y is an integer without raising inexact */ + if (hy >= 0x4b000000) { + /* |y| >= 2^23, so it must be an integer */ + n = (iy < 0)? -65000 : 65000; + } else if (hy < 0x3f800000) { + /* |y| < 1, so it must be zero or non-integer */ + return ((hy == 0)? x : (x - x) / (x - x)); + } else { + if (hy & ((1 << (0x96 - (hy >> 23))) - 1)) + return ((y - y) / (y - y)); + n = (int)y; + } + return (scalbnf(x, n)); +} diff --git a/usr/src/lib/libm/common/R/scalbnf.c b/usr/src/lib/libm/common/R/scalbnf.c new file mode 100644 index 0000000000..fab79011fc --- /dev/null +++ b/usr/src/lib/libm/common/R/scalbnf.c @@ -0,0 +1,97 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak scalbnf = __scalbnf +#endif + +#include "libm.h" +#include <float.h> /* FLT_MAX, FLT_MIN */ +#include <stdlib.h> /* abs */ + +static const float twom25f = 2.98023223876953125e-8F; +#if defined(USE_FPSCALE) || defined(__x86) +static const float two23f = 8388608.0F; +#else +/* + * v: a non-zero subnormal |x|; returns [-22, 0] + */ +static int +ilogbf_biased(unsigned v) { + int r = -22; + + if (v & 0xffff0000) + r += 16, v >>= 16; + if (v & 0xff00) + r += 8, v >>= 8; + if (v & 0xf0) + r += 4, v >>= 4; + v <<= 1; + return (r + ((0xffffaa50 >> v) & 0x3)); +} +#endif /* defined(USE_FPSCALE) */ + +float +scalbnf(float x, int n) { + int *px = (int *) &x, ix, k; + + ix = *px & ~0x80000000; + k = ix >> 23; + if (k == 0xff) +#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN) + return (ix > 0x7f800000 ? x * x : x); +#else + return (x + x); +#endif + if (ix == 0 || n == 0) + return (x); + if (k == 0) { +#if defined(USE_FPSCALE) || defined(__x86) + x *= two23f; + k = ((*px & ~0x80000000) >> 23) - 23; +#else + k = ilogbf_biased(ix); + *px = (*px & 0x80000000) | (ix << (-k + 1)); +#endif + } + if ((unsigned) abs(n) >= 131072) /* cast to unsigned for -2^31 */ + n >>= 1; /* avoid subsequent integer overflow */ + k += n; + if (k > 0xfe) + return (FLT_MAX * copysignf(FLT_MAX, x)); + if (k <= -25) + return (FLT_MIN * copysignf(FLT_MIN, x)); + if (k > 0) { + *px = (*px & ~0x7f800000) | (k << 23); + return (x); + } + k += 25; + *px = (*px & ~0x7f800000) | (k << 23); + return (x * twom25f); +} diff --git a/usr/src/lib/libm/common/R/signgamf.c b/usr/src/lib/libm/common/R/signgamf.c new file mode 100644 index 0000000000..3b9c3fc063 --- /dev/null +++ b/usr/src/lib/libm/common/R/signgamf.c @@ -0,0 +1,34 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak signgamf = __signgamf + +#include "libm.h" + +int signgamf = 0; diff --git a/usr/src/lib/libm/common/R/significandf.c b/usr/src/lib/libm/common/R/significandf.c new file mode 100644 index 0000000000..1e9c81f7ec --- /dev/null +++ b/usr/src/lib/libm/common/R/significandf.c @@ -0,0 +1,48 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak significandf = __significandf +#endif + +#include "libm.h" + +float +significandf(float x) { + int ix = *(int *) &x & ~0x80000000; + + if (ix == 0 || ix >= 0x7f800000) /* 0/+-Inf/NaN */ +#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN) + return (ix > 0x7f800000 ? x * x : x); +#else + return (x + x); +#endif + else + return (scalbnf(x, -ilogbf(x))); +} diff --git a/usr/src/lib/libm/common/R/sincosf.c b/usr/src/lib/libm/common/R/sincosf.c new file mode 100644 index 0000000000..73b8c731d4 --- /dev/null +++ b/usr/src/lib/libm/common/R/sincosf.c @@ -0,0 +1,187 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak sincosf = __sincosf + +/* INDENT OFF */ +/* + * For |x| < pi/4, let z = x * x, and approximate sin(x) by + * + * S(x) = x(S0 + S1*z)(S2 + S3*z + z*z) + * where + * S0 = 1.85735322054308378716204874632872525989806770558e-0003, + * S1 = -1.95035094218403635082921458859320791358115801259e-0004, + * S2 = 5.38400550766074785970952495168558701485841707252e+0002, + * S3 = -3.31975110777873728964197739157371509422022905947e+0001, + * + * with error bounded by |(sin(x) - S(x))/x| < 2**(-28.2), and + * cos(x) by + * + * C(x) = (C0 + C1*z + C2*z*z) * (C3 + C4*z + z*z) + * where + * C0 = 1.09349482127188401868272000389539985058873853699e-0003 + * C1 = -5.03324285989964979398034700054920226866107675091e-0004 + * C2 = 2.43792880266971107750418061559602239831538067410e-0005 + * C3 = 9.14499072605666582228127405245558035523741471271e+0002 + * C4 = -3.63151270591815439197122504991683846785293207730e+0001 + * + * with error bounded by |cos(x) - C(x)| < 2**(-34.2). + */ +/* INDENT ON */ + +#include "libm.h" + +extern const int _TBL_ipio2_inf[]; +extern int __rem_pio2m(double *, double *, int, int, int, const int *); +#if defined(__i386) && !defined(__amd64) +extern int __swapRP(int); +#endif + +static const double C[] = { + 1.85735322054308378716204874632872525989806770558e-0003, + -1.95035094218403635082921458859320791358115801259e-0004, + 5.38400550766074785970952495168558701485841707252e+0002, + -3.31975110777873728964197739157371509422022905947e+0001, + 1.09349482127188401868272000389539985058873853699e-0003, + -5.03324285989964979398034700054920226866107675091e-0004, + 2.43792880266971107750418061559602239831538067410e-0005, + 9.14499072605666582228127405245558035523741471271e+0002, + -3.63151270591815439197122504991683846785293207730e+0001, + 0.636619772367581343075535, /* 2^ -1 * 1.45F306DC9C883 */ + 0.5, + 1.570796326734125614166, /* 2^ 0 * 1.921FB54400000 */ + 6.077100506506192601475e-11, /* 2^-34 * 1.0B4611A626331 */ +}; + +#define S0 C[0] +#define S1 C[1] +#define S2 C[2] +#define S3 C[3] +#define C0 C[4] +#define C1 C[5] +#define C2 C[6] +#define C3 C[7] +#define C4 C[8] +#define invpio2 C[9] +#define half C[10] +#define pio2_1 C[11] +#define pio2_t C[12] + +void +sincosf(float x, float *s, float *c) +{ + double y, z, w; + float f, g; + int n, ix, hx, hy; + volatile int i; + + hx = *((int *)&x); + ix = hx & 0x7fffffff; + + y = (double)x; + + if (ix <= 0x4016cbe4) { /* |x| < 3*pi/4 */ + if (ix <= 0x3f490fdb) { /* |x| < pi/4 */ + if (ix <= 0x39800000) { /* |x| <= 2**-12 */ + i = (int)y; +#ifdef lint + i = i; +#endif + *s = x; + *c = 1.0f; + return; + } + z = y * y; + *s = (float)((y * (S0 + z * S1)) * + (S2 + z * (S3 + z))); + *c = (float)(((C0 + z * C1) + (z * z) * C2) * + (C3 + z * (C4 + z))); + } else if (hx > 0) { + y = (y - pio2_1) - pio2_t; + z = y * y; + *s = (float)(((C0 + z * C1) + (z * z) * C2) * + (C3 + z * (C4 + z))); + *c = (float)-((y * (S0 + z * S1)) * + (S2 + z * (S3 + z))); + } else { + y = (y + pio2_1) + pio2_t; + z = y * y; + *s = (float)-(((C0 + z * C1) + (z * z) * C2) * + (C3 + z * (C4 + z))); + *c = (float)((y * (S0 + z * S1)) * + (S2 + z * (S3 + z))); + } + return; + } else if (ix <= 0x49c90fdb) { /* |x| < 2^19*pi */ +#if defined(__i386) && !defined(__amd64) + int rp; + + rp = __swapRP(fp_extended); +#endif + w = y * invpio2; + if (hx < 0) + n = (int)(w - half); + else + n = (int)(w + half); + y = (y - n * pio2_1) - n * pio2_t; +#if defined(__i386) && !defined(__amd64) + if (rp != fp_extended) + (void) __swapRP(rp); +#endif + } else { + if (ix >= 0x7f800000) { + *s = *c = x / x; + return; + } + hy = ((int *)&y)[HIWORD]; + n = ((hy >> 20) & 0x7ff) - 1046; + ((int *)&w)[HIWORD] = (hy & 0xfffff) | 0x41600000; + ((int *)&w)[LOWORD] = ((int *)&y)[LOWORD]; + n = __rem_pio2m(&w, &y, n, 1, 0, _TBL_ipio2_inf); + if (hy < 0) { + y = -y; + n = -n; + } + } + + z = y * y; + f = (float)((y * (S0 + z * S1)) * (S2 + z * (S3 + z))); + g = (float)(((C0 + z * C1) + (z * z) * C2) * + (C3 + z * (C4 + z))); + if (n & 2) { + f = -f; + g = -g; + } + if (n & 1) { + *s = g; + *c = -f; + } else { + *s = f; + *c = g; + } +} diff --git a/usr/src/lib/libm/common/R/sincospif.c b/usr/src/lib/libm/common/R/sincospif.c new file mode 100644 index 0000000000..414feb52e1 --- /dev/null +++ b/usr/src/lib/libm/common/R/sincospif.c @@ -0,0 +1,51 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak sincospif = __sincospif + +#include "libm.h" + +extern void sincospi(double, double *, double *); + +void +sincospif(float x, float *s, float *c) { + double ds, dc; + +#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN) + if (isnanf(x)) + *s = *c = x * x; + else { +#endif + sincospi((double) x, &ds, &dc); + *s = (float) ds; + *c = (float) dc; +#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN) + } +#endif +} diff --git a/usr/src/lib/libm/common/R/sinf.c b/usr/src/lib/libm/common/R/sinf.c new file mode 100644 index 0000000000..fe194621ab --- /dev/null +++ b/usr/src/lib/libm/common/R/sinf.c @@ -0,0 +1,151 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak sinf = __sinf + +/* + * See sincosf.c + */ + +#include "libm.h" + +extern const int _TBL_ipio2_inf[]; +extern int __rem_pio2m(double *, double *, int, int, int, const int *); +#if defined(__i386) && !defined(__amd64) +extern int __swapRP(int); +#endif + +static const double C[] = { + 1.85735322054308378716204874632872525989806770558e-0003, + -1.95035094218403635082921458859320791358115801259e-0004, + 5.38400550766074785970952495168558701485841707252e+0002, + -3.31975110777873728964197739157371509422022905947e+0001, + 1.09349482127188401868272000389539985058873853699e-0003, + -5.03324285989964979398034700054920226866107675091e-0004, + 2.43792880266971107750418061559602239831538067410e-0005, + 9.14499072605666582228127405245558035523741471271e+0002, + -3.63151270591815439197122504991683846785293207730e+0001, + 0.636619772367581343075535, /* 2^ -1 * 1.45F306DC9C883 */ + 0.5, + 1.570796326734125614166, /* 2^ 0 * 1.921FB54400000 */ + 6.077100506506192601475e-11, /* 2^-34 * 1.0B4611A626331 */ +}; + +#define S0 C[0] +#define S1 C[1] +#define S2 C[2] +#define S3 C[3] +#define C0 C[4] +#define C1 C[5] +#define C2 C[6] +#define C3 C[7] +#define C4 C[8] +#define invpio2 C[9] +#define half C[10] +#define pio2_1 C[11] +#define pio2_t C[12] + +float +sinf(float x) +{ + double y, z, w; + float f; + int n, ix, hx, hy; + volatile int i; + + hx = *((int *)&x); + ix = hx & 0x7fffffff; + + y = (double)x; + + if (ix <= 0x4016cbe4) { /* |x| < 3*pi/4 */ + if (ix <= 0x3f490fdb) { /* |x| < pi/4 */ + if (ix <= 0x39800000) { /* |x| <= 2**-12 */ + i = (int)y; +#ifdef lint + i = i; +#endif + return (x); + } + z = y * y; + return ((float)((y * (S0 + z * S1)) * + (S2 + z * (S3 + z)))); + } else if (hx > 0) { + y = (y - pio2_1) - pio2_t; + z = y * y; + return ((float)(((C0 + z * C1) + (z * z) * C2) * + (C3 + z * (C4 + z)))); + } else { + y = (y + pio2_1) + pio2_t; + z = y * y; + return ((float)-(((C0 + z * C1) + (z * z) * C2) * + (C3 + z * (C4 + z)))); + } + } else if (ix <= 0x49c90fdb) { /* |x| < 2^19*pi */ +#if defined(__i386) && !defined(__amd64) + int rp; + + rp = __swapRP(fp_extended); +#endif + w = y * invpio2; + if (hx < 0) + n = (int)(w - half); + else + n = (int)(w + half); + y = (y - n * pio2_1) - n * pio2_t; +#if defined(__i386) && !defined(__amd64) + if (rp != fp_extended) + (void) __swapRP(rp); +#endif + } else { + if (ix >= 0x7f800000) + return (x / x); /* sin(Inf or NaN) is NaN */ + hy = ((int *)&y)[HIWORD]; + n = ((hy >> 20) & 0x7ff) - 1046; + ((int *)&w)[HIWORD] = (hy & 0xfffff) | 0x41600000; + ((int *)&w)[LOWORD] = ((int *)&y)[LOWORD]; + n = __rem_pio2m(&w, &y, n, 1, 0, _TBL_ipio2_inf); + if (hy < 0) { + y = -y; + n = -n; + } + } + + if (n & 1) { + /* compute cos y */ + z = y * y; + f = (float)(((C0 + z * C1) + (z * z) * C2) * + (C3 + z * (C4 + z))); + } else { + /* compute sin y */ + z = y * y; + f = (float)((y * (S0 + z * S1)) * (S2 + z * (S3 + z))); + } + + return ((n & 2)? -f : f); +} diff --git a/usr/src/lib/libm/common/R/sinhf.c b/usr/src/lib/libm/common/R/sinhf.c new file mode 100644 index 0000000000..2e807aaacd --- /dev/null +++ b/usr/src/lib/libm/common/R/sinhf.c @@ -0,0 +1,51 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak sinhf = __sinhf + +#include "libm.h" + +float +sinhf(float x) { + double s; + float w; + int hx, ix; + + hx = *(int *)&x; + ix = hx & ~0x80000000; + if (ix >= 0x7f800000) { + /* sinhf(x) is x if x is +-Inf or NaN */ + return (x * 1.0f); + } + if (ix >= 0x43000000) /* sinhf(x) trivially overflows */ + s = (hx < 0)? -1.0e100 : 1.0e100; + else + s = sinh((double)x); + w = (float)s; + return (w); +} diff --git a/usr/src/lib/libm/common/R/sqrtf.c b/usr/src/lib/libm/common/R/sqrtf.c new file mode 100644 index 0000000000..125fd4b03d --- /dev/null +++ b/usr/src/lib/libm/common/R/sqrtf.c @@ -0,0 +1,109 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak sqrtf = __sqrtf + +#include "libm.h" + +#ifdef __INLINE + +extern float __inline_sqrtf(float); + +float +sqrtf(float x) { + return (__inline_sqrtf(x)); +} + +#else /* defined(__INLINE) */ + +static const float huge = 1.0e35F, tiny = 1.0e-35F, zero = 0.0f; + +float +sqrtf(float x) { + float dz, w; + int *pw = (int *)&w; + int ix, j, r, q, m, n, s, t; + + w = x; + ix = pw[0]; + if (ix <= 0) { + /* x is <= 0 or nan */ + j = ix & 0x7fffffff; + if (j == 0) + return (w); + return ((w * zero) / zero); + } + + if ((ix & 0x7f800000) == 0x7f800000) { + /* x is +inf or nan */ + return (w * w); + } + + m = ir_ilogb_(&w); + n = -m; + w = r_scalbn_(&w, (int *)&n); + ix = (pw[0] & 0x007fffff) | 0x00800000; + n = m / 2; + if ((n + n) != m) { + ix = ix + ix; + m -= 1; + n = m / 2; + } + + /* generate sqrt(x) bit by bit */ + ix <<= 1; + q = s = 0; + r = 0x01000000; + for (j = 1; j <= 25; j++) { + t = s + r; + if (t <= ix) { + s = t + r; + ix -= t; + q += r; + } + ix <<= 1; + r >>= 1; + } + if (ix == 0) + goto done; + + /* raise inexact and determine the ambient rounding mode */ + dz = huge - tiny; + if (dz < huge) + goto done; + dz = huge + tiny; + if (dz > huge) + q += 1; + q += (q & 1); + +done: + pw[0] = (q >> 1) + 0x3f000000; + return (r_scalbn_(&w, (int *)&n)); +} + +#endif /* defined(__INLINE) */ diff --git a/usr/src/lib/libm/common/R/tanf.c b/usr/src/lib/libm/common/R/tanf.c new file mode 100644 index 0000000000..cc57c66bbb --- /dev/null +++ b/usr/src/lib/libm/common/R/tanf.c @@ -0,0 +1,159 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak tanf = __tanf + +#include "libm.h" + +extern const int _TBL_ipio2_inf[]; +extern int __rem_pio2m(double *, double *, int, int, int, const int *); +#if defined(__i386) && !defined(__amd64) +extern int __swapRP(int); +#endif + +static const double C[] = { + 1.0, + 4.46066928428959230679140546271810308098793029785e-0003, + 4.92165316309189027066395283327437937259674072266e+0000, + -7.11410648161473480044492134766187518835067749023e-0001, + 4.08549808374053391446523164631798863410949707031e+0000, + 2.50411070398050927821032018982805311679840087891e+0000, + 1.11492064560251158411574579076841473579406738281e+0001, + -1.50565540968422650891511693771462887525558471680e+0000, + -1.81484378878349295050043110677506774663925170898e+0000, + 3.333335997532835641297409611782510896641e-0001, + 2.999997598248363761541668282006867229939e+00, + 0.636619772367581343075535, /* 2^ -1 * 1.45F306DC9C883 */ + 0.5, + 1.570796326734125614166, /* 2^ 0 * 1.921FB54400000 */ + 6.077100506506192601475e-11, /* 2^-34 * 1.0B4611A626331 */ +}; + +#define one C[0] +#define P0 C[1] +#define P1 C[2] +#define P2 C[3] +#define P3 C[4] +#define P4 C[5] +#define P5 C[6] +#define P6 C[7] +#define P7 C[8] +#define T0 C[9] +#define T1 C[10] +#define invpio2 C[11] +#define half C[12] +#define pio2_1 C[13] +#define pio2_t C[14] + +float +tanf(float x) +{ + double y, z, w; + float f; + int n, ix, hx, hy; + volatile int i; + + hx = *((int *)&x); + ix = hx & 0x7fffffff; + + y = (double)x; + + if (ix <= 0x4016cbe4) { /* |x| < 3*pi/4 */ + if (ix <= 0x3f490fdb) { /* |x| < pi/4 */ + if (ix < 0x3c000000) { /* |x| < 2**-7 */ + if (ix <= 0x39800000) { /* |x| < 2**-12 */ + i = (int)y; +#ifdef lint + i = i; +#endif + return (x); + } + return ((float)((y * T0) * (T1 + y * y))); + } + z = y * y; + return ((float)(((P0 * y) * (P1 + z * (P2 + z)) * + (P3 + z * (P4 + z))) * + (P5 + z * (P6 + z * (P7 + z))))); + } + if (hx > 0) + y = (y - pio2_1) - pio2_t; + else + y = (y + pio2_1) + pio2_t; + hy = ((int *)&y)[HIWORD] & ~0x80000000; + if (hy < 0x3f800000) { /* |y| < 2**-7 */ + z = (y * T0) * (T1 + y * y); + return ((float)(-one / z)); + } + z = y * y; + w = ((P0 * y) * (P1 + z * (P2 + z)) * (P3 + z * (P4 + z))) * + (P5 + z * (P6 + z * (P7 + z))); + return ((float)(-one / w)); + } + + if (ix <= 0x49c90fdb) { /* |x| < 2^19*pi */ +#if defined(__i386) && !defined(__amd64) + int rp; + + rp = __swapRP(fp_extended); +#endif + w = y * invpio2; + if (hx < 0) + n = (int)(w - half); + else + n = (int)(w + half); + y = (y - n * pio2_1) - n * pio2_t; +#if defined(__i386) && !defined(__amd64) + if (rp != fp_extended) + (void) __swapRP(rp); +#endif + } else { + if (ix >= 0x7f800000) + return (x / x); /* sin(Inf or NaN) is NaN */ + hy = ((int *)&y)[HIWORD]; + n = ((hy >> 20) & 0x7ff) - 1046; + ((int *)&w)[HIWORD] = (hy & 0xfffff) | 0x41600000; + ((int *)&w)[LOWORD] = ((int *)&y)[LOWORD]; + n = __rem_pio2m(&w, &y, n, 1, 0, _TBL_ipio2_inf); + if (hy < 0) { + y = -y; + n = -n; + } + } + + hy = ((int *)&y)[HIWORD] & ~0x80000000; + if (hy < 0x3f800000) { /* |y| < 2**-7 */ + z = (y * T0) * (T1 + y * y); + f = ((n & 1) == 0)? (float)z : (float)(-one / z); + return (f); + } + z = y * y; + w = ((P0 * y) * (P1 + z * (P2 + z)) * (P3 + z * (P4 + z))) * + (P5 + z * (P6 + z * (P7 + z))); + f = ((n & 1) == 0)? (float)w : (float)(-one / w); + return (f); +} diff --git a/usr/src/lib/libm/common/R/tanhf.c b/usr/src/lib/libm/common/R/tanhf.c new file mode 100644 index 0000000000..aed4b24613 --- /dev/null +++ b/usr/src/lib/libm/common/R/tanhf.c @@ -0,0 +1,42 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak tanhf = __tanhf + +#include "libm.h" + +float +tanhf(float x) { +#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN) + if (isnanf(x)) + return (x * x); + else +#endif + return ((float) tanh((double) x)); +} diff --git a/usr/src/lib/libm/common/complex/cabs.c b/usr/src/lib/libm/common/complex/cabs.c new file mode 100644 index 0000000000..a41233347e --- /dev/null +++ b/usr/src/lib/libm/common/complex/cabs.c @@ -0,0 +1,183 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak cabs = __cabs + +#include "libm_synonyms.h" +#include <math.h> +#include "complex_wrapper.h" + +/* + * If C were the only standard we cared about, cabs could just call + * hypot. Unfortunately, various other standards say that hypot must + * call matherr and/or set errno to ERANGE when the result overflows. + * Since cabs should do neither of these things, we have to either + * make hypot a wrapper on another internal function or duplicate + * the hypot implementation here. I've chosen to do the latter. + */ + +static const double + zero = 0.0, + onep1u = 1.00000000000000022204e+00, /* 0x3ff00000 1 = 1+2**-52 */ + twom53 = 1.11022302462515654042e-16, /* 0x3ca00000 0 = 2**-53 */ + twom768 = 6.441148769597133308e-232, /* 2^-768 */ + two768 = 1.552518092300708935e+231; /* 2^768 */ + +double +cabs(dcomplex z) +{ + double x, y, xh, yh, w, ax, ay; + int i, j, nx, ny, ix, iy, iscale = 0; + unsigned lx, ly; + + x = D_RE(z); + y = D_IM(z); + + ix = ((int *)&x)[HIWORD] & ~0x80000000; + lx = ((int *)&x)[LOWORD]; + iy = ((int *)&y)[HIWORD] & ~0x80000000; + ly = ((int *)&y)[LOWORD]; + + /* force ax = |x| ~>~ ay = |y| */ + if (iy > ix) { + ax = fabs(y); + ay = fabs(x); + i = ix; + ix = iy; + iy = i; + i = lx; + lx = ly; + ly = i; + } else { + ax = fabs(x); + ay = fabs(y); + } + nx = ix >> 20; + ny = iy >> 20; + j = nx - ny; + + if (nx >= 0x5f3) { + /* x >= 2^500 (x*x or y*y may overflow) */ + if (nx == 0x7ff) { + /* inf or NaN, signal of sNaN */ + if (((ix - 0x7ff00000) | lx) == 0) + return ((ax == ay)? ay : ax); + else if (((iy - 0x7ff00000) | ly) == 0) + return ((ay == ax)? ax : ay); + else + return (ax * ay); + } else if (j > 32) { + /* x >> y */ + if (j <= 53) + ay *= twom53; + ax += ay; + return (ax); + } + ax *= twom768; + ay *= twom768; + iscale = 2; + ix -= 768 << 20; + iy -= 768 << 20; + } else if (ny < 0x23d) { + /* y < 2^-450 (x*x or y*y may underflow) */ + if ((ix | lx) == 0) + return (ay); + if ((iy | ly) == 0) + return (ax); + if (j > 53) /* x >> y */ + return (ax + ay); + iscale = 1; + ax *= two768; + ay *= two768; + if (nx == 0) { + if (ax == zero) /* guard subnormal flush to zero */ + return (ax); + ix = ((int *)&ax)[HIWORD]; + } else { + ix += 768 << 20; + } + if (ny == 0) { + if (ay == zero) /* guard subnormal flush to zero */ + return (ax * twom768); + iy = ((int *)&ay)[HIWORD]; + } else { + iy += 768 << 20; + } + j = (ix >> 20) - (iy >> 20); + if (j > 32) { + /* x >> y */ + if (j <= 53) + ay *= twom53; + return ((ax + ay) * twom768); + } + } else if (j > 32) { + /* x >> y */ + if (j <= 53) + ay *= twom53; + return (ax + ay); + } + + /* + * Medium range ax and ay with max{|ax/ay|,|ay/ax|} bounded by 2^32. + * First check rounding mode by comparing onep1u*onep1u with onep1u + * + twom53. Make sure the computation is done at run-time. + */ + if (((lx | ly) << 5) == 0) { + ay = ay * ay; + ax += ay / (ax + sqrt(ax * ax + ay)); + } else if (onep1u * onep1u != onep1u + twom53) { + /* round-to-zero, positive, negative mode */ + /* magic formula with less than an ulp error */ + w = sqrt(ax * ax + ay * ay); + ax += ay / ((ax + w) / ay); + } else { + /* round-to-nearest mode */ + w = ax - ay; + if (w > ay) { + ((int *)&xh)[HIWORD] = ix; + ((int *)&xh)[LOWORD] = 0; + ay = ay * ay + (ax - xh) * (ax + xh); + ax = sqrt(xh * xh + ay); + } else { + ax = ax + ax; + ((int *)&xh)[HIWORD] = ix + 0x00100000; + ((int *)&xh)[LOWORD] = 0; + ((int *)&yh)[HIWORD] = iy; + ((int *)&yh)[LOWORD] = 0; + ay = w * w + ((ax - xh) * yh + (ay - yh) * ax); + ax = sqrt(xh * yh + ay); + } + } + if (iscale > 0) { + if (iscale == 1) + ax *= twom768; + else + ax *= two768; /* must generate side effect here */ + } + return (ax); +} diff --git a/usr/src/lib/libm/common/complex/cabsf.c b/usr/src/lib/libm/common/complex/cabsf.c new file mode 100644 index 0000000000..a9f61027fb --- /dev/null +++ b/usr/src/lib/libm/common/complex/cabsf.c @@ -0,0 +1,38 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak cabsf = __cabsf + +#include "libm.h" +#include "complex_wrapper.h" + +float +cabsf(fcomplex z) { + return (hypotf(F_RE(z), F_IM(z))); +} diff --git a/usr/src/lib/libm/common/complex/cabsl.c b/usr/src/lib/libm/common/complex/cabsl.c new file mode 100644 index 0000000000..10b029f95c --- /dev/null +++ b/usr/src/lib/libm/common/complex/cabsl.c @@ -0,0 +1,38 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak cabsl = __cabsl + +#include "libm.h" +#include "complex_wrapper.h" + +long double +cabsl(ldcomplex z) { + return (hypotl(LD_RE(z), LD_IM(z))); +} diff --git a/usr/src/lib/libm/common/complex/cacos.c b/usr/src/lib/libm/common/complex/cacos.c new file mode 100644 index 0000000000..4fccae23bb --- /dev/null +++ b/usr/src/lib/libm/common/complex/cacos.c @@ -0,0 +1,404 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak cacos = __cacos + +/* INDENT OFF */ +/* + * dcomplex cacos(dcomplex z); + * + * Alogrithm + * (based on T.E.Hull, Thomas F. Fairgrieve and Ping Tak Peter Tang's + * paper "Implementing the Complex Arcsine and Arccosine Functins Using + * Exception Handling", ACM TOMS, Vol 23, pp 299-335) + * + * The principal value of complex inverse cosine function cacos(z), + * where z = x+iy, can be defined by + * + * cacos(z) = acos(B) - i sign(y) log (A + sqrt(A*A-1)), + * + * where the log function is the natural log, and + * ____________ ____________ + * 1 / 2 2 1 / 2 2 + * A = --- / (x+1) + y + --- / (x-1) + y + * 2 \/ 2 \/ + * ____________ ____________ + * 1 / 2 2 1 / 2 2 + * B = --- / (x+1) + y - --- / (x-1) + y . + * 2 \/ 2 \/ + * + * The Branch cuts are on the real line from -inf to -1 and from 1 to inf. + * The real and imaginary parts are based on Abramowitz and Stegun + * [Handbook of Mathematic Functions, 1972]. The sign of the imaginary + * part is chosen to be the generally considered the principal value of + * this function. + * + * Notes:1. A is the average of the distances from z to the points (1,0) + * and (-1,0) in the complex z-plane, and in particular A>=1. + * 2. B is in [-1,1], and A*B = x + * + * Basic relations + * cacos(conj(z)) = conj(cacos(z)) + * cacos(-z) = pi - cacos(z) + * cacos( z) = pi/2 - casin(z) + * + * Special cases (conform to ISO/IEC 9899:1999(E)): + * cacos(+-0 + i y ) = pi/2 - i y for y is +-0, +-inf, NaN + * cacos( x + i inf) = pi/2 - i inf for all x + * cacos( x + i NaN) = NaN + i NaN with invalid for non-zero finite x + * cacos(-inf + i y ) = pi - i inf for finite +y + * cacos( inf + i y ) = 0 - i inf for finite +y + * cacos(-inf + i inf) = 3pi/4- i inf + * cacos( inf + i inf) = pi/4 - i inf + * cacos(+-inf+ i NaN) = NaN - i inf (sign of imaginary is unspecified) + * cacos(NaN + i y ) = NaN + i NaN with invalid for finite y + * cacos(NaN + i inf) = NaN - i inf + * cacos(NaN + i NaN) = NaN + i NaN + * + * Special Regions (better formula for accuracy and for avoiding spurious + * overflow or underflow) (all x and y are assumed nonnegative): + * case 1: y = 0 + * case 2: tiny y relative to x-1: y <= ulp(0.5)*|x-1| + * case 3: tiny y: y < 4 sqrt(u), where u = minimum normal number + * case 4: huge y relative to x+1: y >= (1+x)/ulp(0.5) + * case 5: huge x and y: x and y >= sqrt(M)/8, where M = maximum normal number + * case 6: tiny x: x < 4 sqrt(u) + * -------- + * case 1 & 2. y=0 or y/|x-1| is tiny. We have + * ____________ _____________ + * / 2 2 / y 2 + * / (x+-1) + y = |x+-1| / 1 + (------) + * \/ \/ |x+-1| + * + * 1 y 2 + * ~ |x+-1| ( 1 + --- (------) ) + * 2 |x+-1| + * + * 2 + * y + * = |x+-1| + --------. + * 2|x+-1| + * + * Consequently, it is not difficult to see that + * 2 + * y + * [ 1 + ------------ , if x < 1, + * [ 2(1+x)(1-x) + * [ + * [ + * [ x, if x = 1 (y = 0), + * [ + * A ~= [ 2 + * [ x * y + * [ x + ------------ ~ x, if x > 1 + * [ 2(x+1)(x-1) + * + * and hence + * ______ 2 + * / 2 y y + * A + \/ A - 1 ~ 1 + ---------------- + -----------, if x < 1, + * sqrt((x+1)(1-x)) 2(x+1)(1-x) + * + * + * ~ x + sqrt((x-1)*(x+1)), if x >= 1. + * + * 2 + * y + * [ x(1 - -----------) ~ x, if x < 1, + * [ 2(1+x)(1-x) + * B = x/A ~ [ + * [ 1, if x = 1, + * [ + * [ 2 + * [ y + * [ 1 - ------------ , if x > 1, + * [ 2(x+1)(x-1) + * Thus + * [ acos(x) - i y/sqrt((x-1)*(x+1)), if x < 1, + * [ + * cacos(x+i*y)~ [ 0 - i 0, if x = 1, + * [ + * [ y/sqrt(x*x-1) - i log(x+sqrt(x*x-1)), if x > 1. + * + * Note: y/sqrt(x*x-1) ~ y/x when x >= 2**26. + * case 3. y < 4 sqrt(u), where u = minimum normal x. + * After case 1 and 2, this will only occurs when x=1. When x=1, we have + * A = (sqrt(4+y*y)+y)/2 ~ 1 + y/2 + y^2/8 + ... + * and + * B = 1/A = 1 - y/2 + y^2/8 + ... + * Since + * cos(sqrt(y)) ~ 1 - y/2 + ... + * we have, for the real part, + * acos(B) ~ acos(1 - y/2) ~ sqrt(y) + * For the imaginary part, + * log(A+sqrt(A*A-1)) ~ log(1+y/2+sqrt(2*y/2)) + * = log(1+y/2+sqrt(y)) + * = (y/2+sqrt(y)) - (y/2+sqrt(y))^2/2 + ... + * ~ sqrt(y) - y*(sqrt(y)+y/2)/2 + * ~ sqrt(y) + * + * case 4. y >= (x+1)/ulp(0.5). In this case, A ~ y and B ~ x/y. Thus + * real part = acos(B) ~ pi/2 + * and + * imag part = log(y+sqrt(y*y-one)) + * + * case 5. Both x and y are large: x and y > sqrt(M)/8, where M = maximum x + * In this case, + * A ~ sqrt(x*x+y*y) + * B ~ x/sqrt(x*x+y*y). + * Thus + * real part = acos(B) = atan(y/x), + * imag part = log(A+sqrt(A*A-1)) ~ log(2A) + * = log(2) + 0.5*log(x*x+y*y) + * = log(2) + log(y) + 0.5*log(1+(x/y)^2) + * + * case 6. x < 4 sqrt(u). In this case, we have + * A ~ sqrt(1+y*y), B = x/sqrt(1+y*y). + * Since B is tiny, we have + * real part = acos(B) ~ pi/2 + * imag part = log(A+sqrt(A*A-1)) = log (A+sqrt(y*y)) + * = log(y+sqrt(1+y*y)) + * = 0.5*log(y^2+2ysqrt(1+y^2)+1+y^2) + * = 0.5*log(1+2y(y+sqrt(1+y^2))); + * = 0.5*log1p(2y(y+A)); + * + * cacos(z) = acos(B) - i sign(y) log (A + sqrt(A*A-1)), + */ +/* INDENT ON */ + +#include "libm.h" +#include "complex_wrapper.h" + +/* INDENT OFF */ +static const double + zero = 0.0, + one = 1.0, + E = 1.11022302462515654042e-16, /* 2**-53 */ + ln2 = 6.93147180559945286227e-01, + pi = 3.1415926535897931159979634685, + pi_l = 1.224646799147353177e-16, + pi_2 = 1.570796326794896558e+00, + pi_2_l = 6.123233995736765886e-17, + pi_4 = 0.78539816339744827899949, + pi_4_l = 3.061616997868382943e-17, + pi3_4 = 2.356194490192344836998, + pi3_4_l = 9.184850993605148829195e-17, + Foursqrtu = 5.96667258496016539463e-154, /* 2**(-509) */ + Acrossover = 1.5, + Bcrossover = 0.6417, + half = 0.5; +/* INDENT ON */ + +dcomplex +cacos(dcomplex z) { + double x, y, t, R, S, A, Am1, B, y2, xm1, xp1, Apx; + int ix, iy, hx, hy; + unsigned lx, ly; + dcomplex ans; + + x = D_RE(z); + y = D_IM(z); + hx = HI_WORD(x); + lx = LO_WORD(x); + hy = HI_WORD(y); + ly = LO_WORD(y); + ix = hx & 0x7fffffff; + iy = hy & 0x7fffffff; + + /* x is 0 */ + if ((ix | lx) == 0) { + if (((iy | ly) == 0) || (iy >= 0x7ff00000)) { + D_RE(ans) = pi_2; + D_IM(ans) = -y; + return (ans); + } + } + + /* |y| is inf or NaN */ + if (iy >= 0x7ff00000) { + if (ISINF(iy, ly)) { /* cacos(x + i inf) = pi/2 - i inf */ + D_IM(ans) = -y; + if (ix < 0x7ff00000) { + D_RE(ans) = pi_2 + pi_2_l; + } else if (ISINF(ix, lx)) { + if (hx >= 0) + D_RE(ans) = pi_4 + pi_4_l; + else + D_RE(ans) = pi3_4 + pi3_4_l; + } else { + D_RE(ans) = x; + } + } else { /* cacos(x + i NaN) = NaN + i NaN */ + D_RE(ans) = y + x; + if (ISINF(ix, lx)) + D_IM(ans) = -fabs(x); + else + D_IM(ans) = y; + } + return (ans); + } + + x = fabs(x); + y = fabs(y); + + /* x is inf or NaN */ + if (ix >= 0x7ff00000) { /* x is inf or NaN */ + if (ISINF(ix, lx)) { /* x is INF */ + D_IM(ans) = -x; + if (iy >= 0x7ff00000) { + if (ISINF(iy, ly)) { + /* INDENT OFF */ + /* cacos(inf + i inf) = pi/4 - i inf */ + /* cacos(-inf+ i inf) =3pi/4 - i inf */ + /* INDENT ON */ + if (hx >= 0) + D_RE(ans) = pi_4 + pi_4_l; + else + D_RE(ans) = pi3_4 + pi3_4_l; + } else + /* INDENT OFF */ + /* cacos(inf + i NaN) = NaN - i inf */ + /* INDENT ON */ + D_RE(ans) = y + y; + } else + /* INDENT OFF */ + /* cacos(inf + iy ) = 0 - i inf */ + /* cacos(-inf+ iy ) = pi - i inf */ + /* INDENT ON */ + if (hx >= 0) + D_RE(ans) = zero; + else + D_RE(ans) = pi + pi_l; + } else { /* x is NaN */ + /* INDENT OFF */ + /* + * cacos(NaN + i inf) = NaN - i inf + * cacos(NaN + i y ) = NaN + i NaN + * cacos(NaN + i NaN) = NaN + i NaN + */ + /* INDENT ON */ + D_RE(ans) = x + y; + if (iy >= 0x7ff00000) { + D_IM(ans) = -y; + } else { + D_IM(ans) = x; + } + } + if (hy < 0) + D_IM(ans) = -D_IM(ans); + return (ans); + } + + if ((iy | ly) == 0) { /* region 1: y=0 */ + if (ix < 0x3ff00000) { /* |x| < 1 */ + D_RE(ans) = acos(x); + D_IM(ans) = zero; + } else { + D_RE(ans) = zero; + if (ix >= 0x43500000) /* |x| >= 2**54 */ + D_IM(ans) = ln2 + log(x); + else if (ix >= 0x3ff80000) /* x > Acrossover */ + D_IM(ans) = log(x + sqrt((x - one) * (x + + one))); + else { + xm1 = x - one; + D_IM(ans) = log1p(xm1 + sqrt(xm1 * (x + one))); + } + } + } else if (y <= E * fabs(x - one)) { /* region 2: y < tiny*|x-1| */ + if (ix < 0x3ff00000) { /* x < 1 */ + D_RE(ans) = acos(x); + D_IM(ans) = y / sqrt((one + x) * (one - x)); + } else if (ix >= 0x43500000) { /* |x| >= 2**54 */ + D_RE(ans) = y / x; + D_IM(ans) = ln2 + log(x); + } else { + t = sqrt((x - one) * (x + one)); + D_RE(ans) = y / t; + if (ix >= 0x3ff80000) /* x > Acrossover */ + D_IM(ans) = log(x + t); + else + D_IM(ans) = log1p((x - one) + t); + } + } else if (y < Foursqrtu) { /* region 3 */ + t = sqrt(y); + D_RE(ans) = t; + D_IM(ans) = t; + } else if (E * y - one >= x) { /* region 4 */ + D_RE(ans) = pi_2; + D_IM(ans) = ln2 + log(y); + } else if (ix >= 0x5fc00000 || iy >= 0x5fc00000) { /* x,y>2**509 */ + /* region 5: x+1 or y is very large (>= sqrt(max)/8) */ + t = x / y; + D_RE(ans) = atan(y / x); + D_IM(ans) = ln2 + log(y) + half * log1p(t * t); + } else if (x < Foursqrtu) { + /* region 6: x is very small, < 4sqrt(min) */ + D_RE(ans) = pi_2; + A = sqrt(one + y * y); + if (iy >= 0x3ff80000) /* if y > Acrossover */ + D_IM(ans) = log(y + A); + else + D_IM(ans) = half * log1p((y + y) * (y + A)); + } else { /* safe region */ + y2 = y * y; + xp1 = x + one; + xm1 = x - one; + R = sqrt(xp1 * xp1 + y2); + S = sqrt(xm1 * xm1 + y2); + A = half * (R + S); + B = x / A; + if (B <= Bcrossover) + D_RE(ans) = acos(B); + else { /* use atan and an accurate approx to a-x */ + Apx = A + x; + if (x <= one) + D_RE(ans) = atan(sqrt(half * Apx * (y2 / (R + + xp1) + (S - xm1))) / x); + else + D_RE(ans) = atan((y * sqrt(half * (Apx / (R + + xp1) + Apx / (S + xm1)))) / x); + } + if (A <= Acrossover) { + /* use log1p and an accurate approx to A-1 */ + if (x < one) + Am1 = half * (y2 / (R + xp1) + y2 / (S - xm1)); + else + Am1 = half * (y2 / (R + xp1) + (S + xm1)); + D_IM(ans) = log1p(Am1 + sqrt(Am1 * (A + one))); + } else { + D_IM(ans) = log(A + sqrt(A * A - one)); + } + } + if (hx < 0) + D_RE(ans) = pi - D_RE(ans); + if (hy >= 0) + D_IM(ans) = -D_IM(ans); + return (ans); +} diff --git a/usr/src/lib/libm/common/complex/cacosf.c b/usr/src/lib/libm/common/complex/cacosf.c new file mode 100644 index 0000000000..b693a44c05 --- /dev/null +++ b/usr/src/lib/libm/common/complex/cacosf.c @@ -0,0 +1,46 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak cacosf = __cacosf + +#include "libm.h" +#include "complex_wrapper.h" + +fcomplex +cacosf(fcomplex z) { + dcomplex dz, dans; + fcomplex ans; + + D_RE(dz) = (double) (F_RE(z)); + D_IM(dz) = (double) (F_IM(z)); + dans = cacos(dz); + F_RE(ans) = (float) (D_RE(dans)); + F_IM(ans) = (float) (D_IM(dans)); + return (ans); +} diff --git a/usr/src/lib/libm/common/complex/cacosh.c b/usr/src/lib/libm/common/complex/cacosh.c new file mode 100644 index 0000000000..6c6ffe612e --- /dev/null +++ b/usr/src/lib/libm/common/complex/cacosh.c @@ -0,0 +1,70 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak cacosh = __cacosh + +/* INDENT OFF */ +/* + * dcomplex cacosh(dcomplex z); + * cacosh z = +-i cacos z . + * In order to make conj(cacosh(z))=cacosh(conj(z)), + * we define + * cacosh z = sign(Im(z))*i cacos z . + * + */ +/* INDENT ON */ + +#include "libm.h" /* fabs/isnan/isinf/signbit */ +#include "complex_wrapper.h" + +/* need to work on special cases according to spec */ + +dcomplex +cacosh(dcomplex z) { + dcomplex w, ans; + double x, y; + + w = cacos(z); + x = D_RE(z); + y = D_IM(z); + if (isnan(y)) { + D_IM(ans) = y + y; + if (isinf(x)) + D_RE(ans) = fabs(x); + else + D_RE(ans) = y; + } else if (signbit(y) == 0) { + D_RE(ans) = -D_IM(w); + D_IM(ans) = D_RE(w); + } else { + D_RE(ans) = D_IM(w); + D_IM(ans) = -D_RE(w); + } + return (ans); +} diff --git a/usr/src/lib/libm/common/complex/cacoshf.c b/usr/src/lib/libm/common/complex/cacoshf.c new file mode 100644 index 0000000000..8feaf735e6 --- /dev/null +++ b/usr/src/lib/libm/common/complex/cacoshf.c @@ -0,0 +1,48 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak cacoshf = __cacoshf + +#include "libm.h" +#include "complex_wrapper.h" + +/* need to work on special cases according to spec */ + +fcomplex +cacoshf(fcomplex z) { + dcomplex dz, dans; + fcomplex ans; + + D_RE(dz) = (double) (F_RE(z)); + D_IM(dz) = (double) (F_IM(z)); + dans = cacosh(dz); + F_RE(ans) = (float) (D_RE(dans)); + F_IM(ans) = (float) (D_IM(dans)); + return (ans); +} diff --git a/usr/src/lib/libm/common/complex/cacoshl.c b/usr/src/lib/libm/common/complex/cacoshl.c new file mode 100644 index 0000000000..1e17db3f2e --- /dev/null +++ b/usr/src/lib/libm/common/complex/cacoshl.c @@ -0,0 +1,69 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak cacoshl = __cacoshl + +#include "libm.h" /* fabsl/isnanl/isinfl/signbitl */ +#include "complex_wrapper.h" +#include "longdouble.h" + +/* INDENT OFF */ +/* + * ldcomplex cacoshl(ldcomplex z); + * cacosh z = +-i cacos z . + * In order to make conj(cacosh(z))=cacosh(conj(z)), + * we define + * cacosh z = sign(Im(z))*i cacos z . + * + */ +/* INDENT ON */ + +ldcomplex +cacoshl(ldcomplex z) { + ldcomplex w, ans; + long double x, y; + + w = cacosl(z); + x = LD_RE(z); + y = LD_IM(z); + if (isnanl(y)) { + LD_IM(ans) = y + y; + if (isinfl(x)) + LD_RE(ans) = fabsl(x); + else + LD_RE(ans) = y; + } else if (signbitl(y) == 0) { + LD_RE(ans) = -LD_IM(w); + LD_IM(ans) = LD_RE(w); + } else { + LD_RE(ans) = LD_IM(w); + LD_IM(ans) = -LD_RE(w); + } + return (ans); +} diff --git a/usr/src/lib/libm/common/complex/cacosl.c b/usr/src/lib/libm/common/complex/cacosl.c new file mode 100644 index 0000000000..a10f9801dd --- /dev/null +++ b/usr/src/lib/libm/common/complex/cacosl.c @@ -0,0 +1,272 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak cacosl = __cacosl + +#include "libm.h" /* acosl/atanl/fabsl/isinfl/log1pl/logl/sqrtl */ +#include "complex_wrapper.h" +#include "longdouble.h" + +/* INDENT OFF */ +static const long double +zero = 0.0L, +one = 1.0L, +Acrossover = 1.5L, +Bcrossover = 0.6417L, +half = 0.5L, +ln2 = 6.931471805599453094172321214581765680755e-0001L, +Foursqrtu = 7.3344154702193886624856495681939326638255e-2466L, /* 2**-8189 */ +#if defined(__x86) +E = 5.4210108624275221700372640043497085571289e-20L, /* 2**-64 */ +pi = 3.141592653589793238295968524909085317631252110004425048828125L, +pi_l = 1.666748583704175665659172893706807721468195923078e-19L, +pi_2 = 1.5707963267948966191479842624545426588156260L, +pi_2_l = 8.3337429185208783282958644685340386073409796e-20L, +pi_4 = 0.78539816339744830957399213122727132940781302750110626220703125L, +pi_4_l = 4.166871459260439164147932234267019303670489807695410e-20L, +pi3_4 = 2.35619449019234492872197639368181398822343908250331878662109375L, +pi3_4_l = 1.250061437778131749244379670280105791101146942308e-19L; +#else +E = 9.6296497219361792652798897129246365926905e-35L, /* 2**-113 */ +pi = 3.1415926535897932384626433832795027974790680981372955730045043318L, +pi_l = 8.6718101301237810247970440260433519687623233462565303417759356862e-35L, +pi_2 = 1.5707963267948966192313216916397513987395340L, +pi_2_l = 4.3359050650618905123985220130216759843811616e-35L, +pi_4 = 0.785398163397448309615660845819875699369767024534323893251126L, +pi_4_l = 2.167952532530945256199261006510837992190580836564132585443e-35L, +pi3_4 = 2.35619449019234492884698253745962709810930107360297167975337824L, +pi3_4_l = 6.503857597592835768597783019532513976571742509692397756331e-35L; +#endif +/* INDENT ON */ + +#if defined(__x86) +static const int ip1 = 0x40400000; /* 2**65 */ +#else +static const int ip1 = 0x40710000; /* 2**114 */ +#endif + +ldcomplex +cacosl(ldcomplex z) { + long double x, y, t, R, S, A, Am1, B, y2, xm1, xp1, Apx; + int ix, iy, hx, hy; + ldcomplex ans; + + x = LD_RE(z); + y = LD_IM(z); + hx = HI_XWORD(x); + hy = HI_XWORD(y); + ix = hx & 0x7fffffff; + iy = hy & 0x7fffffff; + + /* x is 0 */ + if (x == zero) { + if (y == zero || (iy >= 0x7fff0000)) { + LD_RE(ans) = pi_2 + pi_2_l; + LD_IM(ans) = -y; + return (ans); + } + } + + /* |y| is inf or NaN */ + if (iy >= 0x7fff0000) { + if (isinfl(y)) { /* cacos(x + i inf) = pi/2 - i inf */ + LD_IM(ans) = -y; + if (ix < 0x7fff0000) { + LD_RE(ans) = pi_2 + pi_2_l; + } else if (isinfl(x)) { + if (hx >= 0) + LD_RE(ans) = pi_4 + pi_4_l; + else + LD_RE(ans) = pi3_4 + pi3_4_l; + } else { + LD_RE(ans) = x; + } + } else { /* cacos(x + i NaN) = NaN + i NaN */ + LD_RE(ans) = y + x; + if (isinfl(x)) + LD_IM(ans) = -fabsl(x); + else + LD_IM(ans) = y; + } + return (ans); + } + + y = fabsl(y); + + if (ix >= 0x7fff0000) { /* x is inf or NaN */ + if (isinfl(x)) { /* x is INF */ + LD_IM(ans) = -fabsl(x); + if (iy >= 0x7fff0000) { + if (isinfl(y)) { + /* INDENT OFF */ + /* cacos(inf + i inf) = pi/4 - i inf */ + /* cacos(-inf+ i inf) =3pi/4 - i inf */ + /* INDENT ON */ + if (hx >= 0) + LD_RE(ans) = pi_4 + pi_4_l; + else + LD_RE(ans) = pi3_4 + pi3_4_l; + } else + /* INDENT OFF */ + /* cacos(inf + i NaN) = NaN - i inf */ + /* INDENT ON */ + LD_RE(ans) = y + y; + } else { + /* INDENT OFF */ + /* cacos(inf + iy ) = 0 - i inf */ + /* cacos(-inf+ iy ) = pi - i inf */ + /* INDENT ON */ + if (hx >= 0) + LD_RE(ans) = zero; + else + LD_RE(ans) = pi + pi_l; + } + } else { /* x is NaN */ + /* INDENT OFF */ + /* + * cacos(NaN + i inf) = NaN - i inf + * cacos(NaN + i y ) = NaN + i NaN + * cacos(NaN + i NaN) = NaN + i NaN + */ + /* INDENT ON */ + LD_RE(ans) = x + y; + if (iy >= 0x7fff0000) { + LD_IM(ans) = -y; + } else { + LD_IM(ans) = x; + } + } + if (hy < 0) + LD_IM(ans) = -LD_IM(ans); + return (ans); + } + + if (y == zero) { /* region 1: y=0 */ + if (ix < 0x3fff0000) { /* |x| < 1 */ + LD_RE(ans) = acosl(x); + LD_IM(ans) = zero; + } else { + LD_RE(ans) = zero; + x = fabsl(x); + if (ix >= ip1) /* i386 ? 2**65 : 2**114 */ + LD_IM(ans) = ln2 + logl(x); + else if (ix >= 0x3fff8000) /* x > Acrossover */ + LD_IM(ans) = logl(x + sqrtl((x - one) * (x + + one))); + else { + xm1 = x - one; + LD_IM(ans) = log1pl(xm1 + sqrtl(xm1 * (x + + one))); + } + } + } else if (y <= E * fabsl(fabsl(x) - one)) { + /* region 2: y < tiny*||x|-1| */ + if (ix < 0x3fff0000) { /* x < 1 */ + LD_RE(ans) = acosl(x); + x = fabsl(x); + LD_IM(ans) = y / sqrtl((one + x) * (one - x)); + } else if (ix >= ip1) { /* i386 ? 2**65 : 2**114 */ + if (hx >= 0) + LD_RE(ans) = y / x; + else { + if (ix >= ip1 + 0x00040000) + LD_RE(ans) = pi + pi_l; + else { + t = pi_l + y / x; + LD_RE(ans) = pi + t; + } + } + LD_IM(ans) = ln2 + logl(fabsl(x)); + } else { + x = fabsl(x); + t = sqrtl((x - one) * (x + one)); + LD_RE(ans) = (hx >= 0)? y / t : pi - (y / t - pi_l); + if (ix >= 0x3fff8000) /* x > Acrossover */ + LD_IM(ans) = logl(x + t); + else + LD_IM(ans) = log1pl(t - (one - x)); + } + } else if (y < Foursqrtu) { /* region 3 */ + t = sqrtl(y); + LD_RE(ans) = (hx >= 0)? t : pi + pi_l; + LD_IM(ans) = t; + } else if (E * y - one >= fabsl(x)) { /* region 4 */ + LD_RE(ans) = pi_2 + pi_2_l; + LD_IM(ans) = ln2 + logl(y); + } else if (ix >= 0x5ffb0000 || iy >= 0x5ffb0000) { + /* region 5: x+1 and y are both (>= sqrt(max)/8) i.e. 2**8188 */ + t = x / y; + LD_RE(ans) = atan2l(y, x); + LD_IM(ans) = ln2 + logl(y) + half * log1pl(t * t); + } else if (fabsl(x) < Foursqrtu) { + /* region 6: x is very small, < 4sqrt(min) */ + LD_RE(ans) = pi_2 + pi_2_l; + A = sqrtl(one + y * y); + if (iy >= 0x3fff8000) /* if y > Acrossover */ + LD_IM(ans) = logl(y + A); + else + LD_IM(ans) = half * log1pl((y + y) * (y + A)); + } else { /* safe region */ + t = fabsl(x); + y2 = y * y; + xp1 = t + one; + xm1 = t - one; + R = sqrtl(xp1 * xp1 + y2); + S = sqrtl(xm1 * xm1 + y2); + A = half * (R + S); + B = t / A; + + if (B <= Bcrossover) + LD_RE(ans) = (hx >= 0)? acosl(B) : acosl(-B); + else { /* use atan and an accurate approx to a-x */ + Apx = A + t; + if (t <= one) + LD_RE(ans) = atan2l(sqrtl(half * Apx * (y2 / + (R + xp1) + (S - xm1))), x); + else + LD_RE(ans) = atan2l((y * sqrtl(half * (Apx / + (R + xp1) + Apx / (S + xm1)))), x); + } + if (A <= Acrossover) { + /* use log1p and an accurate approx to A-1 */ + if (ix < 0x3fff0000) + Am1 = half * (y2 / (R + xp1) + y2 / (S - xm1)); + else + Am1 = half * (y2 / (R + xp1) + (S + xm1)); + LD_IM(ans) = log1pl(Am1 + sqrtl(Am1 * (A + one))); + } else { + LD_IM(ans) = logl(A + sqrtl(A * A - one)); + } + } + + if (hy >= 0) + LD_IM(ans) = -LD_IM(ans); + + return (ans); +} diff --git a/usr/src/lib/libm/common/complex/carg.c b/usr/src/lib/libm/common/complex/carg.c new file mode 100644 index 0000000000..a79bca005b --- /dev/null +++ b/usr/src/lib/libm/common/complex/carg.c @@ -0,0 +1,53 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak carg = __carg + +#include "libm_synonyms.h" +#include <math.h> /* atan2 */ +#include "complex_wrapper.h" + +static const double + pi = 3.14159265358979311600e+00, + pi_lo = 1.22464679914735320717e-16; + +double +carg(dcomplex z) { + int ix, iy; + + ix = ((int *)&(D_RE(z)))[HIWORD]; + iy = ((int *)&(D_IM(z)))[HIWORD]; + if ((((ix | iy) & ~0x80000000) | ((int *)&(D_RE(z)))[LOWORD] | + ((int *)&(D_IM(z)))[LOWORD]) == 0) { + /* x and y are both zero */ + if (ix == 0) + return (D_IM(z)); + return ((iy == 0)? pi + pi_lo : -pi - pi_lo); + } + return (atan2(D_IM(z), D_RE(z))); +} diff --git a/usr/src/lib/libm/common/complex/cargf.c b/usr/src/lib/libm/common/complex/cargf.c new file mode 100644 index 0000000000..30e25aafac --- /dev/null +++ b/usr/src/lib/libm/common/complex/cargf.c @@ -0,0 +1,38 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak cargf = __cargf + +#include "libm.h" /* atan2f */ +#include "complex_wrapper.h" + +float +cargf(fcomplex z) { + return (atan2f(F_IM(z), F_RE(z))); +} diff --git a/usr/src/lib/libm/common/complex/cargl.c b/usr/src/lib/libm/common/complex/cargl.c new file mode 100644 index 0000000000..e3338da415 --- /dev/null +++ b/usr/src/lib/libm/common/complex/cargl.c @@ -0,0 +1,38 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak cargl = __cargl + +#include "libm.h" +#include "complex_wrapper.h" + +long double +cargl(ldcomplex z) { + return (atan2l(LD_IM(z), LD_RE(z))); +} diff --git a/usr/src/lib/libm/common/complex/casin.c b/usr/src/lib/libm/common/complex/casin.c new file mode 100644 index 0000000000..5fdbb63dc2 --- /dev/null +++ b/usr/src/lib/libm/common/complex/casin.c @@ -0,0 +1,379 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak casin = __casin + +/* INDENT OFF */ +/* + * dcomplex casin(dcomplex z); + * + * Alogrithm + * (based on T.E.Hull, Thomas F. Fairgrieve and Ping Tak Peter Tang's + * paper "Implementing the Complex Arcsine and Arccosine Functins Using + * Exception Handling", ACM TOMS, Vol 23, pp 299-335) + * + * The principal value of complex inverse sine function casin(z), + * where z = x+iy, can be defined by + * + * casin(z) = asin(B) + i sign(y) log (A + sqrt(A*A-1)), + * + * where the log function is the natural log, and + * ____________ ____________ + * 1 / 2 2 1 / 2 2 + * A = --- / (x+1) + y + --- / (x-1) + y + * 2 \/ 2 \/ + * ____________ ____________ + * 1 / 2 2 1 / 2 2 + * B = --- / (x+1) + y - --- / (x-1) + y . + * 2 \/ 2 \/ + * + * The Branch cuts are on the real line from -inf to -1 and from 1 to inf. + * The real and imaginary parts are based on Abramowitz and Stegun + * [Handbook of Mathematic Functions, 1972]. The sign of the imaginary + * part is chosen to be the generally considered the principal value of + * this function. + * + * Notes:1. A is the average of the distances from z to the points (1,0) + * and (-1,0) in the complex z-plane, and in particular A>=1. + * 2. B is in [-1,1], and A*B = x. + * + * Special notes: if casin( x, y) = ( u, v), then + * casin(-x, y) = (-u, v), + * casin( x,-y) = ( u,-v), + * in general, we have casin(conj(z)) = conj(casin(z)) + * casin(-z) = -casin(z) + * casin(z) = pi/2 - cacos(z) + * + * EXCEPTION CASES (conform to ISO/IEC 9899:1999(E)): + * casin( 0 + i 0 ) = 0 + i 0 + * casin( 0 + i NaN ) = 0 + i NaN + * casin( x + i inf ) = 0 + i inf for finite x + * casin( x + i NaN ) = NaN + i NaN with invalid for finite x != 0 + * casin(inf + iy ) = pi/2 + i inf finite y + * casin(inf + i inf) = pi/4 + i inf + * casin(inf + i NaN) = NaN + i inf + * casin(NaN + i y ) = NaN + i NaN for finite y + * casin(NaN + i inf) = NaN + i inf + * casin(NaN + i NaN) = NaN + i NaN + * + * Special Regions (better formula for accuracy and for avoiding spurious + * overflow or underflow) (all x and y are assumed nonnegative): + * case 1: y = 0 + * case 2: tiny y relative to x-1: y <= ulp(0.5)*|x-1| + * case 3: tiny y: y < 4 sqrt(u), where u = minimum normal number + * case 4: huge y relative to x+1: y >= (1+x)/ulp(0.5) + * case 5: huge x and y: x and y >= sqrt(M)/8, where M = maximum normal number + * case 6: tiny x: x < 4 sqrt(u) + * -------- + * case 1 & 2. y=0 or y/|x-1| is tiny. We have + * ____________ _____________ + * / 2 2 / y 2 + * / (x+-1) + y = |x+-1| / 1 + (------) + * \/ \/ |x+-1| + * + * 1 y 2 + * ~ |x+-1| ( 1 + --- (------) ) + * 2 |x+-1| + * + * 2 + * y + * = |x+-1| + --------. + * 2|x+-1| + * + * Consequently, it is not difficult to see that + * 2 + * y + * [ 1 + ------------ , if x < 1, + * [ 2(1+x)(1-x) + * [ + * [ + * [ x, if x = 1 (y = 0), + * [ + * A ~= [ 2 + * [ x * y + * [ x + ------------ , if x > 1 + * [ 2(1+x)(x-1) + * + * and hence + * ______ 2 + * / 2 y y + * A + \/ A - 1 ~ 1 + ---------------- + -----------, if x < 1, + * sqrt((x+1)(1-x)) 2(x+1)(1-x) + * + * + * ~ x + sqrt((x-1)*(x+1)), if x >= 1. + * + * 2 + * y + * [ x(1 - ------------), if x < 1, + * [ 2(1+x)(1-x) + * B = x/A ~ [ + * [ 1, if x = 1, + * [ + * [ 2 + * [ y + * [ 1 - ------------ , if x > 1, + * [ 2(1+x)(1-x) + * Thus + * [ asin(x) + i y/sqrt((x-1)*(x+1)), if x < 1 + * casin(x+i*y)=[ + * [ pi/2 + i log(x+sqrt(x*x-1)), if x >= 1 + * + * case 3. y < 4 sqrt(u), where u = minimum normal x. + * After case 1 and 2, this will only occurs when x=1. When x=1, we have + * A = (sqrt(4+y*y)+y)/2 ~ 1 + y/2 + y^2/8 + ... + * and + * B = 1/A = 1 - y/2 + y^2/8 + ... + * Since + * asin(x) = pi/2-2*asin(sqrt((1-x)/2)) + * asin(x) = x + x^3/6 + x^5*3/40 + x^7*15/336 + ... + * we have, for the real part asin(B), + * asin(1-y/2) ~ pi/2 - 2 asin(sqrt(y/4)) + * ~ pi/2 - sqrt(y) + * For the imaginary part, + * log(A+sqrt(A*A-1)) ~ log(1+y/2+sqrt(2*y/2)) + * = log(1+y/2+sqrt(y)) + * = (y/2+sqrt(y)) - (y/2+sqrt(y))^2/2 + ... + * ~ sqrt(y) - y*(sqrt(y)+y/2)/2 + * ~ sqrt(y) + * + * case 4. y >= (x+1)ulp(0.5). In this case, A ~ y and B ~ x/y. Thus + * real part = asin(B) ~ x/y (be careful, x/y may underflow) + * and + * imag part = log(y+sqrt(y*y-one)) + * + * + * case 5. Both x and y are large: x and y > sqrt(M)/8, where M = maximum x + * In this case, + * A ~ sqrt(x*x+y*y) + * B ~ x/sqrt(x*x+y*y). + * Thus + * real part = asin(B) = atan(x/y), + * imag part = log(A+sqrt(A*A-1)) ~ log(2A) + * = log(2) + 0.5*log(x*x+y*y) + * = log(2) + log(y) + 0.5*log(1+(x/y)^2) + * + * case 6. x < 4 sqrt(u). In this case, we have + * A ~ sqrt(1+y*y), B = x/sqrt(1+y*y). + * Since B is tiny, we have + * real part = asin(B) ~ B = x/sqrt(1+y*y) + * imag part = log(A+sqrt(A*A-1)) = log (A+sqrt(y*y)) + * = log(y+sqrt(1+y*y)) + * = 0.5*log(y^2+2ysqrt(1+y^2)+1+y^2) + * = 0.5*log(1+2y(y+sqrt(1+y^2))); + * = 0.5*log1p(2y(y+A)); + * + * casin(z) = asin(B) + i sign(y) log (A + sqrt(A*A-1)), + */ +/* INDENT ON */ + +#include "libm.h" /* asin/atan/fabs/log/log1p/sqrt */ +#include "complex_wrapper.h" + +/* INDENT OFF */ +static const double + zero = 0.0, + one = 1.0, + E = 1.11022302462515654042e-16, /* 2**-53 */ + ln2 = 6.93147180559945286227e-01, + pi_2 = 1.570796326794896558e+00, + pi_2_l = 6.123233995736765886e-17, + pi_4 = 7.85398163397448278999e-01, + Foursqrtu = 5.96667258496016539463e-154, /* 2**(-509) */ + Acrossover = 1.5, + Bcrossover = 0.6417, + half = 0.5; +/* INDENT ON */ + +dcomplex +casin(dcomplex z) { + double x, y, t, R, S, A, Am1, B, y2, xm1, xp1, Apx; + int ix, iy, hx, hy; + unsigned lx, ly; + dcomplex ans; + + x = D_RE(z); + y = D_IM(z); + hx = HI_WORD(x); + lx = LO_WORD(x); + hy = HI_WORD(y); + ly = LO_WORD(y); + ix = hx & 0x7fffffff; + iy = hy & 0x7fffffff; + x = fabs(x); + y = fabs(y); + + /* special cases */ + + /* x is inf or NaN */ + if (ix >= 0x7ff00000) { /* x is inf or NaN */ + if (ISINF(ix, lx)) { /* x is INF */ + D_IM(ans) = x; + if (iy >= 0x7ff00000) { + if (ISINF(iy, ly)) + /* casin(inf + i inf) = pi/4 + i inf */ + D_RE(ans) = pi_4; + else /* casin(inf + i NaN) = NaN + i inf */ + D_RE(ans) = y + y; + } else /* casin(inf + iy) = pi/2 + i inf */ + D_RE(ans) = pi_2; + } else { /* x is NaN */ + if (iy >= 0x7ff00000) { + /* INDENT OFF */ + /* + * casin(NaN + i inf) = NaN + i inf + * casin(NaN + i NaN) = NaN + i NaN + */ + /* INDENT ON */ + D_IM(ans) = y + y; + D_RE(ans) = x + x; + } else { + /* casin(NaN + i y ) = NaN + i NaN */ + D_IM(ans) = D_RE(ans) = x + y; + } + } + if (hx < 0) + D_RE(ans) = -D_RE(ans); + if (hy < 0) + D_IM(ans) = -D_IM(ans); + return (ans); + } + + /* casin(+0 + i 0 ) = 0 + i 0. */ + if ((ix | lx | iy | ly) == 0) + return (z); + + if (iy >= 0x7ff00000) { /* y is inf or NaN */ + if (ISINF(iy, ly)) { /* casin(x + i inf) = 0 + i inf */ + D_IM(ans) = y; + D_RE(ans) = zero; + } else { /* casin(x + i NaN) = NaN + i NaN */ + D_IM(ans) = x + y; + if ((ix | lx) == 0) + D_RE(ans) = x; + else + D_RE(ans) = y; + } + if (hx < 0) + D_RE(ans) = -D_RE(ans); + if (hy < 0) + D_IM(ans) = -D_IM(ans); + return (ans); + } + + if ((iy | ly) == 0) { /* region 1: y=0 */ + if (ix < 0x3ff00000) { /* |x| < 1 */ + D_RE(ans) = asin(x); + D_IM(ans) = zero; + } else { + D_RE(ans) = pi_2; + if (ix >= 0x43500000) /* |x| >= 2**54 */ + D_IM(ans) = ln2 + log(x); + else if (ix >= 0x3ff80000) /* x > Acrossover */ + D_IM(ans) = log(x + sqrt((x - one) * (x + + one))); + else { + xm1 = x - one; + D_IM(ans) = log1p(xm1 + sqrt(xm1 * (x + one))); + } + } + } else if (y <= E * fabs(x - one)) { /* region 2: y < tiny*|x-1| */ + if (ix < 0x3ff00000) { /* x < 1 */ + D_RE(ans) = asin(x); + D_IM(ans) = y / sqrt((one + x) * (one - x)); + } else { + D_RE(ans) = pi_2; + if (ix >= 0x43500000) { /* |x| >= 2**54 */ + D_IM(ans) = ln2 + log(x); + } else if (ix >= 0x3ff80000) /* x > Acrossover */ + D_IM(ans) = log(x + sqrt((x - one) * (x + + one))); + else + D_IM(ans) = log1p((x - one) + sqrt((x - one) * + (x + one))); + } + } else if (y < Foursqrtu) { /* region 3 */ + t = sqrt(y); + D_RE(ans) = pi_2 - (t - pi_2_l); + D_IM(ans) = t; + } else if (E * y - one >= x) { /* region 4 */ + D_RE(ans) = x / y; /* need to fix underflow cases */ + D_IM(ans) = ln2 + log(y); + } else if (ix >= 0x5fc00000 || iy >= 0x5fc00000) { /* x,y>2**509 */ + /* region 5: x+1 or y is very large (>= sqrt(max)/8) */ + t = x / y; + D_RE(ans) = atan(t); + D_IM(ans) = ln2 + log(y) + half * log1p(t * t); + } else if (x < Foursqrtu) { + /* region 6: x is very small, < 4sqrt(min) */ + A = sqrt(one + y * y); + D_RE(ans) = x / A; /* may underflow */ + if (iy >= 0x3ff80000) /* if y > Acrossover */ + D_IM(ans) = log(y + A); + else + D_IM(ans) = half * log1p((y + y) * (y + A)); + } else { /* safe region */ + y2 = y * y; + xp1 = x + one; + xm1 = x - one; + R = sqrt(xp1 * xp1 + y2); + S = sqrt(xm1 * xm1 + y2); + A = half * (R + S); + B = x / A; + + if (B <= Bcrossover) + D_RE(ans) = asin(B); + else { /* use atan and an accurate approx to a-x */ + Apx = A + x; + if (x <= one) + D_RE(ans) = atan(x / sqrt(half * Apx * (y2 / + (R + xp1) + (S - xm1)))); + else + D_RE(ans) = atan(x / (y * sqrt(half * (Apx / + (R + xp1) + Apx / (S + xm1))))); + } + if (A <= Acrossover) { + /* use log1p and an accurate approx to A-1 */ + if (x < one) + Am1 = half * (y2 / (R + xp1) + y2 / (S - xm1)); + else + Am1 = half * (y2 / (R + xp1) + (S + xm1)); + D_IM(ans) = log1p(Am1 + sqrt(Am1 * (A + one))); + } else { + D_IM(ans) = log(A + sqrt(A * A - one)); + } + } + + if (hx < 0) + D_RE(ans) = -D_RE(ans); + if (hy < 0) + D_IM(ans) = -D_IM(ans); + + return (ans); +} diff --git a/usr/src/lib/libm/common/complex/casinf.c b/usr/src/lib/libm/common/complex/casinf.c new file mode 100644 index 0000000000..7346c0ae7e --- /dev/null +++ b/usr/src/lib/libm/common/complex/casinf.c @@ -0,0 +1,46 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak casinf = __casinf + +#include "libm.h" +#include "complex_wrapper.h" + +fcomplex +casinf(fcomplex z) { + dcomplex dz, dans; + fcomplex ans; + + D_RE(dz) = (double) (F_RE(z)); + D_IM(dz) = (double) (F_IM(z)); + dans = casin(dz); + F_RE(ans) = (float) (D_RE(dans)); + F_IM(ans) = (float) (D_IM(dans)); + return (ans); +} diff --git a/usr/src/lib/libm/common/complex/casinh.c b/usr/src/lib/libm/common/complex/casinh.c new file mode 100644 index 0000000000..8fac57ed50 --- /dev/null +++ b/usr/src/lib/libm/common/complex/casinh.c @@ -0,0 +1,52 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak casinh = __casinh + +/* INDENT OFF */ +/* + * dcomplex casinh(dcomplex z); + * casinh z = -i casin iz . + */ +/* INDENT ON */ + +#include "libm.h" +#include "complex_wrapper.h" + +dcomplex +casinh(dcomplex z) { + dcomplex w, r, ans; + + D_RE(w) = -D_IM(z); + D_IM(w) = D_RE(z); + r = casin(w); + D_RE(ans) = D_IM(r); + D_IM(ans) = -D_RE(r); + return (ans); +} diff --git a/usr/src/lib/libm/common/complex/casinhf.c b/usr/src/lib/libm/common/complex/casinhf.c new file mode 100644 index 0000000000..3db35cf257 --- /dev/null +++ b/usr/src/lib/libm/common/complex/casinhf.c @@ -0,0 +1,45 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak casinhf = __casinhf + +#include "libm.h" +#include "complex_wrapper.h" + +fcomplex +casinhf(fcomplex z) { + fcomplex w, r, ans; + + F_RE(w) = -F_IM(z); + F_IM(w) = F_RE(z); + r = casinf(w); + F_RE(ans) = F_IM(r); + F_IM(ans) = -F_RE(r); + return (ans); +} diff --git a/usr/src/lib/libm/common/complex/casinhl.c b/usr/src/lib/libm/common/complex/casinhl.c new file mode 100644 index 0000000000..65c1f87801 --- /dev/null +++ b/usr/src/lib/libm/common/complex/casinhl.c @@ -0,0 +1,45 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak casinhl = __casinhl + +#include "libm.h" +#include "complex_wrapper.h" + +ldcomplex +casinhl(ldcomplex z) { + ldcomplex w, r, ans; + + LD_RE(w) = -LD_IM(z); + LD_IM(w) = LD_RE(z); + r = casinl(w); + LD_RE(ans) = LD_IM(r); + LD_IM(ans) = -LD_RE(r); + return (ans); +} diff --git a/usr/src/lib/libm/common/complex/casinl.c b/usr/src/lib/libm/common/complex/casinl.c new file mode 100644 index 0000000000..8fe5e6c959 --- /dev/null +++ b/usr/src/lib/libm/common/complex/casinl.c @@ -0,0 +1,232 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak casinl = __casinl + +#include "libm.h" /* asinl/atanl/fabsl/isinfl/log1pl/logl/sqrtl */ +#include "complex_wrapper.h" +#include "longdouble.h" + +/* INDENT OFF */ +static const long double +zero = 0.0L, +one = 1.0L, +Acrossover = 1.5L, +Bcrossover = 0.6417L, +half = 0.5L, +ln2 = 6.931471805599453094172321214581765680755e-0001L, +Foursqrtu = 7.3344154702193886624856495681939326638255e-2466L, /* 2**-8189 */ +#if defined(__x86) +E = 5.4210108624275221700372640043497085571289e-20L, /* 2**-64 */ +pi_4 = 0.7853981633974483095739921312272713294078130L, +pi_4_l = 4.1668714592604391641479322342670193036704898e-20L, +pi_2 = 1.5707963267948966191479842624545426588156260L, +pi_2_l = 8.3337429185208783282958644685340386073409796e-20L; + +#else +E = 9.6296497219361792652798897129246365926905e-35L, /* 2**-113 */ +pi_4 = 0.7853981633974483096156608458198756993697670L, +pi_4_l = 2.1679525325309452561992610065108379921905808e-35L, +pi_2 = 1.5707963267948966192313216916397513987395340L, +pi_2_l = 4.3359050650618905123985220130216759843811616e-35L; + +#endif +/* INDENT ON */ + +#if defined(__x86) +static const int ip1 = 0x40400000; /* 2**65 */ +#else +static const int ip1 = 0x40710000; /* 2**114 */ +#endif + +ldcomplex +casinl(ldcomplex z) { + long double x, y, t, R, S, A, Am1, B, y2, xm1, xp1, Apx; + int ix, iy, hx, hy; + ldcomplex ans; + + x = LD_RE(z); + y = LD_IM(z); + hx = HI_XWORD(x); + hy = HI_XWORD(y); + ix = hx & 0x7fffffff; + iy = hy & 0x7fffffff; + x = fabsl(x); + y = fabsl(y); + + /* special cases */ + + /* x is inf or NaN */ + if (ix >= 0x7fff0000) { /* x is inf or NaN */ + if (isinfl(x)) { /* x is INF */ + LD_IM(ans) = x; + if (iy >= 0x7fff0000) { + if (isinfl(y)) + /* casin(inf + i inf) = pi/4 + i inf */ + LD_RE(ans) = pi_4 + pi_4_l; + else /* casin(inf + i NaN) = NaN + i inf */ + LD_RE(ans) = y + y; + } else /* casin(inf + iy) = pi/2 + i inf */ + LD_RE(ans) = pi_2 + pi_2_l; + } else { /* x is NaN */ + if (iy >= 0x7fff0000) { + /* INDENT OFF */ + /* + * casin(NaN + i inf) = NaN + i inf + * casin(NaN + i NaN) = NaN + i NaN + */ + /* INDENT ON */ + LD_IM(ans) = y + y; + LD_RE(ans) = x + x; + } else { + /* INDENT OFF */ + /* casin(NaN + i y ) = NaN + i NaN */ + /* INDENT ON */ + LD_IM(ans) = LD_RE(ans) = x + y; + } + } + if (hx < 0) + LD_RE(ans) = -LD_RE(ans); + if (hy < 0) + LD_IM(ans) = -LD_IM(ans); + return (ans); + } + + /* casin(+0 + i 0) = 0 + i 0. */ + if (x == zero && y == zero) + return (z); + + if (iy >= 0x7fff0000) { /* y is inf or NaN */ + if (isinfl(y)) { /* casin(x + i inf) = 0 + i inf */ + LD_IM(ans) = y; + LD_RE(ans) = zero; + } else { /* casin(x + i NaN) = NaN + i NaN */ + LD_IM(ans) = x + y; + if (x == zero) + LD_RE(ans) = x; + else + LD_RE(ans) = y; + } + if (hx < 0) + LD_RE(ans) = -LD_RE(ans); + if (hy < 0) + LD_IM(ans) = -LD_IM(ans); + return (ans); + } + + if (y == zero) { /* region 1: y=0 */ + if (ix < 0x3fff0000) { /* |x| < 1 */ + LD_RE(ans) = asinl(x); + LD_IM(ans) = zero; + } else { + LD_RE(ans) = pi_2 + pi_2_l; + if (ix >= ip1) /* |x| >= i386 ? 2**65 : 2**114 */ + LD_IM(ans) = ln2 + logl(x); + else if (ix >= 0x3fff8000) /* x > Acrossover */ + LD_IM(ans) = logl(x + sqrtl((x - one) * (x + + one))); + else { + xm1 = x - one; + LD_IM(ans) = log1pl(xm1 + sqrtl(xm1 * (x + + one))); + } + } + } else if (y <= E * fabsl(x - one)) { /* region 2: y < tiny*|x-1| */ + if (ix < 0x3fff0000) { /* x < 1 */ + LD_RE(ans) = asinl(x); + LD_IM(ans) = y / sqrtl((one + x) * (one - x)); + } else { + LD_RE(ans) = pi_2 + pi_2_l; + if (ix >= ip1) /* i386 ? 2**65 : 2**114 */ + LD_IM(ans) = ln2 + logl(x); + else if (ix >= 0x3fff8000) /* x > Acrossover */ + LD_IM(ans) = logl(x + sqrtl((x - one) * (x + + one))); + else + LD_IM(ans) = log1pl((x - one) + sqrtl((x - + one) * (x + one))); + } + } else if (y < Foursqrtu) { /* region 3 */ + t = sqrtl(y); + LD_RE(ans) = pi_2 - (t - pi_2_l); + LD_IM(ans) = t; + } else if (E * y - one >= x) { /* region 4 */ + LD_RE(ans) = x / y; /* need to fix underflow cases */ + LD_IM(ans) = ln2 + logl(y); + } else if (ix >= 0x5ffb0000 || iy >= 0x5ffb0000) { + /* region 5: x+1 and y are both (>= sqrt(max)/8) i.e. 2**8188 */ + t = x / y; + LD_RE(ans) = atanl(t); + LD_IM(ans) = ln2 + logl(y) + half * log1pl(t * t); + } else if (x < Foursqrtu) { + /* region 6: x is very small, < 4sqrt(min) */ + A = sqrtl(one + y * y); + LD_RE(ans) = x / A; /* may underflow */ + if (iy >= 0x3fff8000) /* if y > Acrossover */ + LD_IM(ans) = logl(y + A); + else + LD_IM(ans) = half * log1pl((y + y) * (y + A)); + } else { /* safe region */ + y2 = y * y; + xp1 = x + one; + xm1 = x - one; + R = sqrtl(xp1 * xp1 + y2); + S = sqrtl(xm1 * xm1 + y2); + A = half * (R + S); + B = x / A; + if (B <= Bcrossover) + LD_RE(ans) = asinl(B); + else { /* use atan and an accurate approx to a-x */ + Apx = A + x; + if (x <= one) + LD_RE(ans) = atanl(x / sqrtl(half * Apx * (y2 / + (R + xp1) + (S - xm1)))); + else + LD_RE(ans) = atanl(x / (y * sqrtl(half * (Apx / + (R + xp1) + Apx / (S + xm1))))); + } + if (A <= Acrossover) { + /* use log1p and an accurate approx to A-1 */ + if (x < one) + Am1 = half * (y2 / (R + xp1) + y2 / (S - xm1)); + else + Am1 = half * (y2 / (R + xp1) + (S + xm1)); + LD_IM(ans) = log1pl(Am1 + sqrtl(Am1 * (A + one))); + } else { + LD_IM(ans) = logl(A + sqrtl(A * A - one)); + } + } + + if (hx < 0) + LD_RE(ans) = -LD_RE(ans); + if (hy < 0) + LD_IM(ans) = -LD_IM(ans); + + return (ans); +} diff --git a/usr/src/lib/libm/common/complex/catan.c b/usr/src/lib/libm/common/complex/catan.c new file mode 100644 index 0000000000..39446a07e7 --- /dev/null +++ b/usr/src/lib/libm/common/complex/catan.c @@ -0,0 +1,292 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak catan = __catan + +/* INDENT OFF */ +/* + * dcomplex catan(dcomplex z); + * + * If + * z = x + iy, + * + * then + * 1 ( 2x ) 1 2 2 + * Re w = - arctan(-----------) = - ATAN2(2x, 1 - x - y ) + * 2 ( 2 2) 2 + * (1 - x - y ) + * + * ( 2 2) + * 1 (x + (y+1) ) 1 4y + * Im w = - log(------------) .= --- log [ 1 + ------------- ] + * 4 ( 2 2) 4 2 2 + * (x + (y-1) ) x + (y-1) + * + * 2 16 3 y + * = t - 2t + -- t - ..., where t = ----------------- + * 3 x*x + (y-1)*(y-1) + * + * Note that: if catan( x, y) = ( u, v), then + * catan(-x, y) = (-u, v) + * catan( x,-y) = ( u,-v) + * + * Also, catan(x,y) = -i*catanh(-y,x), or + * catanh(x,y) = i*catan(-y,x) + * So, if catanh(y,x) = (v,u), then catan(x,y) = -i*(-v,u) = (u,v), i.e., + * catan(x,y) = (u,v) + * + * EXCEPTION CASES (conform to ISO/IEC 9899:1999(E)): + * catan( 0 , 0 ) = (0 , 0 ) + * catan( NaN, 0 ) = (NaN , 0 ) + * catan( 0 , 1 ) = (0 , +inf) with divide-by-zero + * catan( inf, y ) = (pi/2 , 0 ) for finite +y + * catan( NaN, y ) = (NaN , NaN ) with invalid for finite y != 0 + * catan( x , inf ) = (pi/2 , 0 ) for finite +x + * catan( inf, inf ) = (pi/2 , 0 ) + * catan( NaN, inf ) = (NaN , 0 ) + * catan( x , NaN ) = (NaN , NaN ) with invalid for finite x + * catan( inf, NaN ) = (pi/2 , +-0 ) + */ +/* INDENT ON */ + +#include "libm.h" /* atan/atan2/fabs/log/log1p */ +#include "complex_wrapper.h" + +/* INDENT OFF */ +static const double + pi_2 = 1.570796326794896558e+00, + zero = 0.0, + half = 0.5, + two = 2.0, + ln2 = 6.931471805599453094172321214581765680755e-0001, + one = 1.0; +/* INDENT ON */ + +dcomplex +catan(dcomplex z) { + dcomplex ans; + double x, y, ax, ay, t; + int hx, hy, ix, iy; + unsigned lx, ly; + + x = D_RE(z); + y = D_IM(z); + ax = fabs(x); + ay = fabs(y); + hx = HI_WORD(x); + lx = LO_WORD(x); + hy = HI_WORD(y); + ly = LO_WORD(y); + ix = hx & 0x7fffffff; + iy = hy & 0x7fffffff; + + /* x is inf or NaN */ + if (ix >= 0x7ff00000) { + if (ISINF(ix, lx)) { + D_RE(ans) = pi_2; + D_IM(ans) = zero; + } else { + D_RE(ans) = x + x; + if ((iy | ly) == 0 || (ISINF(iy, ly))) + D_IM(ans) = zero; + else + D_IM(ans) = (fabs(y) - ay) / (fabs(y) - ay); + } + } else if (iy >= 0x7ff00000) { + /* y is inf or NaN */ + if (ISINF(iy, ly)) { + D_RE(ans) = pi_2; + D_IM(ans) = zero; + } else { + D_RE(ans) = (fabs(x) - ax) / (fabs(x) - ax); + D_IM(ans) = y; + } + } else if ((ix | lx) == 0) { + /* INDENT OFF */ + /* + * x = 0 + * 1 1 + * A = --- * atan2(2x, 1-x*x-y*y) = --- atan2(0,1-|y|) + * 2 2 + * + * 1 [ (y+1)*(y+1) ] 1 2 1 2y + * B = - log [ ------------ ] = - log (1+ ---) or - log(1+ ----) + * 4 [ (y-1)*(y-1) ] 2 y-1 2 1-y + */ + /* INDENT ON */ + t = one - ay; + if (((iy - 0x3ff00000) | ly) == 0) { + /* y=1: catan(0,1)=(0,+inf) with 1/0 signal */ + D_IM(ans) = ay / ax; + D_RE(ans) = zero; + } else if (iy >= 0x3ff00000) { /* y>1 */ + D_IM(ans) = half * log1p(two / (-t)); + D_RE(ans) = pi_2; + } else { /* y<1 */ + D_IM(ans) = half * log1p((ay + ay) / t); + D_RE(ans) = zero; + } + } else if (iy < 0x3e200000 || ((ix - iy) >> 20) >= 30) { + /* INDENT OFF */ + /* + * Tiny y (relative to 1+|x|) + * |y| < E*(1+|x|) + * where E=2**-29, -35, -60 for double, double extended, quad precision + * + * 1 [ x<=1: atan(x) + * A = --- * atan2(2x, 1-x*x-y*y) ~ [ 1 1+x + * 2 [ x>=1: - atan2(2,(1-x)*(-----)) + * 2 x + * + * y/x + * B ~ t*(1-2t), where t = ----------------- is tiny + * x + (y-1)*(y-1)/x + */ + /* INDENT ON */ + if (ix < 0x3ff00000) + D_RE(ans) = atan(ax); + else + D_RE(ans) = half * atan2(two, (one - ax) * (one + + one / ax)); + if ((iy | ly) == 0) { + D_IM(ans) = ay; + } else { + if (ix < 0x3e200000) + t = ay / ((ay - one) * (ay - one)); + else if (ix > 0x41c00000) + t = (ay / ax) / ax; + else + t = ay / (ax * ax + (ay - one) * (ay - one)); + D_IM(ans) = t * (one - (t + t)); + } + } else if (iy >= 0x41c00000 && ((iy - ix) >> 20) >= 30) { + /* INDENT OFF */ + /* + * Huge y relative to 1+|x| + * |y| > Einv*(1+|x|), where Einv~2**(prec/2+3), + * 1 + * A ~ --- * atan2(2x, -y*y) ~ pi/2 + * 2 + * y + * B ~ t*(1-2t), where t = --------------- is tiny + * (y-1)*(y-1) + */ + /* INDENT ON */ + D_RE(ans) = pi_2; + t = (ay / (ay - one)) / (ay - one); + D_IM(ans) = t * (one - (t + t)); + } else if (((iy - 0x3ff00000) | ly) == 0) { + /* INDENT OFF */ + /* + * y = 1 + * 1 1 + * A = --- * atan2(2x, -x*x) = --- atan2(2,-x) + * 2 2 + * + * 1 [x*x + 4] 1 4 [ 0.5(log2-logx) if + * B = - log [-------] = - log (1+ ---) = [ |x|<E, else 0.25* + * 4 [ x*x ] 4 x*x [ log1p((2/x)*(2/x)) + */ + /* INDENT ON */ + D_RE(ans) = half * atan2(two, -ax); + if (ix < 0x3e200000) + D_IM(ans) = half * (ln2 - log(ax)); + else { + t = two / ax; + D_IM(ans) = 0.25 * log1p(t * t); + } + } else if (ix >= 0x43900000) { + /* INDENT OFF */ + /* + * Huge x: + * when |x| > 1/E^2, + * 1 pi + * A ~ --- * atan2(2x, -x*x-y*y) ~ --- + * 2 2 + * y y/x + * B ~ t*(1-2t), where t = --------------- = (-------------- )/x + * x*x+(y-1)*(y-1) 1+((y-1)/x)^2 + */ + /* INDENT ON */ + D_RE(ans) = pi_2; + t = ((ay / ax) / (one + ((ay - one) / ax) * ((ay - one) / + ax))) / ax; + D_IM(ans) = t * (one - (t + t)); + } else if (ix < 0x38b00000) { + /* INDENT OFF */ + /* + * Tiny x: + * when |x| < E^4, (note that y != 1) + * 1 1 + * A = --- * atan2(2x, 1-x*x-y*y) ~ --- * atan2(2x,(1-y)*(1+y)) + * 2 2 + * + * 1 [(y+1)*(y+1)] 1 2 1 2y + * B = - log [-----------] = - log (1+ ---) or - log(1+ ----) + * 4 [(y-1)*(y-1)] 2 y-1 2 1-y + */ + /* INDENT ON */ + D_RE(ans) = half * atan2(ax + ax, (one - ay) * (one + ay)); + if (iy >= 0x3ff00000) + D_IM(ans) = half * log1p(two / (ay - one)); + else + D_IM(ans) = half * log1p((ay + ay) / (one - ay)); + } else { + /* INDENT OFF */ + /* + * normal x,y + * 1 + * A = --- * atan2(2x, 1-x*x-y*y) + * 2 + * + * 1 [x*x+(y+1)*(y+1)] 1 4y + * B = - log [---------------] = - log (1+ -----------------) + * 4 [x*x+(y-1)*(y-1)] 4 x*x + (y-1)*(y-1) + */ + /* INDENT ON */ + t = one - ay; + if (iy >= 0x3fe00000 && iy < 0x40000000) { + /* y close to 1 */ + D_RE(ans) = half * (atan2((ax + ax), (t * (one + ay) - + ax * ax))); + } else if (ix >= 0x3fe00000 && ix < 0x40000000) { + /* x close to 1 */ + D_RE(ans) = half * atan2((ax + ax), ((one - ax) * + (one + ax) - ay * ay)); + } else + D_RE(ans) = half * atan2((ax + ax), ((one - ax * ax) - + ay * ay)); + D_IM(ans) = 0.25 * log1p((4.0 * ay) / (ax * ax + t * t)); + } + if (hx < 0) + D_RE(ans) = -D_RE(ans); + if (hy < 0) + D_IM(ans) = -D_IM(ans); + return (ans); +} diff --git a/usr/src/lib/libm/common/complex/catanf.c b/usr/src/lib/libm/common/complex/catanf.c new file mode 100644 index 0000000000..32796246ad --- /dev/null +++ b/usr/src/lib/libm/common/complex/catanf.c @@ -0,0 +1,138 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak catanf = __catanf + +#include "libm.h" +#include "complex_wrapper.h" + +#if defined(__i386) && !defined(__amd64) +extern int __swapRP(int); +#endif + +static const float + pi_2 = 1.570796326794896558e+00F, + zero = 0.0F, + half = 0.5F, + two = 2.0F, + one = 1.0F; + +fcomplex +catanf(fcomplex z) { + fcomplex ans; + float x, y, ax, ay, t; + double dx, dy, dt; + int hx, hy, ix, iy; + + x = F_RE(z); + y = F_IM(z); + ax = fabsf(x); + ay = fabsf(y); + hx = THE_WORD(x); + hy = THE_WORD(y); + ix = hx & 0x7fffffff; + iy = hy & 0x7fffffff; + + if (ix >= 0x7f800000) { /* x is inf or NaN */ + if (ix == 0x7f800000) { + F_RE(ans) = pi_2; + F_IM(ans) = zero; + } else { + F_RE(ans) = x * x; + if (iy == 0 || iy == 0x7f800000) + F_IM(ans) = zero; + else + F_IM(ans) = (fabsf(y) - ay) / (fabsf(y) - ay); + } + } else if (iy >= 0x7f800000) { /* y is inf or NaN */ + if (iy == 0x7f800000) { + F_RE(ans) = pi_2; + F_IM(ans) = zero; + } else { + F_RE(ans) = (fabsf(x) - ax) / (fabsf(x) - ax); + F_IM(ans) = y * y; + } + } else if (ix == 0) { + /* INDENT OFF */ + /* + * x = 0 + * 1 1 + * A = --- * atan2(2x, 1-x*x-y*y) = --- atan2(0,1-|y|) + * 2 2 + * + * 1 [ (y+1)*(y+1) ] 1 2 1 2y + * B = - log [ ----------- ] = - log (1+ ---) or - log(1+ ----) + * 4 [ (y-1)*(y-1) ] 2 y-1 2 1-y + */ + /* INDENT ON */ + t = one - ay; + if (iy == 0x3f800000) { + /* y=1: catan(0,1)=(0,+inf) with 1/0 signal */ + F_IM(ans) = ay / ax; + F_RE(ans) = zero; + } else if (iy > 0x3f800000) { /* y>1 */ + F_IM(ans) = half * log1pf(two / (-t)); + F_RE(ans) = pi_2; + } else { /* y<1 */ + F_IM(ans) = half * log1pf((ay + ay) / t); + F_RE(ans) = zero; + } + } else { + /* INDENT OFF */ + /* + * use double precision x,y + * 1 + * A = --- * atan2(2x, 1-x*x-y*y) + * 2 + * + * 1 [ x*x+(y+1)*(y+1) ] 1 4y + * B = - log [ --------------- ] = - log (1+ -----------------) + * 4 [ x*x+(y-1)*(y-1) ] 4 x*x + (y-1)*(y-1) + */ + /* INDENT ON */ +#if defined(__i386) && !defined(__amd64) + int rp = __swapRP(fp_extended); +#endif + dx = (double)ax; + dy = (double)ay; + F_RE(ans) = (float)(0.5 * atan2(dx + dx, + 1.0 - dx * dx - dy * dy)); + dt = dy - 1.0; + F_IM(ans) = (float)(0.25 * log1p(4.0 * dy / + (dx * dx + dt * dt))); +#if defined(__i386) && !defined(__amd64) + if (rp != fp_extended) + (void) __swapRP(rp); +#endif + } + if (hx < 0) + F_RE(ans) = -F_RE(ans); + if (hy < 0) + F_IM(ans) = -F_IM(ans); + return (ans); +} diff --git a/usr/src/lib/libm/common/complex/catanh.c b/usr/src/lib/libm/common/complex/catanh.c new file mode 100644 index 0000000000..7abe2f005a --- /dev/null +++ b/usr/src/lib/libm/common/complex/catanh.c @@ -0,0 +1,57 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak catanh = __catanh + +/* INDENT OFF */ +/* + * z := x + iy + * catanh(z) = -i catan(iz) + * = -i catan(-y+ix) + * = (Im(catan(-y+ix)), -Re(catan(-y+ix))) + */ +/* INDENT ON */ + +#include "libm.h" +#include "complex_wrapper.h" + +dcomplex +catanh(dcomplex z) { + double x, y; + dcomplex ans, ct; + + x = D_RE(z); + y = D_IM(z); + D_RE(z) = -y; + D_IM(z) = x; + ct = catan(z); + D_RE(ans) = D_IM(ct); + D_IM(ans) = -D_RE(ct); + return (ans); +} diff --git a/usr/src/lib/libm/common/complex/catanhf.c b/usr/src/lib/libm/common/complex/catanhf.c new file mode 100644 index 0000000000..51ff2042ed --- /dev/null +++ b/usr/src/lib/libm/common/complex/catanhf.c @@ -0,0 +1,48 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak catanhf = __catanhf + +#include "libm.h" +#include "complex_wrapper.h" + +fcomplex +catanhf(fcomplex z) { + float x, y; + fcomplex ans, ct; + + x = F_RE(z); + y = F_IM(z); + F_RE(z) = -y; + F_IM(z) = x; + ct = catanf(z); + F_RE(ans) = F_IM(ct); + F_IM(ans) = -F_RE(ct); + return (ans); +} diff --git a/usr/src/lib/libm/common/complex/catanhl.c b/usr/src/lib/libm/common/complex/catanhl.c new file mode 100644 index 0000000000..9676c46215 --- /dev/null +++ b/usr/src/lib/libm/common/complex/catanhl.c @@ -0,0 +1,48 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak catanhl = __catanhl + +#include "libm.h" +#include "complex_wrapper.h" + +ldcomplex +catanhl(ldcomplex z) { + long double x, y; + ldcomplex ans, ct; + + x = LD_RE(z); + y = LD_IM(z); + LD_RE(z) = -y; + LD_IM(z) = x; + ct = catanl(z); + LD_RE(ans) = LD_IM(ct); + LD_IM(ans) = -LD_RE(ct); + return (ans); +} diff --git a/usr/src/lib/libm/common/complex/catanl.c b/usr/src/lib/libm/common/complex/catanl.c new file mode 100644 index 0000000000..b0543ed8b0 --- /dev/null +++ b/usr/src/lib/libm/common/complex/catanl.c @@ -0,0 +1,329 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak catanl = __catanl + +/* INDENT OFF */ +/* + * ldcomplex catanl(ldcomplex z); + * + * Atan(z) return A + Bi where, + * 1 + * A = --- * atan2(2x, 1-x*x-y*y) + * 2 + * + * 1 [ x*x + (y+1)*(y+1) ] 1 4y + * B = --- log [ ----------------- ] = - log (1+ -----------------) + * 4 [ x*x + (y-1)*(y-1) ] 4 x*x + (y-1)*(y-1) + * + * 2 16 3 y + * = t - 2t + -- t - ..., where t = ----------------- + * 3 x*x + (y-1)*(y-1) + * Proof: + * Let w = atan(z=x+yi) = A + B i. Then tan(w) = z. + * Since sin(w) = (exp(iw)-exp(-iw))/(2i), cos(w)=(exp(iw)+exp(-iw))/(2), + * Let p = exp(iw), then z = tan(w) = ((p-1/p)/(p+1/p))/i, or + * iz = (p*p-1)/(p*p+1), or, after simplification, + * p*p = (1+iz)/(1-iz) ... (1) + * LHS of (1) = exp(2iw) = exp(2i(A+Bi)) = exp(-2B)*exp(2iA) + * = exp(-2B)*(cos(2A)+i*sin(2A)) ... (2) + * 1-y+ix (1-y+ix)*(1+y+ix) 1-x*x-y*y + 2xi + * RHS of (1) = ------ = ----------------- = --------------- ... (3) + * 1+y-ix (1+y)**2 + x**2 (1+y)**2 + x**2 + * + * Comparing the real and imaginary parts of (2) and (3), we have: + * cos(2A) : 1-x*x-y*y = sin(2A) : 2x + * and hence + * tan(2A) = 2x/(1-x*x-y*y), or + * A = 0.5 * atan2(2x, 1-x*x-y*y) ... (4) + * + * For the imaginary part B, Note that |p*p| = exp(-2B), and + * |1+iz| |i-z| hypot(x,(y-1)) + * |----| = |---| = -------------- + * |1-iz| |i+z| hypot(x,(y+1)) + * Thus + * x*x + (y+1)*(y+1) + * exp(4B) = -----------------, or + * x*x + (y-1)*(y-1) + * + * 1 [x^2+(y+1)^2] 1 4y + * B = - log [-----------] = - log(1+ -------------) ... (5) + * 4 [x^2+(y-1)^2] 4 x^2+(y-1)^2 + * + * QED. + * + * Note that: if catan( x, y) = ( u, v), then + * catan(-x, y) = (-u, v) + * catan( x,-y) = ( u,-v) + * + * Also, catan(x,y) = -i*catanh(-y,x), or + * catanh(x,y) = i*catan(-y,x) + * So, if catanh(y,x) = (v,u), then catan(x,y) = -i*(-v,u) = (u,v), i.e., + * catan(x,y) = (u,v) + * + * EXCEPTION CASES (conform to ISO/IEC 9899:1999(E)): + * catan( 0 , 0 ) = (0 , 0 ) + * catan( NaN, 0 ) = (NaN , 0 ) + * catan( 0 , 1 ) = (0 , +inf) with divide-by-zero + * catan( inf, y ) = (pi/2 , 0 ) for finite +y + * catan( NaN, y ) = (NaN , NaN ) with invalid for finite y != 0 + * catan( x , inf ) = (pi/2 , 0 ) for finite +x + * catan( inf, inf ) = (pi/2 , 0 ) + * catan( NaN, inf ) = (NaN , 0 ) + * catan( x , NaN ) = (NaN , NaN ) with invalid for finite x + * catan( inf, NaN ) = (pi/2 , +-0 ) + */ +/* INDENT ON */ + +#include "libm.h" /* atan2l/atanl/fabsl/isinfl/iszerol/log1pl/logl */ +#include "complex_wrapper.h" +#include "longdouble.h" + +/* INDENT OFF */ +static const long double +zero = 0.0L, +one = 1.0L, +two = 2.0L, +half = 0.5L, +ln2 = 6.931471805599453094172321214581765680755e-0001L, +pi_2 = 1.570796326794896619231321691639751442098584699687552910487472L, +#if defined(__x86) +E = 2.910383045673370361328125000000000000000e-11L, /* 2**-35 */ +Einv = 3.435973836800000000000000000000000000000e+10L; /* 2**+35 */ +#else +E = 8.673617379884035472059622406959533691406e-19L, /* 2**-60 */ +Einv = 1.152921504606846976000000000000000000000e18L; /* 2**+60 */ +#endif +/* INDENT ON */ + +ldcomplex +catanl(ldcomplex z) { + ldcomplex ans; + long double x, y, t1, ax, ay, t; + int hx, hy, ix, iy; + + x = LD_RE(z); + y = LD_IM(z); + ax = fabsl(x); + ay = fabsl(y); + hx = HI_XWORD(x); + hy = HI_XWORD(y); + ix = hx & 0x7fffffff; + iy = hy & 0x7fffffff; + + /* x is inf or NaN */ + if (ix >= 0x7fff0000) { + if (isinfl(x)) { + LD_RE(ans) = pi_2; + LD_IM(ans) = zero; + } else { + LD_RE(ans) = x + x; + if (iszerol(y) || (isinfl(y))) + LD_IM(ans) = zero; + else + LD_IM(ans) = (fabsl(y) - ay) / (fabsl(y) - ay); + } + } else if (iy >= 0x7fff0000) { + /* y is inf or NaN */ + if (isinfl(y)) { + LD_RE(ans) = pi_2; + LD_IM(ans) = zero; + } else { + LD_RE(ans) = (fabsl(x) - ax) / (fabsl(x) - ax); + LD_IM(ans) = y; + } + } else if (iszerol(x)) { + /* INDENT OFF */ + /* + * x = 0 + * 1 1 + * A = --- * atan2(2x, 1-x*x-y*y) = --- atan2(0,1-|y|) + * 2 2 + * + * 1 [ (y+1)*(y+1) ] 1 2 1 2y + * B = - log [ ----------- ] = - log (1+ ---) or - log(1+ ----) + * 4 [ (y-1)*(y-1) ] 2 y-1 2 1-y + */ + /* INDENT ON */ + t = one - ay; + if (ay == one) { + /* y=1: catan(0,1)=(0,+inf) with 1/0 signal */ + LD_IM(ans) = ay / ax; + LD_RE(ans) = zero; + } else if (ay > one) { /* y>1 */ + LD_IM(ans) = half * log1pl(two / (-t)); + LD_RE(ans) = pi_2; + } else { /* y<1 */ + LD_IM(ans) = half * log1pl((ay + ay) / t); + LD_RE(ans) = zero; + } + } else if (ay < E * (one + ax)) { + /* INDENT OFF */ + /* + * Tiny y (relative to 1+|x|) + * |y| < E*(1+|x|) + * where E=2**-29, -35, -60 for double, extended, quad precision + * + * 1 [x<=1: atan(x) + * A = - * atan2(2x,1-x*x-y*y) ~ [ 1 1+x + * 2 [x>=1: - atan2(2,(1-x)*(-----)) + * 2 x + * + * y/x + * B ~ t*(1-2t), where t = ----------------- is tiny + * x + (y-1)*(y-1)/x + * + * y + * (when x < 2**-60, t = ----------- ) + * (y-1)*(y-1) + */ + /* INDENT ON */ + if (ay == zero) + LD_IM(ans) = ay; + else { + t1 = ay - one; + if (ix < 0x3fc30000) + t = ay / (t1 * t1); + else if (ix > 0x403b0000) + t = (ay / ax) / ax; + else + t = ay / (ax * ax + t1 * t1); + LD_IM(ans) = t * (one - two * t); + } + if (ix < 0x3fff0000) + LD_RE(ans) = atanl(ax); + else + LD_RE(ans) = half * atan2l(two, (one - ax) * (one + + one / ax)); + + } else if (ay > Einv * (one + ax)) { + /* INDENT OFF */ + /* + * Huge y relative to 1+|x| + * |y| > Einv*(1+|x|), where Einv~2**(prec/2+3), + * 1 + * A ~ --- * atan2(2x, -y*y) ~ pi/2 + * 2 + * y + * B ~ t*(1-2t), where t = --------------- is tiny + * (y-1)*(y-1) + */ + /* INDENT ON */ + LD_RE(ans) = pi_2; + t = (ay / (ay - one)) / (ay - one); + LD_IM(ans) = t * (one - (t + t)); + } else if (ay == one) { + /* INDENT OFF */ + /* + * y=1 + * 1 1 + * A = - * atan2(2x, -x*x) = --- atan2(2,-x) + * 2 2 + * + * 1 [ x*x+4] 1 4 [ 0.5(log2-logx) if + * B = - log [ -----] = - log (1+ ---) = [ |x|<E, else 0.25* + * 4 [ x*x ] 4 x*x [ log1p((2/x)*(2/x)) + */ + /* INDENT ON */ + LD_RE(ans) = half * atan2l(two, -ax); + if (ax < E) + LD_IM(ans) = half * (ln2 - logl(ax)); + else { + t = two / ax; + LD_IM(ans) = 0.25L * log1pl(t * t); + } + } else if (ax > Einv * Einv) { + /* INDENT OFF */ + /* + * Huge x: + * when |x| > 1/E^2, + * 1 pi + * A ~ --- * atan2(2x, -x*x-y*y) ~ --- + * 2 2 + * y y/x + * B ~ t*(1-2t), where t = --------------- = (-------------- )/x + * x*x+(y-1)*(y-1) 1+((y-1)/x)^2 + */ + /* INDENT ON */ + LD_RE(ans) = pi_2; + t = ((ay / ax) / (one + ((ay - one) / ax) * ((ay - one) / + ax))) / ax; + LD_IM(ans) = t * (one - (t + t)); + } else if (ax < E * E * E * E) { + /* INDENT OFF */ + /* + * Tiny x: + * when |x| < E^4, (note that y != 1) + * 1 1 + * A = --- * atan2(2x, 1-x*x-y*y) ~ --- * atan2(2x,1-y*y) + * 2 2 + * + * 1 [ (y+1)*(y+1) ] 1 2 1 2y + * B = - log [ ----------- ] = - log (1+ ---) or - log(1+ ----) + * 4 [ (y-1)*(y-1) ] 2 y-1 2 1-y + */ + /* INDENT ON */ + LD_RE(ans) = half * atan2l(ax + ax, (one - ay) * (one + ay)); + if (ay > one) /* y>1 */ + LD_IM(ans) = half * log1pl(two / (ay - one)); + else /* y<1 */ + LD_IM(ans) = half * log1pl((ay + ay) / (one - ay)); + } else { + /* INDENT OFF */ + /* + * normal x,y + * 1 + * A = --- * atan2(2x, 1-x*x-y*y) + * 2 + * + * 1 [ x*x+(y+1)*(y+1) ] 1 4y + * B = - log [ --------------- ] = - log (1+ -----------------) + * 4 [ x*x+(y-1)*(y-1) ] 4 x*x + (y-1)*(y-1) + */ + /* INDENT ON */ + t = one - ay; + if (iy >= 0x3ffe0000 && iy < 0x40000000) { + /* y close to 1 */ + LD_RE(ans) = half * (atan2l((ax + ax), (t * (one + + ay) - ax * ax))); + } else if (ix >= 0x3ffe0000 && ix < 0x40000000) { + /* x close to 1 */ + LD_RE(ans) = half * atan2l((ax + ax), ((one - ax) * + (one + ax) - ay * ay)); + } else + LD_RE(ans) = half * atan2l((ax + ax), ((one - ax * + ax) - ay * ay)); + LD_IM(ans) = 0.25L * log1pl((4.0L * ay) / (ax * ax + t * t)); + } + if (hx < 0) + LD_RE(ans) = -LD_RE(ans); + if (hy < 0) + LD_IM(ans) = -LD_IM(ans); + return (ans); +} diff --git a/usr/src/lib/libm/common/complex/ccos.c b/usr/src/lib/libm/common/complex/ccos.c new file mode 100644 index 0000000000..c4a2edd945 --- /dev/null +++ b/usr/src/lib/libm/common/complex/ccos.c @@ -0,0 +1,55 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak ccos = __ccos + +/* INDENT OFF */ +/* + * dcomplex ccos(dcomplex z); + * + * z := x+iy; since ccos(iz) = cosh(z), we have + * ccos(z) = ccos((-1)*(-z)) = ccos(i*i*(-z)) + * = ccosh(i*(-z)) = ccosh(i*(-x-yi)) + * = ccosh(y-ix) + */ +/* INDENT ON */ + +#include "libm.h" +#include "complex_wrapper.h" + +dcomplex +ccos(dcomplex z) { + double x, y; + + x = D_RE(z); + y = D_IM(z); + D_RE(z) = y; + D_IM(z) = -x; + return (ccosh(z)); +} diff --git a/usr/src/lib/libm/common/complex/ccosf.c b/usr/src/lib/libm/common/complex/ccosf.c new file mode 100644 index 0000000000..a2686fc6f4 --- /dev/null +++ b/usr/src/lib/libm/common/complex/ccosf.c @@ -0,0 +1,44 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak ccosf = __ccosf + +#include "libm.h" +#include "complex_wrapper.h" + +fcomplex +ccosf(fcomplex z) { + float x, y; + + x = F_RE(z); + y = F_IM(z); + F_RE(z) = y; + F_IM(z) = -x; + return (ccoshf(z)); +} diff --git a/usr/src/lib/libm/common/complex/ccosh.c b/usr/src/lib/libm/common/complex/ccosh.c new file mode 100644 index 0000000000..836ab927c5 --- /dev/null +++ b/usr/src/lib/libm/common/complex/ccosh.c @@ -0,0 +1,135 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak ccosh = __ccosh + +/* INDENT OFF */ +/* + * dcomplex ccosh(dcomplex z); + * + * z -z x -x + * e + e e (cos(y)+i*sin(y)) + e (cos(-y)+i*sin(-y)) + * cosh z = -------------- = --------------------------------------------- + * 2 2 + * x -x x -x + * cos(y) ( e + e ) + i*sin(y) (e - e ) + * = -------------------------------------------- + * 2 + * + * = cos(y) cosh(x) + i sin(y) sinh(x) + * + * Implementation Note + * ------------------- + * + * |x| -|x| |x| -2|x| -2|x| -P-4 + * Note that e +- e = e ( 1 +- e ). If e < 2 , where + * + * P stands for the number of significant bits of the machine precision, + * |x| + * then the result will be rounded to e . Therefore, we have + * + * z + * e + * cosh z = ----- if |x| >= (P/2 + 2)*ln2 + * 2 + * + * EXCEPTION (conform to ISO/IEC 9899:1999(E)): + * ccosh(0,0)=(1,0) + * ccosh(0,inf)=(NaN,+-0) + * ccosh(0,NaN)=(NaN,+-0) + * ccosh(x,inf) = (NaN,NaN) for finite non-zero x + * ccosh(x,NaN) = (NaN,NaN) for finite non-zero x + * ccosh(inf,0) = (inf, 0) + * ccosh(inf,y) = (inf*cos(y),inf*sin(y)) for finite non-zero y + * ccosh(inf,inf) = (+-inf,NaN) + * ccosh(inf,NaN) = (+inf,NaN) + * ccosh(NaN,0) = (NaN,+-0) + * ccosh(NaN,y) = (NaN,NaN) for non-zero y + * ccosh(NaN,NaN) = (NaN,NaN) + */ +/* INDENT ON */ + +#include "libm.h" /* cosh/exp/fabs/scalbn/sinh/sincos/__k_cexp */ +#include "complex_wrapper.h" + +dcomplex +ccosh(dcomplex z) { + double t, x, y, S, C; + int hx, ix, lx, hy, iy, ly, n; + dcomplex ans; + + x = D_RE(z); + y = D_IM(z); + hx = HI_WORD(x); + lx = LO_WORD(x); + ix = hx & 0x7fffffff; + hy = HI_WORD(y); + ly = LO_WORD(y); + iy = hy & 0x7fffffff; + x = fabs(x); + y = fabs(y); + + (void) sincos(y, &S, &C); + if (ix >= 0x403c0000) { /* |x| > 28 = prec/2 (14,28,34,60) */ + if (ix >= 0x40862E42) { /* |x| > 709.78... ~ log(2**1024) */ + if (ix >= 0x7ff00000) { /* |x| is inf or NaN */ + if ((iy | ly) == 0) { + D_RE(ans) = x; + D_IM(ans) = y; + } else if (iy >= 0x7ff00000) { + D_RE(ans) = x; + D_IM(ans) = x - y; + } else { + D_RE(ans) = C * x; + D_IM(ans) = S * x; + } + } else { + t = __k_cexp(x, &n); + /* return exp(x)=t*2**n */ + D_RE(ans) = scalbn(C * t, n - 1); + D_IM(ans) = scalbn(S * t, n - 1); + } + } else { + t = exp(x) * 0.5; + D_RE(ans) = C * t; + D_IM(ans) = S * t; + } + } else { + if ((ix | lx) == 0) { /* x = 0, return (C,0) */ + D_RE(ans) = C; + D_IM(ans) = 0.0; + } else { + D_RE(ans) = C * cosh(x); + D_IM(ans) = S * sinh(x); + } + } + if ((hx ^ hy) < 0) + D_IM(ans) = -D_IM(ans); + return (ans); +} diff --git a/usr/src/lib/libm/common/complex/ccoshf.c b/usr/src/lib/libm/common/complex/ccoshf.c new file mode 100644 index 0000000000..873cf0aa03 --- /dev/null +++ b/usr/src/lib/libm/common/complex/ccoshf.c @@ -0,0 +1,100 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak ccoshf = __ccoshf + +#include "libm.h" +#include "complex_wrapper.h" + +#if defined(__i386) && !defined(__amd64) +extern int __swapRP(int); +#endif + +static const float zero = 0.0F, half = 0.5F; + +fcomplex +ccoshf(fcomplex z) { + float t, x, y, S, C; + double w; + int hx, ix, hy, iy, n; + fcomplex ans; + + x = F_RE(z); + y = F_IM(z); + hx = THE_WORD(x); + ix = hx & 0x7fffffff; + hy = THE_WORD(y); + iy = hy & 0x7fffffff; + x = fabsf(x); + y = fabsf(y); + + sincosf(y, &S, &C); + if (ix >= 0x41600000) { /* |x| > 14 = prec/2 (14,28,34,60) */ + if (ix >= 0x42B171AA) { /* |x| > 88.722... ~ log(2**128) */ + if (ix >= 0x7f800000) { /* |x| is inf or NaN */ + if (iy == 0) { + F_RE(ans) = x; + F_IM(ans) = y; + } else if (iy >= 0x7f800000) { + F_RE(ans) = x; + F_IM(ans) = x - y; + } else { + F_RE(ans) = C * x; + F_IM(ans) = S * x; + } + } else { +#if defined(__i386) && !defined(__amd64) + int rp = __swapRP(fp_extended); +#endif + /* return (C, S) * exp(x) / 2 */ + w = __k_cexp((double)x, &n); + F_RE(ans) = (float)scalbn(C * w, n - 1); + F_IM(ans) = (float)scalbn(S * w, n - 1); +#if defined(__i386) && !defined(__amd64) + if (rp != fp_extended) + (void) __swapRP(rp); +#endif + } + } else { + t = expf(x) * half; + F_RE(ans) = C * t; + F_IM(ans) = S * t; + } + } else { + if (ix == 0) { /* x = 0, return (C,0) */ + F_RE(ans) = C; + F_IM(ans) = zero; + } else { + F_RE(ans) = C * coshf(x); + F_IM(ans) = S * sinhf(x); + } + } + if ((hx ^ hy) < 0) + F_IM(ans) = -F_IM(ans); + return (ans); +} diff --git a/usr/src/lib/libm/common/complex/ccoshl.c b/usr/src/lib/libm/common/complex/ccoshl.c new file mode 100644 index 0000000000..0f65741c97 --- /dev/null +++ b/usr/src/lib/libm/common/complex/ccoshl.c @@ -0,0 +1,91 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak ccoshl = __ccoshl + +#include "libm.h" /* coshl/expl/fabsl/scalbnl/sincosl/sinhl/__k_cexpl */ +#include "complex_wrapper.h" + +/* INDENT OFF */ +static const long double zero = 0.0L, half = 0.5L; +/* INDENT ON */ + +ldcomplex +ccoshl(ldcomplex z) { + long double t, x, y, S, C; + int hx, ix, hy, iy, n; + ldcomplex ans; + + x = LD_RE(z); + y = LD_IM(z); + hx = HI_XWORD(x); + ix = hx & 0x7fffffff; + hy = HI_XWORD(y); + iy = hy & 0x7fffffff; + x = fabsl(x); + y = fabsl(y); + + (void) sincosl(y, &S, &C); + if (ix >= 0x4004e000) { /* |x| > 60 = prec/2 (14,28,34,60) */ + if (ix >= 0x400C62E4) { /* |x| > 11356.52... ~ log(2**16384) */ + if (ix >= 0x7fff0000) { /* |x| is inf or NaN */ + if (y == zero) { + LD_RE(ans) = x; + LD_IM(ans) = y; + } else if (iy >= 0x7fff0000) { + LD_RE(ans) = x; + LD_IM(ans) = x - y; + } else { + LD_RE(ans) = C * x; + LD_IM(ans) = S * x; + } + } else { + t = __k_cexpl(x, &n); + /* return exp(x)=t*2**n */ + LD_RE(ans) = scalbnl(C * t, n - 1); + LD_IM(ans) = scalbnl(S * t, n - 1); + } + } else { + t = expl(x) * half; + LD_RE(ans) = C * t; + LD_IM(ans) = S * t; + } + } else { + if (x == zero) { /* x = 0, return (C,0) */ + LD_RE(ans) = C; + LD_IM(ans) = zero; + } else { + LD_RE(ans) = C * coshl(x); + LD_IM(ans) = S * sinhl(x); + } + } + if ((hx ^ hy) < 0) + LD_IM(ans) = -LD_IM(ans); + return (ans); +} diff --git a/usr/src/lib/libm/common/complex/ccosl.c b/usr/src/lib/libm/common/complex/ccosl.c new file mode 100644 index 0000000000..8a822be99b --- /dev/null +++ b/usr/src/lib/libm/common/complex/ccosl.c @@ -0,0 +1,44 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak ccosl = __ccosl + +#include "libm.h" +#include "complex_wrapper.h" + +ldcomplex +ccosl(ldcomplex z) { + long double x, y; + + x = LD_RE(z); + y = LD_IM(z); + LD_RE(z) = y; + LD_IM(z) = -x; + return (ccoshl(z)); +} diff --git a/usr/src/lib/libm/common/complex/cexp.c b/usr/src/lib/libm/common/complex/cexp.c new file mode 100644 index 0000000000..9b85bc4843 --- /dev/null +++ b/usr/src/lib/libm/common/complex/cexp.c @@ -0,0 +1,116 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak cexp = __cexp + +/* INDENT OFF */ +/* + * dcomplex cexp(dcomplex z); + * + * x+iy x + * e = e (cos(y)+i*sin(y)) + * + * Over/underflow issue + * -------------------- + * exp(x) may be huge but cos(y) or sin(y) may be tiny. So we use + * function __k_cexp(x,&n) to return exp(x) = __k_cexp(x,&n)*2**n. + * Thus if exp(x+iy) = A + Bi and t = __k_cexp(x,&n), then + * A = t*cos(y)*2**n, B = t*sin(y)*2**n + * + * Purge off all exceptional arguments: + * (x,0) --> (exp(x),0) for all x, include inf and NaN + * (+inf, y) --> (+inf, NaN) for inf, nan + * (-inf, y) --> (+-0, +-0) for y = inf, nan + * (x,+-inf/NaN) --> (NaN,NaN) for finite x + * For all other cases, return + * (x,y) --> exp(x)*cos(y)+i*exp(x)*sin(y)) + * + * Algorithm for out of range x and finite y + * 1. compute exp(x) in factor form (t=__k_cexp(x,&n))*2**n + * 2. compute sincos(y,&s,&c) + * 3. compute t*s+i*(t*c), then scale back to 2**n and return. + */ +/* INDENT ON */ + +#include "libm.h" /* exp/scalbn/sincos/__k_cexp */ +#include "complex_wrapper.h" + +static const double zero = 0.0; + +dcomplex +cexp(dcomplex z) { + dcomplex ans; + double x, y, t, c, s; + int n, ix, iy, hx, hy, lx, ly; + + x = D_RE(z); + y = D_IM(z); + hx = HI_WORD(x); + lx = LO_WORD(x); + hy = HI_WORD(y); + ly = LO_WORD(y); + ix = hx & 0x7fffffff; + iy = hy & 0x7fffffff; + if ((iy | ly) == 0) { /* y = 0 */ + D_RE(ans) = exp(x); + D_IM(ans) = y; + } else if (ISINF(ix, lx)) { /* x is +-inf */ + if (hx < 0) { + if (iy >= 0x7ff00000) { + D_RE(ans) = zero; + D_IM(ans) = zero; + } else { + sincos(y, &s, &c); + D_RE(ans) = zero * c; + D_IM(ans) = zero * s; + } + } else { + if (iy >= 0x7ff00000) { + D_RE(ans) = x; + D_IM(ans) = y - y; + } else { + (void) sincos(y, &s, &c); + D_RE(ans) = x * c; + D_IM(ans) = x * s; + } + } + } else { + (void) sincos(y, &s, &c); + if (ix >= 0x40862E42) { /* |x| > 709.78... ~ log(2**1024) */ + t = __k_cexp(x, &n); + D_RE(ans) = scalbn(t * c, n); + D_IM(ans) = scalbn(t * s, n); + } else { + t = exp(x); + D_RE(ans) = t * c; + D_IM(ans) = t * s; + } + } + return (ans); +} diff --git a/usr/src/lib/libm/common/complex/cexpf.c b/usr/src/lib/libm/common/complex/cexpf.c new file mode 100644 index 0000000000..411b6d4f6e --- /dev/null +++ b/usr/src/lib/libm/common/complex/cexpf.c @@ -0,0 +1,96 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak cexpf = __cexpf + +#include "libm.h" +#include "complex_wrapper.h" + +#if defined(__i386) && !defined(__amd64) +extern int __swapRP(int); +#endif + +static const float zero = 0.0F; + +fcomplex +cexpf(fcomplex z) { + fcomplex ans; + float x, y, c, s; + double t; + int n, ix, iy, hx, hy; + + x = F_RE(z); + y = F_IM(z); + hx = THE_WORD(x); + hy = THE_WORD(y); + ix = hx & 0x7fffffff; + iy = hy & 0x7fffffff; + if (iy == 0) { /* y = 0 */ + F_RE(ans) = expf(x); + F_IM(ans) = y; + } else if (ix == 0x7f800000) { /* x is +-inf */ + if (hx < 0) { + if (iy >= 0x7f800000) { + F_RE(ans) = zero; + F_IM(ans) = zero; + } else { + sincosf(y, &s, &c); + F_RE(ans) = zero * c; + F_IM(ans) = zero * s; + } + } else { + if (iy >= 0x7f800000) { + F_RE(ans) = x; + F_IM(ans) = y - y; + } else { + sincosf(y, &s, &c); + F_RE(ans) = x * c; + F_IM(ans) = x * s; + } + } + } else { + sincosf(y, &s, &c); + if (ix >= 0x42B171AA) { /* |x| > 88.722... ~ log(2**128) */ +#if defined(__i386) && !defined(__amd64) + int rp = __swapRP(fp_extended); +#endif + t = __k_cexp(x, &n); + F_RE(ans) = (float)scalbn(t * (double)c, n); + F_IM(ans) = (float)scalbn(t * (double)s, n); +#if defined(__i386) && !defined(__amd64) + if (rp != fp_extended) + (void) __swapRP(rp); +#endif + } else { + t = expf(x); + F_RE(ans) = t * c; + F_IM(ans) = t * s; + } + } + return (ans); +} diff --git a/usr/src/lib/libm/common/complex/cexpl.c b/usr/src/lib/libm/common/complex/cexpl.c new file mode 100644 index 0000000000..bccd9ba415 --- /dev/null +++ b/usr/src/lib/libm/common/complex/cexpl.c @@ -0,0 +1,90 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak cexpl = __cexpl + +#include "libm.h" /* expl/isinfl/iszerol/scalbnl/sincosl */ +#include "complex_wrapper.h" + +extern int isinfl(long double); +extern int iszerol(long double); + +/* INDENT OFF */ +static const long double zero = 0.0L; +/* INDENT ON */ + +ldcomplex +cexpl(ldcomplex z) { + ldcomplex ans; + long double x, y, t, c, s; + int n, ix, iy, hx, hy; + + x = LD_RE(z); + y = LD_IM(z); + hx = HI_XWORD(x); + hy = HI_XWORD(y); + ix = hx & 0x7fffffff; + iy = hy & 0x7fffffff; + if (iszerol(y)) { /* y = 0 */ + LD_RE(ans) = expl(x); + LD_IM(ans) = y; + } else if (isinfl(x)) { /* x is +-inf */ + if (hx < 0) { + if (iy >= 0x7fff0000) { + LD_RE(ans) = zero; + LD_IM(ans) = zero; + } else { + sincosl(y, &s, &c); + LD_RE(ans) = zero * c; + LD_IM(ans) = zero * s; + } + } else { + if (iy >= 0x7fff0000) { + LD_RE(ans) = x; + LD_IM(ans) = y - y; + } else { + (void) sincosl(y, &s, &c); + LD_RE(ans) = x * c; + LD_IM(ans) = x * s; + } + } + } else { + (void) sincosl(y, &s, &c); + if (ix >= 0x400C62E4) { /* |x| > 11356.52... ~ log(2**16384) */ + t = __k_cexpl(x, &n); + LD_RE(ans) = scalbnl(t * c, n); + LD_IM(ans) = scalbnl(t * s, n); + } else { + t = expl(x); + LD_RE(ans) = t * c; + LD_IM(ans) = t * s; + } + } + return (ans); +} diff --git a/usr/src/lib/libm/common/complex/cimag.c b/usr/src/lib/libm/common/complex/cimag.c new file mode 100644 index 0000000000..ac9edd2549 --- /dev/null +++ b/usr/src/lib/libm/common/complex/cimag.c @@ -0,0 +1,38 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak cimag = __cimag + +#include "libm.h" +#include "complex_wrapper.h" + +double +cimag(dcomplex z) { + return (D_IM(z)); +} diff --git a/usr/src/lib/libm/common/complex/cimagf.c b/usr/src/lib/libm/common/complex/cimagf.c new file mode 100644 index 0000000000..89768f9760 --- /dev/null +++ b/usr/src/lib/libm/common/complex/cimagf.c @@ -0,0 +1,38 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak cimagf = __cimagf + +#include "libm.h" +#include "complex_wrapper.h" + +float +cimagf(fcomplex z) { + return (F_IM(z)); +} diff --git a/usr/src/lib/libm/common/complex/cimagl.c b/usr/src/lib/libm/common/complex/cimagl.c new file mode 100644 index 0000000000..a80d65a14a --- /dev/null +++ b/usr/src/lib/libm/common/complex/cimagl.c @@ -0,0 +1,38 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak cimagl = __cimagl + +#include "libm.h" +#include "complex_wrapper.h" + +long double +cimagl(ldcomplex z) { + return (LD_IM(z)); +} diff --git a/usr/src/lib/libm/common/complex/clog.c b/usr/src/lib/libm/common/complex/clog.c new file mode 100644 index 0000000000..eb8492e4bf --- /dev/null +++ b/usr/src/lib/libm/common/complex/clog.c @@ -0,0 +1,134 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak clog = __clog + +/* INDENT OFF */ +/* + * dcomplex clog(dcomplex z); + * + * _________ + * / 2 2 -1 y + * log(x+iy) = log(\/ x + y ) + i tan (---) + * x + * + * 1 2 2 -1 y + * = --- log(x + y ) + i tan (---) + * 2 x + * + * Note that the arctangent ranges from -PI to +PI, thus the imaginary + * part of clog is atan2(y,x). + * + * EXCEPTION CASES (conform to ISO/IEC 9899:1999(E)): + * clog(-0 + i 0 ) = -inf + i pi + * clog( 0 + i 0 ) = -inf + i 0 + * clog( x + i inf ) = -inf + i pi/2, for finite x + * clog( x + i NaN ) = NaN + i NaN with invalid for finite x + * clog(-inf + iy )= +inf + i pi, for finite positive-signed y + * clog(+inf + iy )= +inf + i 0 , for finite positive-signed y + * clog(-inf + i inf)= inf + i 3pi/4 + * clog(+inf + i inf)= inf + i pi/4 + * clog(+-inf+ i NaN)= inf + i NaN + * clog(NaN + i y )= NaN + i NaN for finite y + * clog(NaN + i inf)= inf + i NaN + * clog(NaN + i NaN)= NaN + i NaN + */ +/* INDENT ON */ + +#include "libm_synonyms.h" +#include <math.h> /* atan2/fabs/log/log1p */ +#include "complex_wrapper.h" +#include "libm_protos.h" /* __k_clog_r */ + + +static const double half = 0.5, one = 1.0; + +dcomplex +clog(dcomplex z) { + dcomplex ans; + double x, y, t, ax, ay, w; + int n, ix, iy, hx, hy; + unsigned lx, ly; + + x = D_RE(z); + y = D_IM(z); + hx = HI_WORD(x); + lx = LO_WORD(x); + hy = HI_WORD(y); + ly = LO_WORD(y); + ix = hx & 0x7fffffff; + iy = hy & 0x7fffffff; + ay = fabs(y); + ax = fabs(x); + D_IM(ans) = carg(z); + if (ix < iy || (ix == iy && lx < ly)) { + /* swap x and y to force ax >= ay */ + t = ax; + ax = ay; + ay = t; + n = ix, ix = iy; + iy = n; + n = lx, lx = ly; + ly = n; + } + n = (ix - iy) >> 20; + if (ix >= 0x7ff00000) { /* x or y is Inf or NaN */ + if (ISINF(ix, lx)) + D_RE(ans) = ax; + else if (ISINF(iy, ly)) + D_RE(ans) = ay; + else + D_RE(ans) = ax * ay; + } else if ((iy | ly) == 0) { + D_RE(ans) = ((ix | lx) == 0)? -one / ax : log(ax); + } else if (((0x3fffffff - ix) ^ (ix - 0x3fe00000)) >= 0) { + /* 0.5 <= x < 2 */ + if (ix >= 0x3ff00000) { + if (((ix - 0x3ff00000) | lx) == 0) + D_RE(ans) = half * log1p(ay * ay); + else if (n >= 60) + D_RE(ans) = log(ax); + else + D_RE(ans) = half * (log1p(ay * ay + (ax - + one) * (ax + one))); + } else if (n >= 60) { + D_RE(ans) = log(ax); + } else { + D_RE(ans) = __k_clog_r(ax, ay, &w); + } + } else if (n >= 30) { + D_RE(ans) = log(ax); + } else if (ix < 0x5f300000 && iy >= 0x20b00000) { + /* 2**-500< y < x < 2**500 */ + D_RE(ans) = half * log(ax * ax + ay * ay); + } else { + t = ay / ax; + D_RE(ans) = log(ax) + half * log1p(t * t); + } + return (ans); +} diff --git a/usr/src/lib/libm/common/complex/clogf.c b/usr/src/lib/libm/common/complex/clogf.c new file mode 100644 index 0000000000..93b04cd58c --- /dev/null +++ b/usr/src/lib/libm/common/complex/clogf.c @@ -0,0 +1,82 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak clogf = __clogf + +#include "libm.h" +#include "complex_wrapper.h" + +#if defined(__i386) && !defined(__amd64) +extern int __swapRP(int); +#endif + +fcomplex +clogf(fcomplex z) { + fcomplex ans; + float x, y, ax, ay; + double dx, dy; + int ix, iy, hx, hy; + + x = F_RE(z); + y = F_IM(z); + hx = THE_WORD(x); + hy = THE_WORD(y); + ix = hx & 0x7fffffff; + iy = hy & 0x7fffffff; + ay = fabsf(y); + ax = fabsf(x); + F_IM(ans) = atan2f(y, x); + if (ix >= 0x7f800000 || iy >= 0x7f800000) { + /* x or y is Inf or NaN */ + if (iy == 0x7f800000) + F_RE(ans) = ay; + else if (ix == 0x7f800000) + F_RE(ans) = ax; + else + F_RE(ans) = ax + ay; + } else { +#if defined(__i386) && !defined(__amd64) + int rp = __swapRP(fp_extended); +#endif + dx = (double)ax; + dy = (double)ay; + if (ix == 0x3f800000) + F_RE(ans) = (float)(0.5 * log1p(dy * dy)); + else if (iy == 0x3f800000) + F_RE(ans) = (float)(0.5 * log1p(dx * dx)); + else if ((ix | iy) == 0) + F_RE(ans) = -1.0f / ax; + else + F_RE(ans) = (float)(0.5 * log(dx * dx + dy * dy)); +#if defined(__i386) && !defined(__amd64) + if (rp != fp_extended) + (void) __swapRP(rp); +#endif + } + return (ans); +} diff --git a/usr/src/lib/libm/common/complex/clogl.c b/usr/src/lib/libm/common/complex/clogl.c new file mode 100644 index 0000000000..87d584f8a5 --- /dev/null +++ b/usr/src/lib/libm/common/complex/clogl.c @@ -0,0 +1,105 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak clogl = __clogl + +#include "libm.h" /* atan2l/fabsl/isinfl/log1pl/logl/__k_clog_rl */ +#include "complex_wrapper.h" +#include "longdouble.h" + +#if defined(__sparc) +#define SIGP7 120 +#define HSIGP7 60 +#elif defined(__x86) +#define SIGP7 70 +#define HSIGP7 35 +#endif + +/* INDENT OFF */ +static const long double zero = 0.0L, half = 0.5L, one = 1.0L; +/* INDENT ON */ + +ldcomplex +clogl(ldcomplex z) { + ldcomplex ans; + long double x, y, t, ax, ay; + int n, ix, iy, hx, hy; + + x = LD_RE(z); + y = LD_IM(z); + hx = HI_XWORD(x); + hy = HI_XWORD(y); + ix = hx & 0x7fffffff; + iy = hy & 0x7fffffff; + ay = fabsl(y); + ax = fabsl(x); + LD_IM(ans) = atan2l(y, x); + if (ix < iy || (ix == iy && ix < 0x7fff0000 && ax < ay)) { + /* swap x and y to force ax>=ay */ + t = ax; + ax = ay; + ay = t; + n = ix, ix = iy; + iy = n; + } + n = (ix - iy) >> 16; + if (ix >= 0x7fff0000) { /* x or y is Inf or NaN */ + if (isinfl(ax)) + LD_RE(ans) = ax; + else if (isinfl(ay)) + LD_RE(ans) = ay; + else + LD_RE(ans) = ax + ay; + } else if (ay == zero) + LD_RE(ans) = logl(ax); + else if (((0x3fffffff - ix) ^ (ix - 0x3ffe0000)) >= 0) { + /* 0.5 <= x < 2 */ + if (ix >= 0x3fff0000) { + if (ax == one) + LD_RE(ans) = half * log1pl(ay * ay); + else if (n >= SIGP7) + LD_RE(ans) = logl(ax); + else + LD_RE(ans) = half * (log1pl(ay * ay + (ax - + one) * (ax + one))); + } else if (n >= SIGP7) + LD_RE(ans) = logl(ax); + else + LD_RE(ans) = __k_clog_rl(x, y, &t); + } else if (n >= HSIGP7) + LD_RE(ans) = logl(ax); + else if (ix < 0x5f3f0000 && iy >= 0x20bf0000) + /* 2**-8000 < y < x < 2**8000 */ + LD_RE(ans) = half * logl(ax * ax + ay * ay); + else { + t = ay / ax; + LD_RE(ans) = logl(ax) + half * log1pl(t * t); + } + return (ans); +} diff --git a/usr/src/lib/libm/common/complex/complex_wrapper.h b/usr/src/lib/libm/common/complex/complex_wrapper.h new file mode 100644 index 0000000000..b86a9846eb --- /dev/null +++ b/usr/src/lib/libm/common/complex/complex_wrapper.h @@ -0,0 +1,89 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _COMPLEX_WRAPPER_H +#define _COMPLEX_WRAPPER_H + +#pragma ident "@(#)complex_wrapper.h 1.7 06/01/31 SMI" + +#if defined(__GNUC__) +#define dcomplex double _Complex +#define fcomplex float _Complex +#define ldcomplex long double _Complex +#define D_RE(x) __real__ x +#define D_IM(x) __imag__ x +#define F_RE(x) __real__ x +#define F_IM(x) __imag__ x +#define LD_RE(x) __real__ x +#define LD_IM(x) __imag__ x + +#include <complex.h> +#else + +#define dcomplex double complex +#define fcomplex float complex +#define ldcomplex long double complex +#define _X_RE(__t, __z) ((__t *) &__z)[0] +#define _X_IM(__t, __z) ((__t *) &__z)[1] +#define D_RE(__z) _X_RE(double, __z) +#define D_IM(__z) _X_IM(double, __z) +#define F_RE(__z) _X_RE(float, __z) +#define F_IM(__z) _X_IM(float, __z) +#define LD_RE(__z) _X_RE(long double, __z) +#define LD_IM(__z) _X_IM(long double, __z) + +#include <complex.h> +#endif + +#if defined(__sparc) +#define HIWORD 0 +#define LOWORD 1 +#define HI_XWORD(x) ((unsigned *) &x)[0] +#define XFSCALE(x, n) ((unsigned *) &x)[0] += n << 16 /* signbitl(x) == 0 */ +#define CHOPPED(x) ((long double) ((double) (x))) +#elif defined(__x86) +#define HIWORD 1 +#define LOWORD 0 +#define HI_XWORD(x) ((((int *) &x)[2] << 16) | \ + (0xffff & ((unsigned *) &x)[1] >> 15)) +#define XFSCALE(x, n) ((unsigned short *) &x)[4] += n /* signbitl(x) == 0 */ +#define CHOPPED(x) ((long double) ((float) (x))) +#else +#error Unknown architecture +#endif +#define HI_WORD(x) ((int *) &x)[HIWORD] /* for double */ +#define LO_WORD(x) ((int *) &x)[LOWORD] /* for double */ +#define THE_WORD(x) ((int *) &x)[0] /* for float */ + +/* + * iy:ly must have the sign bit already cleared + */ +#define ISINF(iy, ly) (((iy - 0x7ff00000) | ly) == 0) + +#endif /* _COMPLEX_WRAPPER_H */ diff --git a/usr/src/lib/libm/common/complex/conj.c b/usr/src/lib/libm/common/complex/conj.c new file mode 100644 index 0000000000..9e3b4ea77a --- /dev/null +++ b/usr/src/lib/libm/common/complex/conj.c @@ -0,0 +1,39 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak conj = __conj + +#include "libm.h" +#include "complex_wrapper.h" + +dcomplex +conj(dcomplex z) { + D_IM(z) = -D_IM(z); + return (z); +} diff --git a/usr/src/lib/libm/common/complex/conjf.c b/usr/src/lib/libm/common/complex/conjf.c new file mode 100644 index 0000000000..417d333a7b --- /dev/null +++ b/usr/src/lib/libm/common/complex/conjf.c @@ -0,0 +1,39 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak conjf = __conjf + +#include "libm.h" +#include "complex_wrapper.h" + +fcomplex +conjf(fcomplex z) { + F_IM(z) = -F_IM(z); + return (z); +} diff --git a/usr/src/lib/libm/common/complex/conjl.c b/usr/src/lib/libm/common/complex/conjl.c new file mode 100644 index 0000000000..cdce73d37e --- /dev/null +++ b/usr/src/lib/libm/common/complex/conjl.c @@ -0,0 +1,39 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak conjl = __conjl + +#include "libm.h" +#include "complex_wrapper.h" + +ldcomplex +conjl(ldcomplex z) { + LD_IM(z) = -LD_IM(z); + return (z); +} diff --git a/usr/src/lib/libm/common/complex/cpow.c b/usr/src/lib/libm/common/complex/cpow.c new file mode 100644 index 0000000000..9fed91435a --- /dev/null +++ b/usr/src/lib/libm/common/complex/cpow.c @@ -0,0 +1,337 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak cpow = __cpow + +/* INDENT OFF */ +/* + * dcomplex cpow(dcomplex z); + * + * z**w analytically equivalent to + * + * cpow(z,w) = cexp(w clog(z)) + * + * Let z = x+iy, w = u+iv. + * Since + * _________ + * / 2 2 -1 y + * log(x+iy) = log(\/ x + y ) + i tan (---) + * x + * + * 1 2 2 -1 y + * = --- log(x + y ) + i tan (---) + * 2 x + * u 2 2 -1 y + * (u+iv)* log(x+iy) = --- log(x + y ) - v tan (---) + (1) + * 2 x + * + * v 2 2 -1 y + * i * [ --- log(x + y ) + u tan (---) ] (2) + * 2 x + * + * = r + i q + * + * Therefore, + * w r+iq r + * z = e = e (cos(q)+i*sin(q)) + * _______ + * / 2 2 + * r \/ x + y -v*atan2(y,x) + * Here e can be expressed as: u * e + * + * Special cases (in the order of appearance): + * 1. (anything) ** 0 is 1 + * 2. (anything) ** 1 is itself + * 3. When v = 0, y = 0: + * If x is finite and negative, and u is finite, then + * x ** u = exp(u*pi i) * pow(|x|, u); + * otherwise, + * x ** u = pow(x, u); + * 4. When v = 0, x = 0 or |x| = |y| or x is inf or y is inf: + * (x + y i) ** u = r * exp(q i) + * where + * r = hypot(x,y) ** u + * q = u * atan2pi(y, x) + * + * 5. otherwise, z**w is NAN if any x, y, u, v is a Nan or inf + * + * Note: many results of special cases are obtained in terms of + * polar coordinate. In the conversion from polar to rectangle: + * r exp(q i) = r * cos(q) + r * sin(q) i, + * we regard r * 0 is 0 except when r is a NaN. + */ +/* INDENT ON */ + +#include "libm.h" /* atan2/exp/fabs/hypot/log/pow/scalbn */ + /* atan2pi/exp2/sincos/sincospi/__k_clog_r/__k_atan2 */ +#include "complex_wrapper.h" + +extern void sincospi(double, double *, double *); + +static const double + huge = 1e300, + tiny = 1e-300, + invln2 = 1.44269504088896338700e+00, + ln2hi = 6.93147180369123816490e-01, /* 0x3fe62e42, 0xfee00000 */ + ln2lo = 1.90821492927058770002e-10, /* 0x3dea39ef, 0x35793c76 */ + one = 1.0, + zero = 0.0; + +static const int hiinf = 0x7ff00000; +extern double atan2pi(double, double); + +/* + * Assuming |t[0]| > |t[1]| and |t[2]| > |t[3]|, sum4fp subroutine + * compute t[0] + t[1] + t[2] + t[3] into two double fp numbers. + */ +static double +sum4fp(double ta[], double *w) { + double t1, t2, t3, t4, w1, w2, t; + t1 = ta[0]; t2 = ta[1]; t3 = ta[2]; t4 = ta[3]; + /* + * Rearrange ti so that |t1| >= |t2| >= |t3| >= |t4| + */ + if (fabs(t4) > fabs(t1)) { + t = t1; t1 = t3; t3 = t; + t = t2; t2 = t4; t4 = t; + } else if (fabs(t3) > fabs(t1)) { + t = t1; t1 = t3; + if (fabs(t4) > fabs(t2)) { + t3 = t4; t4 = t2; t2 = t; + } else { + t3 = t2; t2 = t; + } + } else if (fabs(t3) > fabs(t2)) { + t = t2; t2 = t3; + if (fabs(t4) > fabs(t2)) { + t3 = t4; t4 = t; + } else + t3 = t; + } + /* summing r = t1 + t2 + t3 + t4 to w1 + w2 */ + w1 = t3 + t4; + w2 = t4 - (w1 - t3); + t = t2 + w1; + w2 += w1 - (t - t2); + w1 = t + w2; + w2 += t - w1; + t = t1 + w1; + w2 += w1 - (t - t1); + w1 = t + w2; + *w = w2 - (w1 - t); + return (w1); +} + +dcomplex +cpow(dcomplex z, dcomplex w) { + dcomplex ans; + double x, y, u, v, t, c, s, r, x2, y2; + double b[4], t1, t2, t3, t4, w1, w2, u1, v1, x1, y1; + int ix, iy, hx, lx, hy, ly, hv, hu, iu, iv, lu, lv; + int i, j, k; + + x = D_RE(z); + y = D_IM(z); + u = D_RE(w); + v = D_IM(w); + hx = ((int *) &x)[HIWORD]; + lx = ((int *) &x)[LOWORD]; + hy = ((int *) &y)[HIWORD]; + ly = ((int *) &y)[LOWORD]; + hu = ((int *) &u)[HIWORD]; + lu = ((int *) &u)[LOWORD]; + hv = ((int *) &v)[HIWORD]; + lv = ((int *) &v)[LOWORD]; + ix = hx & 0x7fffffff; + iy = hy & 0x7fffffff; + iu = hu & 0x7fffffff; + iv = hv & 0x7fffffff; + + j = 0; + if ((iv | lv) == 0) { /* z**(real) */ + if (((hu - 0x3ff00000) | lu) == 0) { /* z ** 1 = z */ + D_RE(ans) = x; + D_IM(ans) = y; + } else if ((iu | lu) == 0) { /* z ** 0 = 1 */ + D_RE(ans) = one; + D_IM(ans) = zero; + } else if ((iy | ly) == 0) { /* (real)**(real) */ + D_IM(ans) = zero; + if (hx < 0 && ix < hiinf && iu < hiinf) { + /* -x ** u is exp(i*pi*u)*pow(x,u) */ + r = pow(-x, u); + sincospi(u, &s, &c); + D_RE(ans) = (c == zero)? c: c * r; + D_IM(ans) = (s == zero)? s: s * r; + } else + D_RE(ans) = pow(x, u); + } else if (((ix | lx) == 0) || ix >= hiinf || iy >= hiinf) { + if (isnan(x) || isnan(y) || isnan(u)) + D_RE(ans) = D_IM(ans) = x + y + u; + else { + if ((ix | lx) == 0) + r = fabs(y); + else + r = fabs(x) + fabs(y); + t = atan2pi(y, x); + sincospi(t * u, &s, &c); + D_RE(ans) = (c == zero)? c: c * r; + D_IM(ans) = (s == zero)? s: s * r; + } + } else if (((ix - iy) | (lx - ly)) == 0) { /* |x| = |y| */ + if (hx >= 0) { + t = (hy >= 0)? 0.25 : -0.25; + sincospi(t * u, &s, &c); + } else if ((lu & 3) == 0) { + t = (hy >= 0)? 0.75 : -0.75; + sincospi(t * u, &s, &c); + } else { + r = (hy >= 0)? u : -u; + t = -0.25 * r; + w1 = r + t; + w2 = t - (w1 - r); + sincospi(w1, &t1, &t2); + sincospi(w2, &t3, &t4); + s = t1 * t4 + t3 * t2; + c = t2 * t4 - t1 * t3; + } + if (ix < 0x3fe00000) /* |x| < 1/2 */ + r = pow(fabs(x + x), u) * exp2(-0.5 * u); + else if (ix >= 0x3ff00000 || iu < 0x408ff800) + /* |x| >= 1 or |u| < 1023 */ + r = pow(fabs(x), u) * exp2(0.5 * u); + else /* special treatment */ + j = 2; + if (j == 0) { + D_RE(ans) = (c == zero)? c: c * r; + D_IM(ans) = (s == zero)? s: s * r; + } + } else + j = 1; + if (j == 0) + return (ans); + } + if (iu >= hiinf || iv >= hiinf || ix >= hiinf || iy >= hiinf) { + /* + * non-zero imag part(s) with inf component(s) yields NaN + */ + t = fabs(x) + fabs(y) + fabs(u) + fabs(v); + D_RE(ans) = D_IM(ans) = t - t; + } else { + k = 0; /* no scaling */ + if (iu > 0x7f000000 || iv > 0x7f000000) { + u *= .0009765625; /* scale 2**-10 to avoid overflow */ + v *= .0009765625; + k = 1; /* scale by 2**-10 */ + } + /* + * Use similated higher precision arithmetic to compute: + * r = u * log(hypot(x, y)) - v * atan2(y, x) + * q = u * atan2(y, x) + v * log(hypot(x, y)) + */ + t1 = __k_clog_r(x, y, &t2); + t3 = __k_atan2(y, x, &t4); + x1 = t1; + y1 = t3; + u1 = u; + v1 = v; + ((int *) &u1)[LOWORD] &= 0xf8000000; + ((int *) &v1)[LOWORD] &= 0xf8000000; + ((int *) &x1)[LOWORD] &= 0xf8000000; + ((int *) &y1)[LOWORD] &= 0xf8000000; + x2 = t2 - (x1 - t1); /* log(hypot(x,y)) = x1 + x2 */ + y2 = t4 - (y1 - t3); /* atan2(y,x) = y1 + y2 */ + /* compute q = u * atan2(y, x) + v * log(hypot(x, y)) */ + if (j != 2) { + b[0] = u1 * y1; + b[1] = (u - u1) * y1 + u * y2; + if (j == 1) { /* v = 0 */ + w1 = b[0] + b[1]; + w2 = b[1] - (w1 - b[0]); + } else { + b[2] = v1 * x1; + b[3] = (v - v1) * x1 + v * x2; + w1 = sum4fp(b, &w2); + } + sincos(w1, &t1, &t2); + sincos(w2, &t3, &t4); + s = t1 * t4 + t3 * t2; + c = t2 * t4 - t1 * t3; + if (k == 1) + /* + * square (cos(q) + i sin(q)) k times to get + * (cos(2^k * q + i sin(2^k * q) + */ + for (i = 0; i < 10; i++) { + t1 = s * c; + c = (c + s) * (c - s); + s = t1 + t1; + } + } + /* compute r = u * (t1, t2) - v * (t3, t4) */ + b[0] = u1 * x1; + b[1] = (u - u1) * x1 + u * x2; + if (j == 1) { /* v = 0 */ + w1 = b[0] + b[1]; + w2 = b[1] - (w1 - b[0]); + } else { + b[2] = -v1 * y1; + b[3] = (v1 - v) * y1 - v * y2; + w1 = sum4fp(b, &w2); + } + /* check over/underflow for exp(w1 + w2) */ + if (k && fabs(w1) < 1000.0) { + w1 *= 1024; w2 *= 1024; k = 0; + } + hx = ((int *) &w1)[HIWORD]; + lx = ((int *) &w1)[LOWORD]; + ix = hx & 0x7fffffff; + /* compute exp(w1 + w2) */ + if (ix < 0x3c900000) /* exp(tiny < 2**-54) = 1 */ + r = one; + else if (ix >= 0x40880000) /* overflow/underflow */ + r = (hx < 0)? tiny * tiny : huge * huge; + else { /* compute exp(w1 + w2) */ + k = (int) (invln2 * w1 + ((hx >= 0)? 0.5 : -0.5)); + t1 = (double) k; + t2 = w1 - t1 * ln2hi; + t3 = w2 - t1 * ln2lo; + r = exp(t2 + t3); + } + if (c != zero) c *= r; + if (s != zero) s *= r; + if (k != 0) { + c = scalbn(c, k); + s = scalbn(s, k); + } + D_RE(ans) = c; + D_IM(ans) = s; + } + return (ans); +} diff --git a/usr/src/lib/libm/common/complex/cpowf.c b/usr/src/lib/libm/common/complex/cpowf.c new file mode 100644 index 0000000000..5c06c59757 --- /dev/null +++ b/usr/src/lib/libm/common/complex/cpowf.c @@ -0,0 +1,174 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak cpowf = __cpowf + +#include "libm.h" +#include "complex_wrapper.h" + +extern void sincospi(double, double *, double *); +extern void sincospif(float, float *, float *); +extern double atan2pi(double, double); +extern float atan2pif(float, float); + +#if defined(__i386) && !defined(__amd64) +extern int __swapRP(int); +#endif + +static const double + dpi = 3.1415926535897931160E0, /* Hex 2^ 1 * 1.921FB54442D18 */ + dhalf = 0.5, + dsqrt2 = 1.41421356237309514547, /* 3FF6A09E 667F3BCD */ + dinvpi = 0.3183098861837906715377675; + +static const float one = 1.0F, zero = 0.0F; + +#define hiinf 0x7f800000 + +fcomplex +cpowf(fcomplex z, fcomplex w) { + fcomplex ans; + float x, y, u, v, t, c, s; + double dx, dy, du, dv, dt, dc, ds, dp, dq, dr; + int ix, iy, hx, hy, hv, hu, iu, iv, j; + + x = F_RE(z); + y = F_IM(z); + u = F_RE(w); + v = F_IM(w); + hx = THE_WORD(x); + hy = THE_WORD(y); + hu = THE_WORD(u); + hv = THE_WORD(v); + ix = hx & 0x7fffffff; + iy = hy & 0x7fffffff; + iu = hu & 0x7fffffff; + iv = hv & 0x7fffffff; + + j = 0; + if (iv == 0) { /* z**(real) */ + if (hu == 0x3f800000) { /* (anything) ** 1 is itself */ + F_RE(ans) = x; + F_IM(ans) = y; + } else if (iu == 0) { /* (anything) ** 0 is 1 */ + F_RE(ans) = one; + F_IM(ans) = zero; + } else if (iy == 0) { /* (real)**(real) */ + F_IM(ans) = zero; + if (hx < 0 && ix < hiinf && iu < hiinf) { + /* -x ** u is exp(i*pi*u)*pow(x,u) */ + t = powf(-x, u); + sincospif(u, &s, &c); + F_RE(ans) = (c == zero)? c: c * t; + F_IM(ans) = (s == zero)? s: s * t; + } else { + F_RE(ans) = powf(x, u); + } + } else if (ix == 0 || ix >= hiinf || iy >= hiinf) { + if (ix > hiinf || iy > hiinf || iu > hiinf) { + F_RE(ans) = F_IM(ans) = x + y + u; + } else { + v = fabsf(y); + if (ix != 0) + v += fabsf(x); + t = atan2pif(y, x); + sincospif(t * u, &s, &c); + F_RE(ans) = (c == zero)? c: c * v; + F_IM(ans) = (s == zero)? s: s * v; + } + } else if (ix == iy) { /* if |x| == |y| */ +#if defined(__i386) && !defined(__amd64) + int rp = __swapRP(fp_extended); +#endif + dx = (double)x; + du = (double)u; + dt = (hx >= 0)? 0.25 : 0.75; + if (hy < 0) + dt = -dt; + dr = pow(dsqrt2 * dx, du); + sincospi(dt * du, &ds, &dc); + F_RE(ans) = (float)(dr * dc); + F_IM(ans) = (float)(dr * ds); +#if defined(__i386) && !defined(__amd64) + if (rp != fp_extended) + (void) __swapRP(rp); +#endif + } else { + j = 1; + } + if (j == 0) + return (ans); + } + if (iu >= hiinf || iv >= hiinf || ix >= hiinf || iy >= hiinf) { + /* + * non-zero imaginery part(s) with inf component(s) yields NaN + */ + t = fabsf(x) + fabsf(y) + fabsf(u) + fabsf(v); + F_RE(ans) = F_IM(ans) = t - t; + } else { +#if defined(__i386) && !defined(__amd64) + int rp = __swapRP(fp_extended); +#endif + /* INDENT OFF */ + /* + * r = u*log(hypot(x,y))-v*atan2(y,x), + * q = u*atan2(y,x)+v*log(hypot(x,y)) + * or + * r = u*log(hypot(x,y))-v*pi*atan2pi(y,x), + * q/pi = u*atan2pi(y,x)+v*log(hypot(x,y))/pi + * ans = exp(r)*(cospi(q/pi) + i sinpi(q/pi)) + */ + /* INDENT ON */ + dx = (double)x; + dy = (double)y; + du = (double)u; + dv = (double)v; + if (ix > 0x3f000000 && ix < 0x40000000) /* .5 < |x| < 2 */ + dt = dhalf * log1p((dx - 1.0) * (dx + 1.0) + dy * dy); + else if (iy > 0x3f000000 && iy < 0x40000000) /* .5 < |y| < 2 */ + dt = dhalf * log1p((dy - 1.0) * (dy + 1.0) + dx * dx); + else + dt = dhalf * log(dx * dx + dy * dy); + dp = atan2pi(dy, dx); + if (iv == 0) { /* dv = 0 */ + dr = exp(du * dt); + dq = du * dp; + } else { + dr = exp(du * dt - dv * dp * dpi); + dq = du * dp + dv * dt * dinvpi; + } + sincospi(dq, &ds, &dc); + F_RE(ans) = (float)(dr * dc); + F_IM(ans) = (float)(dr * ds); +#if defined(__i386) && !defined(__amd64) + if (rp != fp_extended) + (void) __swapRP(rp); +#endif + } + return (ans); +} diff --git a/usr/src/lib/libm/common/complex/cpowl.c b/usr/src/lib/libm/common/complex/cpowl.c new file mode 100644 index 0000000000..091155d452 --- /dev/null +++ b/usr/src/lib/libm/common/complex/cpowl.c @@ -0,0 +1,280 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak cpowl = __cpowl + +#include "libm.h" /* __k_clog_rl/__k_atan2l */ +/* atan2l/atan2pil/exp2l/expl/fabsl/hypotl/isinfl/logl/powl/sincosl/sincospil */ +#include "complex_wrapper.h" +#include "longdouble.h" + +#if defined(__sparc) +#define HALF(x) ((int *) &x)[3] = 0; ((int *) &x)[2] &= 0xfe000000 +#define LAST(x) ((int *) &x)[3] +#elif defined(__x86) +#define HALF(x) ((int *) &x)[0] = 0 +#define LAST(x) ((int *) &x)[0] +#endif + +/* INDENT OFF */ +static const int hiinf = 0x7fff0000; +static const long double + tiny = 1.0e-4000L, + huge = 1.0e4000L, +#if defined(__x86) + /* 43 significant bits, 21 trailing zeros */ + ln2hil = 0.693147180559890330187045037746429443359375L, + ln2lol = 5.497923018708371174712471612513436025525412068e-14L, +#else /* sparc */ + /* 0x3FF962E4 2FEFA39E F35793C7 00000000 */ + ln2hil = 0.693147180559945309417231592858066493070671489074L, + ln2lol = 5.28600110075004828645286235820646730106802446566153e-25L, +#endif + invln2 = 1.442695040888963407359924681001892137427e+0000L, + one = 1.0L, + zero = 0.0L; +/* INDENT ON */ + +/* + * Assuming |t[0]| > |t[1]| and |t[2]| > |t[3]|, sum4fpl subroutine + * compute t[0] + t[1] + t[2] + t[3] into two long double fp numbers. + */ +static long double sum4fpl(long double ta[], long double *w) +{ + long double t1, t2, t3, t4, w1, w2, t; + t1 = ta[0]; t2 = ta[1]; t3 = ta[2]; t4 = ta[3]; + /* + * Rearrange ti so that |t1| >= |t2| >= |t3| >= |t4| + */ + if (fabsl(t4) > fabsl(t1)) { + t = t1; t1 = t3; t3 = t; + t = t2; t2 = t4; t4 = t; + } else if (fabsl(t3) > fabsl(t1)) { + t = t1; t1 = t3; + if (fabsl(t4) > fabsl(t2)) { + t3 = t4; t4 = t2; t2 = t; + } else { + t3 = t2; t2 = t; + } + } else if (fabsl(t3) > fabsl(t2)) { + t = t2; t2 = t3; + if (fabsl(t4) > fabsl(t2)) { + t3 = t4; t4 = t; + } else + t3 = t; + } + /* summing r = t1 + t2 + t3 + t4 to w1 + w2 */ + w1 = t3 + t4; + w2 = t4 - (w1 - t3); + t = t2 + w1; + w2 += w1 - (t - t2); + w1 = t + w2; + w2 += t - w1; + t = t1 + w1; + w2 += w1 - (t - t1); + w1 = t + w2; + *w = w2 - (w1 - t); + return (w1); +} + +ldcomplex +cpowl(ldcomplex z, ldcomplex w) { + ldcomplex ans; + long double x, y, u, v, t, c, s, r; + long double t1, t2, t3, t4, x1, x2, y1, y2, u1, v1, b[4], w1, w2; + int ix, iy, hx, hy, hv, hu, iu, iv, i, j, k; + + x = LD_RE(z); + y = LD_IM(z); + u = LD_RE(w); + v = LD_IM(w); + hx = HI_XWORD(x); + hy = HI_XWORD(y); + hu = HI_XWORD(u); + hv = HI_XWORD(v); + ix = hx & 0x7fffffff; + iy = hy & 0x7fffffff; + iu = hu & 0x7fffffff; + iv = hv & 0x7fffffff; + + j = 0; + if (v == zero) { /* z**(real) */ + if (u == one) { /* (anything) ** 1 is itself */ + LD_RE(ans) = x; + LD_IM(ans) = y; + } else if (u == zero) { /* (anything) ** 0 is 1 */ + LD_RE(ans) = one; + LD_IM(ans) = zero; + } else if (y == zero) { /* real ** real */ + LD_IM(ans) = zero; + if (hx < 0 && ix < hiinf && iu < hiinf) { + /* -x ** u is exp(i*pi*u)*pow(x,u) */ + r = powl(-x, u); + sincospil(u, &s, &c); + LD_RE(ans) = (c == zero)? c: c * r; + LD_IM(ans) = (s == zero)? s: s * r; + } else + LD_RE(ans) = powl(x, u); + } else if (x == zero || ix >= hiinf || iy >= hiinf) { + if (isnanl(x) || isnanl(y) || isnanl(u)) + LD_RE(ans) = LD_IM(ans) = x + y + u; + else { + if (x == zero) + r = fabsl(y); + else + r = fabsl(x) + fabsl(y); + t = atan2pil(y, x); + sincospil(t * u, &s, &c); + LD_RE(ans) = (c == zero)? c: c * r; + LD_IM(ans) = (s == zero)? s: s * r; + } + } else if (fabsl(x) == fabsl(y)) { /* |x| = |y| */ + if (hx >= 0) { + t = (hy >= 0)? 0.25L : -0.25L; + sincospil(t * u, &s, &c); + } else if ((LAST(u) & 3) == 0) { + t = (hy >= 0)? 0.75L : -0.75L; + sincospil(t * u, &s, &c); + } else { + r = (hy >= 0)? u : -u; + t = -0.25L * r; + w1 = r + t; + w2 = t - (w1 - r); + sincospil(w1, &t1, &t2); + sincospil(w2, &t3, &t4); + s = t1 * t4 + t3 * t2; + c = t2 * t4 - t1 * t3; + } + if (ix < 0x3ffe0000) /* |x| < 1/2 */ + r = powl(fabsl(x + x), u) * exp2l(-0.5L * u); + else if (ix >= 0x3fff0000 || iu < 0x400cfff8) + /* |x| >= 1 or |u| < 16383 */ + r = powl(fabsl(x), u) * exp2l(0.5L * u); + else /* special treatment */ + j = 2; + if (j == 0) { + LD_RE(ans) = (c == zero)? c: c * r; + LD_IM(ans) = (s == zero)? s: s * r; + } + } else + j = 1; + if (j == 0) + return (ans); + } + if (iu >= hiinf || iv >= hiinf || ix >= hiinf || iy >= hiinf) { + /* + * non-zero imag part(s) with inf component(s) yields NaN + */ + t = fabsl(x) + fabsl(y) + fabsl(u) + fabsl(v); + LD_RE(ans) = LD_IM(ans) = t - t; + } else { + k = 0; /* no scaling */ + if (iu > 0x7ffe0000 || iv > 0x7ffe0000) { + u *= 1.52587890625000000000e-05L; + v *= 1.52587890625000000000e-05L; + k = 1; /* scale u and v by 2**-16 */ + } + /* + * Use similated higher precision arithmetic to compute: + * r = u * log(hypot(x, y)) - v * atan2(y, x) + * q = u * atan2(y, x) + v * log(hypot(x, y)) + */ + + t1 = __k_clog_rl(x, y, &t2); + t3 = __k_atan2l(y, x, &t4); + x1 = t1; HALF(x1); + y1 = t3; HALF(y1); + u1 = u; HALF(u1); + v1 = v; HALF(v1); + x2 = t2 - (x1 - t1); /* log(hypot(x,y)) = x1 + x2 */ + y2 = t4 - (y1 - t3); /* atan2(y,x) = y1 + y2 */ + /* compute q = u * atan2(y, x) + v * log(hypot(x, y)) */ + if (j != 2) { + b[0] = u1 * y1; + b[1] = (u - u1) * y1 + u * y2; + if (j == 1) { /* v = 0 */ + w1 = b[0] + b[1]; + w2 = b[1] - (w1 - b[0]); + } else { + b[2] = v1 * x1; + b[3] = (v - v1) * x1 + v * x2; + w1 = sum4fpl(b, &w2); + } + sincosl(w1, &t1, &t2); + sincosl(w2, &t3, &t4); + s = t1 * t4 + t3 * t2; + c = t2 * t4 - t1 * t3; + if (k == 1) /* square j times */ + for (i = 0; i < 10; i++) { + t1 = s * c; + c = (c + s) * (c - s); + s = t1 + t1; + } + } + /* compute r = u * (t1, t2) - v * (t3, t4) */ + b[0] = u1 * x1; + b[1] = (u - u1) * x1 + u * x2; + if (j == 1) { /* v = 0 */ + w1 = b[0] + b[1]; + w2 = b[1] - (w1 - b[0]); + } else { + b[2] = -v1 * y1; + b[3] = (v1 - v) * y1 - v * y2; + w1 = sum4fpl(b, &w2); + } + /* scale back unless w1 is large enough to cause exception */ + if (k != 0 && fabsl(w1) < 20000.0L) { + w1 *= 65536.0L; w2 *= 65536.0L; + } + hx = HI_XWORD(w1); + ix = hx & 0x7fffffff; + /* compute exp(w1 + w2) */ + k = 0; + if (ix < 0x3f8c0000) /* exp(tiny < 2**-115) = 1 */ + r = one; + else if (ix >= 0x400c6760) /* overflow/underflow */ + r = (hx < 0)? tiny * tiny : huge * huge; + else { /* compute exp(w1 + w2) */ + k = (int) (invln2 * w1 + ((hx >= 0)? 0.5L : -0.5L)); + t1 = (long double) k; + t2 = w1 - t1 * ln2hil; + t3 = w2 - t1 * ln2lol; + r = expl(t2 + t3); + } + if (c != zero) c *= r; + if (s != zero) s *= r; + if (k != 0) { + c = scalbnl(c, k); + s = scalbnl(s, k); + } + LD_RE(ans) = c; + LD_IM(ans) = s; + } + return (ans); +} diff --git a/usr/src/lib/libm/common/complex/cproj.c b/usr/src/lib/libm/common/complex/cproj.c new file mode 100644 index 0000000000..10ed9ad20a --- /dev/null +++ b/usr/src/lib/libm/common/complex/cproj.c @@ -0,0 +1,69 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak cproj = __cproj + +/* INDENT OFF */ +/* + * dcomplex cproj(dcomplex z); + * + * If one of the component of z = (x,y) is an inf, then + * cproj(z) = (+inf, copysign(0,y)); + * otherwise, + * cproj(z) = z + */ +/* INDENT ON */ + +#include "libm.h" /* fabs */ +#include "complex_wrapper.h" + +static const double zero = 0.0; + +dcomplex +cproj(dcomplex z) { + double x, y; + int ix, iy, hx, hy, lx, ly; + + x = D_RE(z); + y = D_IM(z); + hx = HI_WORD(x); + lx = LO_WORD(x); + hy = HI_WORD(y); + ly = LO_WORD(y); + ix = hx & 0x7fffffff; + iy = hy & 0x7fffffff; + if (ISINF(iy, ly)) { + D_RE(z) = fabs(y); + D_IM(z) = hy >= 0 ? zero : -zero; + } else if (ISINF(ix, lx)) { + D_RE(z) = fabs(x); + D_IM(z) = hy >= 0 ? zero : -zero; + } + return (z); +} diff --git a/usr/src/lib/libm/common/complex/cprojf.c b/usr/src/lib/libm/common/complex/cprojf.c new file mode 100644 index 0000000000..53585dcde9 --- /dev/null +++ b/usr/src/lib/libm/common/complex/cprojf.c @@ -0,0 +1,58 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak cprojf = __cprojf + +#include "libm.h" +#include "complex_wrapper.h" + +/* INDENT OFF */ +static const float zero = 0.0F; +/* INDENT ON */ + +fcomplex +cprojf(fcomplex z) { + float x, y; + int ix, iy, hx, hy; + + x = F_RE(z); + y = F_IM(z); + hx = THE_WORD(x); + hy = THE_WORD(y); + ix = hx & 0x7fffffff; + iy = hy & 0x7fffffff; + if (iy == 0x7f800000) { + F_RE(z) = fabsf(y); + F_IM(z) = hy >= 0 ? zero : -zero; + } else if (ix == 0x7f800000) { + F_RE(z) = fabsf(x); + F_IM(z) = hy >= 0 ? zero : -zero; + } + return (z); +} diff --git a/usr/src/lib/libm/common/complex/cprojl.c b/usr/src/lib/libm/common/complex/cprojl.c new file mode 100644 index 0000000000..76a88e2b96 --- /dev/null +++ b/usr/src/lib/libm/common/complex/cprojl.c @@ -0,0 +1,60 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak cprojl = __cprojl + +#include "libm.h" /* fabsl */ +#include "complex_wrapper.h" +#include "longdouble.h" + +/* INDENT OFF */ +static const long double zero = 0.0L; +/* INDENT ON */ + +ldcomplex +cprojl(ldcomplex z) { + long double x, y; + int hy; + + x = LD_RE(z); + y = LD_IM(z); +#if defined(__x86) + hy = ((int *) &y)[2] << 16; +#else + hy = ((int *) &y)[0]; +#endif + if (isinfl(y)) { + LD_RE(z) = fabsl(y); + LD_IM(z) = hy >= 0 ? zero : -zero; + } else if (isinfl(x)) { + LD_RE(z) = fabsl(x); + LD_IM(z) = hy >= 0 ? zero : -zero; + } + return (z); +} diff --git a/usr/src/lib/libm/common/complex/creal.c b/usr/src/lib/libm/common/complex/creal.c new file mode 100644 index 0000000000..2cc287fe36 --- /dev/null +++ b/usr/src/lib/libm/common/complex/creal.c @@ -0,0 +1,38 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak creal = __creal + +#include "libm.h" +#include "complex_wrapper.h" + +double +creal(dcomplex z) { + return (D_RE(z)); +} diff --git a/usr/src/lib/libm/common/complex/crealf.c b/usr/src/lib/libm/common/complex/crealf.c new file mode 100644 index 0000000000..fb5fb4cb38 --- /dev/null +++ b/usr/src/lib/libm/common/complex/crealf.c @@ -0,0 +1,38 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak crealf = __crealf + +#include "libm.h" +#include "complex_wrapper.h" + +float +crealf(fcomplex z) { + return (F_RE(z)); +} diff --git a/usr/src/lib/libm/common/complex/creall.c b/usr/src/lib/libm/common/complex/creall.c new file mode 100644 index 0000000000..0b2b2f62a7 --- /dev/null +++ b/usr/src/lib/libm/common/complex/creall.c @@ -0,0 +1,38 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak creall = __creall + +#include "libm.h" +#include "complex_wrapper.h" + +long double +creall(ldcomplex z) { + return (LD_RE(z)); +} diff --git a/usr/src/lib/libm/common/complex/csin.c b/usr/src/lib/libm/common/complex/csin.c new file mode 100644 index 0000000000..3bac36288c --- /dev/null +++ b/usr/src/lib/libm/common/complex/csin.c @@ -0,0 +1,61 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak csin = __csin + +/* INDENT OFF */ +/* + * dcomplex csin(dcomplex z); + * + * If z = x+iy, then since csin(iz) = i*csinh(z), we have + * + * csin(z) = csin((-1)*(-z)) = csin(i*i*(-z)) + * = i*csinh(i*(-z)) = i*csinh(i*(-x-yi)) + * = i*csinh(y-ix) + * = -Im(csinh(y-ix))+i*Re(csinh(y-ix)) + */ +/* INDENT ON */ + +#include "libm.h" +#include "complex_wrapper.h" + +dcomplex +csin(dcomplex z) { + double x, y; + dcomplex ans, ct; + + x = D_RE(z); + y = D_IM(z); + D_RE(z) = y; + D_IM(z) = -x; + ct = csinh(z); + D_RE(ans) = -D_IM(ct); + D_IM(ans) = D_RE(ct); + return (ans); +} diff --git a/usr/src/lib/libm/common/complex/csinf.c b/usr/src/lib/libm/common/complex/csinf.c new file mode 100644 index 0000000000..6c88d0e1d1 --- /dev/null +++ b/usr/src/lib/libm/common/complex/csinf.c @@ -0,0 +1,48 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak csinf = __csinf + +#include "libm.h" +#include "complex_wrapper.h" + +fcomplex +csinf(fcomplex z) { + float x, y; + fcomplex ans, ct; + + x = F_RE(z); + y = F_IM(z); + F_RE(z) = y; + F_IM(z) = -x; + ct = csinhf(z); + F_RE(ans) = -F_IM(ct); + F_IM(ans) = F_RE(ct); + return (ans); +} diff --git a/usr/src/lib/libm/common/complex/csinh.c b/usr/src/lib/libm/common/complex/csinh.c new file mode 100644 index 0000000000..4bca7f6ade --- /dev/null +++ b/usr/src/lib/libm/common/complex/csinh.c @@ -0,0 +1,137 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak csinh = __csinh + +/* INDENT OFF */ +/* + * dcomplex csinh(dcomplex z); + * + * z -z x -x + * e - e e (cos(y)+i*sin(y)) - e (cos(-y)+i*sin(-y)) + * sinh z = -------------- = --------------------------------------------- + * 2 2 + * x -x x -x + * cos(y) ( e - e ) + i*sin(y) (e + e ) + * = -------------------------------------------- + * 2 + * + * = cos(y) sinh(x) + i sin(y) cosh(x) + * + * Implementation Note + * ------------------- + * + * |x| -|x| |x| -2|x| -2|x| -P-4 + * Note that e +- e = e ( 1 +- e ). If e < 2 , where + * + * P stands for the number of significant bits of the machine precision, + * |x| + * then the result will be rounded to e . Therefore, we have + * + * z + * e + * sinh z = ----- if |x| >= (P/2 + 2)*ln2 + * 2 + * + * EXCEPTION (conform to ISO/IEC 9899:1999(E)): + * csinh(0,0)=(0,0) + * csinh(0,inf)=(+-0,NaN) + * csinh(0,NaN)=(+-0,NaN) + * csinh(x,inf) = (NaN,NaN) for finite positive x + * csinh(x,NaN) = (NaN,NaN) for finite non-zero x + * csinh(inf,0) = (inf, 0) + * csinh(inf,y) = (inf*cos(y),inf*sin(y)) for positive finite y + * csinh(inf,inf) = (+-inf,NaN) + * csinh(inf,NaN) = (+-inf,NaN) + * csinh(NaN,0) = (NaN,0) + * csinh(NaN,y) = (NaN,NaN) for non-zero y + * csinh(NaN,NaN) = (NaN,NaN) + */ +/* INDENT ON */ + +#include "libm.h" /* cosh/exp/fabs/scalbn/sinh/sincos/__k_cexp */ +#include "complex_wrapper.h" + +dcomplex +csinh(dcomplex z) { + double t, x, y, S, C; + int hx, ix, lx, hy, iy, ly, n; + dcomplex ans; + + x = D_RE(z); + y = D_IM(z); + hx = HI_WORD(x); + lx = LO_WORD(x); + ix = hx & 0x7fffffff; + hy = HI_WORD(y); + ly = LO_WORD(y); + iy = hy & 0x7fffffff; + x = fabs(x); + y = fabs(y); + + (void) sincos(y, &S, &C); + if (ix >= 0x403c0000) { /* |x| > 28 = prec/2 (14,28,34,60) */ + if (ix >= 0x40862E42) { /* |x| > 709.78... ~ log(2**1024) */ + if (ix >= 0x7ff00000) { /* |x| is inf or NaN */ + if ((iy | ly) == 0) { + D_RE(ans) = x; + D_IM(ans) = y; + } else if (iy >= 0x7ff00000) { + D_RE(ans) = x; + D_IM(ans) = x - y; + } else { + D_RE(ans) = C * x; + D_IM(ans) = S * x; + } + } else { + /* return exp(x)=t*2**n */ + t = __k_cexp(x, &n); + D_RE(ans) = scalbn(C * t, n - 1); + D_IM(ans) = scalbn(S * t, n - 1); + } + } else { + t = exp(x) * 0.5; + D_RE(ans) = C * t; + D_IM(ans) = S * t; + } + } else { + if ((ix | lx) == 0) { /* x = 0, return (0,S) */ + D_RE(ans) = 0.0; + D_IM(ans) = S; + } else { + D_RE(ans) = C * sinh(x); + D_IM(ans) = S * cosh(x); + } + } + if (hx < 0) + D_RE(ans) = -D_RE(ans); + if (hy < 0) + D_IM(ans) = -D_IM(ans); + return (ans); +} diff --git a/usr/src/lib/libm/common/complex/csinhf.c b/usr/src/lib/libm/common/complex/csinhf.c new file mode 100644 index 0000000000..0ef3012b7c --- /dev/null +++ b/usr/src/lib/libm/common/complex/csinhf.c @@ -0,0 +1,102 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak csinhf = __csinhf + +#include "libm.h" +#include "complex_wrapper.h" + +#if defined(__i386) && !defined(__amd64) +extern int __swapRP(int); +#endif + +static const float zero = 0.0F, half = 0.5F; + +fcomplex +csinhf(fcomplex z) { + float x, y, S, C; + double t; + int hx, ix, hy, iy, n; + fcomplex ans; + + x = F_RE(z); + y = F_IM(z); + hx = THE_WORD(x); + ix = hx & 0x7fffffff; + hy = THE_WORD(y); + iy = hy & 0x7fffffff; + x = fabsf(x); + y = fabsf(y); + + sincosf(y, &S, &C); + if (ix >= 0x41600000) { /* |x| > 14 = prec/2 (14,28,34,60) */ + if (ix >= 0x42B171AA) { /* |x| > 88.722... ~ log(2**128) */ + if (ix >= 0x7f800000) { /* |x| is inf or NaN */ + if (iy == 0) { + F_RE(ans) = x; + F_IM(ans) = y; + } else if (iy >= 0x7f800000) { + F_RE(ans) = x; + F_IM(ans) = x - y; + } else { + F_RE(ans) = C * x; + F_IM(ans) = S * x; + } + } else { +#if defined(__i386) && !defined(__amd64) + int rp = __swapRP(fp_extended); +#endif + /* return (C, S) * exp(x) / 2 */ + t = __k_cexp((double)x, &n); + F_RE(ans) = (float)scalbn(C * t, n - 1); + F_IM(ans) = (float)scalbn(S * t, n - 1); +#if defined(__i386) && !defined(__amd64) + if (rp != fp_extended) + (void) __swapRP(rp); +#endif + } + } else { + t = expf(x) * half; + F_RE(ans) = C * t; + F_IM(ans) = S * t; + } + } else { + if (ix == 0) { /* x = 0, return (0,S) */ + F_RE(ans) = zero; + F_IM(ans) = S; + } else { + F_RE(ans) = C * sinhf(x); + F_IM(ans) = S * coshf(x); + } + } + if (hx < 0) + F_RE(ans) = -F_RE(ans); + if (hy < 0) + F_IM(ans) = -F_IM(ans); + return (ans); +} diff --git a/usr/src/lib/libm/common/complex/csinhl.c b/usr/src/lib/libm/common/complex/csinhl.c new file mode 100644 index 0000000000..1660a9c022 --- /dev/null +++ b/usr/src/lib/libm/common/complex/csinhl.c @@ -0,0 +1,93 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak csinhl = __csinhl + +#include "libm.h" /* coshl/expl/fabsl/scalbnl/sincosl/sinhl/__k_cexpl */ +#include "complex_wrapper.h" + +/* INDENT OFF */ +static const long double zero = 0.0L, half = 0.5L; +/* INDENT ON */ + +ldcomplex +csinhl(ldcomplex z) { + long double t, x, y, S, C; + int hx, ix, hy, iy, n; + ldcomplex ans; + + x = LD_RE(z); + y = LD_IM(z); + hx = HI_XWORD(x); + ix = hx & 0x7fffffff; + hy = HI_XWORD(y); + iy = hy & 0x7fffffff; + x = fabsl(x); + y = fabsl(y); + + (void) sincosl(y, &S, &C); + if (ix >= 0x4004e000) { /* |x| > 60 = prec/2 (14,28,34,60) */ + if (ix >= 0x400C62E4) { /* |x| > 11356.52... ~ log(2**16384) */ + if (ix >= 0x7fff0000) { /* |x| is inf or NaN */ + if (y == zero) { + LD_RE(ans) = x; + LD_IM(ans) = y; + } else if (iy >= 0x7fff0000) { + LD_RE(ans) = x; + LD_IM(ans) = x - y; + } else { + LD_RE(ans) = C * x; + LD_IM(ans) = S * x; + } + } else { + /* return exp(x)=t*2**n */ + t = __k_cexpl(x, &n); + LD_RE(ans) = scalbnl(C * t, n - 1); + LD_IM(ans) = scalbnl(S * t, n - 1); + } + } else { + t = expl(x) * half; + LD_RE(ans) = C * t; + LD_IM(ans) = S * t; + } + } else { + if (x == zero) { /* x = 0, return (0,S) */ + LD_RE(ans) = zero; + LD_IM(ans) = S; + } else { + LD_RE(ans) = C * sinhl(x); + LD_IM(ans) = S * coshl(x); + } + } + if (hx < 0) + LD_RE(ans) = -LD_RE(ans); + if (hy < 0) + LD_IM(ans) = -LD_IM(ans); + return (ans); +} diff --git a/usr/src/lib/libm/common/complex/csinl.c b/usr/src/lib/libm/common/complex/csinl.c new file mode 100644 index 0000000000..d5ec9a5661 --- /dev/null +++ b/usr/src/lib/libm/common/complex/csinl.c @@ -0,0 +1,48 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak csinl = __csinl + +#include "libm.h" +#include "complex_wrapper.h" + +ldcomplex +csinl(ldcomplex z) { + long double x, y; + ldcomplex ans, ct; + + x = LD_RE(z); + y = LD_IM(z); + LD_RE(z) = y; + LD_IM(z) = -x; + ct = csinhl(z); + LD_RE(ans) = -LD_IM(ct); + LD_IM(ans) = LD_RE(ct); + return (ans); +} diff --git a/usr/src/lib/libm/common/complex/csqrt.c b/usr/src/lib/libm/common/complex/csqrt.c new file mode 100644 index 0000000000..1a00236677 --- /dev/null +++ b/usr/src/lib/libm/common/complex/csqrt.c @@ -0,0 +1,210 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak csqrt = __csqrt + +/* INDENT OFF */ +/* + * dcomplex csqrt(dcomplex z); + * + * 2 2 2 + * Let w=r+i*s = sqrt(x+iy). Then (r + i s) = r - s + i 2sr = x + i y. + * + * Hence x = r*r-s*s, y = 2sr. + * + * Note that x*x+y*y = (s*s+r*r)**2. Thus, we have + * ________ + * 2 2 / 2 2 + * (1) r + s = \/ x + y , + * + * 2 2 + * (2) r - s = x + * + * (3) 2sr = y. + * + * Perform (1)-(2) and (1)+(2), we obtain + * + * 2 + * (4) 2 r = hypot(x,y)+x, + * + * 2 + * (5) 2*s = hypot(x,y)-x + * ________ + * / 2 2 + * where hypot(x,y) = \/ x + y . + * + * In order to avoid numerical cancellation, we use formula (4) for + * positive x, and (5) for negative x. The other component is then + * computed by formula (3). + * + * + * ALGORITHM + * ------------------ + * + * (assume x and y are of medium size, i.e., no over/underflow in squaring) + * + * If x >=0 then + * ________ + * / 2 2 + * 2 \/ x + y + x y + * r = ---------------------, s = -------; (6) + * 2 2 r + * + * (note that we choose sign(s) = sign(y) to force r >=0). + * Otherwise, + * ________ + * / 2 2 + * 2 \/ x + y - x y + * s = ---------------------, r = -------; (7) + * 2 2 s + * + * EXCEPTION: + * + * One may use the polar coordinate of a complex number to justify the + * following exception cases: + * + * EXCEPTION CASES (conform to ISO/IEC 9899:1999(E)): + * csqrt(+-0+ i 0 ) = 0 + i 0 + * csqrt( x + i inf ) = inf + i inf for all x (including NaN) + * csqrt( x + i NaN ) = NaN + i NaN with invalid for finite x + * csqrt(-inf+ iy ) = 0 + i inf for finite positive-signed y + * csqrt(+inf+ iy ) = inf + i 0 for finite positive-signed y + * csqrt(-inf+ i NaN) = NaN +-i inf + * csqrt(+inf+ i NaN) = inf + i NaN + * csqrt(NaN + i y ) = NaN + i NaN for finite y + * csqrt(NaN + i NaN) = NaN + i NaN + */ +/* INDENT ON */ + +#include "libm.h" /* fabs/sqrt */ +#include "complex_wrapper.h" + +/* INDENT OFF */ +static const double + two300 = 2.03703597633448608627e+90, + twom300 = 4.90909346529772655310e-91, + two599 = 2.07475778444049647926e+180, + twom601 = 1.20495993255144205887e-181, + two = 2.0, + zero = 0.0, + half = 0.5; +/* INDENT ON */ + +dcomplex +csqrt(dcomplex z) { + dcomplex ans; + double x, y, t, ax, ay; + int n, ix, iy, hx, hy, lx, ly; + + x = D_RE(z); + y = D_IM(z); + hx = HI_WORD(x); + lx = LO_WORD(x); + hy = HI_WORD(y); + ly = LO_WORD(y); + ix = hx & 0x7fffffff; + iy = hy & 0x7fffffff; + ay = fabs(y); + ax = fabs(x); + if (ix >= 0x7ff00000 || iy >= 0x7ff00000) { + /* x or y is Inf or NaN */ + if (ISINF(iy, ly)) + D_IM(ans) = D_RE(ans) = ay; + else if (ISINF(ix, lx)) { + if (hx > 0) { + D_RE(ans) = ax; + D_IM(ans) = ay * zero; + } else { + D_RE(ans) = ay * zero; + D_IM(ans) = ax; + } + } else + D_IM(ans) = D_RE(ans) = ax + ay; + } else if ((iy | ly) == 0) { /* y = 0 */ + if (hx >= 0) { + D_RE(ans) = sqrt(ax); + D_IM(ans) = zero; + } else { + D_IM(ans) = sqrt(ax); + D_RE(ans) = zero; + } + } else if (ix >= iy) { + n = (ix - iy) >> 20; + if (n >= 30) { /* x >> y or y=0 */ + t = sqrt(ax); + } else if (ix >= 0x5f300000) { /* x > 2**500 */ + ax *= twom601; + y *= twom601; + t = two300 * sqrt(ax + sqrt(ax * ax + y * y)); + } else if (iy < 0x20b00000) { /* y < 2**-500 */ + ax *= two599; + y *= two599; + t = twom300 * sqrt(ax + sqrt(ax * ax + y * y)); + } else + t = sqrt(half * (ax + sqrt(ax * ax + ay * ay))); + if (hx >= 0) { + D_RE(ans) = t; + D_IM(ans) = ay / (t + t); + } else { + D_IM(ans) = t; + D_RE(ans) = ay / (t + t); + } + } else { + n = (iy - ix) >> 20; + if (n >= 30) { /* y >> x */ + if (n >= 60) + t = sqrt(half * ay); + else if (iy >= 0x7fe00000) + t = sqrt(half * ay + half * ax); + else if (ix <= 0x00100000) + t = half * sqrt(two * (ay + ax)); + else + t = sqrt(half * (ay + ax)); + } else if (iy >= 0x5f300000) { /* y > 2**500 */ + ax *= twom601; + y *= twom601; + t = two300 * sqrt(ax + sqrt(ax * ax + y * y)); + } else if (ix < 0x20b00000) { /* x < 2**-500 */ + ax *= two599; + y *= two599; + t = twom300 * sqrt(ax + sqrt(ax * ax + y * y)); + } else + t = sqrt(half * (ax + sqrt(ax * ax + ay * ay))); + if (hx >= 0) { + D_RE(ans) = t; + D_IM(ans) = ay / (t + t); + } else { + D_IM(ans) = t; + D_RE(ans) = ay / (t + t); + } + } + if (hy < 0) + D_IM(ans) = -D_IM(ans); + return (ans); +} diff --git a/usr/src/lib/libm/common/complex/csqrtf.c b/usr/src/lib/libm/common/complex/csqrtf.c new file mode 100644 index 0000000000..b0fef11d0f --- /dev/null +++ b/usr/src/lib/libm/common/complex/csqrtf.c @@ -0,0 +1,93 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak csqrtf = __csqrtf + +#include "libm.h" /* sqrt/fabsf/sqrtf */ +#include "complex_wrapper.h" + +/* INDENT OFF */ +static const float zero = 0.0F; +/* INDENT ON */ + +fcomplex +csqrtf(fcomplex z) { + fcomplex ans; + double dt, dx, dy; + float x, y, t, ax, ay, w; + int ix, iy, hx, hy; + + x = F_RE(z); + y = F_IM(z); + hx = THE_WORD(x); + hy = THE_WORD(y); + ix = hx & 0x7fffffff; + iy = hy & 0x7fffffff; + ay = fabsf(y); + ax = fabsf(x); + if (ix >= 0x7f800000 || iy >= 0x7f800000) { + /* x or y is Inf or NaN */ + if (iy == 0x7f800000) + F_IM(ans) = F_RE(ans) = ay; + else if (ix == 0x7f800000) { + if (hx > 0) { + F_RE(ans) = ax; + F_IM(ans) = ay * zero; + } else { + F_RE(ans) = ay * zero; + F_IM(ans) = ax; + } + } else + F_IM(ans) = F_RE(ans) = ax + ay; + } else if (iy == 0) { + if (hx >= 0) { + F_RE(ans) = sqrtf(ax); + F_IM(ans) = zero; + } else { + F_IM(ans) = sqrtf(ax); + F_RE(ans) = zero; + } + } else { + dx = (double) ax; + dy = (double) ay; + dt = sqrt(0.5 * (sqrt(dx * dx + dy * dy) + dx)); + t = (float) dt; + w = (float) (dy / (dt + dt)); + if (hx >= 0) { + F_RE(ans) = t; + F_IM(ans) = w; + } else { + F_IM(ans) = t; + F_RE(ans) = w; + } + } + if (hy < 0) + F_IM(ans) = -F_IM(ans); + return (ans); +} diff --git a/usr/src/lib/libm/common/complex/csqrtl.c b/usr/src/lib/libm/common/complex/csqrtl.c new file mode 100644 index 0000000000..6bd8bccf4d --- /dev/null +++ b/usr/src/lib/libm/common/complex/csqrtl.c @@ -0,0 +1,146 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak csqrtl = __csqrtl + +#include "libm.h" /* fabsl/isinfl/sqrtl */ +#include "complex_wrapper.h" +#include "longdouble.h" + +/* INDENT OFF */ +static const long double + twom9001 = 2.6854002716003034957421765100615693043656e-2710L, + twom4500 = 2.3174987687592429423263242862381544149252e-1355L, + two8999 = 9.3095991180122343502582347372163290310934e+2708L, + two4500 = 4.3149968987270974283777803545571722250806e+1354L, + zero = 0.0L, + half = 0.5L, + two = 2.0L; +/* INDENT ON */ + +ldcomplex +csqrtl(ldcomplex z) { + ldcomplex ans; + long double x, y, t, ax, ay; + int n, ix, iy, hx, hy; + + x = LD_RE(z); + y = LD_IM(z); + hx = HI_XWORD(x); + hy = HI_XWORD(y); + ix = hx & 0x7fffffff; + iy = hy & 0x7fffffff; + ay = fabsl(y); + ax = fabsl(x); + if (ix >= 0x7fff0000 || iy >= 0x7fff0000) { + /* x or y is Inf or NaN */ + if (isinfl(y)) + LD_IM(ans) = LD_RE(ans) = ay; + else if (isinfl(x)) { + if (hx > 0) { + LD_RE(ans) = ax; + LD_IM(ans) = ay * zero; + } else { + LD_RE(ans) = ay * zero; + LD_IM(ans) = ax; + } + } else + LD_IM(ans) = LD_RE(ans) = ax + ay; + } else if (y == zero) { + if (hx >= 0) { + LD_RE(ans) = sqrtl(ax); + LD_IM(ans) = zero; + } else { + LD_IM(ans) = sqrtl(ax); + LD_RE(ans) = zero; + } + } else if (ix >= iy) { + n = (ix - iy) >> 16; +#if defined(__x86) /* 64 significant bits */ + if (n >= 35) +#else /* 113 significant bits */ + if (n >= 60) +#endif + t = sqrtl(ax); + else if (ix >= 0x5f3f0000) { /* x > 2**8000 */ + ax *= twom9001; + y *= twom9001; + t = two4500 * sqrtl(ax + sqrtl(ax * ax + y * y)); + } else if (iy <= 0x20bf0000) { /* y < 2**-8000 */ + ax *= two8999; + y *= two8999; + t = twom4500 * sqrtl(ax + sqrtl(ax * ax + y * y)); + } else + t = sqrtl(half * (ax + sqrtl(ax * ax + y * y))); + + if (hx >= 0) { + LD_RE(ans) = t; + LD_IM(ans) = ay / (t + t); + } else { + LD_IM(ans) = t; + LD_RE(ans) = ay / (t + t); + } + } else { + n = (iy - ix) >> 16; +#if defined(__x86) /* 64 significant bits */ + if (n >= 35) { /* } */ +#else /* 113 significant bits */ + if (n >= 60) { +#endif + if (n >= 120) + t = sqrtl(half * ay); + else if (iy >= 0x7ffe0000) + t = sqrtl(half * ay + half * ax); + else if (ix <= 0x00010000) + t = half * (sqrtl(two * (ax + ay))); + else + t = sqrtl(half * (ax + ay)); + } else if (iy >= 0x5f3f0000) { /* y > 2**8000 */ + ax *= twom9001; + y *= twom9001; + t = two4500 * sqrtl(ax + sqrtl(ax * ax + y * y)); + } else if (ix <= 0x20bf0000) { + ax *= two8999; + y *= two8999; + t = twom4500 * sqrtl(ax + sqrtl(ax * ax + y * y)); + } else + t = sqrtl(half * (ax + sqrtl(ax * ax + y * y))); + + if (hx >= 0) { + LD_RE(ans) = t; + LD_IM(ans) = ay / (t + t); + } else { + LD_IM(ans) = t; + LD_RE(ans) = ay / (t + t); + } + } + if (hy < 0) + LD_IM(ans) = -LD_IM(ans); + return (ans); +} diff --git a/usr/src/lib/libm/common/complex/ctan.c b/usr/src/lib/libm/common/complex/ctan.c new file mode 100644 index 0000000000..c9d5765853 --- /dev/null +++ b/usr/src/lib/libm/common/complex/ctan.c @@ -0,0 +1,61 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak ctan = __ctan + +/* INDENT OFF */ +/* + * dcomplex ctan(dcomplex z); + * + * If z = x+iy, then since ctan(iz) = i*ctanh(z), we have + * + * ctan(z) = ctan((-1)*(-z)) = ctan(i*i*(-z)) + * = i*ctanh(i*(-z)) = i*ctanh(i*(-x-yi)) + * = i*ctanh(y-ix) + * = -Im(ctanh(y-ix))+i*Re(ctanh(y-ix)) + */ +/* INDENT ON */ + +#include "libm.h" +#include "complex_wrapper.h" + +dcomplex +ctan(dcomplex z) { + double x, y; + dcomplex ans, ct; + + x = D_RE(z); + y = D_IM(z); + D_RE(z) = y; + D_IM(z) = -x; + ct = ctanh(z); + D_RE(ans) = -D_IM(ct); + D_IM(ans) = D_RE(ct); + return (ans); +} diff --git a/usr/src/lib/libm/common/complex/ctanf.c b/usr/src/lib/libm/common/complex/ctanf.c new file mode 100644 index 0000000000..fde6c0bbee --- /dev/null +++ b/usr/src/lib/libm/common/complex/ctanf.c @@ -0,0 +1,48 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak ctanf = __ctanf + +#include "libm.h" +#include "complex_wrapper.h" + +fcomplex +ctanf(fcomplex z) { + float x, y; + fcomplex ans, ct; + + x = F_RE(z); + y = F_IM(z); + F_RE(z) = y; + F_IM(z) = -x; + ct = ctanhf(z); + F_RE(ans) = -F_IM(ct); + F_IM(ans) = F_RE(ct); + return (ans); +} diff --git a/usr/src/lib/libm/common/complex/ctanh.c b/usr/src/lib/libm/common/complex/ctanh.c new file mode 100644 index 0000000000..1e5ecb833f --- /dev/null +++ b/usr/src/lib/libm/common/complex/ctanh.c @@ -0,0 +1,176 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak ctanh = __ctanh + +/* INDENT OFF */ +/* + * dcomplex ctanh(dcomplex z); + * + * tanh x + i tan y sinh 2x + i sin 2y + * ctanh z = --------------------- = -------------------- + * 1 + i tanh(x)tan(y) cosh 2x + cos 2y + * + * For |x| >= prec/2 (14,28,34,60 for single, double, double extended, quad), + * we use + * + * 1 2x 2 sin 2y + * cosh 2x = sinh 2x = --- e and hence ctanh z = 1 + i -----------; + * 2 2x + * e + * + * otherwise, to avoid cancellation, for |x| < prec/2, + * 2x 2 + * (e - 1) 2 2 + * cosh 2x + cos 2y = 1 + ------------ + cos y - sin y + * 2x + * 2 e + * + * 1 2x 2 -2x 2 + * = --- (e - 1) e + 2 cos y + * 2 + * and + * + * [ 2x ] + * 1 [ 2x e - 1 ] + * sinh 2x = --- [ e - 1 + --------- ] + * 2 [ 2x ] + * [ e ] + * 2x + * Implementation notes: let t = expm1(2x) = e - 1, then + * + * 1 [ t*t 2 ] 1 [ t ] + * cosh 2x + cos 2y = --- * [ ----- + 4 cos y ]; sinh 2x = --- * [ t + --- ] + * 2 [ t+1 ] 2 [ t+1 ] + * + * Hence, + * + * + * t*t+2t [4(t+1)(cos y)]*(sin y) + * ctanh z = --------------------------- + i -------------------------- + * t*t+[4(t+1)(cos y)](cos y) t*t+[4(t+1)(cos y)](cos y) + * + * EXCEPTION (conform to ISO/IEC 9899:1999(E)): + * ctanh(0,0)=(0,0) + * ctanh(x,inf) = (NaN,NaN) for finite x + * ctanh(x,NaN) = (NaN,NaN) for finite x + * ctanh(inf,y) = 1+ i*0*sin(2y) for positive-signed finite y + * ctanh(inf,inf) = (1, +-0) + * ctanh(inf,NaN) = (1, +-0) + * ctanh(NaN,0) = (NaN,0) + * ctanh(NaN,y) = (NaN,NaN) for non-zero y + * ctanh(NaN,NaN) = (NaN,NaN) + */ +/* INDENT ON */ + +#include "libm.h" /* exp/expm1/fabs/sin/tanh/sincos */ +#include "complex_wrapper.h" + +static const double four = 4.0, two = 2.0, one = 1.0, zero = 0.0; + +dcomplex +ctanh(dcomplex z) { + double t, r, v, u, x, y, S, C; + int hx, ix, lx, hy, iy, ly; + dcomplex ans; + + x = D_RE(z); + y = D_IM(z); + hx = HI_WORD(x); + lx = LO_WORD(x); + ix = hx & 0x7fffffff; + hy = HI_WORD(y); + ly = LO_WORD(y); + iy = hy & 0x7fffffff; + x = fabs(x); + y = fabs(y); + + if ((iy | ly) == 0) { /* ctanh(x,0) = (x,0) for x = 0 or NaN */ + D_RE(ans) = tanh(x); + D_IM(ans) = zero; + } else if (iy >= 0x7ff00000) { /* y is inf or NaN */ + if (ix < 0x7ff00000) /* catanh(finite x,inf/nan) is nan */ + D_RE(ans) = D_IM(ans) = y - y; + else if (((ix - 0x7ff00000) | lx) == 0) { /* x is inf */ + D_RE(ans) = one; + D_IM(ans) = zero; + } else { + D_RE(ans) = x + y; + D_IM(ans) = y - y; + } + } else if (ix >= 0x403c0000) { + /* + * |x| > 28 = prec/2 (14,28,34,60) + * ctanh z ~ 1 + i (sin2y)/(exp(2x)) + */ + D_RE(ans) = one; + if (iy < 0x7fe00000) /* t = sin(2y) */ + S = sin(y + y); + else { + (void) sincos(y, &S, &C); + S = (S + S) * C; + } + if (ix >= 0x7fe00000) { /* |x| > max/2 */ + if (ix >= 0x7ff00000) { /* |x| is inf or NaN */ + if (((ix - 0x7ff00000) | lx) != 0) + D_RE(ans) = D_IM(ans) = x + y; + /* x is NaN */ + else + D_IM(ans) = zero * S; /* x is inf */ + } else + D_IM(ans) = S * exp(-x); /* underflow */ + } else + D_IM(ans) = (S + S) * exp(-(x + x)); + /* 2 sin 2y / exp(2x) */ + } else { + /* INDENT OFF */ + /* + * t*t+2t + * ctanh z = --------------------------- + + * t*t+[4(t+1)(cos y)](cos y) + * + * [4(t+1)(cos y)]*(sin y) + * i -------------------------- + * t*t+[4(t+1)(cos y)](cos y) + */ + /* INDENT ON */ + (void) sincos(y, &S, &C); + t = expm1(x + x); + r = (four * C) * (t + one); + u = t * t; + v = one / (u + r * C); + D_RE(ans) = (u + two * t) * v; + D_IM(ans) = (r * S) * v; + } + if (hx < 0) + D_RE(ans) = -D_RE(ans); + if (hy < 0) + D_IM(ans) = -D_IM(ans); + return (ans); +} diff --git a/usr/src/lib/libm/common/complex/ctanhf.c b/usr/src/lib/libm/common/complex/ctanhf.c new file mode 100644 index 0000000000..ab67e2dd40 --- /dev/null +++ b/usr/src/lib/libm/common/complex/ctanhf.c @@ -0,0 +1,115 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak ctanhf = __ctanhf + +#include "libm.h" /* expf/expm1f/fabsf/sincosf/sinf/tanhf */ +#include "complex_wrapper.h" + +/* INDENT OFF */ +static const float four = 4.0F, two = 2.0F, one = 1.0F, zero = 0.0F; +/* INDENT ON */ + +fcomplex +ctanhf(fcomplex z) { + float r, u, v, t, x, y, S, C; + int hx, ix, hy, iy; + fcomplex ans; + + x = F_RE(z); + y = F_IM(z); + hx = THE_WORD(x); + ix = hx & 0x7fffffff; + hy = THE_WORD(y); + iy = hy & 0x7fffffff; + x = fabsf(x); + y = fabsf(y); + + if (iy == 0) { /* ctanh(x,0) = (x,0) for x = 0 or NaN */ + F_RE(ans) = tanhf(x); + F_IM(ans) = zero; + } else if (iy >= 0x7f800000) { /* y is inf or NaN */ + if (ix < 0x7f800000) /* catanh(finite x,inf/nan) is nan */ + F_RE(ans) = F_IM(ans) = y - y; + else if (ix == 0x7f800000) { /* x is inf */ + F_RE(ans) = one; + F_IM(ans) = zero; + } else { + F_RE(ans) = x + y; + F_IM(ans) = y - y; + } + } else if (ix >= 0x41600000) { + /* + * |x| > 14 = prec/2 (14,28,34,60) + * ctanh z ~ 1 + i (sin2y)/(exp(2x)) + */ + F_RE(ans) = one; + if (iy < 0x7f000000) /* t = sin(2y) */ + S = sinf(y + y); + else { + (void) sincosf(y, &S, &C); + S = (S + S) * C; + } + if (ix >= 0x7f000000) { /* |x| > max/2 */ + if (ix >= 0x7f800000) { /* |x| is inf or NaN */ + if (ix > 0x7f800000) /* x is NaN */ + F_RE(ans) = F_IM(ans) = x + y; + else + F_IM(ans) = zero * S; /* x is inf */ + } else + F_IM(ans) = S * expf(-x); /* underflow */ + } else + F_IM(ans) = (S + S) * expf(-(x + x)); + /* 2 sin 2y / exp(2x) */ + } else { + /* INDENT OFF */ + /* + * t*t+2t + * ctanh z = --------------------------- + * t*t+[4(t+1)(cos y)](cos y) + * + * [4(t+1)(cos y)]*(sin y) + * i -------------------------- + * t*t+[4(t+1)(cos y)](cos y) + */ + /* INDENT ON */ + (void) sincosf(y, &S, &C); + t = expm1f(x + x); + r = (four * C) * (t + one); + u = t * t; + v = one / (u + r * C); + F_RE(ans) = (u + two * t) * v; + F_IM(ans) = (r * S) * v; + } + if (hx < 0) + F_RE(ans) = -F_RE(ans); + if (hy < 0) + F_IM(ans) = -F_IM(ans); + return (ans); +} diff --git a/usr/src/lib/libm/common/complex/ctanhl.c b/usr/src/lib/libm/common/complex/ctanhl.c new file mode 100644 index 0000000000..c19e067f3e --- /dev/null +++ b/usr/src/lib/libm/common/complex/ctanhl.c @@ -0,0 +1,118 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak ctanhl = __ctanhl + +#include "libm.h" /* expl/expm1l/fabsl/isinfl/isnanl/sincosl/sinl/tanhl */ +#include "complex_wrapper.h" +#include "longdouble.h" + +/* INDENT OFF */ +static const long double four = 4.0L, two = 2.0L, one = 1.0L, zero = 0.0L; +/* INDENT ON */ + +ldcomplex +ctanhl(ldcomplex z) { + long double r, u, v, t, x, y, S, C; + int hx, ix, hy, iy; + ldcomplex ans; + + x = LD_RE(z); + y = LD_IM(z); + hx = HI_XWORD(x); + ix = hx & 0x7fffffff; + hy = HI_XWORD(y); + iy = hy & 0x7fffffff; + x = fabsl(x); + y = fabsl(y); + + if (y == zero) { /* ctanh(x,0) = (x,0) for x = 0 or NaN */ + LD_RE(ans) = tanhl(x); + LD_IM(ans) = zero; + } else if (iy >= 0x7fff0000) { /* y is inf or NaN */ + if (ix < 0x7fff0000) /* catanh(finite x,inf/nan) is nan */ + LD_RE(ans) = LD_IM(ans) = y - y; + else if (isinfl(x)) { /* x is inf */ + LD_RE(ans) = one; + LD_IM(ans) = zero; + } else { + LD_RE(ans) = x + y; + LD_IM(ans) = y - y; + } + } else if (ix >= 0x4004e000) { + /* INDENT OFF */ + /* + * |x| > 60 = prec/2 (14,28,34,60) + * ctanh z ~ 1 + i (sin2y)/(exp(2x)) + */ + /* INDENT ON */ + LD_RE(ans) = one; + if (iy < 0x7ffe0000) /* t = sin(2y) */ + S = sinl(y + y); + else { + (void) sincosl(y, &S, &C); + S = (S + S) * C; + } + if (ix >= 0x7ffe0000) { /* |x| > max/2 */ + if (ix >= 0x7fff0000) { /* |x| is inf or NaN */ + if (isnanl(x)) /* x is NaN */ + LD_RE(ans) = LD_IM(ans) = x + y; + else + LD_IM(ans) = zero * S; /* x is inf */ + } else + LD_IM(ans) = S * expl(-x); /* underflow */ + } else + LD_IM(ans) = (S + S) * expl(-(x + x)); + /* 2 sin 2y / exp(2x) */ + } else { + /* INDENT OFF */ + /* + * t*t+2t + * ctanh z = --------------------------- + * t*t+[4(t+1)(cos y)](cos y) + * + * [4(t+1)(cos y)]*(sin y) + * i -------------------------- + * t*t+[4(t+1)(cos y)](cos y) + */ + /* INDENT ON */ + sincosl(y, &S, &C); + t = expm1l(x + x); + r = (four * C) * (t + one); + u = t * t; + v = one / (u + r * C); + LD_RE(ans) = (u + two * t) * v; + LD_IM(ans) = (r * S) * v; + } + if (hx < 0) + LD_RE(ans) = -LD_RE(ans); + if (hy < 0) + LD_IM(ans) = -LD_IM(ans); + return (ans); +} diff --git a/usr/src/lib/libm/common/complex/ctanl.c b/usr/src/lib/libm/common/complex/ctanl.c new file mode 100644 index 0000000000..613cf4bd11 --- /dev/null +++ b/usr/src/lib/libm/common/complex/ctanl.c @@ -0,0 +1,48 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak ctanl = __ctanl + +#include "libm.h" +#include "complex_wrapper.h" + +ldcomplex +ctanl(ldcomplex z) { + long double x, y; + ldcomplex ans, ct; + + x = LD_RE(z); + y = LD_IM(z); + LD_RE(z) = y; + LD_IM(z) = -x; + ct = ctanhl(z); + LD_RE(ans) = -LD_IM(ct); + LD_IM(ans) = LD_RE(ct); + return (ans); +} diff --git a/usr/src/lib/libm/common/complex/k_atan2.c b/usr/src/lib/libm/common/complex/k_atan2.c new file mode 100644 index 0000000000..f3fe1691c9 --- /dev/null +++ b/usr/src/lib/libm/common/complex/k_atan2.c @@ -0,0 +1,550 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "libm.h" /* __k_atan2 */ +#include "complex_wrapper.h" + +/* + * double __k_atan2(double y, double x, double *e) + * + * Compute atan2 with error terms. + * + * Important formula: + * 3 5 + * x x + * atan(x) = x - ----- + ----- - ... (for x <= 1) + * 3 5 + * + * pi 1 1 + * = --- - --- + --- - ... (for x > 1) + * 3 + * 2 x 3x + * + * Arg(x + y i) = sign(y) * atan2(|y|, x) + * = sign(y) * atan(|y|/x) (for x > 0) + * sign(y) * (PI - atan(|y|/|x|)) (for x < 0) + * Thus if x >> y (IEEE double: EXP(x) - EXP(y) >= 60): + * 1. (x > 0): atan2(y,x) ~ y/x + * 2. (x < 0): atan2(y,x) ~ sign(y) (PI - |y/x|)) + * Otherwise if x << y: + * atan2(y,x) ~ sign(y)*PI/2 - x/y + * + * __k_atan2 call static functions mx_poly, mx_atan + */ + +/* + * (void) mx_poly (double *z, double *a, double *e, int n) + * return + * e = a + z*(a + z*(a + ... z*(a + e)...)) + * 0 2 4 2n + * Note: + * 1. e and coefficient ai are represented by two double numbers. + * For e, the first one contain the leading 24 bits rounded, and the + * second one contain the remaining 53 bits (total 77 bits accuracy). + * For ai, the first one contian the leading 53 bits rounded, and the + * second is the remaining 53 bits (total 106 bits accuracy). + * 2. z is an array of three doubles. + * z[0] : the rounded value of Z (the intended value of z) + * z[1] : the leading 24 bits of Z rounded + * z[2] : the remaining 53 bits of Z + * Note that z[0] = z[1]+z[2] rounded. + * + */ + +static void +mx_poly(const double *z, const double *a, double *e, int n) { + double r, s, t, p_h, p_l, z_h, z_l, p; + int i; + + n = n + n; + p = e[0] + a[n]; + p_l = a[n + 1]; + p_h = (double) ((float) p); + p = a[n - 2] + z[0] * p; + z_h = z[1]; z_l = z[2]; + p_l += e[0] - (p_h - a[n]); + + for (i = n - 2; i >= 2; i -= 2) { + /* compute p = ai + z * p */ + t = z_h * p_h; + s = z[0] * p_l + p_h * z_l; + p_h = (double) ((float) p); + s += a[i + 1]; + r = t - (p_h - a[i]); + p = a[i - 2] + z[0] * p; + p_l = r + s; + } + e[0] = (double)((float) p); + t = z_h * p_h; + s = z[0] * p_l + p_h * z_l; + r = t - (e[0] - a[0]); + e[1] = r + s; +} + +/* + * Table of constants for atan from 0.125 to 8 + * 0.125 -- 0x3fc00000 --- (increment at bit 16) + * 0x3fc10000 + * 0x3fc20000 + * ... ... + * 0x401f0000 + * 8.000 -- 0x40200000 (total: 97) + * By K.C. Ng, March 9, 1989 + */ + +static const double TBL_atan_hi[] = { +1.243549945467614382e-01, 1.320397616146387620e-01, 1.397088742891636204e-01, +1.473614810886516302e-01, 1.549967419239409727e-01, 1.626138285979485676e-01, +1.702119252854744080e-01, 1.777902289926760471e-01, 1.853479499956947607e-01, +1.928843122579746439e-01, 2.003985538258785115e-01, 2.078899272022629863e-01, +2.153576996977380476e-01, 2.228011537593945213e-01, 2.302195872768437179e-01, +2.376123138654712419e-01, 2.449786631268641435e-01, 2.596296294082575118e-01, +2.741674511196587893e-01, 2.885873618940774099e-01, 3.028848683749714166e-01, +3.170557532091470287e-01, 3.310960767041321029e-01, 3.450021772071051318e-01, +3.587706702705721895e-01, 3.723984466767542023e-01, 3.858826693980737521e-01, +3.992207695752525431e-01, 4.124104415973872673e-01, 4.254496373700422662e-01, +4.383365598579578304e-01, 4.510696559885234436e-01, 4.636476090008060935e-01, +4.883339510564055352e-01, 5.123894603107377321e-01, 5.358112379604637043e-01, +5.585993153435624414e-01, 5.807563535676704136e-01, 6.022873461349641522e-01, +6.231993299340659043e-01, 6.435011087932843710e-01, 6.632029927060932861e-01, +6.823165548747480713e-01, 7.008544078844501923e-01, 7.188299996216245269e-01, +7.362574289814280970e-01, 7.531512809621944138e-01, 7.695264804056582975e-01, +7.853981633974482790e-01, 8.156919233162234217e-01, 8.441539861131710509e-01, +8.709034570756529758e-01, 8.960553845713439269e-01, 9.197196053504168578e-01, +9.420000403794636101e-01, 9.629943306809362058e-01, 9.827937232473290541e-01, +1.001483135694234639e+00, 1.019141344266349725e+00, 1.035841253008800145e+00, +1.051650212548373764e+00, 1.066630365315743623e+00, 1.080839000541168327e+00, +1.094328907321189925e+00, 1.107148717794090409e+00, 1.130953743979160375e+00, +1.152571997215667610e+00, 1.172273881128476303e+00, 1.190289949682531656e+00, +1.206817370285252489e+00, 1.222025323210989667e+00, 1.236059489478081863e+00, +1.249045772398254428e+00, 1.261093382252440387e+00, 1.272297395208717319e+00, +1.282740879744270757e+00, 1.292496667789785336e+00, 1.301628834009196156e+00, +1.310193935047555547e+00, 1.318242051016837113e+00, 1.325817663668032553e+00, +1.339705659598999565e+00, 1.352127380920954636e+00, 1.363300100359693845e+00, +1.373400766945015894e+00, 1.382574821490125894e+00, 1.390942827002418447e+00, +1.398605512271957618e+00, 1.405647649380269870e+00, 1.412141064608495311e+00, +1.418146998399631542e+00, 1.423717971406494032e+00, 1.428899272190732761e+00, +1.433730152484709031e+00, 1.438244794498222623e+00, 1.442473099109101931e+00, +1.446441332248135092e+00, +}; + +static const double TBL_atan_lo[] = { +-3.125324142453938311e-18, -1.276925400709959526e-17, 2.479758919089733066e-17, +5.409599147666297957e-18, 9.585415594114323829e-18, 7.784470643106252464e-18, +-3.541164079802125137e-18, 2.372599351477449041e-17, 4.180692268843078977e-18, +2.034098543938166622e-17, 3.139954287184449286e-18, 7.333160666520898500e-18, +4.738160130078732886e-19, -5.498822172446843173e-18, 1.231340452914270316e-17, +1.058231431371112987e-17, 1.069875561873445139e-17, 1.923875492461530410e-17, +8.261353575163771936e-18, -1.428369957377257085e-17, -1.101082790300136900e-17, +-1.893928924292642146e-17, -7.952610375793798701e-18, -2.293880475557830393e-17, +3.088733564861919217e-17, 1.961231150484565340e-17, 2.378822732491940868e-17, +2.246598105617042065e-17, 3.963462895355093301e-17, 2.331553074189288466e-17, +-2.494277030626540909e-17, 3.280735600183735558e-17, 2.269877745296168709e-17, +-1.137323618932958456e-17, -2.546278147285580353e-17, -4.063795683482557497e-18, +-5.455630548591626394e-18, -1.441464378193066908e-17, 2.950430737228402307e-17, +2.672403885140095079e-17, 1.583478505144428617e-17, -3.076054864429649001e-17, +6.943223671560007740e-18, -1.987626234335816123e-17, -2.147838844445698302e-17, +3.473937648299456719e-17, -2.425693465918206812e-17, -3.704991905602721293e-17, +3.061616997868383018e-17, -1.071456562778743077e-17, -4.841337011934916763e-17, +-2.269823590747287052e-17, 2.923876285774304890e-17, -4.057439412852767923e-17, +5.460837485846687627e-17, -3.986660595210752445e-18, 1.390331103123099845e-17, +9.438308023545392000e-17, 1.000401886936679889e-17, 3.194313981784503706e-17, +-9.650564731467513515e-17, -5.956589637160374564e-17, -1.567632251135907253e-17, +-5.490676155022364226e-18, 9.404471373566379412e-17, 7.123833804538446299e-17, +-9.159738508900378819e-17, 8.385188614028674371e-17, 7.683333629842068806e-17, +4.172467638861439118e-17, -2.979162864892849274e-17, 7.879752739459421280e-17, +-2.196203799612310905e-18, 3.242139621534960503e-17, 2.245875015034507026e-17, +-9.283188754266129476e-18, -6.830804768926660334e-17, -1.236918499824626670e-17, +8.745413734780278834e-17, -6.319394031144676258e-17, -8.824429373951136321e-17, +-2.599011860304134377e-17, 2.147674250751150961e-17, 1.093246171526936217e-16, +-3.307710355769516504e-17, -3.561490438648230100e-17, -9.843712133488842595e-17, +-2.324061182591627982e-17, -8.922630138234492386e-17, -9.573807110557223276e-17, +-8.263883782511013632e-17, 8.721870922223967507e-17, -6.457134743238754385e-17, +-4.396204466767636187e-17, -2.493019910264565554e-17, -1.105119435430315713e-16, +9.211323971545051565e-17, +}; + +/* + * mx_atan(x,err) + * Table look-up algorithm + * By K.C. Ng, March 9, 1989 + * + * Algorithm. + * + * The algorithm is based on atan(x)=atan(y)+atan((x-y)/(1+x*y)). + * We use poly1(x) to approximate atan(x) for x in [0,1/8] with + * error (relative) + * |(atan(x)-poly1(x))/x|<= 2^-83.41 + * + * and use poly2(x) to approximate atan(x) for x in [0,1/65] with + * error + * |atan(x)-poly2(x)|<= 2^-86.8 + * + * Here poly1 and poly2 are odd polynomial with the following form: + * x + x^3*(a1+x^2*(a2+...)) + * + * (0). Purge off Inf and NaN and 0 + * (1). Reduce x to positive by atan(x) = -atan(-x). + * (2). For x <= 1/8, use + * (2.1) if x < 2^(-prec/2), atan(x) = x with inexact flag raised + * (2.2) Otherwise + * atan(x) = poly1(x) + * (3). For x >= 8 then (prec = 78) + * (3.1) if x >= 2^prec, atan(x) = atan(inf) - pio2lo + * (3.2) if x >= 2^(prec/3), atan(x) = atan(inf) - 1/x + * (3.3) if x > 65, atan(x) = atan(inf) - poly2(1/x) + * (3.4) Otherwise, atan(x) = atan(inf) - poly1(1/x) + * + * (4). Now x is in (0.125, 8) + * Find y that match x to 4.5 bit after binary (easy). + * If iy is the high word of y, then + * single : j = (iy - 0x3e000000) >> 19 + * double : j = (iy - 0x3fc00000) >> 16 + * quad : j = (iy - 0x3ffc0000) >> 12 + * + * Let s = (x-y)/(1+x*y). Then + * atan(x) = atan(y) + poly1(s) + * = _TBL_atan_hi[j] + (_TBL_atan_lo[j] + poly2(s) ) + * + * Note. |s| <= 1.5384615385e-02 = 1/65. Maxium occurs at x = 1.03125 + * + */ + +#define P1 p[2] +#define P4 p[8] +#define P5 p[9] +#define P6 p[10] +#define P7 p[11] +#define P8 p[12] +#define P9 p[13] +static const double p[] = { + 1.0, + 0.0, + -3.33333333333333314830e-01, /* p1 = BFD55555 55555555 */ + -1.85030852238476921863e-17, /* p1_l = BC755525 9783A49C */ + 2.00000000000000011102e-01, /* p2 = 3FC99999 9999999A */ + -1.27263196576150347368e-17, /* p2_l = BC6D584B 0D874007 */ + -1.42857142857141405923e-01, /* p3 = BFC24924 9249245E */ + -1.34258204847170493327e-17, /* p3_l = BC6EF534 A112500D */ + 1.11111111110486909803e-01, /* p4 = 3FBC71C7 1C71176A */ + -9.09090907557387889470e-02, /* p5 = BFB745D1 73B47A7D */ + 7.69230541541713053189e-02, /* p6 = 3FB3B13A B1E68DE6 */ + -6.66645815401964159097e-02, /* p7 = BFB110EE 1584446A */ + 5.87081768778560317279e-02, /* p8 = 3FAE0EFF 87657733 */ + -4.90818147456113240690e-02, /* p9 = BFA92140 6A524B5C */ +}; +#define Q1 q[2] +#define Q3 q[6] +#define Q4 q[7] +#define Q5 q[8] +static const double q[] = { + 1.0, + 0.0, + -3.33333333333333314830e-01, /* q1 = BFD55555 55555555 */ + -1.85022941571278638733e-17, /* q1_l = BC7554E9 D20EFA66 */ + 1.99999999999999927836e-01, /* q2 = 3FC99999 99999997 */ + -1.28782564407438833398e-17, /* q2_l = BC6DB1FB 17217417 */ + -1.42857142855492280642e-01, /* q3 = BFC24924 92483C46 */ + 1.11111097130183356096e-01, /* q4 = 3FBC71C6 E06595CC */ + -9.08553303569109294013e-02, /* q5 = BFB7424B 808CDA76 */ +}; +static const double +one = 1.0, +pio2hi = 1.570796326794896558e+00, +pio2lo = 6.123233995736765886e-17; + +static double +mx_atan(double x, double *err) { + double y, z, r, s, t, w, s_h, s_l, x_h, x_l, zz[3], ee[2], z_h, + z_l, r_h, r_l, u, v; + int ix, iy, sign, j; + + ix = ((int *) &x)[HIWORD]; + sign = ix & 0x80000000; + ix ^= sign; + + /* for |x| < 1/8 */ + if (ix < 0x3fc00000) { + if (ix < 0x3f300000) { /* when |x| < 2**-12 */ + if (ix < 0x3d800000) { /* if |x| < 2**-39 */ + *err = (double) ((int) x); + return (x); + } + z = x * x; + t = x * z * (q[2] + z * (q[4] + z * q[6])); + r = x + t; + *err = t - (r - x); + return (r); + } + z = x * x; + + /* use double precision at p4 and on */ + ee[0] = z * + (P4 + z * + (P5 + z * (P6 + z * (P7 + z * (P8 + z * P9))))); + + x_h = (double) ((float) x); + z_h = (double) ((float) z); + x_l = x - x_h; + z_l = (x_h * x_h - z_h); + zz[0] = z; + zz[1] = z_h; + zz[2] = z_l + x_l * (x + x_h); + + /* + * compute (1+z*(p1+z*(p2+z*(p3+e)))) by call + * mx_poly + */ + + mx_poly(zz, p, ee, 3); + + /* finally x*(1+z*(p1+...)) */ + r = x_h * ee[0]; + t = x * ee[1] + x_l * ee[0]; + s = t + r; + *err = t - (s - r); + return (s); + } + /* for |x| >= 8.0 */ + if (ix >= 0x40200000) { /* x >= 8 */ + x = fabs(x); + if (ix >= 0x42600000) { /* x >= 2**39 */ + if (ix >= 0x44c00000) { /* x >= 2**77 */ + y = -pio2lo; + } else + y = one / x - pio2lo; + if (sign == 0) { + t = pio2hi - y; + *err = -(y - (pio2hi - t)); + } else { + t = y - pio2hi; + *err = y - (pio2hi + t); + } + return (t); + } else { + /* compute r = 1/x */ + r = one / x; + z = r * r; + if (ix < 0x40504000) { /* 8 < x < 65 */ + + /* use double precision at p4 and on */ + ee[0] = z * + (P4 + z * + (P5 + z * + (P6 + z * (P7 + z * (P8 + z * P9))))); + x_h = (double) ((float) x); + r_h = (double) ((float) r); + z_h = (double) ((float) z); + r_l = r * ((x_h - x) * r_h - (x_h * r_h - one)); + z_l = (r_h * r_h - z_h); + zz[0] = z; + zz[1] = z_h; + zz[2] = z_l + r_l * (r + r_h); + /* + * compute (1+z*(p1+z*(p2+z*(p3+e)))) by call + * mx_poly + */ + mx_poly(zz, p, ee, 3); + } else { /* x < 65 < 2**39 */ + /* use double precision at q3 and on */ + ee[0] = z * (Q3 + z * (Q4 + z * Q5)); + x_h = (double) ((float) x); + r_h = (double) ((float) r); + z_h = (double) ((float) z); + r_l = r * ((x_h - x) * r_h - (x_h * r_h - one)); + z_l = (r_h * r_h - z_h); + zz[0] = z; + zz[1] = z_h; + zz[2] = z_l + r_l * (r + r_h); + /* + * compute (1+z*(q1+z*(q2+e))) by call + * mx_poly + */ + mx_poly(zz, q, ee, 2); + } + /* pio2 - r*(1+...) */ + v = r_h * ee[0]; + t = pio2lo - (r * ee[1] + r_l * ee[0]); + if (sign == 0) { + s = pio2hi - v; + t -= (v - (pio2hi - s)); + } else { + s = v - pio2hi; + t = -(t - (v - (s + pio2hi))); + } + w = s + t; + *err = t - (w - s); + return (w); + } + } + /* now x is between 1/8 and 8 */ + ((int *) &x)[HIWORD] = ix; + iy = (ix + 0x00008000) & 0x7fff0000; + ((int *) &y)[HIWORD] = iy; + ((int *) &y)[LOWORD] = 0; + j = (iy - 0x3fc00000) >> 16; + + w = (x - y); + v = 1 / (one + x * y); + s = w * v; + z = s * s; + /* use double precision at q3 and on */ + ee[0] = z * (Q3 + z * (Q4 + z * Q5)); + s_h = (double) ((float) s); + z_h = (double) ((float) z); + x_h = (double) ((float) x); + t = (double) ((float) (one + x * y)); + r = -((x_h - x) * y - (x_h * y - (t - one))); + s_l = -v * (s_h * r - (w - s_h * t)); + z_l = (s_h * s_h - z_h); + zz[0] = z; + zz[1] = z_h; + zz[2] = z_l + s_l * (s + s_h); + /* compute (1+z*(q1+z*(q2+e))) by call mx_poly */ + mx_poly(zz, q, ee, 2); + v = s_h * ee[0]; + t = TBL_atan_lo[j] + (s * ee[1] + s_l * ee[0]); + u = TBL_atan_hi[j]; + s = u + v; + t += (v - (s - u)); + w = s + t; + *err = t - (w - s); + if (sign != 0) { + w = -w; + *err = -*err; + } + return (w); +} + +static const double + twom768 = 6.441148769597133308e-232, /* 2^-768 */ + two768 = 1.552518092300708935e+231, /* 2^768 */ + pi = 3.1415926535897931159979634685, + pi_lo = 1.224646799147353177e-16, + pio2 = 1.570796326794896558e+00, + pio2_lo = 6.123233995736765886e-17, + pio4 = 0.78539816339744827899949, + pio4_lo = 3.061616997868382943e-17, + pi3o4 = 2.356194490192344836998, + pi3o4_lo = 9.184850993605148829195e-17; + +double +__k_atan2(double y, double x, double *w) { + double t, xh, th, t1, t2, w1, w2; + int ix, iy, hx, hy, lx, ly; + + hy = ((int *) &y)[HIWORD]; + ly = ((int *) &y)[LOWORD]; + iy = hy & ~0x80000000; + + hx = ((int *) &x)[HIWORD]; + lx = ((int *) &x)[LOWORD]; + ix = hx & ~0x80000000; + + *w = 0.0; + if (ix >= 0x7ff00000 || iy >= 0x7ff00000) { /* ignore inexact */ + if (isnan(x) || isnan(y)) + return (x * y); + else if (iy < 0x7ff00000) { + if (hx >= 0) { /* ATAN2(+-finite, +inf) is +-0 */ + *w *= y; + return (*w); + } else { /* ATAN2(+-finite, -inf) is +-pi */ + *w = copysign(pi_lo, y); + return (copysign(pi, y)); + } + } else if (ix < 0x7ff00000) { + /* ATAN2(+-inf, finite) is +-pi/2 */ + *w = (hy >= 0)? pio2_lo : -pio2_lo; + return ((hy >= 0)? pio2 : -pio2); + } else if (hx > 0) { /* ATAN2(+-INF,+INF) = +-pi/4 */ + *w = (hy >= 0)? pio4_lo : -pio4_lo; + return ((hy >= 0)? pio4 : -pio4); + } else { /* ATAN2(+-INF,-INF) = +-3pi/4 */ + *w = (hy >= 0)? pi3o4_lo : -pi3o4_lo; + return ((hy >= 0)? pi3o4 : -pi3o4); + } + } else if ((ix | lx) == 0 || (iy | ly) == 0) { + if ((iy | ly) == 0) { + if (hx >= 0) /* ATAN2(+-0, +(0 <= x <= inf)) is +-0 */ + return (y); + else { /* ATAN2(+-0, -(0 <= x <= inf)) is +-pi */ + *w = (hy >= 0)? pi_lo : -pi_lo; + return ((hy >= 0)? pi : -pi); + } + } else { /* ATAN2(+-(anything but 0 and NaN), 0) is +-pi/2 */ + *w = (hy >= 0)? pio2_lo : -pio2_lo; + return ((hy >= 0)? pio2 : -pio2); + } + } else if (iy - ix > 0x06400000) { /* |x/y| < 2 ** -100 */ + *w = (hy >= 0)? pio2_lo : -pio2_lo; + return ((hy >= 0)? pio2 : -pio2); + } else if (ix - iy > 0x06400000) { /* |y/x| < 2 ** -100 */ + if (hx < 0) { + *w = (hy >= 0)? pi_lo : -pi_lo; + return ((hy >= 0)? pi : -pi); + } else { + t = y / x; + th = t; + ((int *) &th)[LOWORD] &= 0xf8000000; + xh = x; + ((int *) &xh)[LOWORD] &= 0xf8000000; + t1 = (x - xh) * t + xh * (t - th); + t2 = y - xh * th; + *w = (t2 - t1) / x; + return (t); + } + } else { + if (ix >= 0x5f300000) { + x *= twom768; + y *= twom768; + } else if (ix < 0x23d00000) { + x *= two768; + y *= two768; + } + y = fabs(y); + x = fabs(x); + t = y / x; + th = t; + ((int *) &th)[LOWORD] &= 0xf8000000; + xh = x; + ((int *) &xh)[LOWORD] &= 0xf8000000; + t1 = (x - xh) * t + xh * (t - th); + t2 = y - xh * th; + w1 = mx_atan(t, &w2); + w2 += (t2 - t1) / (x + y * t); + if (hx < 0) { + t1 = pi - w1; + t2 = pi - t1; + w2 = (pi_lo - w2) - (w1 - t2); + w1 = t1; + } + *w = (hy >= 0)? w2 : -w2; + return ((hy >= 0)? w1 : -w1); + } +} diff --git a/usr/src/lib/libm/common/complex/k_atan2l.c b/usr/src/lib/libm/common/complex/k_atan2l.c new file mode 100644 index 0000000000..5cd04f6995 --- /dev/null +++ b/usr/src/lib/libm/common/complex/k_atan2l.c @@ -0,0 +1,809 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "libm.h" /* __k_atan2l */ +#include "complex_wrapper.h" + +#if defined(__sparc) +#define HALF(x) ((int *) &x)[3] = 0; ((int *) &x)[2] &= 0xfe000000 +#elif defined(__x86) +#define HALF(x) ((int *) &x)[0] = 0 +#endif + +/* + * long double __k_atan2l(long double y, long double x, long double *e) + * + * Compute atan2l with error terms. + * + * Important formula: + * 3 5 + * x x + * atan(x) = x - ----- + ----- - ... (for x <= 1) + * 3 5 + * + * pi 1 1 + * = --- - --- + --- - ... (for x > 1) + * 3 + * 2 x 3x + * + * Arg(x + y i) = sign(y) * atan2(|y|, x) + * = sign(y) * atan(|y|/x) (for x > 0) + * sign(y) * (PI - atan(|y|/|x|)) (for x < 0) + * Thus if x >> y (IEEE double: EXP(x) - EXP(y) >= 60): + * 1. (x > 0): atan2(y,x) ~ y/x + * 2. (x < 0): atan2(y,x) ~ sign(y) (PI - |y/x|)) + * Otherwise if x << y: + * atan2(y,x) ~ sign(y)*PI/2 - x/y + * + * __k_atan2l call static functions mx_polyl, mx_atanl + */ + + +/* + * (void) mx_polyl (long double *z, long double *a, long double *e, int n) + * return + * e = a + z*(a + z*(a + ... z*(a + e)...)) + * 0 2 4 2n + * Note: + * 1. e and coefficient ai are represented by two long double numbers. + * For e, the first one contain the leading 53 bits (30 for x86 exteneded) + * and the second one contain the remaining 113 bits (64 for x86 extended). + * For ai, the first one contian the leading 53 bits (or 30 for x86) + * rounded, and the second is the remaining 113 bits (or 64 for x86). + * 2. z is an array of three doubles. + * z[0] : the rounded value of Z (the intended value of z) + * z[1] : the leading 32 (or 56) bits of Z rounded + * z[2] : the remaining 113 (or 64) bits of Z + * Note that z[0] = z[1]+z[2] rounded. + * + */ + +static void +mx_polyl(const long double *z, const long double *a, long double *e, int n) { + long double r, s, t, p_h, p_l, z_h, z_l, p, w; + int i; + n = n + n; + p = e[0] + a[n]; + p_l = a[n + 1]; + w = p; HALF(w); + p_h = w; + p = a[n - 2] + z[0] * p; + z_h = z[1]; z_l = z[2]; + p_l += e[0] - (p_h - a[n]); + + for (i = n - 2; i >= 2; i -= 2) { + + /* compute p = ai + z * p */ + t = z_h * p_h; + s = z[0] * p_l + p_h * z_l; + w = p; HALF(w); + p_h = w; + s += a[i + 1]; + r = t - (p_h - a[i]); + p = a[i - 2] + z[0] * p; + p_l = r + s; + } + w = p; HALF(w); + e[0] = w; + t = z_h * p_h; + s = z[0] * p_l + p_h * z_l; + r = t - (e[0] - a[0]); + e[1] = r + s; +} + +/* + * Table of constants for atan from 0.125 to 8 + * 0.125 -- 0x3ffc0000 --- (increment at bit 12) + * 0x3ffc1000 + * 0x3ffc2000 + * ... ... + * 0x4001f000 + * 8.000 -- 0x40020000 (total: 97) + */ + +static const long double TBL_atan_hil[] = { +#if defined(__sparc) +1.2435499454676143503135484916387102416568e-01L, +1.3203976161463874927468440652656953226250e-01L, +1.3970887428916364518336777673909505681607e-01L, +1.4736148108865163560980276039684551821066e-01L, +1.5499674192394098230371437493349219133371e-01L, +1.6261382859794857537364156376155780062019e-01L, +1.7021192528547440449049660709976171369543e-01L, +1.7779022899267607079662479921582468899456e-01L, +1.8534794999569476488602596122854464667261e-01L, +1.9288431225797466419705871069022730349878e-01L, +2.0039855382587851465394578503437838446153e-01L, +2.0788992720226299360533498310299432475629e-01L, +2.1535769969773804802445962716648964165745e-01L, +2.2280115375939451577103212214043255525024e-01L, +2.3021958727684373024017095967980299065551e-01L, +2.3761231386547125247388363432563777919892e-01L, +2.4497866312686415417208248121127580641959e-01L, +2.5962962940825753102994644318397190560106e-01L, +2.7416745111965879759937189834217578592444e-01L, +2.8858736189407739562361141995821834504332e-01L, +3.0288486837497140556055609450555821812277e-01L, +3.1705575320914700980901557667446732975852e-01L, +3.3109607670413209494433878775694455421259e-01L, +3.4500217720710510886768128690005168408290e-01L, +3.5877067027057222039592006392646052215363e-01L, +3.7239844667675422192365503828370182641413e-01L, +3.8588266939807377589769548460723139638186e-01L, +3.9922076957525256561471669615886476491104e-01L, +4.1241044159738730689979128966712694260920e-01L, +4.2544963737004228954226360518079233013817e-01L, +4.3833655985795780544561604921477130895882e-01L, +4.5106965598852347637563925728219344073798e-01L, +4.6364760900080611621425623146121439713344e-01L, +4.8833395105640552386716496074706484459644e-01L, +5.1238946031073770666660102058425923805558e-01L, +5.3581123796046370026908506870769144698471e-01L, +5.5859931534356243597150821640166122875873e-01L, +5.8075635356767039920327447500150082375122e-01L, +6.0228734613496418168212269420423291922459e-01L, +6.2319932993406593099247534906037459367793e-01L, +6.4350110879328438680280922871732260447265e-01L, +6.6320299270609325536325431023827583417226e-01L, +6.8231655487474807825642998171115298784729e-01L, +7.0085440788445017245795128178675127318623e-01L, +7.1882999962162450541701415152590469891043e-01L, +7.3625742898142813174283527108914662479274e-01L, +7.5315128096219438952473937026902888600575e-01L, +7.6952648040565826040682003598565401726598e-01L, +7.8539816339744830961566084581987569936977e-01L, +8.1569192331622341102146083874564582672284e-01L, +8.4415398611317100251784414827164746738632e-01L, +8.7090345707565295314017311259781407291650e-01L, +8.9605538457134395617480071802993779546602e-01L, +9.1971960535041681722860345482108940969311e-01L, +9.4200004037946366473793717053459362115891e-01L, +9.6299433068093620181519583599709989677298e-01L, +9.8279372324732906798571061101466603762572e-01L, +1.0014831356942347329183295953014374896343e+00L, +1.0191413442663497346383429170230636212354e+00L, +1.0358412530088001765846944703254440735476e+00L, +1.0516502125483736674598673120862999026920e+00L, +1.0666303653157435630791763474202799086015e+00L, +1.0808390005411683108871567292171997859003e+00L, +1.0943289073211899198927883146102352763033e+00L, +1.1071487177940905030170654601785370497543e+00L, +1.1309537439791604464709335155363277560026e+00L, +1.1525719972156675180401498626127514672834e+00L, +1.1722738811284763866005949441337046006865e+00L, +1.1902899496825317329277337748293182803384e+00L, +1.2068173702852525303955115800565576625682e+00L, +1.2220253232109896370417417439225704120294e+00L, +1.2360594894780819419094519711090786146210e+00L, +1.2490457723982544258299170772810900483550e+00L, +1.2610933822524404193139408812473357640124e+00L, +1.2722973952087173412961937498224805746463e+00L, +1.2827408797442707473628852511364955164072e+00L, +1.2924966677897852679030914214070816723528e+00L, +1.3016288340091961438047858503666855024453e+00L, +1.3101939350475556342564376891719053437537e+00L, +1.3182420510168370498593302023271363040427e+00L, +1.3258176636680324650592392104284756886164e+00L, +1.3397056595989995393283037525895557850243e+00L, +1.3521273809209546571891479413898127598774e+00L, +1.3633001003596939542892985278250991560269e+00L, +1.3734007669450158608612719264449610604836e+00L, +1.3825748214901258580599674177685685163955e+00L, +1.3909428270024183486427686943836432395486e+00L, +1.3986055122719575950126700816114282727858e+00L, +1.4056476493802697809521934019958080664406e+00L, +1.4121410646084952153676136718584890852820e+00L, +1.4181469983996314594038603039700988632607e+00L, +1.4237179714064941189018190466107297108905e+00L, +1.4288992721907326964184700745371984001389e+00L, +1.4337301524847089866404719096698873880264e+00L, +1.4382447944982225979614042479354816039669e+00L, +1.4424730991091018200252920599377291810352e+00L, +1.4464413322481351841999668424758803866109e+00L, +#elif defined(__x86) +1.243549945356789976358413696289e-01L, 1.320397615781985223293304443359e-01L, +1.397088742814958095550537109375e-01L, 1.473614810383878648281097412109e-01L, +1.549967419123277068138122558594e-01L, 1.626138285500928759574890136719e-01L, +1.702119252295233309268951416016e-01L, 1.777902289759367704391479492188e-01L, +1.853479499695822596549987792969e-01L, 1.928843122441321611404418945312e-01L, +2.003985538030974566936492919922e-01L, 2.078899272019043564796447753906e-01L, +2.153576996643096208572387695312e-01L, 2.228011537226848304271697998047e-01L, +2.302195872762240469455718994141e-01L, 2.376123138237744569778442382812e-01L, +2.449786631041206419467926025391e-01L, 2.596296293195337057113647460938e-01L, +2.741674510762095451354980468750e-01L, 2.885873618070036172866821289062e-01L, +3.028848683461546897888183593750e-01L, 3.170557531993836164474487304688e-01L, +3.310960766393691301345825195312e-01L, 3.450021771714091300964355468750e-01L, +3.587706702528521418571472167969e-01L, 3.723984466632828116416931152344e-01L, +3.858826693613082170486450195312e-01L, 3.992207695264369249343872070312e-01L, +4.124104415532201528549194335938e-01L, 4.254496373469009995460510253906e-01L, +4.383365598041564226150512695312e-01L, 4.510696559445932507514953613281e-01L, +4.636476089945062994956970214844e-01L, 4.883339509833604097366333007812e-01L, +5.123894601128995418548583984375e-01L, 5.358112377580255270004272460938e-01L, +5.585993151180446147918701171875e-01L, 5.807563534472137689590454101562e-01L, +6.022873460315167903900146484375e-01L, 6.231993297114968299865722656250e-01L, +6.435011087451130151748657226562e-01L, 6.632029926404356956481933593750e-01L, +6.823165547102689743041992187500e-01L, 7.008544078562408685684204101562e-01L, +7.188299994450062513351440429688e-01L, 7.362574287690222263336181640625e-01L, +7.531512808054685592651367187500e-01L, 7.695264802314341068267822265625e-01L, +7.853981633670628070831298828125e-01L, 8.156919232569634914398193359375e-01L, +8.441539860796183347702026367188e-01L, 8.709034570492804050445556640625e-01L, +8.960553845390677452087402343750e-01L, 9.197196052409708499908447265625e-01L, +9.420000403188169002532958984375e-01L, 9.629943305626511573791503906250e-01L, +9.827937232330441474914550781250e-01L, 1.001483135391026735305786132812e+00L, +1.019141343887895345687866210938e+00L, 1.035841252654790878295898437500e+00L, +1.051650212146341800689697265625e+00L, 1.066630364861339330673217773438e+00L, +1.080839000176638364791870117188e+00L, 1.094328907318413257598876953125e+00L, +1.107148717623203992843627929688e+00L, 1.130953743588179349899291992188e+00L, +1.152571997139602899551391601562e+00L, 1.172273880802094936370849609375e+00L, +1.190289949532598257064819335938e+00L, 1.206817369908094406127929687500e+00L, +1.222025323193520307540893554688e+00L, 1.236059489194303750991821289062e+00L, +1.249045772012323141098022460938e+00L, 1.261093381792306900024414062500e+00L, +1.272297394927591085433959960938e+00L, 1.282740879338234663009643554688e+00L, +1.292496667709201574325561523438e+00L, 1.301628833636641502380371093750e+00L, +1.310193934943526983261108398438e+00L, 1.318242050707340240478515625000e+00L, +1.325817663222551345825195312500e+00L, 1.339705659542232751846313476562e+00L, +1.352127380669116973876953125000e+00L, 1.363300099968910217285156250000e+00L, +1.373400766868144273757934570312e+00L, 1.382574821356683969497680664062e+00L, +1.390942826867103576660156250000e+00L, 1.398605511989444494247436523438e+00L, +1.405647648964077234268188476562e+00L, 1.412141064181923866271972656250e+00L, +1.418146998155862092971801757812e+00L, 1.423717970959842205047607421875e+00L, +1.428899271879345178604125976562e+00L, 1.433730152435600757598876953125e+00L, +1.438244794495403766632080078125e+00L, 1.442473099101334810256958007812e+00L, +1.446441331878304481506347656250e+00L, +#endif +}; +static const long double TBL_atan_lol[] = { +#if defined(__sparc) +1.4074869197628063802317202820414310039556e-36L, +-4.9596961594739925555730439437999675295505e-36L, +8.9527745625194648873931213446361849472788e-36L, +1.1880437423207895718180765843544965589427e-35L, +-2.7810278112045145378425375128234365381448e-37L, +1.4797220377023800327295536234315147262387e-36L, +-4.2169561400548198732870384801849639863829e-36L, +7.2431229666913484649930323656316023494680e-36L, +-2.1573430089839170299895679353790663182462e-36L, +-9.9515745405126723554452367298128605186305e-36L, +-3.9065558992324838181617569730397882363067e-36L, +5.5260292271793726813211980664661124518807e-36L, +8.8415722215914321807682254318036452043689e-36L, +-8.1767728791586179254193323628285599800711e-36L, +-1.3344123034656142243797113823028330070762e-36L, +-4.4927331207813382908930733924681325892188e-36L, +4.4945511471812490393201824336762495687730e-36L, +-1.6688081504279223555776724459648440567274e-35L, +1.5629757586107955769461086568937329684113e-35L, +-2.2389835563308078552507970385331510848109e-35L, +-4.8312321745547311551870450671182151367050e-36L, +-1.4336172352905832876958926610980698844309e-35L, +-8.7440181998899932802989174170960593316080e-36L, +5.9284636008529837445780360785464550143016e-36L, +-2.2376651248436241276061055295043514993630e-35L, +6.0745837599336105414280310756677442136480e-36L, +1.5372187110451949677792344762029967023093e-35L, +2.0976068056751156241657121582478790247159e-35L, +-5.5623956405495438060726862202622807523700e-36L, +1.9697366707832471841858411934897351901523e-35L, +2.1070311964479488509034733639424887543697e-35L, +-2.3027356362982001602256518510854229844561e-35L, +4.8950964225733349266861843522029764772843e-36L, +-7.2380143477794458213872723050820253166391e-36L, +1.6365648865703614031637443396049568858105e-35L, +-3.9885811958234530793729129919803234197399e-35L, +4.1587722120912613510417783923227421336929e-35L, +3.8347421454556472153684687377337135027394e-35L, +-9.2251178933638721723515896465489002497864e-36L, +1.4094619690455989526175736741854656192178e-36L, +3.3568857805472235270612851425810803679451e-35L, +3.9090991055522552395018106803232118803401e-35L, +5.2956416979654208140521862707297033857956e-36L, +-5.0960846819945514367847063923662507136721e-36L, +-4.4959014425277615858329680393918315204998e-35L, +3.8039226544551634266566857615962609653834e-35L, +-4.4056522872895512108308642196611689657618e-36L, +1.6025024192482161076223807753425619076948e-36L, +2.1679525325309452561992610065108380635264e-35L, +1.9844038013515422125715362925736754104066e-35L, +3.9139619471799746834505227353568432457241e-35L, +2.1113443807975453505518453436799561854730e-35L, +3.1558557277444692755039816944392770185432e-35L, +1.6295044520355461408265585619500238335614e-35L, +-3.5087245209270305856151230356171213582305e-35L, +2.9041041864282855679591055270946117300088e-35L, +-2.3128843453818356590931995209806627233282e-35L, +-7.7124923181471578439967973820714857839953e-35L, +2.7539027829886922429092063590445808781462e-35L, +-9.4500899453181308951084545990839335972452e-35L, +-7.3061755302032092337594946001641651543473e-35L, +-4.1736144813953752193952770157406952602798e-35L, +3.4369948356256407045344855262863733571105e-35L, +-6.3790243492298090907302084924276831116460e-35L, +-9.6842943816353261291004127866079538980649e-36L, +4.8746757539138870909275958326700072821615e-35L, +-8.7533886477084190884511601368582548254655e-35L, +1.4284743992327918892692551138086727754845e-35L, +5.7262776211073389542565625693479173445042e-35L, +-3.2254883148780411245594822270747948565684e-35L, +7.8853548190609877325965525252380833808405e-35L, +8.4081736739037194097515038365370730251333e-35L, +7.4722870357563683815078242981933587273670e-35L, +7.9977202825793435289434813600890494256112e-36L, +-8.0577840773362139054848492346292673645405e-35L, +1.4217746753670583065490040209048757624336e-35L, +1.2232486914221205004109743560319090913328e-35L, +8.9696055070830036447361957217943988339065e-35L, +-3.1480394435081884410686066739846269858951e-35L, +-5.0927146040715345013240642517608928352977e-35L, +-5.7431997715924136568133859432702789493569e-35L, +-4.3920451405083770279099766080476485439987e-35L, +9.1106753984907715563018666776308759323326e-35L, +-3.7032569014272841009512400773061537538358e-35L, +8.8167419429746714276909825405131416764489e-35L, +-3.8389341696028352503752312861740895209678e-36L, +-3.3462959341960891546340895508017603408404e-35L, +-3.9212626776786074383916188498955828634947e-35L, +-7.8340397396377867255864494568594088378648e-35L, +7.4681018632456986520600640340627309824469e-35L, +8.9110918618956918451135594876165314884113e-35L, +3.9418160632271890530431797145664308529115e-35L, +-4.1048114088580104820193435638327617443913e-35L, +-2.3165419451582153326383944756220900454330e-35L, +-1.8428312581525319409399330203703211113843e-35L, +7.1477316546709482345411712017906842769961e-35L, +2.9914501578435874662153637707016094237004e-35L, +#elif defined(__x86) +1.108243739551347953496477557317e-11L, 3.644022694535396219063202730280e-11L, +7.667835628314065801595065768845e-12L, 5.026377078169301918590803009109e-11L, +1.161327548990211907411719105561e-11L, 4.785569941615255008968280209991e-11L, +5.595107356360146549819920947848e-11L, 1.673930035747684999707469623769e-11L, +2.611250523102718193166964451527e-11L, 1.384250305661681615897729354721e-11L, +2.278105796029649304219088055497e-11L, 3.586371256902077123693302823191e-13L, +3.342842716722085763523965049902e-11L, 3.670968534386232233574504707347e-11L, +6.196832945990602657404893210974e-13L, 4.169679549603939604438777470618e-11L, +2.274351222528987867221331091414e-11L, 8.872382531858169709022188891298e-11L, +4.344925246387385146717580155420e-11L, 8.707377833692929105196832265348e-11L, +2.881671577173773513055821329154e-11L, 9.763393361566846205717315422347e-12L, +6.476296480975626822569454546857e-11L, 3.569597877124574002505169001136e-11L, +1.772007853877284712958549977698e-11L, 1.347141028196192304932683248872e-11L, +3.676555884905046507598141175404e-11L, 4.881564068032948912761478588710e-11L, +4.416715404487185607337693704681e-11L, 2.314128999621257979016734983553e-11L, +5.380138283056477968352133002913e-11L, 4.393022562414389595406841771063e-11L, +6.299816718559209976839402028537e-12L, 7.304511413053165996581483735843e-11L, +1.978381648117426221467592544212e-10L, 2.024381732686578226139414070989e-10L, +2.255178211796380992141612703464e-10L, 1.204566302442290648452508620986e-10L, +1.034473912921080457667329099995e-10L, 2.225691010059030834353745950874e-10L, +4.817137162794350606107263804151e-11L, 6.565755971506095086327587326326e-11L, +1.644791039522307629611529931429e-10L, 2.820930388953087163050126809014e-11L, +1.766182540818701085571546539514e-10L, 2.124059054092171070266466628320e-10L, +1.567258302596026515190288816001e-10L, 1.742241535800378094231540188685e-10L, +3.038550253253096300737572104929e-11L, 5.925991958164150280814584656688e-11L, +3.355266774764151155289750652594e-11L, 2.637254809561744853531409402995e-11L, +3.227621096606048365493782702458e-11L, 1.094459672377587282585894259882e-10L, +6.064676448464127209709358607166e-11L, 1.182850444360454453720999258140e-10L, +1.428492049425553288966601449688e-11L, 3.032079976125434624889374125094e-10L, +3.784543889504767060855636487744e-10L, 3.540092982887960328254439790467e-10L, +4.020318667701700464612998296302e-10L, 4.544042324059585739827798668654e-10L, +3.645299460952866120296998202703e-10L, 2.776662293911361485235212513020e-12L, +1.708865101734375304910370400700e-10L, 3.909810965716415233488278047493e-10L, +7.606461848875826105025137974947e-11L, 3.263814502297453347587046149712e-10L, +1.499334758629144388918183376012e-10L, 3.771581242675818925565576303133e-10L, +1.746932950084818923507049088298e-11L, 2.837781909176306820465786987027e-10L, +3.859312847318946163435901230778e-10L, 4.601335192895268187473357720101e-10L, +2.811262558622337888849804940684e-10L, 4.060360843532416964489955306249e-10L, +8.058369357752989796958168458531e-11L, 3.725546414244147566166855921414e-10L, +1.040286509953292907344053122733e-10L, 3.094968093808145773271362531155e-10L, +4.454811192340438979284756311844e-10L, 5.676678748199027602705574110388e-11L, +2.518376833121948163898128509842e-10L, 3.907837370041422778250991189943e-10L, +7.687158710333735613246114865100e-11L, 1.334418885622867537060685125566e-10L, +1.353147719826124443836432060856e-10L, 2.825131007652335581739282335732e-10L, +4.161925466840049254333079881002e-10L, 4.265713490956410156084891599630e-10L, +2.437693664320585461575989523716e-10L, 4.466519138542116247357297503086e-10L, +3.113875178143440979746983590908e-10L, 4.910822904159495654488736486097e-11L, +2.818831329324169810481585538618e-12L, 7.767009768334052125229252512543e-12L, +3.698307026936191862258804165254e-10L, +#endif +}; + +/* + * mx_atanl(x, err) + * Table look-up algorithm + * By K.C. Ng, March 9, 1989 + * + * Algorithm. + * + * The algorithm is based on atan(x)=atan(y)+atan((x-y)/(1+x*y)). + * We use poly1(x) to approximate atan(x) for x in [0,1/8] with + * error (relative) + * |(atan(x)-poly1(x))/x|<= 2^-140 + * + * and use poly2(x) to approximate atan(x) for x in [0,1/65] with + * error + * |atan(x)-poly2(x)|<= 2^-143.7 + * + * Here poly1 and poly2 are odd polynomial with the following form: + * x + x^3*(a1+x^2*(a2+...)) + * + * (0). Purge off Inf and NaN and 0 + * (1). Reduce x to positive by atan(x) = -atan(-x). + * (2). For x <= 1/8, use + * (2.1) if x < 2^(-prec/2), atan(x) = x with inexact flag raised + * (2.2) Otherwise + * atan(x) = poly1(x) + * (3). For x >= 8 then (prec = 78) + * (3.1) if x >= 2^prec, atan(x) = atan(inf) - pio2_lo + * (3.2) if x >= 2^(prec/3), atan(x) = atan(inf) - 1/x + * (3.3) if x > 65, atan(x) = atan(inf) - poly2(1/x) + * (3.4) Otherwise, atan(x) = atan(inf) - poly1(1/x) + * + * (4). Now x is in (0.125, 8) + * Find y that match x to 4.5 bit after binary (easy). + * If iy is the high word of y, then + * single : j = (iy - 0x3e000000) >> 19 + * double : j = (iy - 0x3fc00000) >> 16 + * quad : j = (iy - 0x3ffc0000) >> 12 + * + * Let s = (x-y)/(1+x*y). Then + * atan(x) = atan(y) + poly1(s) + * = _TBL_atan_hi[j] + (_TBL_atan_lo[j] + poly2(s) ) + * + * Note. |s| <= 1.5384615385e-02 = 1/65. Maxium occurs at x = 1.03125 + * + */ + +/* + * p[0] - p[16] for atan(x) = + * x + x^3*(p1+x^2*(p2+...)) + */ +static const long double pe[] = { + 1.0L, + 0.0L, +#if defined(__sparc) + -0.33333333333333332870740406406184774823L, + -4.62592926927148558508441072595508240609e-18L, + 0.19999999999999999722444243843710864894L, + 2.77555756156289124602047010782090464486e-18L, + -0.14285714285714285615158658515611023176L, + -9.91270557700756738621231719241800559409e-19L, +#elif defined(__x86) + -0.33333333325572311878204345703125L, + -7.76102145512898763020833333192787755766644373e-11L, + 0.19999999995343387126922607421875L, + 4.65661287307739257812498949613909375938538636e-11L, + -0.142857142840512096881866455078125L, + -1.66307602609906877787419703858463013035681375e-11L, +#endif +}; + +static const long double p[] = { /* p[0] - p[16] */ + 1.0L, + -3.33333333333333333333333333333333333319278775586e-0001L, + 1.99999999999999999999999999999999894961390937601e-0001L, + -1.42857142857142857142857142856866970385846301312e-0001L, + 1.11111111111111111111111110742899094415954427738e-0001L, + -9.09090909090909090909087972707015549231951421806e-0002L, + 7.69230769230769230767699003016385628597359717046e-0002L, + -6.66666666666666666113842763495291228025226575259e-0002L, + 5.88235294117646915706902204947653640091126695962e-0002L, + -5.26315789473657016886225044679594035524579379810e-0002L, + 4.76190476186633969331771169790375592681525481267e-0002L, + -4.34782608290146274616081389793141896576997370161e-0002L, + 3.99999968161267722260103962788865225205057218988e-0002L, + -3.70368536844778256320786172745225703228683638328e-0002L, + 3.44752320396524479494062858284036892703898522150e-0002L, + -3.20491216046653214683721787776813360591233428081e-0002L, + 2.67632651033434456758550618122802167256870856514e-0002L, +}; + +/* q[0] - q[9] */ +static const long double qe[] = { + 1.0L, + 0.0L, +#if defined(__sparc) + -0.33333333333333332870740406406184774823486804962158203125L, + -4.625929269271485585069345465471207312531868714634217630e-18L, + 0.19999999999999999722444243843710864894092082977294921875L, + 2.7755575615628864268260553912956813621977220359134667560e-18L, +#elif defined(__x86) + -0.33333333325572311878204345703125L, + -7.76102145512898763020833333042135150927893e-11L, + 0.19999999995343387126922607421875L, + 4.656612873077392578124507576697622106863058e-11L, +#endif +}; + +static const long double q[] = { /* q[0] - q[9] */ + -3.33333333333333333333333333333333333304213515094e-0001L, + 1.99999999999999999999999999999995075766976221077e-0001L, + -1.42857142857142857142857142570379604317921113079e-0001L, + 1.11111111111111111111102923861900979127978214077e-0001L, + -9.09090909090909089586854075816999506863320031460e-0002L, + 7.69230769230756334929213246003824644696974730368e-0002L, + -6.66666666589192433974402013508912138168133579856e-0002L, + 5.88235013696778007696800252045588307023299350858e-0002L, + -5.25754959898164576495303840687699583228444695685e-0002L, +}; + +static const long double +two8700 = 9.140338438955067659002088492701e+2618L, /* 2^8700 */ +twom8700 = 1.094051392821643668051436593760e-2619L, /* 2^-8700 */ +one = 1.0L, +zero = 0.0L, +pi = 3.1415926535897932384626433832795028841971693993751L, +pio2 = 1.57079632679489661923132169163975144209858469968755L, +pio4 = 0.785398163397448309615660845819875721049292349843776L, +pi3o4 = 2.356194490192344928846982537459627163147877049531329L, +#if defined(__sparc) +pi_lo = 8.67181013012378102479704402604335196876232e-35L, +pio2_lo = 4.33590506506189051239852201302167598438116e-35L, +pio4_lo = 2.16795253253094525619926100651083799219058e-35L, +pi3o4_lo = 6.50385759759283576859778301953251397657174e-35L; +#elif defined(__x86) +pi_lo = -5.01655761266833202355732708e-20L, +pio2_lo = -2.50827880633416601177866354e-20L, +pio4_lo = -1.25413940316708300588933177e-20L, +pi3o4_lo = -9.18342907192877118770525931e-20L; +#endif + +static long double +mx_atanl(long double x, long double *err) { + long double y, z, r, s, t, w, s_h, s_l, x_h, x_l, zz[3], ee[2], z_h, + z_l, r_h, r_l, u, v; + int ix, iy, hx, i, j; + float fx; + + hx = HI_XWORD(x); + ix = hx & (~0x80000000); + + /* for |x| < 1/8 */ + if (ix < 0x3ffc0000) { + if (ix < 0x3ff30000) { /* when |x| < 2**-12 */ + if (ix < 0x3fc60000) { /* if |x| < 2**-prec/2 */ + *err = (long double) ((int) x); + return (x); + } + z = x * x; + t = q[8]; + for (i = 7; i >= 0; i--) t = q[i] + z * t; + t *= x * z; + r = x + t; + *err = t - (r - x); + return (r); + } + z = x * x; + + /* use long double precision at p4 and on */ + t = p[16]; + for (i = 15; i >= 4; i--) t = p[i] + z * t; + ee[0] = z * t; + + x_h = x; HALF(x_h); + z_h = z; HALF(z_h); + x_l = x - x_h; + z_l = (x_h * x_h - z_h); + zz[0] = z; + zz[1] = z_h; + zz[2] = z_l + x_l * (x + x_h); + + /* compute (1+z*(p1+z*(p2+z*(p3+e)))) */ + + mx_polyl(zz, pe, ee, 3); + + /* finally x*(1+z*(p1+...)) */ + r = x_h * ee[0]; + t = x * ee[1] + x_l * ee[0]; + s = t + r; + *err = t - (s - r); + return (s); + } + /* for |x| >= 8.0 */ + if (ix >= 0x40020000) { /* x >= 8 */ + x = fabsl(x); + if (ix >= 0x402e0000) { /* x >= 2**47 */ + if (ix >= 0x408b0000) { /* x >= 2**140 */ + y = -pio2_lo; + } else + y = one / x - pio2_lo; + if (hx >= 0) { + t = pio2 - y; + *err = -(y - (pio2 - t)); + } else { + t = y - pio2; + *err = y - (pio2 + t); + } + return (t); + } else { + /* compute r = 1/x */ + r = one / x; + z = r * r; + x_h = x; HALF(x_h); + r_h = r; HALF(r_h); + z_h = z; HALF(z_h); + r_l = r * ((x_h - x) * r_h - (x_h * r_h - one)); + z_l = (r_h * r_h - z_h); + zz[0] = z; + zz[1] = z_h; + zz[2] = z_l + r_l * (r + r_h); + if (ix < 0x40050400) { /* 8 < x < 65 */ + /* use double precision at p4 and on */ + t = p[16]; + for (i = 15; i >= 4; i--) t = p[i] + z * t; + ee[0] = z * t; + /* compute (1+z*(p1+z*(p2+z*(p3+e)))) */ + mx_polyl(zz, pe, ee, 3); + } else { /* x < 65 < 2**47 */ + /* use long double at q3 and on */ + t = q[8]; + for (i = 7; i >= 2; i--) t = q[i] + z * t; + ee[0] = z * t; + /* compute (1+z*(q1+z*(q2+e))) */ + mx_polyl(zz, qe, ee, 2); + } + /* pio2 - r*(1+...) */ + v = r_h * ee[0]; + t = pio2_lo - (r * ee[1] + r_l * ee[0]); + if (hx >= 0) { + s = pio2 - v; + t -= (v - (pio2 - s)); + } else { + s = v - pio2; + t = -(t - (v - (s + pio2))); + } + w = s + t; + *err = t - (w - s); + return (w); + } + } + /* now x is between 1/8 and 8 */ + iy = (ix + 0x00000800) & 0x7ffff000; + j = (iy - 0x3ffc0000) >> 12; + ((int *) &fx)[0] = 0x3e000000 + (j << 19); + y = (long double) fx; + x = fabsl(x); + + w = (x - y); + v = 1.0L / (one + x * y); + s = w * v; + z = s * s; + /* use long double precision at q3 and on */ + t = q[8]; + for (i = 7; i >= 2; i--) t = q[i] + z * t; + ee[0] = z * t; + s_h = s; HALF(s_h); + z_h = z; HALF(z_h); + x_h = x; HALF(x_h); + t = one + x * y; HALF(t); + r = -((x_h - x) * y - (x_h * y - (t - one))); + s_l = -v * (s_h * r - (w - s_h * t)); + z_l = (s_h * s_h - z_h); + zz[0] = z; + zz[1] = z_h; + zz[2] = z_l + s_l * (s + s_h); + /* compute (1+z*(q1+z*(q2+e))) by call mx_poly */ + mx_polyl(zz, qe, ee, 2); + v = s_h * ee[0]; + t = TBL_atan_lol[j] + (s * ee[1] + s_l * ee[0]); + u = TBL_atan_hil[j]; + s = u + v; + t += (v - (s - u)); + w = s + t; + *err = t - (w - s); + if (hx < 0) { + w = -w; + *err = -*err; + } + return (w); +} + +long double +__k_atan2l(long double y, long double x, long double *w) { + long double t, xh, th, t1, t2, w1, w2; + int ix, iy, hx, hy; + + hy = HI_XWORD(y); + hx = HI_XWORD(x); + iy = hy & ~0x80000000; + ix = hx & ~0x80000000; + + *w = 0.0; + if (ix >= 0x7fff0000 || iy >= 0x7fff0000) { /* ignore inexact */ + if (isnanl(x) || isnanl(y)) + return (x * y); + else if (iy < 0x7fff0000) { + if (hx >= 0) { /* ATAN2(+-finite, +inf) is +-0 */ + *w *= y; + return (*w); + } else { /* ATAN2(+-finite, -inf) is +-pi */ + *w = copysignl(pi_lo, y); + return (copysignl(pi, y)); + } + } else if (ix < 0x7fff0000) { + /* ATAN2(+-inf, finite) is +-pi/2 */ + *w = (hy >= 0)? pio2_lo : -pio2_lo; + return ((hy >= 0)? pio2 : -pio2); + } else if (hx > 0) { /* ATAN2(+-INF,+INF) = +-pi/4 */ + *w = (hy >= 0)? pio4_lo : -pio4_lo; + return ((hy >= 0)? pio4 : -pio4); + } else { /* ATAN2(+-INF,-INF) = +-3pi/4 */ + *w = (hy >= 0)? pi3o4_lo : -pi3o4_lo; + return ((hy >= 0)? pi3o4 : -pi3o4); + } + } else if (x == zero || y == zero) { + if (y == zero) { + if (hx >= 0) /* ATAN2(+-0, +(0 <= x <= inf)) is +-0 */ + return (y); + else { /* ATAN2(+-0, -(0 <= x <= inf)) is +-pi */ + *w = (hy >= 0)? pi_lo : -pi_lo; + return ((hy >= 0)? pi : -pi); + } + } else { /* ATAN2(+-(anything but 0 and NaN), 0) is +-pi/2 */ + *w = (hy >= 0)? pio2_lo : -pio2_lo; + return ((hy >= 0)? pio2 : -pio2); + } + } else if (iy - ix > 0x00640000) { /* |x/y| < 2 ** -100 */ + *w = (hy >= 0)? pio2_lo : -pio2_lo; + return ((hy >= 0)? pio2 : -pio2); + } else if (ix - iy > 0x00640000) { /* |y/x| < 2 ** -100 */ + if (hx < 0) { + *w = (hy >= 0)? pi_lo : -pi_lo; + return ((hy >= 0)? pi : -pi); + } else { + t = y / x; + th = t; HALF(th); + xh = x; HALF(xh); + t1 = (x - xh) * t + xh * (t - th); + t2 = y - xh * th; + *w = (t2 - t1) / x; + return (t); + } + } else { + if (ix >= 0x5fff3000) { + x *= twom8700; + y *= twom8700; + } else if (ix < 0x203d0000) { + x *= two8700; + y *= two8700; + } + y = fabsl(y); + x = fabsl(x); + t = y / x; + th = t; HALF(th); + xh = x; HALF(xh); + t1 = (x - xh) * t + xh * (t - th); + t2 = y - xh * th; + w1 = mx_atanl(t, &w2); + w2 += (t2 - t1) / (x + y * t); + if (hx < 0) { + t1 = pi - w1; + t2 = pi - t1; + w2 = (pi_lo - w2) - (w1 - t2); + w1 = t1; + } + *w = (hy >= 0)? w2 : -w2; + return ((hy >= 0)? w1 : -w1); + } +} diff --git a/usr/src/lib/libm/common/complex/k_cexp.c b/usr/src/lib/libm/common/complex/k_cexp.c new file mode 100644 index 0000000000..0befa68fc1 --- /dev/null +++ b/usr/src/lib/libm/common/complex/k_cexp.c @@ -0,0 +1,180 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* INDENT OFF */ +/* + * double __k_cexp(double x, int *n); + * Returns the exponential of x in the form of 2**n * y, y=__k_cexp(x,&n). + * + * Method + * 1. Argument reduction: + * Reduce x to an r so that |r| <= 0.5*ln2 ~ 0.34658. + * Given x, find r and integer k such that + * + * x = k*ln2 + r, |r| <= 0.5*ln2. + * + * Here r will be represented as r = hi-lo for better + * accuracy. + * + * 2. Approximation of exp(r) by a special rational function on + * the interval [0,0.34658]: + * Write + * R(r**2) = r*(exp(r)+1)/(exp(r)-1) = 2 + r*r/6 - r**4/360 + ... + * We use a special Remez algorithm on [0,0.34658] to generate + * a polynomial of degree 5 to approximate R. The maximum error + * of this polynomial approximation is bounded by 2**-59. In + * other words, + * R(z) ~ 2.0 + P1*z + P2*z**2 + P3*z**3 + P4*z**4 + P5*z**5 + * (where z=r*r, and the values of P1 to P5 are listed below) + * and + * | 5 | -59 + * | 2.0+P1*z+...+P5*z - R(z) | <= 2 + * | | + * The computation of exp(r) thus becomes + * 2*r + * exp(r) = 1 + ------- + * R - r + * r*R1(r) + * = 1 + r + ----------- (for better accuracy) + * 2 - R1(r) + * where + * 2 4 10 + * R1(r) = r - (P1*r + P2*r + ... + P5*r ). + * + * 3. Return n = k and __k_cexp = exp(r). + * + * Special cases: + * exp(INF) is INF, exp(NaN) is NaN; + * exp(-INF) is 0, and + * for finite argument, only exp(0)=1 is exact. + * + * Range and Accuracy: + * When |x| is really big, say |x| > 50000, the accuracy + * is not important because the ultimate result will over or under + * flow. So we will simply replace n = 50000 and r = 0.0. For + * moderate size x, according to an error analysis, the error is + * always less than 1 ulp (unit in the last place). + * + * Constants: + * The hexadecimal values are the intended ones for the following + * constants. The decimal values may be used, provided that the + * compiler will convert from decimal to binary accurately enough + * to produce the hexadecimal values shown. + */ +/* INDENT ON */ + +#include "libm.h" /* __k_cexp */ +#include "complex_wrapper.h" /* HI_WORD/LO_WORD */ + +/* INDENT OFF */ +static const double +one = 1.0, +two128 = 3.40282366920938463463e+38, +halF[2] = { + 0.5, -0.5, +}, +ln2HI[2] = { + 6.93147180369123816490e-01, /* 0x3fe62e42, 0xfee00000 */ + -6.93147180369123816490e-01, /* 0xbfe62e42, 0xfee00000 */ +}, +ln2LO[2] = { + 1.90821492927058770002e-10, /* 0x3dea39ef, 0x35793c76 */ + -1.90821492927058770002e-10, /* 0xbdea39ef, 0x35793c76 */ +}, +invln2 = 1.44269504088896338700e+00, /* 0x3ff71547, 0x652b82fe */ +P1 = 1.66666666666666019037e-01, /* 0x3FC55555, 0x5555553E */ +P2 = -2.77777777770155933842e-03, /* 0xBF66C16C, 0x16BEBD93 */ +P3 = 6.61375632143793436117e-05, /* 0x3F11566A, 0xAF25DE2C */ +P4 = -1.65339022054652515390e-06, /* 0xBEBBBD41, 0xC5D26BF1 */ +P5 = 4.13813679705723846039e-08; /* 0x3E663769, 0x72BEA4D0 */ +/* INDENT ON */ + +double +__k_cexp(double x, int *n) { + double hi = 0.0L, lo = 0.0L, c, t; + int k, xsb; + unsigned hx, lx; + + hx = HI_WORD(x); /* high word of x */ + lx = LO_WORD(x); /* low word of x */ + xsb = (hx >> 31) & 1; /* sign bit of x */ + hx &= 0x7fffffff; /* high word of |x| */ + + /* filter out non-finite argument */ + if (hx >= 0x40e86a00) { /* if |x| > 50000 */ + if (hx >= 0x7ff00000) { + *n = 1; + if (((hx & 0xfffff) | lx) != 0) + return (x + x); /* NaN */ + else + return ((xsb == 0) ? x : 0.0); + /* exp(+-inf)={inf,0} */ + } + *n = (xsb == 0) ? 50000 : -50000; + return (one + ln2LO[1] * ln2LO[1]); /* generate inexact */ + } + + *n = 0; + /* argument reduction */ + if (hx > 0x3fd62e42) { /* if |x| > 0.5 ln2 */ + if (hx < 0x3FF0A2B2) { /* and |x| < 1.5 ln2 */ + hi = x - ln2HI[xsb]; + lo = ln2LO[xsb]; + k = 1 - xsb - xsb; + } else { + k = (int) (invln2 * x + halF[xsb]); + t = k; + hi = x - t * ln2HI[0]; + /* t*ln2HI is exact for t<2**20 */ + lo = t * ln2LO[0]; + } + x = hi - lo; + *n = k; + } else if (hx < 0x3e300000) { /* when |x|<2**-28 */ + return (one + x); + } else + k = 0; + + /* x is now in primary range */ + t = x * x; + c = x - t * (P1 + t * (P2 + t * (P3 + t * (P4 + t * P5)))); + if (k == 0) + return (one - ((x * c) / (c - 2.0) - x)); + else { + t = one - ((lo - (x * c) / (2.0 - c)) - hi); + if (k > 128) { + t *= two128; + *n = k - 128; + } else if (k > 0) { + HI_WORD(t) += (k << 20); + *n = 0; + } + return (t); + } +} diff --git a/usr/src/lib/libm/common/complex/k_cexpl.c b/usr/src/lib/libm/common/complex/k_cexpl.c new file mode 100644 index 0000000000..5db611d812 --- /dev/null +++ b/usr/src/lib/libm/common/complex/k_cexpl.c @@ -0,0 +1,283 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* INDENT OFF */ +/* + * long double __k_cexpl(long double x, int *n); + * Returns the exponential of x in the form of 2**n * y, y=__k_cexpl(x,&n). + * + * 1. Argument Reduction: given the input x, find r and integer k + * and j such that + * x = (32k+j)*ln2 + r, |r| <= (1/64)*ln2 . + * + * 2. expl(x) = 2^k * (2^(j/32) + 2^(j/32)*expm1(r)) + * Note: + * a. expm1(r) = (2r)/(2-R), R = r - r^2*(t1 + t2*r^2) + * b. 2^(j/32) is represented as + * exp2_32_hi[j]+exp2_32_lo[j] + * where + * exp2_32_hi[j] = 2^(j/32) rounded + * exp2_32_lo[j] = 2^(j/32) - exp2_32_hi[j]. + * + * Special cases: + * expl(INF) is INF, expl(NaN) is NaN; + * expl(-INF)= 0; + * for finite argument, only expl(0)=1 is exact. + * + * Accuracy: + * according to an error analysis, the error is always less than + * an ulp (unit in the last place). + * + * Misc. info. + * When |x| is really big, say |x| > 1000000, the accuracy + * is not important because the ultimate result will over or under + * flow. So we will simply replace n = 1000000 and r = 0.0. For + * moderate size x, according to an error analysis, the error is + * always less than 1 ulp (unit in the last place). + * + * Constants: + * Only decimal values are given. We assume that the compiler will convert + * from decimal to binary accurately enough to produce the correct + * hexadecimal values. + */ +/* INDENT ON */ + +#include "libm.h" /* __k_cexpl */ +#include "complex_wrapper.h" /* HI_XWORD */ + +/* INDENT OFF */ +/* ln2/32 = 0.0216608493924982909192885037955680177523593791987579766912713 */ +#if defined(__x86) +static const long double + /* 43 significant bits, 21 trailing zeros */ +ln2_32hi = 2.166084939249657281834515742957592010498046875e-2L, +ln2_32lo = 1.7181009433463659920976473789104487579766912713e-15L; +static const long double exp2_32_hi[] = { /* exp2_32[j] = 2^(j/32) */ + 1.0000000000000000000000000e+00L, + 1.0218971486541166782081522e+00L, + 1.0442737824274138402382006e+00L, + 1.0671404006768236181297224e+00L, + 1.0905077326652576591003302e+00L, + 1.1143867425958925362894369e+00L, + 1.1387886347566916536971221e+00L, + 1.1637248587775775137938619e+00L, + 1.1892071150027210666875674e+00L, + 1.2152473599804688780476325e+00L, + 1.2418578120734840485256747e+00L, + 1.2690509571917332224885722e+00L, + 1.2968395546510096659215822e+00L, + 1.3252366431597412945939118e+00L, + 1.3542555469368927282668852e+00L, + 1.3839098819638319548151403e+00L, + 1.4142135623730950487637881e+00L, + 1.4451808069770466200253470e+00L, + 1.4768261459394993113155431e+00L, + 1.5091644275934227397133885e+00L, + 1.5422108254079408235859630e+00L, + 1.5759808451078864864006862e+00L, + 1.6104903319492543080837174e+00L, + 1.6457554781539648445110730e+00L, + 1.6817928305074290860378350e+00L, + 1.7186192981224779156032914e+00L, + 1.7562521603732994831094730e+00L, + 1.7947090750031071864148413e+00L, + 1.8340080864093424633989166e+00L, + 1.8741676341102999013002103e+00L, + 1.9152065613971472938202589e+00L, + 1.9571441241754002689657438e+00L, +}; +static const long double exp2_32_lo[] = { + 0.0000000000000000000000000e+00L, + 2.6327965667180882569382524e-20L, + 8.3765863521895191129661899e-20L, + 3.9798705777454504249209575e-20L, + 1.0668046596651558640993042e-19L, + 1.9376009847285360448117114e-20L, + 6.7081819456112953751277576e-21L, + 1.9711680502629186462729727e-20L, + 2.9932584438449523689104569e-20L, + 6.8887754153039109411061914e-20L, + 6.8002718741225378942847820e-20L, + 6.5846917376975403439742349e-20L, + 1.2171958727511372194876001e-20L, + 3.5625253228704087115438260e-20L, + 3.1129551559077560956309179e-20L, + 5.7519192396164779846216492e-20L, + 3.7900651177865141593101239e-20L, + 1.1659262405698741798080115e-20L, + 7.1364385105284695967172478e-20L, + 5.2631003710812203588788949e-20L, + 2.6328853788732632868460580e-20L, + 5.4583950085438242788190141e-20L, + 9.5803254376938269960718656e-20L, + 7.6837733983874245823512279e-21L, + 2.4415965910835093824202087e-20L, + 2.6052966871016580981769728e-20L, + 2.6876456344632553875309579e-21L, + 1.2861930155613700201703279e-20L, + 8.8166633394037485606572294e-20L, + 2.9788615389580190940837037e-20L, + 5.2352341619805098677422139e-20L, + 5.2578463064010463732242363e-20L, +}; +#else /* sparc */ +static const long double + /* 0x3FF962E4 2FEFA39E F35793C7 00000000 */ +ln2_32hi = 2.166084939249829091928849858592451515688e-2L, +ln2_32lo = 5.209643502595475652782654157501186731779e-27L; +static const long double exp2_32_hi[] = { /* exp2_32[j] = 2^(j/32) */ + 1.000000000000000000000000000000000000000e+0000L, + 1.021897148654116678234480134783299439782e+0000L, + 1.044273782427413840321966478739929008785e+0000L, + 1.067140400676823618169521120992809162607e+0000L, + 1.090507732665257659207010655760707978993e+0000L, + 1.114386742595892536308812956919603067800e+0000L, + 1.138788634756691653703830283841511254720e+0000L, + 1.163724858777577513813573599092185312343e+0000L, + 1.189207115002721066717499970560475915293e+0000L, + 1.215247359980468878116520251338798457624e+0000L, + 1.241857812073484048593677468726595605511e+0000L, + 1.269050957191733222554419081032338004715e+0000L, + 1.296839554651009665933754117792451159835e+0000L, + 1.325236643159741294629537095498721674113e+0000L, + 1.354255546936892728298014740140702804343e+0000L, + 1.383909881963831954872659527265192818002e+0000L, + 1.414213562373095048801688724209698078570e+0000L, + 1.445180806977046620037006241471670905678e+0000L, + 1.476826145939499311386907480374049923924e+0000L, + 1.509164427593422739766019551033193531420e+0000L, + 1.542210825407940823612291862090734841307e+0000L, + 1.575980845107886486455270160181905008906e+0000L, + 1.610490331949254308179520667357400583459e+0000L, + 1.645755478153964844518756724725822445667e+0000L, + 1.681792830507429086062250952466429790080e+0000L, + 1.718619298122477915629344376456312504516e+0000L, + 1.756252160373299483112160619375313221294e+0000L, + 1.794709075003107186427703242127781814354e+0000L, + 1.834008086409342463487083189588288856077e+0000L, + 1.874167634110299901329998949954446534439e+0000L, + 1.915206561397147293872611270295830887850e+0000L, + 1.957144124175400269018322251626871491190e+0000L, +}; + +static const long double exp2_32_lo[] = { + +0.000000000000000000000000000000000000000e+0000L, + +1.805067874203309547455733330545737864651e-0035L, + -9.374520292280427421957567419730832143843e-0035L, + -1.596968447292758770712909630231499971233e-0035L, + +9.112493410125022978511686101672486662119e-0035L, + -6.504228206978548287230374775259388710985e-0035L, + -8.148468844525851137325691767488155323605e-0035L, + -5.066214576721800313372330745142903350963e-0035L, + -1.359830974688816973749875638245919118924e-0035L, + +9.497427635563196470307710566433246597109e-0035L, + -3.283170523176998601615065965333915261932e-0036L, + -5.017235709387190410290186530458428950862e-0035L, + -2.391474797689109171622834301602640139258e-0035L, + -8.350571357633908815298890737944083853080e-0036L, + +7.036756889073265042421737190671876440729e-0035L, + -5.182484853064646457536893018566956189817e-0035L, + +9.422242548621832065692116736394064879758e-0035L, + -3.967500825398862309167306130216418281103e-0035L, + +7.143528991563300614523273615092767243521e-0035L, + +1.159871252867985124246517834100444327747e-0035L, + +4.696933478358115495309739213201874466685e-0035L, + -3.386513175995004710799241984999819165197e-0035L, + -8.587318774298247068868655935103874453522e-0035L, + -9.605951548749350503185499362246069088835e-0035L, + +9.609733932128012784507558697141785813655e-0035L, + +6.378397921440028439244761449780848545957e-0035L, + +7.792430785695864249456461125169277701177e-0035L, + +7.361337767588456524131930836633932195088e-0035L, + -6.472995147913347230035214575612170525266e-0035L, + +8.587474417953698694278798062295229624207e-0035L, + +2.371815422825174835691651228302690977951e-0035L, + -3.026891682096118773004597373421900314256e-0037L, +}; +#endif + +static const long double + one = 1.0L, + two = 2.0L, + ln2_64 = 1.083042469624914545964425189778400898568e-2L, + invln2_32 = 4.616624130844682903551758979206054839765e+1L; + +/* rational approximation coeffs for [-(ln2)/64,(ln2)/64] */ +static const long double + t1 = 1.666666666666666666666666666660876387437e-1L, + t2 = -2.777777777777777777777707812093173478756e-3L, + t3 = 6.613756613756613482074280932874221202424e-5L, + t4 = -1.653439153392139954169609822742235851120e-6L, + t5 = 4.175314851769539751387852116610973796053e-8L; +/* INDENT ON */ + +long double +__k_cexpl(long double x, int *n) { + int hx, ix, j, k; + long double t, r; + + *n = 0; + hx = HI_XWORD(x); + ix = hx & 0x7fffffff; + if (hx >= 0x7fff0000) + return (x + x); /* NaN of +inf */ + if (((unsigned) hx) >= 0xffff0000) + return (-one / x); /* NaN or -inf */ + if (ix < 0x3fc30000) + return (one + x); /* |x|<2^-60 */ + if (hx > 0) { + if (hx > 0x401086a0) { /* x > 200000 */ + *n = 200000; + return (one); + } + k = (int) (invln2_32 * (x + ln2_64)); + } else { + if (ix > 0x401086a0) { /* x < -200000 */ + *n = -200000; + return (one); + } + k = (int) (invln2_32 * (x - ln2_64)); + } + j = k & 0x1f; + *n = k >> 5; + t = (long double) k; + x = (x - t * ln2_32hi) - t * ln2_32lo; + t = x * x; + r = (x - t * (t1 + t * (t2 + t * (t3 + t * (t4 + t * t5))))) - two; + x = exp2_32_hi[j] - ((exp2_32_hi[j] * (x + x)) / r - exp2_32_lo[j]); + k >>= 5; + if (k > 240) { + XFSCALE(x, 240); + *n -= 240; + } else if (k > 0) { + XFSCALE(x, k); + *n = 0; + } + return (x); +} diff --git a/usr/src/lib/libm/common/complex/k_clog_r.c b/usr/src/lib/libm/common/complex/k_clog_r.c new file mode 100644 index 0000000000..6726da456d --- /dev/null +++ b/usr/src/lib/libm/common/complex/k_clog_r.c @@ -0,0 +1,412 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "libm.h" /* __k_clog_r */ +#include "complex_wrapper.h" + +/* INDENT OFF */ +/* + * double __k_clog_r(double x, double y, double *e); + * + * Compute real part of complex natural logarithm of x+iy in extra precision + * + * __k_clog_r returns log(hypot(x, y)) with a correction term e. + * + * Accuracy: 70 bits + * + * Method. + * Let Z = x*x + y*y. Z can be normalized as Z = 2^N * z, 1 <= z < 2. + * We further break down z into 1 + zk + zh + zt, where + * zk = K*(2^-7) matches z to 7.5 significant bits, 0 <= K <= 2^(-7)-1 + * zh = (z-zk) rounded to 24 bits + * zt = (z-zk-zh) rounded. + * + * z - (1+zk) (zh+zt) + * Let s = ------------ = ---------------, then + * z + (1+zk) 2(1+zk)+zh+zt + * z + * log(Z) = N*log2 + log(z) = N*log2 + log(1+zk) + log(------) + * 1+zk + * 1+s + * = N*log2 + log(1+zk) + log(---) + * 1-s + * + * 1 3 1 5 + * = N*log2 + log(1+zk) + 2s + -- (2s) + -- (2s) + ... + * 12 80 + * + * Note 1. For IEEE double precision, a seven degree odd polynomial + * 2s + P1*(2s)^3 + P2*(2s)^5 + P3*(2s)^7 + * is generated by a special remez algorithm to + * approx log((1+s)/(1-s)) accurte to 72 bits. + * Note 2. 2s can be computed accurately as s2h+s2t by + * r = 2/((zh+zt)+2(1+zk)) + * s2 = r*(zh+zt) + * s2h = s2 rounded to float; v = 0.5*s2h; + * s2t = r*((((zh-s2h*(1+zk))-v*zh)+zt)-v*zt) + */ +/* INDENT ON */ + +static const double +zero = 0.0, +half = 0.5, +two = 2.0, +two120 = 1.32922799578491587290e+36, /* 2^120 */ +ln2_h = 6.93147180369123816490e-01, /* 3fe62e42 fee00000 */ +ln2_t = 1.90821492927058770002e-10, /* 3dea39ef 35793c76 */ +P1 = .083333333333333351554108717377986202224765262191125, +P2 = .01249999999819227552330700574633767185896464873834375, +P3 = .0022321938458645656605471559987512516234702284287265625; + +/* +* T[2k, 2k+1] = log(1+k*2^-7) for k = 0, ..., 2^7 - 1, +* with T[2k] * 2^40 is an int +*/ + +static const double TBL_log1k[] = { +0.00000000000000000000e+00, 0.00000000000000000000e+00, +7.78214044203195953742e-03, 2.29894100462035112076e-14, +1.55041865355087793432e-02, 4.56474807636434698847e-13, +2.31670592811497044750e-02, 3.84673753843363762372e-13, +3.07716586667083902285e-02, 4.52981425779092882775e-14, +3.83188643018002039753e-02, 3.36395218465265063278e-13, +4.58095360309016541578e-02, 3.92549008891706208826e-13, +5.32445145181554835290e-02, 6.56799336898521766515e-13, +6.06246218158048577607e-02, 6.29984819938331143924e-13, +6.79506619080711971037e-02, 4.36552290856295281946e-13, +7.52234212368421140127e-02, 7.45411685916941618656e-13, +8.24436692109884461388e-02, 8.61451293608781447223e-14, +8.96121586893059429713e-02, 3.81189648692113819551e-13, +9.67296264579999842681e-02, 5.51128027471986918274e-13, +1.03796793680885457434e-01, 7.58107392301637643358e-13, +1.10814366339582193177e-01, 7.07921017612766061755e-13, +1.17783035655520507134e-01, 8.62947404296943765415e-13, +1.24703478500123310369e-01, 8.33925494898414856118e-13, +1.31576357788617315236e-01, 1.01957352237084734958e-13, +1.38402322858382831328e-01, 7.36304357708705134617e-13, +1.45182009843665582594e-01, 8.32314688404647202319e-13, +1.51916042025732167531e-01, 1.09807540998552379211e-13, +1.58605030175749561749e-01, 8.89022343972466269900e-13, +1.65249572894936136436e-01, 3.71026439894104998399e-13, +1.71850256926518341061e-01, 1.40881279371111350341e-13, +1.78407657472234859597e-01, 5.83437522462346671423e-13, +1.84922338493379356805e-01, 6.32635858668445232946e-13, +1.91394852999110298697e-01, 5.19155912393432989209e-13, +1.97825743329303804785e-01, 6.16075577558872326221e-13, +2.04215541428311553318e-01, 3.79338185766902218086e-13, +2.10564769106895255391e-01, 4.54382278998146218219e-13, +2.16873938300523150247e-01, 9.12093724991498410553e-14, +2.23143551314024080057e-01, 1.85675709597960106615e-13, +2.29374101064422575291e-01, 4.23254700234549300166e-13, +2.35566071311950508971e-01, 8.16400106820959292914e-13, +2.41719936886511277407e-01, 6.33890736899755317832e-13, +2.47836163904139539227e-01, 4.41717553713155466566e-13, +2.53915209980732470285e-01, 2.30973852175869394892e-13, +2.59957524436686071567e-01, 2.39995404842117353465e-13, +2.65963548496984003577e-01, 1.53937761744554075681e-13, +2.71933715483100968413e-01, 5.40790418614551497411e-13, +2.77868451003087102436e-01, 3.69203750820800887027e-13, +2.83768173129828937817e-01, 8.15660529536291275782e-13, +2.89633292582948342897e-01, 9.43339818951269030846e-14, +2.95464212893421063200e-01, 4.14813187042585679830e-13, +3.01261330577290209476e-01, 8.71571536970835103739e-13, +3.07025035294827830512e-01, 8.40315630479242455758e-14, +3.12755710003330023028e-01, 5.66865358290073900922e-13, +3.18453731118097493891e-01, 4.37121919574291444278e-13, +3.24119468653407238889e-01, 8.04737201185162774515e-13, +3.29753286371669673827e-01, 7.98307987877335024112e-13, +3.35355541920762334485e-01, 3.75495772572598557174e-13, +3.40926586970454081893e-01, 1.39128412121975659358e-13, +3.46466767346100823488e-01, 1.07757430375726404546e-13, +3.51976423156884266064e-01, 2.93918591876480007730e-13, +3.57455888921322184615e-01, 4.81589611172320539489e-13, +3.62905493689140712377e-01, 2.27740761140395561986e-13, +3.68325561158599157352e-01, 1.08495696229679121506e-13, +3.73716409792905324139e-01, 6.78756682315870616582e-13, +3.79078352934811846353e-01, 1.57612037739694350287e-13, +3.84411698910298582632e-01, 3.34571026954408237380e-14, +3.89716751139530970249e-01, 4.94243121138567024911e-13, +3.94993808240542421117e-01, 3.26556988969071456956e-13, +4.00243164126550254878e-01, 4.62452051668403792833e-13, +4.05465108107819105498e-01, 3.45276479520397708744e-13, +4.10659924984429380856e-01, 8.39005077851830734139e-13, +4.15827895143593195826e-01, 1.17769787513692141889e-13, +4.20969294643327884842e-01, 8.01751287156832458079e-13, +4.26084395310681429692e-01, 2.18633432932159103190e-13, +4.31173464818130014464e-01, 2.41326394913331314894e-13, +4.36236766774527495727e-01, 3.90574622098307022265e-13, +4.41274560804231441580e-01, 6.43787909737320689684e-13, +4.46287102628048160113e-01, 3.71351419195920213229e-13, +4.51274644138720759656e-01, 7.37825488412103968058e-13, +4.56237433480964682531e-01, 6.22911850193784704748e-13, +4.61175715121498797089e-01, 6.71369279138460114513e-13, +4.66089729924533457961e-01, 6.57665976858006147528e-14, +4.70979715218163619284e-01, 6.27393263311115598424e-13, +4.75845904869856894948e-01, 1.07019317621142549209e-13, +4.80688529345570714213e-01, 1.81193463664411114729e-13, +4.85507815781602403149e-01, 9.84046527823262695501e-14, +4.90303988044615834951e-01, 5.78003198945402769376e-13, +4.95077266797125048470e-01, 7.26466128212511528295e-13, +4.99827869555701909121e-01, 7.47420700205478712293e-13, +5.04556010751912253909e-01, 4.83033149495532022300e-13, +5.09261901789614057634e-01, 1.93889170049107088943e-13, +5.13945751101346104406e-01, 8.88212395185718544720e-13, +5.18607764207445143256e-01, 6.00488896640545761201e-13, +5.23248143764249107335e-01, 2.98729182044413286731e-13, +5.27867089620485785417e-01, 3.56599696633478298092e-13, +5.32464798869114019908e-01, 3.57823965912763837621e-13, +5.37041465896436420735e-01, 4.47233831757482468946e-13, +5.41597282432121573947e-01, 6.22797629172251525649e-13, +5.46132437597407260910e-01, 7.28389472720657362987e-13, +5.50647117952394182794e-01, 2.68096466152116723636e-13, +5.55141507539701706264e-01, 7.99886451312335479470e-13, +5.59615787935399566777e-01, 2.31194938380053776320e-14, +5.64070138284478161950e-01, 3.24804121719935740729e-13, +5.68504735351780254859e-01, 8.88457219261483317716e-13, +5.72919753561109246220e-01, 6.76262872317054154667e-13, +5.77315365034337446559e-01, 4.86157758891509033842e-13, +5.81691739634152327199e-01, 4.70155322075549811780e-13, +5.86049045003164792433e-01, 4.13416470738355643357e-13, +5.90387446602107957006e-01, 6.84176364159146659095e-14, +5.94707107746216934174e-01, 4.75855340044306376333e-13, +5.99008189645246602595e-01, 8.36796786747576938145e-13, +6.03290851438032404985e-01, 5.18573553063418286042e-14, +6.07555250224322662689e-01, 2.19132812293400917731e-13, +6.11801541105705837253e-01, 2.87066276408616768331e-13, +6.16029877214714360889e-01, 7.99658758518543977451e-13, +6.20240409751204424538e-01, 6.53104313776336534177e-13, +6.24433288011459808331e-01, 4.33692711555820529733e-13, +6.28608659421843185555e-01, 5.30952189118357790115e-13, +6.32766669570628437214e-01, 4.09392332186786656392e-13, +6.36907462236194987781e-01, 8.74243839148582888557e-13, +6.41031179420679109171e-01, 2.52181884568428814231e-13, +6.45137961372711288277e-01, 8.73413388168702670246e-13, +6.49227946624705509748e-01, 4.04309142530119209805e-13, +6.53301272011958644725e-01, 7.86994033233553225797e-13, +6.57358072708120744210e-01, 2.39285932153437645135e-13, +6.61398482245203922503e-01, 1.61085757539324585156e-13, +6.65422632544505177066e-01, 5.85271884362515112697e-13, +6.69430653942072240170e-01, 5.57027128793880294600e-13, +6.73422675211440946441e-01, 7.25773856816637653180e-13, +6.77398823590920073912e-01, 8.86066898134949155668e-13, +6.81359224807238206267e-01, 6.64862680714687006264e-13, +6.85304003098281100392e-01, 6.38316151706465171657e-13, +6.89233281238557538018e-01, 2.51442307283760746611e-13, +}; + +/* + * Compute N*log2 + log(1+zk+zh+zt) in extra precision + */ +static double k_log_NKz(int N, int K, double zh, double *zt) +{ + double y, r, w, s2, s2h, s2t, t, zk, v, P; + + ((int *)&zk)[HIWORD] = 0x3ff00000 + (K << 13); + ((int *)&zk)[LOWORD] = 0; + t = zh + (*zt); + r = two / (t + two * zk); + s2h = s2 = r * t; + ((int *)&s2h)[LOWORD] &= 0xe0000000; + v = half * s2h; + w = s2 * s2; + s2t = r * ((((zh - s2h * zk) - v * zh) + (*zt)) - v * (*zt)); + P = s2t + (w * s2) * ((P1 + w * P2) + (w * w) * P3); + P += N * ln2_t + TBL_log1k[K + K + 1]; + t = N*ln2_h + TBL_log1k[K+K]; + y = t + (P + s2h); + P -= ((y - t) - s2h); + *zt = P; + return (y); +} + +double +__k_clog_r(double x, double y, double *er) +{ + double t1, t2, t3, t4, tk, z, wh, w, zh, zk; + int n, k, ix, iy, iz, nx, ny, nz, i, j; + unsigned lx, ly; + + ix = (((int *)&x)[HIWORD]) & ~0x80000000; + lx = ((unsigned *)&x)[LOWORD]; + iy = (((int *)&y)[HIWORD]) & ~0x80000000; + ly = ((unsigned *)&y)[LOWORD]; + y = fabs(y); x = fabs(x); + if (ix < iy || (ix == iy && lx < ly)) { /* force x >= y */ + tk = x; x = y; y = tk; + n = ix, ix = iy; iy = n; + n = lx, lx = ly; ly = n; + } + *er = zero; + nx = ix >> 20; ny = iy >> 20; + if (nx >= 0x7ff) { /* x or y is Inf or NaN */ + if (ISINF(ix, lx)) + return (x); + else if (ISINF(iy, ly)) + return (y); + else + return (x+y); + } +/* + * for tiny y (double y < 2^-35, extended y < 2^-46, quad y < 2^-70): + * log(sqrt(1+y^2)) = (y^2)/2 - (y^4)/8 + ... ~= (y^2)/2 + */ + if ((((ix - 0x3ff00000) | lx) == 0) && ny < (0x3ff - 35)) { + t2 = y * y; + if (ny >= 565) { /* compute er = tail of t2 */ + ((int *)&wh)[HIWORD] = iy; + ((unsigned *)&wh)[LOWORD] = ly & 0xf8000000; + *er = half * ((y - wh) * (y + wh) - (t2 - wh * wh)); + } + return (half * t2); + } +/* + * x or y is subnormal or zero + */ + if (nx == 0) { + if ((ix | lx) == 0) + return (-1.0 / x); + else { + x *= two120; + y *= two120; + ix = ((int *)&x)[HIWORD]; + lx = ((unsigned *)&x)[LOWORD]; + iy = ((int *)&y)[HIWORD]; + ly = ((unsigned *)&y)[LOWORD]; + nx = (ix >> 20) - 120; + ny = (iy >> 20) - 120; + /* guard subnormal flush to 0 */ + if ((ix | lx) == 0) + return (-1.0 / x); + } + } else if (ny == 0) { /* y subnormal, scale it */ + y *= two120; + iy = ((int *)&y)[HIWORD]; + ly = ((unsigned *)&y)[LOWORD]; + ny = (iy >> 20) - 120; + } + n = nx - ny; +/* + * return log(x) when y is zero or x >> y so that + * log(x) ~ log(sqrt(x*x+y*y)) to 27 extra bits + * (n > 62 for double, 78 for i386 extended, 122 for quad) + */ + if (n > 62 || (iy | ly) == 0) { + i = (0x000fffff & ix) | 0x3ff00000; /* normalize x */ + ((int *)&x)[HIWORD] = i; + i += 0x1000; + ((int *)&zk)[HIWORD] = i & 0xffffe000; + ((int *)&zk)[LOWORD] = 0; /* zk matches 7.5 bits of x */ + z = x - zk; + zh = (double)((float)z); + i >>= 13; + k = i & 0x7f; /* index of zk */ + n = nx - 0x3ff; + *er = z - zh; + if (i >> 17) { /* if zk = 2.0, adjust scaling */ + n += 1; + zh *= 0.5; *er *= 0.5; + } + w = k_log_NKz(n, k, zh, er); + } else { +/* + * compute z = x*x + y*y + */ + ix = (ix & 0xfffff) | 0x3ff00000; + iy = (iy & 0xfffff) | (0x3ff00000 - (n << 20)); + ((int *)&x)[HIWORD] = ix; ((int *)&y)[HIWORD] = iy; + t1 = x * x; t2 = y * y; + j = ((lx >> 26) + 1) >> 1; + ((int *)&wh)[HIWORD] = ix + (j >> 5); + ((unsigned *)&wh)[LOWORD] = (j << 27); + z = t1+t2; +/* + * higher precision simulation x*x = t1 + t3, y*y = t2 + t4 + */ + tk = wh - x; + t3 = tk * tk - (two * wh * tk - (wh * wh - t1)); + j = ((ly >> 26) + 1) >> 1; + ((int *)&wh)[HIWORD] = iy + (j >> 5); + ((unsigned *)&wh)[LOWORD] = (j << 27); + tk = wh - y; + t4 = tk * tk - (two * wh * tk - (wh * wh - t2)); +/* + * find zk matches z to 7.5 bits + */ + nx -= 0x3ff; + iz = ((int *)&z)[HIWORD] + 0x1000; + k = (iz >> 13) & 0x7f; + nz = (iz >> 20) - 0x3ff; + ((int *)&zk)[HIWORD] = iz & 0xffffe000; + ((int *)&zk)[LOWORD] = 0; +/* + * order t1,t2,t3,t4 according to their size + */ + if (t2 >= fabs(t3)) { + if (fabs(t3) < fabs(t4)) { + wh = t3; t3 = t4; t4 = wh; + } + } else { + wh = t2; t2 = t3; t3 = wh; + } +/* + * higher precision simulation: x * x + y * y = t1 + t2 + t3 + t4 + * = zk (7 bits) + zh (24 bits) + *er (tail) and call k_log_NKz + */ + tk = t1 - zk; + zh = ((tk + t2) + t3) + t4; + ((int *)&zh)[LOWORD] &= 0xe0000000; + w = fabs(zh); + if (w >= fabs(t2)) + *er = (((tk - zh) + t2) + t3) + t4; + else { + if (n == 0) { + wh = half * zk; + wh = (t1 - wh) - (wh - t2); + } else + wh = tk + t2; + if (w >= fabs(t3)) + *er = ((wh - zh) + t3) + t4; + else { + z = t3; + t3 += t4; + t4 -= t3 - z; + if (w >= fabs(t3)) + *er = ((wh - zh) + t3) + t4; + else + *er = ((wh + t3) - zh) + t4; + } + } + if (nz == 3) {zh *= 0.125; *er *= 0.125; } + if (nz == 2) {zh *= 0.25; *er *= 0.25; } + if (nz == 1) {zh *= half; *er *= half; } + nz += nx + nx; + w = half * k_log_NKz(nz, k, zh, er); + *er *= half; + } + return (w); +} diff --git a/usr/src/lib/libm/common/complex/k_clog_rl.c b/usr/src/lib/libm/common/complex/k_clog_rl.c new file mode 100644 index 0000000000..53eaa88ce4 --- /dev/null +++ b/usr/src/lib/libm/common/complex/k_clog_rl.c @@ -0,0 +1,645 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "libm.h" /* __k_clog_rl */ +#include "complex_wrapper.h" +#include "longdouble.h" + +/* INDENT OFF */ +/* + * long double __k_clog_rl(long double x, long double y, long double *e); + * + * Compute real part of complex natural logarithm of x+iy in extra precision + * + * __k_clog_rl returns log(hypot(x, y)) with a correction term e. + * + * Accuracy: quad 140 bits, intel extended 91 bits. + * + * Method. + * Assume X > Y >= 0 . Let X = 2**nx * x, Y = 2**nx * y, where 1 <= x < 2. + * Let Z = X*X + Y*Y. Then Z = 2**(nx+nx) * z, where z = x*x + y*y. + * Note that z < 8. + * Let Z = x*x + y*y. Z can be normalized as Z = 2**N * z, 1 <= z < 2. + * We further break down z into 1 + zk + zh + zt, where + * zk = K*(2**-7) matches z to 7.5 significant bits, 0 <= K <= 2**(-7)-1 + * zh = (z-zk) rounded to half of the current significant bits + * zt = (z-zk-zh) rounded. + * + * z - (1+zk) (zh+zt) + * Let s = ------------ = ---------------, then + * z + (1+zk) 2(1+zk)+zh+zt + * z + * log(Z) = N*log2 + log(z) = N*log2 + log(1+zk) + log(------) + * 1+zk + * 1+s + * = N * log2 + log(1 +zk) + log(---) + * 1-s + * + * 3 5 + * = N*log2 + log(1+zk) + 2s + 1/12(2s) + 1/80(2s) + ... + * + * + * Note 1. For IEEE double precision, a fifteen degree odd polynomial + * 2s + P1*(2s)^3 + P2*(2s)^5 + P3*(2s)^7 + ... + P7*(2s)^15 + * is generated by a special remez algorithm to + * approx log((1+s)/(1-s)) accurte to 145 bits. + * Note 2. 2s can be computed accurately as s2h+s2t by + * r = 2/((zh+zt)+2(1+zk)) + * s2 = r*(zh+zt) + * s2h = s2 rounded to double; v = 0.5*s2h; + * s2t = r*((((zh-s2h*(1+zk))-v*zh)+zt)-v*zt) + */ +/* INDENT ON */ + +static const long double +zero = 0.0L, +half = 0.5L, +two = 2.0L, +two240 = 1.7668470647783843295832975007429185158274839e+72L, /* 2^240 */ + +/* first 48 bits of ln2 */ +ln2_h = 0.693147180559943620892227045260369777679443359375L, +ln2_t = 1.68852500507619780679039605677498525525412068e-15L, +P1 = .083333333333333333333333333333333333341023785768375L, +P2 = .01249999999999999999999999999999679085402075766159375L, +P3 = .002232142857142857142857143310092047621284490564671875L, +P4 = .00043402777777777777774746781319264872413156956512109375L, +P5 = .0000887784090909101756336594019277185263940665468935546875L, +P6 = .000018780048055589639895360927834628371268354778446533203125L, +P7 = .000004069227854328982921366736003458838031087153635406494140625L; + +/* + * T[2k, 2k+1] = log(1+k*2**-7) for k = 0, ..., 2**7 - 1, + * with T[2k] * 2^48 is an int + */ + +static const long double TBL_log1k[] = { +0.0000000000000000000000000000000000000000e+00L, +0.0000000000000000000000000000000000000000e+00L, +7.7821404420532758194894995540380477905273e-03L, +1.6731279734005070987158875984584325351222e-15L, +1.5504186535963526694104075431823730468750e-02L, +1.7274567499706106231054091184928671990316e-15L, +2.3167059281533397552266251295804977416992e-02L, +9.8067653290966648493916241687661877474892e-16L, +3.0771658666751022792595904320478439331055e-02L, +2.6655784323032762937247606420524589813624e-15L, +3.8318864302134159061097307130694389343262e-02L, +2.4401326580179931029010027013316092332340e-15L, +4.5809536031292452662455616518855094909668e-02L, +1.7505042236510958082472042641283104263139e-15L, +5.3244514518809182845870964229106903076172e-02L, +3.1000199992295574218738634002122149891138e-15L, +6.0624621816433688081815489567816257476807e-02L, +1.1544987906424726040058093958345197512800e-15L, +6.7950661908504628172522643581032752990723e-02L, +3.1212220426341915966610439115772728417386e-15L, +7.5223421237584631171557703055441379547119e-02L, +2.8945270476369282210350897509258766743153e-15L, +8.2443669211073711267090402543544769287109e-02L, +8.8000106966612476303662698634483335676886e-16L, +8.9612158689686083334891009144484996795654e-02L, +1.0492850604602339995319895311151740799226e-15L, +9.6729626458550654888313147239387035369873e-02L, +4.5740725790924807640164516707244620870662e-16L, +1.0379679368164218544734467286616563796997e-01L, +1.3793787171308978090503366050174239822054e-15L, +1.1081436634028918319927470292896032333374e-01L, +9.3099553146639425160476473362380086036919e-16L, +1.1778303565638026384476688690483570098877e-01L, +3.1906940272225656860040797111813146690890e-15L, +1.2470347850095464536934741772711277008057e-01L, +2.5904940590976537504984110469214193890052e-15L, +1.3157635778871679121948545798659324645996e-01L, +2.4813692306707028899159917911012100567219e-15L, +1.3840232285911824305912887211889028549194e-01L, +8.9262619700148275890190121571708972000380e-16L, +1.4518200984449691759436973370611667633057e-01L, +9.7968756533003444764719201050911636480025e-16L, +1.5191604202583874894116888754069805145264e-01L, +3.2261306345373561864598749471119213018106e-15L, +1.5860503017663774016909883357584476470947e-01L, +8.4392427234104999681053621980394827998735e-16L, +1.6524957289530561865831259638071060180664e-01L, +1.5442172988528965297119225948270579746101e-15L, +1.7185025692665689689420105423778295516968e-01L, +2.3254458978918173643097657009894831132739e-15L, +1.7840765747281750464026117697358131408691e-01L, +7.9247913906453736065426776912520942036896e-16L, +1.8492233849401173984006163664162158966064e-01L, +2.5282384195601762803134514624610774126020e-16L, +1.9139485299962899489401024766266345977783e-01L, +4.5971528855989864541366920731297729269228e-16L, +1.9782574332991842425144568551331758499146e-01L, +1.4561111263856836438840838027526567191527e-15L, +2.0421554142868814096800633706152439117432e-01L, +2.7505358140491347148810394262840919337709e-15L, +2.1056476910734645002776233013719320297241e-01L, +3.1876417904825951583107481283088861928977e-15L, +2.1687393830061196808856038842350244522095e-01L, +2.3915305291373208450532580201045871599499e-15L, +2.2314355131420882116799475625157356262207e-01L, +9.3459830033405826094075253077304795996257e-16L, +2.2937410106484534821902343537658452987671e-01L, +4.8177245728966955534167425511952551974164e-16L, +2.3556607131276408040321257431060075759888e-01L, +2.8286743756446304426525380844720043381780e-15L, +2.4171993688714366044223424978554248809814e-01L, +1.5077020732661279714120052415509585052975e-15L, +2.4783616390458007572306087240576744079590e-01L, +1.1810575418933407573072030113600980623171e-15L, +2.5391520998096339667426946107298135757446e-01L, +4.7463053836833625309891834934881898560705e-17L, +2.5995752443692410338371701072901487350464e-01L, +1.9635883624838132961710716735786266795913e-15L, +2.6596354849713677026556979399174451828003e-01L, +1.1710735561325457988709887923652142233351e-15L, +2.7193371548364098089223261922597885131836e-01L, +7.7793943687530702031066421537496360004376e-16L, +2.7786845100345303194444568362087011337280e-01L, +3.2742419043493025311197092322146237692165e-15L, +2.8376817313064250924981024581938982009888e-01L, +2.0890970909765308649465619266075677112425e-15L, +2.8963329258304071345264674164354801177979e-01L, +1.9634262463138821209582240742801727823629e-15L, +2.9546421289383317798638017848134040832520e-01L, +2.6984003017275736237868564402005801750600e-15L, +3.0126133057816062432721082586795091629028e-01L, +1.1566856647123658045763670687640673680383e-15L, +3.0702503529490954292668902780860662460327e-01L, +2.3191484355127267712770857311812090801833e-15L, +3.1275571000389490450288576539605855941772e-01L, +1.9838833607942922604727420618882220398852e-15L, +3.1845373111853447767316538374871015548706e-01L, +1.3813708182984188944010814590398164268227e-16L, +3.2411946865421015218089451082050800323486e-01L, +1.8239097762496144793489474731253815376404e-15L, +3.2975328637246548169059678912162780761719e-01L, +2.5001238260227991620033344720809714552230e-15L, +3.3535554192113536942088103387504816055298e-01L, +2.4608362985459391180385214539620341910962e-15L, +3.4092658697059263772644044365733861923218e-01L, +5.7257864875612301758921090406373771458003e-16L, +3.4646676734620740489845047704875469207764e-01L, +1.1760200117113770182586341947822306069951e-15L, +3.5197642315717558858523261733353137969971e-01L, +2.5960702148389259075462896448369304790506e-15L, +3.5745588892180180096147523727267980575562e-01L, +1.9732645342528682246686790561260072184839e-15L, +3.6290549368936808605212718248367309570312e-01L, +3.6708569716349381675043725477739939978160e-16L, +3.6832556115870573876236448995769023895264e-01L, +1.9142858656640927085879445412821643247628e-15L, +3.7371640979358389245135185774415731430054e-01L, +1.8836966497497166619234389157276681281343e-16L, +3.7907835293496816575498087331652641296387e-01L, +1.2926358724723144934459175417385013725801e-15L, +3.8441169891033055705520382616668939590454e-01L, +1.4826795862363146014726140088145939341729e-15L, +3.8971675114002479745067830663174390792847e-01L, +4.1591978529737177695912258866565331189698e-16L, +3.9499380824086571806219581048935651779175e-01L, +3.2600441982258756252505182317625310732365e-15L, +4.0024316412701210765590076334774494171143e-01L, +5.9927342433864738622836851475469574662703e-16L, +4.0546510810816371872533636633306741714478e-01L, +6.6325267674913128171942721503283748008372e-16L, +4.1065992498526782128465129062533378601074e-01L, +5.6464965491255048900165082436455718077885e-16L, +4.1582789514371043537721561733633279800415e-01L, +5.3023611327561856950735176370587227509442e-16L, +4.2096929464412724541944044176489114761353e-01L, +2.3907094267197419048248363335257046791153e-15L, +4.2608439531089814522601955104619264602661e-01L, +1.9178985253285492839728700574592375309985e-15L, +4.3117346481836804628073878120630979537964e-01L, +3.2945784336977492852031005044499611665595e-15L, +4.3623676677491474151793227065354585647583e-01L, +3.3288311090524075754441878570852962903891e-15L, +4.4127456080487448275562201160937547683716e-01L, +7.4673387443005192574852544613692268411229e-16L, +4.4628710262841764233598951250314712524414e-01L, +1.8691966006681165218815050615460959199251e-15L, +4.5127464413945617138779198285192251205444e-01L, +2.4137569004002270899666314791611479063976e-15L, +4.5623743348158640742440184112638235092163e-01L, +1.1869564036970375473975162509216610120281e-15L, +4.6117571512216670726047595962882041931152e-01L, +3.4591075239659690349392915732654828400811e-15L, +4.6608972992459740680715185590088367462158e-01L, +1.8177514673916038857252366108673570603067e-15L, +4.7097971521878889689105562865734100341797e-01L, +2.1156558422273990182479555421331461933366e-15L, +4.7584590486996347635795245878398418426514e-01L, +4.3790725712752039722791012358345927696967e-16L, +4.8068852934575190261057286988943815231323e-01L, +5.0660455855585733988956280680891477171499e-18L, +4.8550781578169832641833636444061994552612e-01L, +2.4813834547127501689550526444948043590905e-15L, +4.9030398804519137456736643798649311065674e-01L, +2.4635829797216592537498738468934647345741e-15L, +4.9507726679784980206022737547755241394043e-01L, +1.7125377372093652812514167461480115600063e-15L, +4.9982786955644797899367404170334339141846e-01L, +1.3508276573735437007500942002018098437396e-15L, +5.0455601075239187025545106735080480575562e-01L, +3.4168028574643873701242268618467347998876e-15L, +5.0926190178980590417268103919923305511475e-01L, +2.0426313938800290907697638200502614622891e-15L, +5.1394575110223428282552049495279788970947e-01L, +3.3975485593321419703400672813719873194659e-17L, +5.1860776420804555186805373523384332656860e-01L, +8.0284923261130955371987633083003284697416e-17L, +5.2324814376454753528378205373883247375488e-01L, +3.0123302517119603836788558832352723470118e-16L, +5.2786708962084105678513878956437110900879e-01L, +1.3283287534282139298545497336570406582397e-15L, +5.3246479886946929127589100971817970275879e-01L, +2.5525980327137419625398485590148417041921e-15L, +5.3704146589688050994482182431966066360474e-01L, +3.1446219074198341716354190061340477078626e-15L, +5.4159728243274329884116014000028371810913e-01L, +1.0727353821639001503808606766770295812627e-15L, +5.4613243759813556721383065450936555862427e-01L, +8.3168566554721843605240702438699163825794e-17L, +5.5064711795266063631970610003918409347534e-01L, +1.6429402420791657293666192255419538448840e-15L, +5.5514150754050106684189813677221536636353e-01L, +5.2587358222274368868380660194332415847228e-16L, +5.5961578793542088305912329815328121185303e-01L, +1.8032117652023735453816330571171114110385e-15L, +5.6407013828480145889443519990891218185425e-01L, +1.5071769490901812785299634348367857600711e-15L, +5.6850473535266843327917740680277347564697e-01L, +2.7879956135806418878792935692629147550413e-16L, +5.7291975356178426181941176764667034149170e-01L, +1.2472733449589795907271346997596471822345e-15L, +5.7731536503482061561953742057085037231445e-01L, +2.9886985746409486460291929160223207644146e-15L, +5.8169173963462128540413687005639076232910e-01L, +1.1971164738836689815783808674399742176950e-15L, +5.8604904500357690722012193873524665832520e-01L, +1.3016839974975520776911897855504474452726e-15L, +5.9038744660217545856539800297468900680542e-01L, +9.1607651870514890975077236127894522134392e-16L, +5.9470710774668944509357970673590898513794e-01L, +3.3444207638397932963480545729233567201211e-15L, +5.9900818964608149030937056522816419601440e-01L, +1.9090722294592334873060460706130642200729e-15L, +6.0329085143808214297678205184638500213623e-01L, +2.1193638031348149256035110177854940281795e-15L, +6.0755525022453937822319858241826295852661e-01L, +2.4172778865703728624133665395876418941354e-15L, +6.1180154110599005434778518974781036376953e-01L, +2.8491821045766810044199163148675291775782e-15L, +6.1602987721551372146677749697118997573853e-01L, +2.9818078843122551067455400545109858745295e-16L, +6.2024040975185457114093878772109746932983e-01L, +2.9577105558448461493874424529516311623184e-15L, +6.2443328801189323939979658462107181549072e-01L, +2.6164274215943360130441858075903119505815e-16L, +6.2860865942237253989333112258464097976685e-01L, +1.5978509770831895426601797458058854400463e-15L, +6.3276666957103699928666173946112394332886e-01L, +8.3025912472904245581515990140161946934461e-16L, +6.3690746223706895534633076749742031097412e-01L, +2.7627416365968377888021629180796328536455e-16L, +6.4103117942092779912854894064366817474365e-01L, +3.4919270523937617243719652995048419893186e-15L, +6.4513796137358170312836591619998216629028e-01L, +2.9985368625799347497396478978681548584217e-15L, +6.4922794662510696639401430729776620864868e-01L, +2.8524968256626075449136225882322854909611e-15L, +6.5330127201274379444839723873883485794067e-01L, +1.8443102186424720390266302263929355424008e-15L, +6.5735807270835877602621621917933225631714e-01L, +1.2541156738040666039091970075936624723645e-15L, +6.6139848224536379461824253667145967483521e-01L, +1.2136419933020381912633127333149145382797e-15L, +6.6542263254508782210905337706208229064941e-01L, +2.6268410392329445778904988886114643307320e-15L, +6.6943065394262646350398426875472068786621e-01L, +2.8037949010021747828222575923191438798877e-15L, +6.7342267521216570003161905333399772644043e-01L, +1.0202663413354670195383104149875619397268e-15L, +6.7739882359180469961756898555904626846313e-01L, +1.4411921136244383020300914304078010801275e-15L, +6.8135922480790256372529256623238325119019e-01L, +5.0522277899333570619054540068138110661023e-16L, +6.8530400309891703614084690343588590621948e-01L, +2.3804032011755313470802014258958896193599e-15L, +6.8923328123880622797514661215245723724365e-01L, +2.7523497677256621466659891416404053623832e-15L, +}; + +/* + * Compute N*log2 + log(1+zk+zh+zt) in extra precision + */ +static long double k_log_NKzl(int N, int K, long double zh, long double *zt) +{ + long double y, r, w, s2, s2h, s2t, t, zk, v, P; + double dzk; + +#if !defined(__x86) + unsigned lx, ly; + int j; +#endif + + ((int *)&dzk)[HIWORD] = 0x3ff00000 + (K << 13); + ((int *)&dzk)[LOWORD] = 0; + t = zh + (*zt); + zk = (long double) dzk; + r = two / (t + two * zk); + s2h = s2 = r * t; +/* split s2 into correctly rounded half */ + +#if defined(__x86) + ((unsigned *)&s2h)[0] = 0; /* 32 bits chopped */ +#else + + lx = ((unsigned *)&s2h)[2]; /* 56 bits rounded */ + j = ((lx >> 24) + 1) >> 1; + ((unsigned *)&s2h)[2] = (j << 25); + lx = ((unsigned *)&s2h)[1]; + ly = lx + (j >> 7); + ((unsigned *)&s2h)[1] = ly; + ((unsigned *)&s2h)[0] += (ly == 0 && lx != 0); + ((unsigned *)&s2h)[3] = 0; +#endif + + v = half * s2h; + w = s2 * s2; + s2t = r * ((((zh - s2h * zk) - v * zh) + (*zt)) - v * (*zt)); + P = s2t + (w * s2) * ((P1 + w * P2) + (w * w) * ((P3 + w * P4) + + (w * w) * (P5 + w * P6 + (w * w) * P7))); + P += N * ln2_t + TBL_log1k[K + K + 1]; + t = N*ln2_h + TBL_log1k[K+K]; + y = t + (P + s2h); + P -= ((y - t) - s2h); + *zt = P; + return (y); +} + +long double +__k_clog_rl(long double x, long double y, long double *er) +{ + long double t1, t2, t3, t4, tk, z, wh, w, zh, zk; + int n, k, ix, iy, iz, nx, ny, nz, i; + double dk; + +#if !defined(__x86) + int j; + unsigned lx, ly; +#endif + + ix = HI_XWORD(x) & ~0x80000000; + iy = HI_XWORD(y) & ~0x80000000; + y = fabsl(y); x = fabsl(x); + if (ix < iy || (ix < 0x7fff0000 && ix == iy && x < y)) { + /* force x >= y */ + tk = x; x = y; y = tk; + n = ix, ix = iy; iy = n; + } + *er = zero; + nx = ix >> 16; ny = iy >> 16; + if (nx >= 0x7fff) { /* x or y is Inf or NaN */ + if (isinfl(x)) + return (x); + else if (isinfl(y)) + return (y); + else + return (x+y); + } +/* + * for tiny y:(double y < 2^-35, extended y < 2^-46, quad y < 2^-70) + * + * log(sqrt(1 + y**2)) = y**2 / 2 - y**4 / 8 + ... = y**2 / 2 + */ +#if defined(__x86) + if (x == 1.0L && ny < (0x3fff - 46)) { +#else + if (x == 1.0L && ny < (0x3fff - 70)) { +#endif + + t2 = y * y; + if (ny >= 8305) { /* compute er = tail of t2 */ + dk = (double) y; + +#if defined(__x86) + ((unsigned *)&dk)[LOWORD] &= 0xfffe0000; +#endif + + wh = (long double) dk; + *er = half * ((y - wh) * (y + wh) - (t2 - wh * wh)); + } + return (half * t2); + } +/* + * x or y is subnormal or zero + */ + if (nx == 0) { + if (x == 0.0L) + return (-1.0L / x); + else { + x *= two240; + y *= two240; + ix = HI_XWORD(x); + iy = HI_XWORD(y); + nx = (ix >> 16) - 240; + ny = (iy >> 16) - 240; + /* guard subnormal flush to 0 */ + if (x == 0.0L) + return (-1.0L / x); + } + } else if (ny == 0) { /* y subnormal, scale it */ + y *= two240; + iy = HI_XWORD(y); + ny = (iy >> 16) - 240; + } + n = nx - ny; +/* + * When y is zero or when x >> y, i.e., n > 62, 78, 122 for DBLE, + * EXTENDED, QUAD respectively, + * log(x) = log(sqrt(x * x + y * y)) to 27 extra bits. + */ + +#if defined(__x86) + if (n > 78 || y == 0.0L) { +#else + if (n > 122 || y == 0.0L) { +#endif + + XFSCALE(x, (0x3fff - (ix >> 16))); + i = ((ix & 0xffff) + 0x100) >> 9; /* 7.5 bits of x */ + zk = 1.0L + ((long double) i) * 0.0078125L; + z = x - zk; + dk = (double)z; + +#if defined(__x86) + ((unsigned *)&dk)[LOWORD] &= 0xfffe0000; +#endif + + zh = (long double)dk; + k = i & 0x7f; /* index of zk */ + n = nx - 0x3fff; + *er = z - zh; + if (i == 0x80) { /* if zk = 2.0, adjust scaling */ + n += 1; + zh *= 0.5L; *er *= 0.5L; + } + w = k_log_NKzl(n, k, zh, er); + } else { +/* + * compute z = x*x + y*y + */ + XFSCALE(x, (0x3fff - (ix >> 16))); + XFSCALE(y, (0x3fff - n - (iy >> 16))); + ix = (ix & 0xffff) | 0x3fff0000; + iy = (iy & 0xffff) | (0x3fff0000 - (n << 16)); + nx -= 0x3fff; + t1 = x * x; t2 = y * y; + wh = x; + +/* split x into correctly rounded half */ +#if defined(__x86) + ((unsigned *)&wh)[0] = 0; /* 32 bits chopped */ +#else + lx = ((unsigned *)&wh)[2]; /* 56 rounded */ + j = ((lx >> 24) + 1) >> 1; + ((unsigned *)&wh)[2] = (j << 25); + lx = ((unsigned *)&wh)[1]; + ly = lx + (j >> 7); + ((unsigned *)&wh)[1] = ly; + ((unsigned *)&wh)[0] += (ly == 0 && lx != 0); + ((unsigned *)&wh)[3] = 0; +#endif + + z = t1+t2; +/* + * higher precision simulation x*x = t1 + t3, y*y = t2 + t4 + */ + tk = wh - x; + t3 = tk * tk - (two * wh * tk - (wh * wh - t1)); + wh = y; + +/* split y into correctly rounded half */ +#if defined(__x86) + ((unsigned *)&wh)[0] = 0; /* 32 bits chopped */ +#else + ly = ((unsigned *)&wh)[2]; /* 56 bits rounded */ + j = ((ly >> 24) + 1) >> 1; + ((unsigned *)&wh)[2] = (j << 25); + lx = ((unsigned *)&wh)[1]; + ly = lx + (j >> 7); + ((unsigned *)&wh)[1] = ly; + ((unsigned *)&wh)[0] += (ly == 0 && lx != 0); + ((unsigned *)&wh)[3] = 0; +#endif + + tk = wh - y; + t4 = tk * tk - (two * wh * tk - (wh * wh - t2)); +/* + * find zk matches z to 7.5 bits + */ + iz = HI_XWORD(z); + k = ((iz & 0xffff) + 0x100) >> 9; /* 7.5 bits of x */ + nz = (iz >> 16) - 0x3fff + (k >> 7); + k &= 0x7f; + zk = 1.0L + ((long double) k) * 0.0078125L; + if (nz == 1) zk += zk; + else if (nz == 2) zk *= 4.0L; + else if (nz == 3) zk *= 8.0L; +/* + * order t1, t2, t3, t4 according to their size + */ + if (t2 >= fabsl(t3)) { + if (fabsl(t3) < fabsl(t4)) { + wh = t3; t3 = t4; t4 = wh; + } + } else { + wh = t2; t2 = t3; t3 = wh; + } +/* + * higher precision simulation: x * x + y * y = t1 + t2 + t3 + t4 + * = zk(7 bits) + zh(24 bits) + *er(tail) and call k_log_NKz + */ + tk = t1 - zk; + zh = ((tk + t2) + t3) + t4; + +/* split zh into correctly rounded half */ +#if defined(__x86) + ((unsigned *)&zh)[0] = 0; +#else + ly = ((unsigned *)&zh)[2]; + j = ((ly >> 24) + 1) >> 1; + ((unsigned *)&zh)[2] = (j << 25); + lx = ((unsigned *)&zh)[1]; + ly = lx + (j >> 7); + ((unsigned *)&zh)[1] = ly; + ((unsigned *)&zh)[0] += (ly == 0 && lx != 0); + ((unsigned *)&zh)[3] = 0; +#endif + + w = fabsl(zh); + if (w >= fabsl(t2)) +{ + *er = (((tk - zh) + t2) + t3) + t4; +} + + else { + + if (n == 0) { + wh = half * zk; + wh = (t1 - wh) - (wh - t2); + } else + wh = tk + t2; + if (w >= fabsl(t3)) + *er = ((wh - zh) + t3) + t4; + else { + z = t3; + t3 += t4; + t4 -= t3 - z; + if (w >= fabsl(t3)) + *er = ((wh - zh) + t3) + t4; + else + *er = ((wh + t3) - zh) + t4; + } + } + if (nz == 3) { + zh *= 0.125L; *er *= 0.125L; + } else if (nz == 2) { + zh *= 0.25L; *er *= 0.25L; + } else if (nz == 1) { + zh *= half; *er *= half; + } + nz += nx + nx; + w = half * k_log_NKzl(nz, k, zh, er); + *er *= half; + } + return (w); +} diff --git a/usr/src/lib/libm/common/llib-lm b/usr/src/lib/libm/common/llib-lm new file mode 100644 index 0000000000..6a6613076b --- /dev/null +++ b/usr/src/lib/libm/common/llib-lm @@ -0,0 +1,47 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* LINTLIBRARY */ +/* PROTOLIB1 */ + +#undef __PRAGMA_REDEFINE_EXTNAME +#include <math.h> +#if defined(_STDC_C99) +#undef isnan +extern int isnan(double); +extern int isnand(double); /* LSARC/2003/670 */ +typedef union _h_val { + unsigned long _i[2]; + double _d; +} _h_val; +extern const _h_val __huge_val; +#endif +#include <fenv.h> +#include <complex.h> +#undef clog +extern double complex clog(double complex); diff --git a/usr/src/lib/libm/common/m9x/__fenv_amd64.il b/usr/src/lib/libm/common/m9x/__fenv_amd64.il new file mode 100644 index 0000000000..f28fe47661 --- /dev/null +++ b/usr/src/lib/libm/common/m9x/__fenv_amd64.il @@ -0,0 +1,349 @@ +/ +/ CDDL HEADER START +/ +/ The contents of this file are subject to the terms of the +/ Common Development and Distribution License (the "License"). +/ You may not use this file except in compliance with the License. +/ +/ You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +/ or http://www.opensolaris.org/os/licensing. +/ See the License for the specific language governing permissions +/ and limitations under the License. +/ +/ When distributing Covered Code, include this CDDL HEADER in each +/ file and include the License file at usr/src/OPENSOLARIS.LICENSE. +/ If applicable, add the following below this CDDL HEADER, with the +/ fields enclosed by brackets "[]" replaced with your own identifying +/ information: Portions Copyright [yyyy] [name of copyright owner] +/ +/ CDDL HEADER END +/ +/ Copyright 2011 Nexenta Systems, Inc. All rights reserved. +/ +/ Copyright 2006 Sun Microsystems, Inc. All rights reserved. +/ Use is subject to license terms. +/ + .inline __fenv_getcwsw,1 + fstsw (%rdi) + fstcw 2(%rdi) + .end + + .inline __fenv_setcwsw,1 + movw (%rdi),%dx + movw 2(%rdi),%cx + subq $32,%rsp + fstenv (%rsp) + movw %cx,(%rsp) + movw %dx,4(%rsp) + fldenv (%rsp) + fwait + addq $32,%rsp + .end + + .inline __fenv_getmxcsr,1 + stmxcsr (%rdi) + .end + + .inline __fenv_setmxcsr,1 + ldmxcsr (%rdi) + .end + + .inline f2xm1,1 + fldt (%rsp) + f2xm1 + .end + + .inline fyl2x,2 + fldt (%rsp) + fldt 16(%rsp) + fyl2x + .end + + .inline fptan,1 + fldt (%rsp) + fptan + fstpt (%rsp) + .end + + .inline fpatan,2 + fldt (%rsp) + fldt 16(%rsp) + fpatan + .end + + .inline fxtract,1 + fldt (%rsp) + fxtract + .end + + .inline fprem1,2 + fldt (%rsp) + fldt 16(%rsp) + fprem1 + fstp %st(1) + .end + + .inline fprem,2 + fldt (%rsp) + fldt 16(%rsp) + fprem + fstp %st(1) + .end + + .inline fyl2xp1,2 + fldt (%rsp) + fldt 16(%rsp) + fyl2xp1 + .end + + .inline fsqrt,1 + fldt (%rsp) + fsqrt + .end + + .inline fsincos,1 + fldt (%rsp) + fsincos + .end + + .inline frndint,1 + fldt (%rsp) + frndint + .end + + .inline fscale,2 + fldt (%rsp) + fldt 16(%rsp) + fscale + fstp %st(1) + .end + + .inline fsin,1 + fldt (%rsp) + fsin + .end + + .inline fcos,1 + fldt (%rsp) + fcos + .end + + .inline sse_cmpeqss,3 + movss (%rdi),%xmm0 + cmpeqss (%rsi),%xmm0 + movss %xmm0,(%rdx) + .end + + .inline sse_cmpltss,3 + movss (%rdi),%xmm0 + cmpltss (%rsi),%xmm0 + movss %xmm0,(%rdx) + .end + + .inline sse_cmpless,3 + movss (%rdi),%xmm0 + cmpless (%rsi),%xmm0 + movss %xmm0,(%rdx) + .end + + .inline sse_cmpunordss,3 + movss (%rdi),%xmm0 + cmpunordss (%rsi),%xmm0 + movss %xmm0,(%rdx) + .end + + .inline sse_minss,3 + movss (%rdi),%xmm0 + minss (%rsi),%xmm0 + movss %xmm0,(%rdx) + .end + + .inline sse_maxss,3 + movss (%rdi),%xmm0 + maxss (%rsi),%xmm0 + movss %xmm0,(%rdx) + .end + + .inline sse_addss,3 + movss (%rdi),%xmm0 + addss (%rsi),%xmm0 + movss %xmm0,(%rdx) + .end + + .inline sse_subss,3 + movss (%rdi),%xmm0 + subss (%rsi),%xmm0 + movss %xmm0,(%rdx) + .end + + .inline sse_mulss,3 + movss (%rdi),%xmm0 + mulss (%rsi),%xmm0 + movss %xmm0,(%rdx) + .end + + .inline sse_divss,3 + movss (%rdi),%xmm0 + divss (%rsi),%xmm0 + movss %xmm0,(%rdx) + .end + + .inline sse_sqrtss,2 + sqrtss (%rdi),%xmm0 + movss %xmm0,(%rsi) + .end + + .inline sse_ucomiss,2 + movss (%rdi),%xmm0 + ucomiss (%rsi),%xmm0 + .end + + .inline sse_comiss,2 + movss (%rdi),%xmm0 + comiss (%rsi),%xmm0 + .end + + .inline sse_cvtss2sd,2 + cvtss2sd (%rdi),%xmm0 + movsd %xmm0,(%rsi) + .end + + .inline sse_cvtsi2ss,2 + cvtsi2ss (%rdi),%xmm0 + movss %xmm0,(%rsi) + .end + + .inline sse_cvttss2si,2 + cvttss2si (%rdi),%ecx + movl %ecx,(%rsi) + .end + + .inline sse_cvtss2si,2 + cvtss2si (%rdi),%ecx + movl %ecx,(%rsi) + .end + + .inline sse_cvtsi2ssq,2 + cvtsi2ssq (%rdi),%xmm0 + movss %xmm0,(%rsi) + .end + + .inline sse_cvttss2siq,2 + cvttss2siq (%rdi),%rcx + movq %rcx,(%rsi) + .end + + .inline sse_cvtss2siq,2 + cvtss2siq (%rdi),%rcx + movq %rcx,(%rsi) + .end + + .inline sse_cmpeqsd,3 + movsd (%rdi),%xmm0 + cmpeqsd (%rsi),%xmm0 + movsd %xmm0,(%rdx) + .end + + .inline sse_cmpltsd,3 + movsd (%rdi),%xmm0 + cmpltsd (%rsi),%xmm0 + movsd %xmm0,(%rdx) + .end + + .inline sse_cmplesd,3 + movsd (%rdi),%xmm0 + cmplesd (%rsi),%xmm0 + movsd %xmm0,(%rdx) + .end + + .inline sse_cmpunordsd,3 + movsd (%rdi),%xmm0 + cmpunordsd (%rsi),%xmm0 + movsd %xmm0,(%rdx) + .end + + .inline sse_minsd,3 + movsd (%rdi),%xmm0 + minsd (%rsi),%xmm0 + movsd %xmm0,(%rdx) + .end + + .inline sse_maxsd,3 + movsd (%rdi),%xmm0 + maxsd (%rsi),%xmm0 + movsd %xmm0,(%rdx) + .end + + .inline sse_addsd,3 + movsd (%rdi),%xmm0 + addsd (%rsi),%xmm0 + movsd %xmm0,(%rdx) + .end + + .inline sse_subsd,3 + movsd (%rdi),%xmm0 + subsd (%rsi),%xmm0 + movsd %xmm0,(%rdx) + .end + + .inline sse_mulsd,3 + movsd (%rdi),%xmm0 + mulsd (%rsi),%xmm0 + movsd %xmm0,(%rdx) + .end + + .inline sse_divsd,3 + movsd (%rdi),%xmm0 + divsd (%rsi),%xmm0 + movsd %xmm0,(%rdx) + .end + + .inline sse_sqrtsd,2 + sqrtsd (%rdi),%xmm0 + movsd %xmm0,(%rsi) + .end + + .inline sse_ucomisd,2 + movsd (%rdi),%xmm0 + ucomisd (%rsi),%xmm0 + .end + + .inline sse_comisd,2 + movsd (%rdi),%xmm0 + comisd (%rsi),%xmm0 + .end + + .inline sse_cvtsd2ss,2 + cvtsd2ss (%rdi),%xmm0 + movss %xmm0,(%rsi) + .end + + .inline sse_cvtsi2sd,2 + cvtsi2sd (%rdi),%xmm0 + movsd %xmm0,(%rsi) + .end + + .inline sse_cvttsd2si,2 + cvttsd2si (%rdi),%ecx + movl %ecx,(%rsi) + .end + + .inline sse_cvtsd2si,2 + cvtsd2si (%rdi),%ecx + movl %ecx,(%rsi) + .end + + .inline sse_cvtsi2sdq,2 + cvtsi2sdq (%rdi),%xmm0 + movsd %xmm0,(%rsi) + .end + + .inline sse_cvttsd2siq,2 + cvttsd2siq (%rdi),%rcx + movq %rcx,(%rsi) + .end + + .inline sse_cvtsd2siq,2 + cvtsd2siq (%rdi),%rcx + movq %rcx,(%rsi) + .end diff --git a/usr/src/lib/libm/common/m9x/__fenv_i386.il b/usr/src/lib/libm/common/m9x/__fenv_i386.il new file mode 100644 index 0000000000..aabc3e6f99 --- /dev/null +++ b/usr/src/lib/libm/common/m9x/__fenv_i386.il @@ -0,0 +1,411 @@ +/ +/ CDDL HEADER START +/ +/ The contents of this file are subject to the terms of the +/ Common Development and Distribution License (the "License"). +/ You may not use this file except in compliance with the License. +/ +/ You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +/ or http://www.opensolaris.org/os/licensing. +/ See the License for the specific language governing permissions +/ and limitations under the License. +/ +/ When distributing Covered Code, include this CDDL HEADER in each +/ file and include the License file at usr/src/OPENSOLARIS.LICENSE. +/ If applicable, add the following below this CDDL HEADER, with the +/ fields enclosed by brackets "[]" replaced with your own identifying +/ information: Portions Copyright [yyyy] [name of copyright owner] +/ +/ CDDL HEADER END +/ +/ Copyright 2011 Nexenta Systems, Inc. All rights reserved. +/ +/ Copyright 2006 Sun Microsystems, Inc. All rights reserved. +/ Use is subject to license terms. +/ + .inline __fenv_getcwsw,1 + movl (%esp),%eax + fstsw (%eax) + fstcw 2(%eax) + .end + + .inline __fenv_setcwsw,1 + movl (%esp),%eax + movw (%eax),%dx + movw 2(%eax),%cx + subl $28,%esp + fstenv (%esp) + movw %cx,(%esp) + movw %dx,4(%esp) + fldenv (%esp) + fwait + addl $28,%esp + .end + + .inline __fenv_getmxcsr,1 + movl (%esp),%eax + stmxcsr (%eax) + .end + + .inline __fenv_setmxcsr,1 + movl (%esp),%eax + ldmxcsr (%eax) + .end + + .inline f2xm1,1 + fldt (%esp) + f2xm1 + .end + + .inline fyl2x,2 + fldt (%esp) + fldt 12(%esp) + fyl2x + .end + + .inline fptan,1 + fldt (%esp) + fptan + fstpt (%esp) + .end + + .inline fpatan,2 + fldt (%esp) + fldt 12(%esp) + fpatan + .end + + .inline fxtract,1 + fldt (%esp) + fxtract + .end + + .inline fprem1,2 + fldt (%esp) + fldt 12(%esp) + fprem1 + fstp %st(1) + .end + + .inline fprem,2 + fldt (%esp) + fldt 12(%esp) + fprem + fstp %st(1) + .end + + .inline fyl2xp1,2 + fldt (%esp) + fldt 12(%esp) + fyl2xp1 + .end + + .inline fsqrt,1 + fldt (%esp) + fsqrt + .end + + .inline fsincos,1 + fldt (%esp) + fsincos + .end + + .inline frndint,1 + fldt (%esp) + frndint + .end + + .inline fscale,2 + fldt (%esp) + fldt 12(%esp) + fscale + fstp %st(1) + .end + + .inline fsin,1 + fldt (%esp) + fsin + .end + + .inline fcos,1 + fldt (%esp) + fcos + .end + + .inline sse_cmpeqss,3 + movl (%esp),%eax + movl 4(%esp),%edx + movl 8(%esp),%ecx + movss (%eax),%xmm0 + cmpeqss (%edx),%xmm0 + movss %xmm0,(%ecx) + .end + + .inline sse_cmpltss,3 + movl (%esp),%eax + movl 4(%esp),%edx + movl 8(%esp),%ecx + movss (%eax),%xmm0 + cmpltss (%edx),%xmm0 + movss %xmm0,(%ecx) + .end + + .inline sse_cmpless,3 + movl (%esp),%eax + movl 4(%esp),%edx + movl 8(%esp),%ecx + movss (%eax),%xmm0 + cmpless (%edx),%xmm0 + movss %xmm0,(%ecx) + .end + + .inline sse_cmpunordss,3 + movl (%esp),%eax + movl 4(%esp),%edx + movl 8(%esp),%ecx + movss (%eax),%xmm0 + cmpunordss (%edx),%xmm0 + movss %xmm0,(%ecx) + .end + + .inline sse_minss,3 + movl (%esp),%eax + movl 4(%esp),%edx + movl 8(%esp),%ecx + movss (%eax),%xmm0 + minss (%edx),%xmm0 + movss %xmm0,(%ecx) + .end + + .inline sse_maxss,3 + movl (%esp),%eax + movl 4(%esp),%edx + movl 8(%esp),%ecx + movss (%eax),%xmm0 + maxss (%edx),%xmm0 + movss %xmm0,(%ecx) + .end + + .inline sse_addss,3 + movl (%esp),%eax + movl 4(%esp),%edx + movl 8(%esp),%ecx + movss (%eax),%xmm0 + addss (%edx),%xmm0 + movss %xmm0,(%ecx) + .end + + .inline sse_subss,3 + movl (%esp),%eax + movl 4(%esp),%edx + movl 8(%esp),%ecx + movss (%eax),%xmm0 + subss (%edx),%xmm0 + movss %xmm0,(%ecx) + .end + + .inline sse_mulss,3 + movl (%esp),%eax + movl 4(%esp),%edx + movl 8(%esp),%ecx + movss (%eax),%xmm0 + mulss (%edx),%xmm0 + movss %xmm0,(%ecx) + .end + + .inline sse_divss,3 + movl (%esp),%eax + movl 4(%esp),%edx + movl 8(%esp),%ecx + movss (%eax),%xmm0 + divss (%edx),%xmm0 + movss %xmm0,(%ecx) + .end + + .inline sse_sqrtss,2 + movl (%esp),%eax + movl 4(%esp),%edx + sqrtss (%eax),%xmm0 + movss %xmm0,(%edx) + .end + + .inline sse_ucomiss,2 + movl (%esp),%eax + movl 4(%esp),%edx + movss (%eax),%xmm0 + ucomiss (%edx),%xmm0 + .end + + .inline sse_comiss,2 + movl (%esp),%eax + movl 4(%esp),%edx + movss (%eax),%xmm0 + comiss (%edx),%xmm0 + .end + + .inline sse_cvtss2sd,2 + movl (%esp),%eax + movl 4(%esp),%edx + cvtss2sd (%eax),%xmm0 + movsd %xmm0,(%edx) + .end + + .inline sse_cvtsi2ss,2 + movl (%esp),%eax + movl 4(%esp),%edx + cvtsi2ss (%eax),%xmm0 + movss %xmm0,(%edx) + .end + + .inline sse_cvttss2si,2 + movl (%esp),%eax + movl 4(%esp),%edx + cvttss2si (%eax),%ecx + movl %ecx,(%edx) + .end + + .inline sse_cvtss2si,2 + movl (%esp),%eax + movl 4(%esp),%edx + cvtss2si (%eax),%ecx + movl %ecx,(%edx) + .end + + .inline sse_cmpeqsd,3 + movl (%esp),%eax + movl 4(%esp),%edx + movl 8(%esp),%ecx + movsd (%eax),%xmm0 + cmpeqsd (%edx),%xmm0 + movsd %xmm0,(%ecx) + .end + + .inline sse_cmpltsd,3 + movl (%esp),%eax + movl 4(%esp),%edx + movl 8(%esp),%ecx + movsd (%eax),%xmm0 + cmpltsd (%edx),%xmm0 + movsd %xmm0,(%ecx) + .end + + .inline sse_cmplesd,3 + movl (%esp),%eax + movl 4(%esp),%edx + movl 8(%esp),%ecx + movsd (%eax),%xmm0 + cmplesd (%edx),%xmm0 + movsd %xmm0,(%ecx) + .end + + .inline sse_cmpunordsd,3 + movl (%esp),%eax + movl 4(%esp),%edx + movl 8(%esp),%ecx + movsd (%eax),%xmm0 + cmpunordsd (%edx),%xmm0 + movsd %xmm0,(%ecx) + .end + + .inline sse_minsd,3 + movl (%esp),%eax + movl 4(%esp),%edx + movl 8(%esp),%ecx + movsd (%eax),%xmm0 + minsd (%edx),%xmm0 + movsd %xmm0,(%ecx) + .end + + .inline sse_maxsd,3 + movl (%esp),%eax + movl 4(%esp),%edx + movl 8(%esp),%ecx + movsd (%eax),%xmm0 + maxsd (%edx),%xmm0 + movsd %xmm0,(%ecx) + .end + + .inline sse_addsd,3 + movl (%esp),%eax + movl 4(%esp),%edx + movl 8(%esp),%ecx + movsd (%eax),%xmm0 + addsd (%edx),%xmm0 + movsd %xmm0,(%ecx) + .end + + .inline sse_subsd,3 + movl (%esp),%eax + movl 4(%esp),%edx + movl 8(%esp),%ecx + movsd (%eax),%xmm0 + subsd (%edx),%xmm0 + movsd %xmm0,(%ecx) + .end + + .inline sse_mulsd,3 + movl (%esp),%eax + movl 4(%esp),%edx + movl 8(%esp),%ecx + movsd (%eax),%xmm0 + mulsd (%edx),%xmm0 + movsd %xmm0,(%ecx) + .end + + .inline sse_divsd,3 + movl (%esp),%eax + movl 4(%esp),%edx + movl 8(%esp),%ecx + movsd (%eax),%xmm0 + divsd (%edx),%xmm0 + movsd %xmm0,(%ecx) + .end + + .inline sse_sqrtsd,2 + movl (%esp),%eax + movl 4(%esp),%edx + sqrtsd (%eax),%xmm0 + movsd %xmm0,(%edx) + .end + + .inline sse_ucomisd,2 + movl (%esp),%eax + movl 4(%esp),%edx + movsd (%eax),%xmm0 + ucomisd (%edx),%xmm0 + .end + + .inline sse_comisd,2 + movl (%esp),%eax + movl 4(%esp),%edx + movsd (%eax),%xmm0 + comisd (%edx),%xmm0 + .end + + .inline sse_cvtsd2ss,2 + movl (%esp),%eax + movl 4(%esp),%edx + cvtsd2ss (%eax),%xmm0 + movss %xmm0,(%edx) + .end + + .inline sse_cvtsi2sd,2 + movl (%esp),%eax + movl 4(%esp),%edx + cvtsi2sd (%eax),%xmm0 + movsd %xmm0,(%edx) + .end + + .inline sse_cvttsd2si,2 + movl (%esp),%eax + movl 4(%esp),%edx + cvttsd2si (%eax),%ecx + movl %ecx,(%edx) + .end + + .inline sse_cvtsd2si,2 + movl (%esp),%eax + movl 4(%esp),%edx + cvtsd2si (%eax),%ecx + movl %ecx,(%edx) + .end diff --git a/usr/src/lib/libm/common/m9x/__fenv_sparc.il b/usr/src/lib/libm/common/m9x/__fenv_sparc.il new file mode 100644 index 0000000000..fd27dcf647 --- /dev/null +++ b/usr/src/lib/libm/common/m9x/__fenv_sparc.il @@ -0,0 +1,41 @@ +! +! CDDL HEADER START +! +! The contents of this file are subject to the terms of the +! Common Development and Distribution License (the "License"). +! You may not use this file except in compliance with the License. +! +! You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +! or http://www.opensolaris.org/os/licensing. +! See the License for the specific language governing permissions +! and limitations under the License. +! +! When distributing Covered Code, include this CDDL HEADER in each +! file and include the License file at usr/src/OPENSOLARIS.LICENSE. +! If applicable, add the following below this CDDL HEADER, with the +! fields enclosed by brackets "[]" replaced with your own identifying +! information: Portions Copyright [yyyy] [name of copyright owner] +! +! CDDL HEADER END +! +! Copyright 2011 Nexenta Systems, Inc. All rights reserved. +! +! Copyright 2006 Sun Microsystems, Inc. All rights reserved. +! Use is subject to license terms. +! +! + .inline __fenv_getfsr,1 + st %fsr,[%o0] + .end + + .inline __fenv_setfsr,1 + ld [%o0],%fsr + .end + + .inline __fenv_getfsr32,1 + st %fsr,[%o0] + .end + + .inline __fenv_setfsr32,1 + ld [%o0],%fsr + .end diff --git a/usr/src/lib/libm/common/m9x/__fenv_sparcv9.il b/usr/src/lib/libm/common/m9x/__fenv_sparcv9.il new file mode 100644 index 0000000000..b460587a0a --- /dev/null +++ b/usr/src/lib/libm/common/m9x/__fenv_sparcv9.il @@ -0,0 +1,41 @@ +! +! CDDL HEADER START +! +! The contents of this file are subject to the terms of the +! Common Development and Distribution License (the "License"). +! You may not use this file except in compliance with the License. +! +! You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +! or http://www.opensolaris.org/os/licensing. +! See the License for the specific language governing permissions +! and limitations under the License. +! +! When distributing Covered Code, include this CDDL HEADER in each +! file and include the License file at usr/src/OPENSOLARIS.LICENSE. +! If applicable, add the following below this CDDL HEADER, with the +! fields enclosed by brackets "[]" replaced with your own identifying +! information: Portions Copyright [yyyy] [name of copyright owner] +! +! CDDL HEADER END +! +! Copyright 2011 Nexenta Systems, Inc. All rights reserved. +! +! Copyright 2006 Sun Microsystems, Inc. All rights reserved. +! Use is subject to license terms. +! +! + .inline __fenv_getfsr,1 + stx %fsr,[%o0] + .end + + .inline __fenv_setfsr,1 + ldx [%o0],%fsr + .end + + .inline __fenv_getfsr32,1 + st %fsr,[%o0] + .end + + .inline __fenv_setfsr32,1 + ld [%o0],%fsr + .end diff --git a/usr/src/lib/libm/common/m9x/__fex_hdlr.c b/usr/src/lib/libm/common/m9x/__fex_hdlr.c new file mode 100644 index 0000000000..737c53ff20 --- /dev/null +++ b/usr/src/lib/libm/common/m9x/__fex_hdlr.c @@ -0,0 +1,849 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "fenv_synonyms.h" +#undef lint +#include <signal.h> +#include <siginfo.h> +#include <ucontext.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <thread.h> +#include <math.h> +#if defined(__SUNPRO_C) +#include <sunmath.h> +#endif +#include <fenv.h> +#include "fex_handler.h" +#include "fenv_inlines.h" + +#if defined(__sparc) && !defined(__sparcv9) +#include <sys/procfs.h> +#endif + +/* 2.x signal.h doesn't declare sigemptyset or sigismember + if they're #defined (see sys/signal.h) */ +extern int sigemptyset(sigset_t *); +extern int sigismember(const sigset_t *, int); + +/* external globals */ +void (*__mt_fex_sync)() = NULL; /* for synchronization with libmtsk */ +#pragma weak __mt_fex_sync + +#ifdef LIBM_MT_FEX_SYNC +void (*__libm_mt_fex_sync)() = NULL; /* new, improved version of above */ +#pragma weak __libm_mt_fex_sync +#endif + +/* private variables */ +static fex_handler_t main_handlers; +static int handlers_initialized = 0; +static thread_key_t handlers_key; +static mutex_t handlers_key_lock = DEFAULTMUTEX; + +static struct sigaction oact = { 0, SIG_DFL }; +static mutex_t hdlr_lock = DEFAULTMUTEX; +static int hdlr_installed = 0; + +/* private const data */ +static const int te_bit[FEX_NUM_EXC] = { + 1 << fp_trap_inexact, + 1 << fp_trap_division, + 1 << fp_trap_underflow, + 1 << fp_trap_overflow, + 1 << fp_trap_invalid, + 1 << fp_trap_invalid, + 1 << fp_trap_invalid, + 1 << fp_trap_invalid, + 1 << fp_trap_invalid, + 1 << fp_trap_invalid, + 1 << fp_trap_invalid, + 1 << fp_trap_invalid +}; + +/* +* Return the traps to be enabled given the current handling modes +* and flags +*/ +static int +__fex_te_needed(struct fex_handler_data *thr_handlers, unsigned long fsr) +{ + int i, ex, te; + + /* set traps for handling modes */ + te = 0; + for (i = 0; i < FEX_NUM_EXC; i++) + if (thr_handlers[i].__mode != FEX_NONSTOP) + te |= te_bit[i]; + + /* add traps for retrospective diagnostics */ + if (fex_get_log()) { + ex = (int)__fenv_get_ex(fsr); + if (!(ex & FE_INEXACT)) + te |= (1 << fp_trap_inexact); + if (!(ex & FE_UNDERFLOW)) + te |= (1 << fp_trap_underflow); + if (!(ex & FE_OVERFLOW)) + te |= (1 << fp_trap_overflow); + if (!(ex & FE_DIVBYZERO)) + te |= (1 << fp_trap_division); + if (!(ex & FE_INVALID)) + te |= (1 << fp_trap_invalid); + } + + return te; +} + +/* +* The following function synchronizes with libmtsk (SPARC only, for now) +*/ +static void +__fex_sync_with_libmtsk(int begin, int master) +{ + static fenv_t master_env; + static int env_initialized = 0; + static mutex_t env_lock = DEFAULTMUTEX; + + if (begin) { + mutex_lock(&env_lock); + if (master) { + (void) fegetenv(&master_env); + env_initialized = 1; + } + else if (env_initialized) + (void) fesetenv(&master_env); + mutex_unlock(&env_lock); + } + else if (master && fex_get_log()) + __fex_update_te(); +} + +#ifdef LIBM_MT_FEX_SYNC +/* +* The following function may be used for synchronization with any +* internal project that manages multiple threads +*/ +enum __libm_mt_fex_sync_actions { + __libm_mt_fex_start_master = 0, + __libm_mt_fex_start_slave, + __libm_mt_fex_finish_master, + __libm_mt_fex_finish_slave +}; + +struct __libm_mt_fex_sync_data { + fenv_t master_env; + int initialized; + mutex_t lock; +}; + +static void +__fex_sync_with_threads(enum __libm_mt_fex_sync_actions action, + struct __libm_mt_fex_sync_data *thr_env) +{ + switch (action) { + case __libm_mt_fex_start_master: + mutex_lock(&thr_env->lock); + (void) fegetenv(&thr_env->master_env); + thr_env->initialized = 1; + mutex_unlock(&thr_env->lock); + break; + + case __libm_mt_fex_start_slave: + mutex_lock(&thr_env->lock); + if (thr_env->initialized) + (void) fesetenv(&thr_env->master_env); + mutex_unlock(&thr_env->lock); + break; + + case __libm_mt_fex_finish_master: +#if defined(__x86) + __fex_update_te(); +#else + if (fex_get_log()) + __fex_update_te(); +#endif + break; + + case __libm_mt_fex_finish_slave: +#if defined(__x86) + /* clear traps, making all accrued flags visible in status word */ + { + unsigned long fsr; + __fenv_getfsr(&fsr); + __fenv_set_te(fsr, 0); + __fenv_setfsr(&fsr); + } +#endif + break; + } +} +#endif + +#if defined(__sparc) + +/* +* Code for setting or clearing interval mode on US-III and above. +* This is embedded as data so we don't have to mark the library +* as a v8plusb/v9b object. (I could have just used one entry and +* modified the second word to set the bits I want, but that would +* have required another mutex.) +*/ +static const unsigned int siam[][2] = { + { 0x81c3e008, 0x81b01020 }, /* retl, siam 0 */ + { 0x81c3e008, 0x81b01024 }, /* retl, siam 4 */ + { 0x81c3e008, 0x81b01025 }, /* retl, siam 5 */ + { 0x81c3e008, 0x81b01026 }, /* retl, siam 6 */ + { 0x81c3e008, 0x81b01027 } /* retl, siam 7 */ +}; + +/* +* If a handling mode is in effect, apply it; otherwise invoke the +* saved handler +*/ +static void +__fex_hdlr(int sig, siginfo_t *sip, ucontext_t *uap) +{ + struct fex_handler_data *thr_handlers; + struct sigaction act; + void (*handler)(), (*siamp)(); + int mode, i; + enum fex_exception e; + fex_info_t info; + unsigned long fsr, tmpfsr, addr; + unsigned int gsr; + + /* determine which exception occurred */ + switch (sip->si_code) { + case FPE_FLTDIV: + e = fex_division; + break; + case FPE_FLTOVF: + e = fex_overflow; + break; + case FPE_FLTUND: + e = fex_underflow; + break; + case FPE_FLTRES: + e = fex_inexact; + break; + case FPE_FLTINV: + if ((int)(e = __fex_get_invalid_type(sip, uap)) < 0) + goto not_ieee; + break; + default: + /* not an IEEE exception */ + goto not_ieee; + } + + /* get the handling mode */ + mode = FEX_NOHANDLER; + handler = oact.sa_handler; /* for log; just looking, no need to lock */ + thr_handlers = __fex_get_thr_handlers(); + if (thr_handlers && thr_handlers[(int)e].__mode != FEX_NOHANDLER) { + mode = thr_handlers[(int)e].__mode; + handler = thr_handlers[(int)e].__handler; + } + + /* make an entry in the log of retro. diag. if need be */ + i = ((int)uap->uc_mcontext.fpregs.fpu_fsr >> 5) & 0x1f; + __fex_mklog(uap, (char *)sip->si_addr, i, e, mode, (void *)handler); + + /* handle the exception based on the mode */ + if (mode == FEX_NOHANDLER) + goto not_ieee; + else if (mode == FEX_ABORT) + abort(); + else if (mode == FEX_SIGNAL) { + handler(sig, sip, uap); + return; + } + + /* custom or nonstop mode; disable traps and clear flags */ + __fenv_getfsr(&fsr); + __fenv_set_te(fsr, 0); + __fenv_set_ex(fsr, 0); + + /* if interval mode was set, clear it, then substitute the + interval rounding direction and clear ns mode in the fsr */ +#ifdef __sparcv9 + gsr = uap->uc_mcontext.asrs[3]; +#else + gsr = 0; + if (uap->uc_mcontext.xrs.xrs_id == XRS_ID) + gsr = (*(unsigned long long*)((prxregset_t*)uap->uc_mcontext. + xrs.xrs_ptr)->pr_un.pr_v8p.pr_filler); +#endif + gsr = (gsr >> 25) & 7; + if (gsr & 4) { + siamp = (void (*)()) siam[0]; + siamp(); + tmpfsr = fsr; + fsr = (fsr & ~0xc0400000ul) | ((gsr & 3) << 30); + } + __fenv_setfsr(&fsr); + + /* decode the operation */ + __fex_get_op(sip, uap, &info); + + /* if a custom mode handler is installed, invoke it */ + if (mode == FEX_CUSTOM) { + /* if we got here from feraiseexcept, pass dummy info */ + addr = (unsigned long)sip->si_addr; + if (addr >= (unsigned long)feraiseexcept && + addr < (unsigned long)fetestexcept) { + info.op = fex_other; + info.op1.type = info.op2.type = info.res.type = + fex_nodata; + } + + /* restore interval mode if it was set, and put the original + rounding direction and ns mode back in the fsr */ + if (gsr & 4) { + __fenv_setfsr(&tmpfsr); + siamp = (void (*)()) siam[1 + (gsr & 3)]; + siamp(); + } + + handler(1 << (int)e, &info); + + /* restore modes in case the user's handler changed them */ + if (gsr & 4) { + siamp = (void (*)()) siam[0]; + siamp(); + } + __fenv_setfsr(&fsr); + } + + /* stuff the result */ + __fex_st_result(sip, uap, &info); + + /* "or" in any exception flags and update traps */ + fsr = uap->uc_mcontext.fpregs.fpu_fsr; + fsr |= ((info.flags & 0x1f) << 5); + i = __fex_te_needed(thr_handlers, fsr); + __fenv_set_te(fsr, i); + uap->uc_mcontext.fpregs.fpu_fsr = fsr; + return; + +not_ieee: + /* revert to the saved handler (if any) */ + mutex_lock(&hdlr_lock); + act = oact; + mutex_unlock(&hdlr_lock); + switch ((unsigned long)act.sa_handler) { + case (unsigned long)SIG_DFL: + /* simulate trap with no handler installed */ + sigaction(SIGFPE, &act, NULL); + kill(getpid(), SIGFPE); + break; +#if !defined(__lint) + case (unsigned long)SIG_IGN: + break; +#endif + default: + act.sa_handler(sig, sip, uap); + } +} + +#elif defined(__x86) + +#if defined(__amd64) +#define test_sse_hw 1 +#else +extern int _sse_hw; +#define test_sse_hw _sse_hw +#endif + +#if !defined(REG_PC) +#define REG_PC EIP +#endif + +/* +* If a handling mode is in effect, apply it; otherwise invoke the +* saved handler +*/ +static void +__fex_hdlr(int sig, siginfo_t *sip, ucontext_t *uap) +{ + struct fex_handler_data *thr_handlers; + struct sigaction act; + void (*handler)() = NULL, (*simd_handler[4])(); + int mode, simd_mode[4], i, len, accrued, *ap; + unsigned int cwsw, oldcwsw, mxcsr, oldmxcsr; + enum fex_exception e, simd_e[4]; + fex_info_t info, simd_info[4]; + unsigned long addr; + siginfo_t osip = *sip; + sseinst_t inst; + + /* check for an exception caused by an SSE instruction */ + if (!(uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.status & 0x80)) { + len = __fex_parse_sse(uap, &inst); + if (len == 0) + goto not_ieee; + + /* disable all traps and clear flags */ + __fenv_getcwsw(&oldcwsw); + cwsw = (oldcwsw & ~0x3f) | 0x003f0000; + __fenv_setcwsw(&cwsw); + __fenv_getmxcsr(&oldmxcsr); + mxcsr = (oldmxcsr & ~0x3f) | 0x1f80; + __fenv_setmxcsr(&mxcsr); + + if ((int)inst.op & SIMD) { + __fex_get_simd_op(uap, &inst, simd_e, simd_info); + + thr_handlers = __fex_get_thr_handlers(); + addr = (unsigned long)uap->uc_mcontext.gregs[REG_PC]; + accrued = uap->uc_mcontext.fpregs.fp_reg_set. + fpchip_state.mxcsr; + + e = (enum fex_exception)-1; + mode = FEX_NONSTOP; + for (i = 0; i < 4; i++) { + if ((int)simd_e[i] < 0) + continue; + + e = simd_e[i]; + simd_mode[i] = FEX_NOHANDLER; + simd_handler[i] = oact.sa_handler; + if (thr_handlers && + thr_handlers[(int)e].__mode != + FEX_NOHANDLER) { + simd_mode[i] = + thr_handlers[(int)e].__mode; + simd_handler[i] = + thr_handlers[(int)e].__handler; + } + accrued &= ~te_bit[(int)e]; + switch (simd_mode[i]) { + case FEX_ABORT: + mode = FEX_ABORT; + break; + case FEX_SIGNAL: + if (mode != FEX_ABORT) + mode = FEX_SIGNAL; + handler = simd_handler[i]; + break; + case FEX_NOHANDLER: + if (mode != FEX_ABORT && mode != + FEX_SIGNAL) + mode = FEX_NOHANDLER; + break; + } + } + if (e == (enum fex_exception)-1) { + __fenv_setcwsw(&oldcwsw); + __fenv_setmxcsr(&oldmxcsr); + goto not_ieee; + } + accrued |= uap->uc_mcontext.fpregs.fp_reg_set. + fpchip_state.status; + ap = __fex_accrued(); + accrued |= *ap; + accrued &= 0x3d; + + for (i = 0; i < 4; i++) { + if ((int)simd_e[i] < 0) + continue; + + __fex_mklog(uap, (char *)addr, accrued, + simd_e[i], simd_mode[i], + (void *)simd_handler[i]); + } + + if (mode == FEX_NOHANDLER) { + __fenv_setcwsw(&oldcwsw); + __fenv_setmxcsr(&oldmxcsr); + goto not_ieee; + } else if (mode == FEX_ABORT) { + abort(); + } else if (mode == FEX_SIGNAL) { + __fenv_setcwsw(&oldcwsw); + __fenv_setmxcsr(&oldmxcsr); + handler(sig, &osip, uap); + return; + } + + *ap = 0; + for (i = 0; i < 4; i++) { + if ((int)simd_e[i] < 0) + continue; + + if (simd_mode[i] == FEX_CUSTOM) { + handler(1 << (int)simd_e[i], + &simd_info[i]); + __fenv_setcwsw(&cwsw); + __fenv_setmxcsr(&mxcsr); + } + } + + __fex_st_simd_result(uap, &inst, simd_e, simd_info); + for (i = 0; i < 4; i++) { + if ((int)simd_e[i] < 0) + continue; + + accrued |= simd_info[i].flags; + } + + if ((int)inst.op & INTREG) { + /* set MMX mode */ +#if defined(__amd64) + uap->uc_mcontext.fpregs.fp_reg_set. + fpchip_state.sw &= ~0x3800; + uap->uc_mcontext.fpregs.fp_reg_set. + fpchip_state.fctw = 0; +#else + uap->uc_mcontext.fpregs.fp_reg_set. + fpchip_state.state[1] &= ~0x3800; + uap->uc_mcontext.fpregs.fp_reg_set. + fpchip_state.state[2] = 0; +#endif + } + } else { + e = __fex_get_sse_op(uap, &inst, &info); + if ((int)e < 0) { + __fenv_setcwsw(&oldcwsw); + __fenv_setmxcsr(&oldmxcsr); + goto not_ieee; + } + + mode = FEX_NOHANDLER; + handler = oact.sa_handler; + thr_handlers = __fex_get_thr_handlers(); + if (thr_handlers && thr_handlers[(int)e].__mode != + FEX_NOHANDLER) { + mode = thr_handlers[(int)e].__mode; + handler = thr_handlers[(int)e].__handler; + } + + addr = (unsigned long)uap->uc_mcontext.gregs[REG_PC]; + accrued = uap->uc_mcontext.fpregs.fp_reg_set. + fpchip_state.mxcsr & ~te_bit[(int)e]; + accrued |= uap->uc_mcontext.fpregs.fp_reg_set. + fpchip_state.status; + ap = __fex_accrued(); + accrued |= *ap; + accrued &= 0x3d; + __fex_mklog(uap, (char *)addr, accrued, e, mode, + (void *)handler); + + if (mode == FEX_NOHANDLER) { + __fenv_setcwsw(&oldcwsw); + __fenv_setmxcsr(&oldmxcsr); + goto not_ieee; + } else if (mode == FEX_ABORT) { + abort(); + } else if (mode == FEX_SIGNAL) { + __fenv_setcwsw(&oldcwsw); + __fenv_setmxcsr(&oldmxcsr); + handler(sig, &osip, uap); + return; + } else if (mode == FEX_CUSTOM) { + *ap = 0; + if (addr >= (unsigned long)feraiseexcept && + addr < (unsigned long)fetestexcept) { + info.op = fex_other; + info.op1.type = info.op2.type = + info.res.type = fex_nodata; + } + handler(1 << (int)e, &info); + __fenv_setcwsw(&cwsw); + __fenv_setmxcsr(&mxcsr); + } + + __fex_st_sse_result(uap, &inst, e, &info); + accrued |= info.flags; + +#if defined(__amd64) + /* + * In 64-bit mode, the 32-bit convert-to-integer + * instructions zero the upper 32 bits of the + * destination. (We do this here and not in + * __fex_st_sse_result because __fex_st_sse_result + * can be called from __fex_st_simd_result, too.) + */ + if (inst.op == cvtss2si || inst.op == cvttss2si || + inst.op == cvtsd2si || inst.op == cvttsd2si) + inst.op1->i[1] = 0; +#endif + } + + /* advance the pc past the SSE instruction */ + uap->uc_mcontext.gregs[REG_PC] += len; + goto update_state; + } + + /* determine which exception occurred */ + __fex_get_x86_exc(sip, uap); + switch (sip->si_code) { + case FPE_FLTDIV: + e = fex_division; + break; + case FPE_FLTOVF: + e = fex_overflow; + break; + case FPE_FLTUND: + e = fex_underflow; + break; + case FPE_FLTRES: + e = fex_inexact; + break; + case FPE_FLTINV: + if ((int)(e = __fex_get_invalid_type(sip, uap)) < 0) + goto not_ieee; + break; + default: + /* not an IEEE exception */ + goto not_ieee; + } + + /* get the handling mode */ + mode = FEX_NOHANDLER; + handler = oact.sa_handler; /* for log; just looking, no need to lock */ + thr_handlers = __fex_get_thr_handlers(); + if (thr_handlers && thr_handlers[(int)e].__mode != FEX_NOHANDLER) { + mode = thr_handlers[(int)e].__mode; + handler = thr_handlers[(int)e].__handler; + } + + /* make an entry in the log of retro. diag. if need be */ +#if defined(__amd64) + addr = (unsigned long)uap->uc_mcontext.fpregs.fp_reg_set. + fpchip_state.rip; +#else + addr = (unsigned long)uap->uc_mcontext.fpregs.fp_reg_set. + fpchip_state.state[3]; +#endif + accrued = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.status & + ~te_bit[(int)e]; + if (test_sse_hw) + accrued |= uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state. + mxcsr; + ap = __fex_accrued(); + accrued |= *ap; + accrued &= 0x3d; + __fex_mklog(uap, (char *)addr, accrued, e, mode, (void *)handler); + + /* handle the exception based on the mode */ + if (mode == FEX_NOHANDLER) + goto not_ieee; + else if (mode == FEX_ABORT) + abort(); + else if (mode == FEX_SIGNAL) { + handler(sig, &osip, uap); + return; + } + + /* disable all traps and clear flags */ + __fenv_getcwsw(&cwsw); + cwsw = (cwsw & ~0x3f) | 0x003f0000; + __fenv_setcwsw(&cwsw); + if (test_sse_hw) { + __fenv_getmxcsr(&mxcsr); + mxcsr = (mxcsr & ~0x3f) | 0x1f80; + __fenv_setmxcsr(&mxcsr); + } + *ap = 0; + + /* decode the operation */ + __fex_get_op(sip, uap, &info); + + /* if a custom mode handler is installed, invoke it */ + if (mode == FEX_CUSTOM) { + /* if we got here from feraiseexcept, pass dummy info */ + if (addr >= (unsigned long)feraiseexcept && + addr < (unsigned long)fetestexcept) { + info.op = fex_other; + info.op1.type = info.op2.type = info.res.type = + fex_nodata; + } + + handler(1 << (int)e, &info); + + /* restore modes in case the user's handler changed them */ + __fenv_setcwsw(&cwsw); + if (test_sse_hw) + __fenv_setmxcsr(&mxcsr); + } + + /* stuff the result */ + __fex_st_result(sip, uap, &info); + accrued |= info.flags; + +update_state: + accrued &= 0x3d; + i = __fex_te_needed(thr_handlers, accrued); + *ap = accrued & i; +#if defined(__amd64) + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.sw &= ~0x3d; + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.sw |= (accrued & ~i); + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.cw |= 0x3d; + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.cw &= ~i; +#else + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[1] &= ~0x3d; + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[1] |= + (accrued & ~i); + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[0] |= 0x3d; + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[0] &= ~i; +#endif + if (test_sse_hw) { + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.mxcsr &= ~0x3d; + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.mxcsr |= + 0x1e80 | (accrued & ~i); + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.mxcsr &= + ~(i << 7); + } + return; + +not_ieee: + /* revert to the saved handler (if any) */ + mutex_lock(&hdlr_lock); + act = oact; + mutex_unlock(&hdlr_lock); + switch ((unsigned long)act.sa_handler) { + case (unsigned long)SIG_DFL: + /* simulate trap with no handler installed */ + sigaction(SIGFPE, &act, NULL); + kill(getpid(), SIGFPE); + break; +#if !defined(__lint) + case (unsigned long)SIG_IGN: + break; +#endif + default: + act.sa_handler(sig, &osip, uap); + } +} + +#else +#error Unknown architecture +#endif + +/* +* Return a pointer to the thread-specific handler data, and +* initialize it if necessary +*/ +struct fex_handler_data * +__fex_get_thr_handlers() +{ + struct fex_handler_data *ptr; + unsigned long fsr; + int i, te; + + if (thr_main()) { + if (!handlers_initialized) { + /* initialize to FEX_NOHANDLER if trap is enabled, + FEX_NONSTOP if trap is disabled */ + __fenv_getfsr(&fsr); + te = (int)__fenv_get_te(fsr); + for (i = 0; i < FEX_NUM_EXC; i++) + main_handlers[i].__mode = + ((te & te_bit[i])? FEX_NOHANDLER : FEX_NONSTOP); + handlers_initialized = 1; + } + return main_handlers; + } + else { + ptr = NULL; + mutex_lock(&handlers_key_lock); + if (thr_getspecific(handlers_key, (void **)&ptr) != 0 && + thr_keycreate(&handlers_key, free) != 0) { + mutex_unlock(&handlers_key_lock); + return NULL; + } + mutex_unlock(&handlers_key_lock); + if (!ptr) { + if ((ptr = (struct fex_handler_data *) + malloc(sizeof(fex_handler_t))) == NULL) { + return NULL; + } + if (thr_setspecific(handlers_key, (void *)ptr) != 0) { + (void)free(ptr); + return NULL; + } + /* initialize to FEX_NOHANDLER if trap is enabled, + FEX_NONSTOP if trap is disabled */ + __fenv_getfsr(&fsr); + te = (int)__fenv_get_te(fsr); + for (i = 0; i < FEX_NUM_EXC; i++) + ptr[i].__mode = ((te & te_bit[i])? FEX_NOHANDLER : FEX_NONSTOP); + } + return ptr; + } +} + +/* +* Update the trap enable bits according to the selected modes +*/ +void +__fex_update_te() +{ + struct fex_handler_data *thr_handlers; + struct sigaction act, tmpact; + sigset_t blocked; + unsigned long fsr; + int te; + + /* determine which traps are needed */ + thr_handlers = __fex_get_thr_handlers(); + __fenv_getfsr(&fsr); + te = __fex_te_needed(thr_handlers, fsr); + + /* install __fex_hdlr as necessary */ + if (!hdlr_installed && te) { + act.sa_handler = __fex_hdlr; + sigemptyset(&act.sa_mask); + act.sa_flags = SA_SIGINFO; + sigaction(SIGFPE, &act, &tmpact); + if (tmpact.sa_handler != __fex_hdlr) + { + mutex_lock(&hdlr_lock); + oact = tmpact; + mutex_unlock(&hdlr_lock); + } + hdlr_installed = 1; + } + + /* set the new trap enable bits (only if SIGFPE is not blocked) */ + if (sigprocmask(0, NULL, &blocked) == 0 && + !sigismember(&blocked, SIGFPE)) { + __fenv_set_te(fsr, te); + __fenv_setfsr(&fsr); + } + + /* synchronize with libmtsk */ + __mt_fex_sync = __fex_sync_with_libmtsk; + +#ifdef LIBM_MT_FEX_SYNC + /* synchronize with other projects */ + __libm_mt_fex_sync = __fex_sync_with_threads; +#endif +} diff --git a/usr/src/lib/libm/common/m9x/__fex_i386.c b/usr/src/lib/libm/common/m9x/__fex_i386.c new file mode 100644 index 0000000000..190c5a042a --- /dev/null +++ b/usr/src/lib/libm/common/m9x/__fex_i386.c @@ -0,0 +1,1693 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "fenv_synonyms.h" +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include <string.h> +#include <signal.h> +#include <siginfo.h> +#include <ucontext.h> +#include <thread.h> +#include <math.h> +#if defined(__SUNPRO_C) +#include <sunmath.h> +#endif +#include <fenv.h> +#include "fex_handler.h" +#include "fenv_inlines.h" + +#if defined(__amd64) +#define test_sse_hw 1 +#else +/* + * The following variable lives in libc on Solaris 10, where it + * gets set to a nonzero value at startup time on systems with SSE. + */ +int _sse_hw = 0; +#pragma weak _sse_hw +#define test_sse_hw &_sse_hw && _sse_hw +#endif + +static int accrued = 0; +static thread_key_t accrued_key; +static mutex_t accrued_key_lock = DEFAULTMUTEX; + +int * +__fex_accrued() +{ + int *p; + + if (thr_main()) + return &accrued; + else { + p = NULL; + mutex_lock(&accrued_key_lock); + if (thr_getspecific(accrued_key, (void **)&p) != 0 && + thr_keycreate(&accrued_key, free) != 0) { + mutex_unlock(&accrued_key_lock); + return NULL; + } + mutex_unlock(&accrued_key_lock); + if (!p) { + if ((p = (int*) malloc(sizeof(int))) == NULL) + return NULL; + if (thr_setspecific(accrued_key, (void *)p) != 0) { + (void)free(p); + return NULL; + } + *p = 0; + } + return p; + } +} + +void +__fenv_getfsr(unsigned long *fsr) +{ + unsigned int cwsw, mxcsr; + + __fenv_getcwsw(&cwsw); + /* clear reserved bits for no particularly good reason */ + cwsw &= ~0xe0c00000u; + if (test_sse_hw) { + /* pick up exception flags (excluding denormal operand + flag) from mxcsr */ + __fenv_getmxcsr(&mxcsr); + cwsw |= (mxcsr & 0x3d); + } + cwsw |= *__fex_accrued(); + *fsr = cwsw ^ 0x003f0000u; +} + +void +__fenv_setfsr(const unsigned long *fsr) +{ + unsigned int cwsw, mxcsr; + int te; + + /* save accrued exception flags corresponding to enabled exceptions */ + cwsw = (unsigned int)*fsr; + te = __fenv_get_te(cwsw); + *__fex_accrued() = cwsw & te; + cwsw = (cwsw & ~te) ^ 0x003f0000; + if (test_sse_hw) { + /* propagate rounding direction, masks, and exception flags + (excluding denormal operand mask and flag) to mxcsr */ + __fenv_getmxcsr(&mxcsr); + mxcsr = (mxcsr & ~0x7ebd) | ((cwsw >> 13) & 0x6000) | + ((cwsw >> 9) & 0x1e80) | (cwsw & 0x3d); + __fenv_setmxcsr(&mxcsr); + } + __fenv_setcwsw(&cwsw); +} + +/* Offsets into the fp environment save area (assumes 32-bit protected mode) */ +#define CW 0 /* control word */ +#define SW 1 /* status word */ +#define TW 2 /* tag word */ +#define IP 3 /* instruction pointer */ +#define OP 4 /* opcode */ +#define EA 5 /* operand address */ + +/* macro for accessing fp registers in the save area */ +#if defined(__amd64) +#define fpreg(u,x) *(long double *)(10*(x)+(char*)&(u)->uc_mcontext.fpregs.fp_reg_set.fpchip_state.st) +#else +#define fpreg(u,x) *(long double *)(10*(x)+(char*)&(u)->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[7]) +#endif + +/* +* Fix sip->si_code; the Solaris x86 kernel can get it wrong +*/ +void +__fex_get_x86_exc(siginfo_t *sip, ucontext_t *uap) +{ + unsigned sw, cw; + + sw = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.status; +#if defined(__amd64) + cw = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.cw; +#else + cw = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[CW]; +#endif + if ((sw & FE_INVALID) && !(cw & (1 << fp_trap_invalid))) + /* store 0 for stack fault, FPE_FLTINV for IEEE invalid op */ + sip->si_code = ((sw & 0x40)? 0 : FPE_FLTINV); + else if ((sw & FE_DIVBYZERO) && !(cw & (1 << fp_trap_division))) + sip->si_code = FPE_FLTDIV; + else if ((sw & FE_OVERFLOW) && !(cw & (1 << fp_trap_overflow))) + sip->si_code = FPE_FLTOVF; + else if ((sw & FE_UNDERFLOW) && !(cw & (1 << fp_trap_underflow))) + sip->si_code = FPE_FLTUND; + else if ((sw & FE_INEXACT) && !(cw & (1 << fp_trap_inexact))) + sip->si_code = FPE_FLTRES; + else + sip->si_code = 0; +} + +static enum fp_class_type +my_fp_classf(float *x) +{ + int i = *(int*)x & ~0x80000000; + + if (i < 0x7f800000) { + if (i < 0x00800000) + return ((i == 0)? fp_zero : fp_subnormal); + return fp_normal; + } + else if (i == 0x7f800000) + return fp_infinity; + else if (i & 0x400000) + return fp_quiet; + else + return fp_signaling; +} + +static enum fp_class_type +my_fp_class(double *x) +{ + int i = *(1+(int*)x) & ~0x80000000; + + if (i < 0x7ff00000) { + if (i < 0x00100000) + return (((i | *(int*)x) == 0)? fp_zero : fp_subnormal); + return fp_normal; + } + else if (i == 0x7ff00000 && *(int*)x == 0) + return fp_infinity; + else if (i & 0x80000) + return fp_quiet; + else + return fp_signaling; +} + +static enum fp_class_type +my_fp_classl(long double *x) +{ + int i = *(2+(int*)x) & 0x7fff; + + if (i < 0x7fff) { + if (i < 1) { + if (*(1+(int*)x) < 0) return fp_normal; /* pseudo-denormal */ + return (((*(1+(int*)x) | *(int*)x) == 0)? + fp_zero : fp_subnormal); + } + return ((*(1+(int*)x) < 0)? fp_normal : + (enum fp_class_type) -1); /* unsupported format */ + } + else if (*(1+(int*)x) == 0x80000000 && *(int*)x == 0) + return fp_infinity; + else if (*(1+(unsigned*)x) >= 0xc0000000) + return fp_quiet; + else if (*(1+(int*)x) < 0) + return fp_signaling; + else + return (enum fp_class_type) -1; /* unsupported format */ +} + +/* +* Determine which type of invalid operation exception occurred +*/ +enum fex_exception +__fex_get_invalid_type(siginfo_t *sip, ucontext_t *uap) +{ + unsigned op; + unsigned long ea; + enum fp_class_type t1, t2; + + /* get the opcode and data address */ +#if defined(__amd64) + op = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.fop >> 16; + ea = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.rdp; +#else + op = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[OP] >> 16; + ea = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[EA]; +#endif + + /* if the instruction is fld, the source must be snan (it can't be + an unsupported format, since fldt doesn't raise any exceptions) */ + switch (op & 0x7f8) { + case 0x100: + case 0x140: + case 0x180: + case 0x500: + case 0x540: + case 0x580: + return fex_inv_snan; + } + + /* otherwise st is one of the operands; see if it's snan */ + t1 = my_fp_classl(&fpreg(uap, 0)); + if (t1 == fp_signaling) + return fex_inv_snan; + else if (t1 == (enum fp_class_type) -1) + return (enum fex_exception) -1; + + /* determine the class of the second operand if there is one */ + t2 = fp_normal; + switch (op & 0x7e0) { + case 0x600: + case 0x620: + case 0x640: + case 0x660: + case 0x680: + case 0x6a0: + /* short memory operand */ + if (!ea) + return (enum fex_exception) -1; + if (*(short *)ea == 0) + t2 = fp_zero; + break; + + case 0x200: + case 0x220: + case 0x240: + case 0x260: + case 0x280: + case 0x2a0: + /* int memory operand */ + if (!ea) + return (enum fex_exception) -1; + if (*(int *)ea == 0) + t2 = fp_zero; + break; + + case 0x000: + case 0x020: + case 0x040: + case 0x060: + case 0x080: + case 0x0a0: + /* single precision memory operand */ + if (!ea) + return (enum fex_exception) -1; + t2 = my_fp_classf((float *)ea); + break; + + case 0x400: + case 0x420: + case 0x440: + case 0x460: + case 0x480: + case 0x4a0: + /* double precision memory operand */ + if (!ea) + return (enum fex_exception) -1; + t2 = my_fp_class((double *)ea); + break; + + case 0x0c0: + case 0x0e0: + case 0x3e0: + case 0x4c0: + case 0x4e0: + case 0x5e0: + case 0x6c0: + case 0x6e0: + case 0x7e0: + /* register operand determined by opcode */ + switch (op & 0x7f8) { + case 0x3e0: + case 0x3f8: + case 0x5f0: + case 0x5f8: + case 0x7e0: + case 0x7f8: + /* weed out nonexistent opcodes */ + break; + + default: + t2 = my_fp_classl(&fpreg(uap, op & 7)); + } + break; + + case 0x1e0: + case 0x2e0: + /* special forms */ + switch (op) { + case 0x1f1: /* fyl2x */ + case 0x1f3: /* fpatan */ + case 0x1f5: /* fprem1 */ + case 0x1f8: /* fprem */ + case 0x1f9: /* fyl2xp1 */ + case 0x1fd: /* fscale */ + case 0x2e9: /* fucompp */ + t2 = my_fp_classl(&fpreg(uap, 1)); + break; + } + break; + } + + /* see if the second op is snan */ + if (t2 == fp_signaling) + return fex_inv_snan; + else if (t2 == (enum fp_class_type) -1) + return (enum fex_exception) -1; + + /* determine the type of operation */ + switch (op & 0x7f8) { + case 0x000: + case 0x020: + case 0x028: + case 0x040: + case 0x060: + case 0x068: + case 0x080: + case 0x0a0: + case 0x0a8: + case 0x0c0: + case 0x0e0: + case 0x0e8: + case 0x400: + case 0x420: + case 0x428: + case 0x440: + case 0x460: + case 0x468: + case 0x480: + case 0x4a0: + case 0x4a8: + case 0x4c0: + case 0x4e0: + case 0x4e8: + case 0x6c0: + case 0x6e0: + case 0x6e8: + /* fadd, fsub, fsubr */ + if (t1 == fp_infinity && t2 == fp_infinity) + return fex_inv_isi; + break; + + case 0x008: + case 0x048: + case 0x088: + case 0x0c8: + case 0x208: + case 0x248: + case 0x288: + case 0x408: + case 0x448: + case 0x488: + case 0x4c8: + case 0x608: + case 0x648: + case 0x688: + case 0x6c8: + /* fmul */ + if ((t1 == fp_zero && t2 == fp_infinity) || (t2 == fp_zero && + t1 == fp_infinity)) + return fex_inv_zmi; + break; + + case 0x030: + case 0x038: + case 0x070: + case 0x078: + case 0x0b0: + case 0x0b8: + case 0x0f0: + case 0x0f8: + case 0x230: + case 0x238: + case 0x270: + case 0x278: + case 0x2b0: + case 0x2b8: + case 0x430: + case 0x438: + case 0x470: + case 0x478: + case 0x4b0: + case 0x4b8: + case 0x4f0: + case 0x4f8: + case 0x630: + case 0x638: + case 0x670: + case 0x678: + case 0x6b0: + case 0x6b8: + case 0x6f0: + case 0x6f8: + /* fdiv */ + if (t1 == fp_zero && t2 == fp_zero) + return fex_inv_zdz; + else if (t1 == fp_infinity && t2 == fp_infinity) + return fex_inv_idi; + break; + + case 0x1f0: + case 0x1f8: + /* fsqrt, other special ops */ + return fex_inv_sqrt; + + case 0x010: + case 0x018: + case 0x050: + case 0x058: + case 0x090: + case 0x098: + case 0x0d0: + case 0x0d8: + case 0x210: + case 0x218: + case 0x250: + case 0x258: + case 0x290: + case 0x298: + case 0x2e8: + case 0x3f0: + case 0x410: + case 0x418: + case 0x450: + case 0x458: + case 0x490: + case 0x498: + case 0x4d0: + case 0x4d8: + case 0x5e0: + case 0x5e8: + case 0x610: + case 0x618: + case 0x650: + case 0x658: + case 0x690: + case 0x698: + case 0x6d0: + case 0x6d8: + case 0x7f0: + /* fcom */ + if (t1 == fp_quiet || t2 == fp_quiet) + return fex_inv_cmp; + break; + + case 0x1e0: + /* ftst */ + if (op == 0x1e4 && t1 == fp_quiet) + return fex_inv_cmp; + break; + + case 0x310: + case 0x318: + case 0x350: + case 0x358: + case 0x390: + case 0x398: + case 0x710: + case 0x718: + case 0x730: + case 0x738: + case 0x750: + case 0x758: + case 0x770: + case 0x778: + case 0x790: + case 0x798: + case 0x7b0: + case 0x7b8: + /* fist, fbst */ + return fex_inv_int; + } + + return (enum fex_exception) -1; +} + +/* scale factors for exponent unwrapping */ +static const long double + two12288 = 1.139165225263043370845938579315932009e+3699l, /* 2^12288 */ + twom12288 = 8.778357852076208839765066529179033145e-3700l, /* 2^-12288 */ + twom12288mulp = 8.778357852076208839289190796475222545e-3700l; + /* (")*(1-2^-64) */ + +/* inline templates */ +extern long double f2xm1(long double); +extern long double fyl2x(long double, long double); +extern long double fptan(long double); +extern long double fpatan(long double, long double); +extern long double fxtract(long double); +extern long double fprem1(long double, long double); +extern long double fprem(long double, long double); +extern long double fyl2xp1(long double, long double); +extern long double fsqrt(long double); +extern long double fsincos(long double); +extern long double frndint(long double); +extern long double fscale(long double, long double); +extern long double fsin(long double); +extern long double fcos(long double); + +/* +* Get the operands, generate the default untrapped result with +* exceptions, and set a code indicating the type of operation +*/ +void +__fex_get_op(siginfo_t *sip, ucontext_t *uap, fex_info_t *info) +{ + fex_numeric_t t; + long double op2v, x; + unsigned int cwsw, ex, sw, op; + unsigned long ea; + volatile int c; + + /* get the exception type, status word, opcode, and data address */ + ex = sip->si_code; + sw = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.status; +#if defined(__amd64) + op = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.fop >> 16; + ea = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.rdp; +#else + op = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[OP] >> 16; + ea = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[EA]; +#endif + + /* initialize res to the default untrapped result and ex to the + corresponding flags (assume trapping is disabled and flags + are clear) */ + + /* single operand instructions */ + info->op = fex_cnvt; + info->op2.type = fex_nodata; + switch (op & 0x7f8) { + /* load instructions */ + case 0x100: + case 0x140: + case 0x180: + if (!ea) { + info->op = fex_other; + info->op1.type = info->op2.type = info->res.type = fex_nodata; + info->flags = 0; + return; + } + info->op1.type = fex_float; + info->op1.val.f = *(float *)ea; + info->res.type = fex_ldouble; + info->res.val.q = (long double) info->op1.val.f; + goto done; + + case 0x500: + case 0x540: + case 0x580: + if (!ea) { + info->op = fex_other; + info->op1.type = info->op2.type = info->res.type = fex_nodata; + info->flags = 0; + return; + } + info->op1.type = fex_double; + info->op1.val.d = *(double *)ea; + info->res.type = fex_ldouble; + info->res.val.q = (long double) info->op1.val.d; + goto done; + + /* store instructions */ + case 0x110: + case 0x118: + case 0x150: + case 0x158: + case 0x190: + case 0x198: + info->res.type = fex_float; + if (ex == FPE_FLTRES && (op & 8) != 0) { + /* inexact, stack popped */ + if (!ea) { + info->op = fex_other; + info->op1.type = info->op2.type = info->res.type = fex_nodata; + info->flags = 0; + return; + } + info->op1.type = fex_nodata; + info->res.val.f = *(float *)ea; + info->flags = FE_INEXACT; + return; + } + info->op1.type = fex_ldouble; + info->op1.val.q = fpreg(uap, 0); + info->res.val.f = (float) info->op1.val.q; + goto done; + + case 0x310: + case 0x318: + case 0x350: + case 0x358: + case 0x390: + case 0x398: + info->res.type = fex_int; + if (ex == FPE_FLTRES && (op & 8) != 0) { + /* inexact, stack popped */ + if (!ea) { + info->op = fex_other; + info->op1.type = info->op2.type = info->res.type = fex_nodata; + info->flags = 0; + return; + } + info->op1.type = fex_nodata; + info->res.val.i = *(int *)ea; + info->flags = FE_INEXACT; + return; + } + info->op1.type = fex_ldouble; + info->op1.val.q = fpreg(uap, 0); + info->res.val.i = (int) info->op1.val.q; + goto done; + + case 0x510: + case 0x518: + case 0x550: + case 0x558: + case 0x590: + case 0x598: + info->res.type = fex_double; + if (ex == FPE_FLTRES && (op & 8) != 0) { + /* inexact, stack popped */ + if (!ea) { + info->op = fex_other; + info->op1.type = info->op2.type = info->res.type = fex_nodata; + info->flags = 0; + return; + } + info->op1.type = fex_nodata; + info->res.val.d = *(double *)ea; + info->flags = FE_INEXACT; + return; + } + info->op1.type = fex_ldouble; + info->op1.val.q = fpreg(uap, 0); + info->res.val.d = (double) info->op1.val.q; + goto done; + + case 0x710: + case 0x718: + case 0x750: + case 0x758: + case 0x790: + case 0x798: + info->res.type = fex_int; + if (ex == FPE_FLTRES && (op & 8) != 0) { + /* inexact, stack popped */ + if (!ea) { + info->op = fex_other; + info->op1.type = info->op2.type = info->res.type = fex_nodata; + info->flags = 0; + return; + } + info->op1.type = fex_nodata; + info->res.val.i = *(short *)ea; + info->flags = FE_INEXACT; + return; + } + info->op1.type = fex_ldouble; + info->op1.val.q = fpreg(uap, 0); + info->res.val.i = (short) info->op1.val.q; + goto done; + + case 0x730: + case 0x770: + case 0x7b0: + /* fbstp; don't bother */ + info->op = fex_other; + info->op1.type = info->res.type = fex_nodata; + info->flags = 0; + return; + + case 0x738: + case 0x778: + case 0x7b8: + info->res.type = fex_llong; + if (ex == FPE_FLTRES) { + /* inexact, stack popped */ + if (!ea) { + info->op = fex_other; + info->op1.type = info->op2.type = info->res.type = fex_nodata; + info->flags = 0; + return; + } + info->op1.type = fex_nodata; + info->res.val.l = *(long long *)ea; + info->flags = FE_INEXACT; + return; + } + info->op1.type = fex_ldouble; + info->op1.val.q = fpreg(uap, 0); + info->res.val.l = (long long) info->op1.val.q; + goto done; + } + + /* all other ops (except compares) have destinations on the stack + so overflow, underflow, and inexact will stomp their operands */ + if (ex == FPE_FLTOVF || ex == FPE_FLTUND || ex == FPE_FLTRES) { + /* find the trapped result */ + info->op1.type = info->op2.type = fex_nodata; + info->res.type = fex_ldouble; + switch (op & 0x7f8) { + case 0x1f0: + /* fptan pushes 1.0 afterward, so result is in st(1) */ + info->res.val.q = ((op == 0x1f2)? fpreg(uap, 1) : + fpreg(uap, 0)); + break; + + case 0x4c0: + case 0x4c8: + case 0x4e0: + case 0x4e8: + case 0x4f0: + case 0x4f8: + info->res.val.q = fpreg(uap, op & 7); + break; + + case 0x6c0: + case 0x6c8: + case 0x6e0: + case 0x6e8: + case 0x6f0: + case 0x6f8: + /* stack was popped afterward */ + info->res.val.q = fpreg(uap, (op - 1) & 7); + break; + + default: + info->res.val.q = fpreg(uap, 0); + } + + /* reconstruct default untrapped result */ + if (ex == FPE_FLTOVF) { + /* generate an overflow with the sign of the result */ + x = two12288; + *(4+(short*)&x) |= (*(4+(short*)&info->res.val.q) & 0x8000); + info->res.val.q = x * two12288; + info->flags = FE_OVERFLOW | FE_INEXACT; + __fenv_getcwsw(&cwsw); + cwsw &= ~FE_ALL_EXCEPT; + __fenv_setcwsw(&cwsw); + } + else if (ex == FPE_FLTUND) { + /* undo the scaling; we can't distinguish a chopped result + from an exact one without futzing around to trap all in- + exact exceptions so as to keep the flag clear, so we just + punt */ + if (sw & 0x200) /* result was rounded up */ + info->res.val.q = (info->res.val.q * twom12288) * twom12288mulp; + else + info->res.val.q = (info->res.val.q * twom12288) * twom12288; + __fenv_getcwsw(&cwsw); + info->flags = (cwsw & FE_INEXACT) | FE_UNDERFLOW; + cwsw &= ~FE_ALL_EXCEPT; + __fenv_setcwsw(&cwsw); + } + else + info->flags = FE_INEXACT; + + /* determine the operation code */ + switch (op) { + case 0x1f0: /* f2xm1 */ + case 0x1f1: /* fyl2x */ + case 0x1f2: /* fptan */ + case 0x1f3: /* fpatan */ + case 0x1f5: /* fprem1 */ + case 0x1f8: /* fprem */ + case 0x1f9: /* fyl2xp1 */ + case 0x1fb: /* fsincos */ + case 0x1fc: /* frndint */ + case 0x1fd: /* fscale */ + case 0x1fe: /* fsin */ + case 0x1ff: /* fcos */ + info->op = fex_other; + return; + + case 0x1fa: /* fsqrt */ + info->op = fex_sqrt; + return; + } + + info->op = fex_other; + switch (op & 0x7c0) { + case 0x000: + case 0x040: + case 0x080: + case 0x0c0: + case 0x200: + case 0x240: + case 0x280: + case 0x400: + case 0x440: + case 0x480: + case 0x4c0: + case 0x600: + case 0x640: + case 0x680: + case 0x6c0: + switch (op & 0x38) { + case 0x00: + info->op = fex_add; + break; + + case 0x08: + info->op = fex_mul; + break; + + case 0x20: + case 0x28: + info->op = fex_sub; + break; + + case 0x30: + case 0x38: + info->op = fex_div; + break; + } + } + return; + } + + /* for other exceptions, the operands are preserved, so we can + just emulate the operation with traps disabled */ + + /* one operand is always in st */ + info->op1.type = fex_ldouble; + info->op1.val.q = fpreg(uap, 0); + + /* oddball instructions */ + info->op = fex_other; + switch (op) { + case 0x1e4: /* ftst */ + info->op = fex_cmp; + info->op2.type = fex_ldouble; + info->op2.val.q = 0.0l; + info->res.type = fex_nodata; + c = (info->op1.val.q < info->op2.val.q); + goto done; + + case 0x1f0: /* f2xm1 */ + info->res.type = fex_ldouble; + info->res.val.q = f2xm1(info->op1.val.q); + goto done; + + case 0x1f1: /* fyl2x */ + info->op2.type = fex_ldouble; + info->op2.val.q = fpreg(uap, 1); + info->res.type = fex_ldouble; + info->res.val.q = fyl2x(info->op1.val.q, info->op2.val.q); + goto done; + + case 0x1f2: /* fptan */ + info->res.type = fex_ldouble; + info->res.val.q = fptan(info->op1.val.q); + goto done; + + case 0x1f3: /* fpatan */ + info->op2.type = fex_ldouble; + info->op2.val.q = fpreg(uap, 1); + info->res.type = fex_ldouble; + info->res.val.q = fpatan(info->op1.val.q, info->op2.val.q); + goto done; + + case 0x1f4: /* fxtract */ + info->res.type = fex_ldouble; + info->res.val.q = fxtract(info->op1.val.q); + goto done; + + case 0x1f5: /* fprem1 */ + info->op2.type = fex_ldouble; + info->op2.val.q = fpreg(uap, 1); + info->res.type = fex_ldouble; + info->res.val.q = fprem1(info->op1.val.q, info->op2.val.q); + goto done; + + case 0x1f8: /* fprem */ + info->op2.type = fex_ldouble; + info->op2.val.q = fpreg(uap, 1); + info->res.type = fex_ldouble; + info->res.val.q = fprem(info->op1.val.q, info->op2.val.q); + goto done; + + case 0x1f9: /* fyl2xp1 */ + info->op2.type = fex_ldouble; + info->op2.val.q = fpreg(uap, 1); + info->res.type = fex_ldouble; + info->res.val.q = fyl2xp1(info->op1.val.q, info->op2.val.q); + goto done; + + case 0x1fa: /* fsqrt */ + info->op = fex_sqrt; + info->res.type = fex_ldouble; + info->res.val.q = fsqrt(info->op1.val.q); + goto done; + + case 0x1fb: /* fsincos */ + info->res.type = fex_ldouble; + info->res.val.q = fsincos(info->op1.val.q); + goto done; + + case 0x1fc: /* frndint */ + info->res.type = fex_ldouble; + info->res.val.q = frndint(info->op1.val.q); + goto done; + + case 0x1fd: /* fscale */ + info->op2.type = fex_ldouble; + info->op2.val.q = fpreg(uap, 1); + info->res.type = fex_ldouble; + info->res.val.q = fscale(info->op1.val.q, info->op2.val.q); + goto done; + + case 0x1fe: /* fsin */ + info->res.type = fex_ldouble; + info->res.val.q = fsin(info->op1.val.q); + goto done; + + case 0x1ff: /* fcos */ + info->res.type = fex_ldouble; + info->res.val.q = fcos(info->op1.val.q); + goto done; + + case 0x2e9: /* fucompp */ + info->op = fex_cmp; + info->op2.type = fex_ldouble; + info->op2.val.q = fpreg(uap, 1); + info->res.type = fex_nodata; + c = (info->op1.val.q == info->op2.val.q); + goto done; + } + + /* fucom[p], fcomi[p], fucomi[p] */ + switch (op & 0x7f8) { + case 0x3e8: + case 0x5e0: + case 0x5e8: + case 0x7e8: /* unordered compares */ + info->op = fex_cmp; + info->op2.type = fex_ldouble; + info->op2.val.q = fpreg(uap, op & 7); + info->res.type = fex_nodata; + c = (info->op1.val.q == info->op2.val.q); + goto done; + + case 0x3f0: + case 0x7f0: /* ordered compares */ + info->op = fex_cmp; + info->op2.type = fex_ldouble; + info->op2.val.q = fpreg(uap, op & 7); + info->res.type = fex_nodata; + c = (info->op1.val.q < info->op2.val.q); + goto done; + } + + /* all other instructions come in groups of the form + fadd, fmul, fcom, fcomp, fsub, fsubr, fdiv, fdivr */ + + /* get the second operand */ + switch (op & 0x7c0) { + case 0x000: + case 0x040: + case 0x080: + if (!ea) { + info->op = fex_other; + info->op1.type = info->op2.type = info->res.type = fex_nodata; + info->flags = 0; + return; + } + info->op2.type = fex_float; + info->op2.val.f = *(float *)ea; + op2v = (long double) info->op2.val.f; + break; + + case 0x0c0: + info->op2.type = fex_ldouble; + op2v = info->op2.val.q = fpreg(uap, op & 7); + break; + + case 0x200: + case 0x240: + case 0x280: + if (!ea) { + info->op = fex_other; + info->op1.type = info->op2.type = info->res.type = fex_nodata; + info->flags = 0; + return; + } + info->op2.type = fex_int; + info->op2.val.i = *(int *)ea; + op2v = (long double) info->op2.val.i; + break; + + case 0x400: + case 0x440: + case 0x480: + if (!ea) { + info->op = fex_other; + info->op1.type = info->op2.type = info->res.type = fex_nodata; + info->flags = 0; + return; + } + info->op2.type = fex_double; + info->op2.val.d = *(double *)ea; + op2v = (long double) info->op2.val.d; + break; + + case 0x4c0: + case 0x6c0: + info->op2.type = fex_ldouble; + info->op2.val.q = fpreg(uap, op & 7); + t = info->op1; + info->op1 = info->op2; + info->op2 = t; + op2v = info->op2.val.q; + break; + + case 0x600: + case 0x640: + case 0x680: + if (!ea) { + info->op = fex_other; + info->op1.type = info->op2.type = info->res.type = fex_nodata; + info->flags = 0; + return; + } + info->op2.type = fex_int; + info->op2.val.i = *(short *)ea; + op2v = (long double) info->op2.val.i; + break; + + default: + info->op = fex_other; + info->op1.type = info->op2.type = info->res.type = fex_nodata; + info->flags = 0; + return; + } + + /* distinguish different operations in the group */ + info->res.type = fex_ldouble; + switch (op & 0x38) { + case 0x00: + info->op = fex_add; + info->res.val.q = info->op1.val.q + op2v; + break; + + case 0x08: + info->op = fex_mul; + info->res.val.q = info->op1.val.q * op2v; + break; + + case 0x10: + case 0x18: + info->op = fex_cmp; + info->res.type = fex_nodata; + c = (info->op1.val.q < op2v); + break; + + case 0x20: + info->op = fex_sub; + info->res.val.q = info->op1.val.q - op2v; + break; + + case 0x28: + info->op = fex_sub; + info->res.val.q = op2v - info->op1.val.q; + t = info->op1; + info->op1 = info->op2; + info->op2 = t; + break; + + case 0x30: + info->op = fex_div; + info->res.val.q = info->op1.val.q / op2v; + break; + + case 0x38: + info->op = fex_div; + info->res.val.q = op2v / info->op1.val.q; + t = info->op1; + info->op1 = info->op2; + info->op2 = t; + break; + + default: + info->op = fex_other; + info->op1.type = info->op2.type = info->res.type = fex_nodata; + info->flags = 0; + return; + } + +done: + __fenv_getcwsw(&cwsw); + info->flags = cwsw & FE_ALL_EXCEPT; + cwsw &= ~FE_ALL_EXCEPT; + __fenv_setcwsw(&cwsw); +} + +/* pop the saved stack */ +static void pop(ucontext_t *uap) +{ + unsigned top; + + fpreg(uap, 0) = fpreg(uap, 1); + fpreg(uap, 1) = fpreg(uap, 2); + fpreg(uap, 2) = fpreg(uap, 3); + fpreg(uap, 3) = fpreg(uap, 4); + fpreg(uap, 4) = fpreg(uap, 5); + fpreg(uap, 5) = fpreg(uap, 6); + fpreg(uap, 6) = fpreg(uap, 7); +#if defined(__amd64) + top = (uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.sw >> 10) + & 0xe; + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.fctw |= (3 << top); + top = (top + 2) & 0xe; + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.sw = + (uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.sw & ~0x3800) + | (top << 10); +#else + top = (uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[SW] >> 10) + & 0xe; + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[TW] |= (3 << top); + top = (top + 2) & 0xe; + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[SW] = + (uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[SW] & ~0x3800) + | (top << 10); +#endif +} + +/* push x onto the saved stack */ +static void push(long double x, ucontext_t *uap) +{ + unsigned top; + + fpreg(uap, 7) = fpreg(uap, 6); + fpreg(uap, 6) = fpreg(uap, 5); + fpreg(uap, 5) = fpreg(uap, 4); + fpreg(uap, 4) = fpreg(uap, 3); + fpreg(uap, 3) = fpreg(uap, 2); + fpreg(uap, 2) = fpreg(uap, 1); + fpreg(uap, 1) = fpreg(uap, 0); + fpreg(uap, 0) = x; +#if defined(__amd64) + top = (uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.sw >> 10) + & 0xe; + top = (top - 2) & 0xe; + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.fctw &= ~(3 << top); + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.sw = + (uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.sw & ~0x3800) + | (top << 10); +#else + top = (uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[SW] >> 10) + & 0xe; + top = (top - 2) & 0xe; + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[TW] &= ~(3 << top); + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[SW] = + (uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[SW] & ~0x3800) + | (top << 10); +#endif +} + +/* scale factors for exponent wrapping */ +static const float + fun = 7.922816251e+28f, /* 2^96 */ + fov = 1.262177448e-29f; /* 2^-96 */ +static const double + dun = 1.552518092300708935e+231, /* 2^768 */ + dov = 6.441148769597133308e-232; /* 2^-768 */ + +/* +* Store the specified result; if no result is given but the exception +* is underflow or overflow, use the default trapped result +*/ +void +__fex_st_result(siginfo_t *sip, ucontext_t *uap, fex_info_t *info) +{ + fex_numeric_t r; + unsigned long ex, op, ea, stack; + + /* get the exception type, opcode, and data address */ + ex = sip->si_code; +#if defined(__amd64) + op = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.fop >> 16; + ea = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.rdp; /*???*/ +#else + op = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[OP] >> 16; + ea = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[EA]; +#endif + + /* if the instruction is a compare, set the condition codes + to unordered and update the stack */ + switch (op & 0x7f8) { + case 0x010: + case 0x050: + case 0x090: + case 0x0d0: + case 0x210: + case 0x250: + case 0x290: + case 0x410: + case 0x450: + case 0x490: + case 0x4d0: + case 0x5e0: + case 0x610: + case 0x650: + case 0x690: + /* f[u]com */ +#if defined(__amd64) + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.sw |= 0x4500; +#else + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[SW] |= 0x4500; +#endif + return; + + case 0x018: + case 0x058: + case 0x098: + case 0x0d8: + case 0x218: + case 0x258: + case 0x298: + case 0x418: + case 0x458: + case 0x498: + case 0x4d8: + case 0x5e8: + case 0x618: + case 0x658: + case 0x698: + case 0x6d0: + /* f[u]comp */ +#if defined(__amd64) + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.sw |= 0x4500; +#else + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[SW] |= 0x4500; +#endif + pop(uap); + return; + + case 0x2e8: + case 0x6d8: + /* f[u]compp */ +#if defined(__amd64) + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.sw |= 0x4500; +#else + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[SW] |= 0x4500; +#endif + pop(uap); + pop(uap); + return; + + case 0x1e0: + if (op == 0x1e4) { /* ftst */ +#if defined(__amd64) + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.sw |= 0x4500; +#else + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[SW] |= 0x4500; +#endif + return; + } + break; + + case 0x3e8: + case 0x3f0: + /* f[u]comi */ +#if defined(__amd64) + uap->uc_mcontext.gregs[REG_PS] |= 0x45; +#else + uap->uc_mcontext.gregs[EFL] |= 0x45; +#endif + return; + + case 0x7e8: + case 0x7f0: + /* f[u]comip */ +#if defined(__amd64) + uap->uc_mcontext.gregs[REG_PS] |= 0x45; +#else + uap->uc_mcontext.gregs[EFL] |= 0x45; +#endif + pop(uap); + return; + } + + /* if there is no result available and the exception is overflow + or underflow, use the wrapped result */ + r = info->res; + if (r.type == fex_nodata) { + if (ex == FPE_FLTOVF || ex == FPE_FLTUND) { + /* for store instructions, do the scaling and store */ + switch (op & 0x7f8) { + case 0x110: + case 0x118: + case 0x150: + case 0x158: + case 0x190: + case 0x198: + if (!ea) + return; + if (ex == FPE_FLTOVF) + *(float *)ea = (fpreg(uap, 0) * fov) * fov; + else + *(float *)ea = (fpreg(uap, 0) * fun) * fun; + if ((op & 8) != 0) + pop(uap); + break; + + case 0x510: + case 0x518: + case 0x550: + case 0x558: + case 0x590: + case 0x598: + if (!ea) + return; + if (ex == FPE_FLTOVF) + *(double *)ea = (fpreg(uap, 0) * dov) * dov; + else + *(double *)ea = (fpreg(uap, 0) * dun) * dun; + if ((op & 8) != 0) + pop(uap); + break; + } + } +#ifdef DEBUG + else if (ex != FPE_FLTRES) + printf("No result supplied, stack may be hosed\n"); +#endif + return; + } + + /* otherwise convert the supplied result to the correct type, + put it in the destination, and update the stack as need be */ + + /* store instructions */ + switch (op & 0x7f8) { + case 0x110: + case 0x118: + case 0x150: + case 0x158: + case 0x190: + case 0x198: + if (!ea) + return; + switch (r.type) { + case fex_int: + *(float *)ea = (float) r.val.i; + break; + + case fex_llong: + *(float *)ea = (float) r.val.l; + break; + + case fex_float: + *(float *)ea = r.val.f; + break; + + case fex_double: + *(float *)ea = (float) r.val.d; + break; + + case fex_ldouble: + *(float *)ea = (float) r.val.q; + break; + + default: + break; + } + if (ex != FPE_FLTRES && (op & 8) != 0) + pop(uap); + return; + + case 0x310: + case 0x318: + case 0x350: + case 0x358: + case 0x390: + case 0x398: + if (!ea) + return; + switch (r.type) { + case fex_int: + *(int *)ea = r.val.i; + break; + + case fex_llong: + *(int *)ea = (int) r.val.l; + break; + + case fex_float: + *(int *)ea = (int) r.val.f; + break; + + case fex_double: + *(int *)ea = (int) r.val.d; + break; + + case fex_ldouble: + *(int *)ea = (int) r.val.q; + break; + + default: + break; + } + if (ex != FPE_FLTRES && (op & 8) != 0) + pop(uap); + return; + + case 0x510: + case 0x518: + case 0x550: + case 0x558: + case 0x590: + case 0x598: + if (!ea) + return; + switch (r.type) { + case fex_int: + *(double *)ea = (double) r.val.i; + break; + + case fex_llong: + *(double *)ea = (double) r.val.l; + break; + + case fex_float: + *(double *)ea = (double) r.val.f; + break; + + case fex_double: + *(double *)ea = r.val.d; + break; + + case fex_ldouble: + *(double *)ea = (double) r.val.q; + break; + + default: + break; + } + if (ex != FPE_FLTRES && (op & 8) != 0) + pop(uap); + return; + + case 0x710: + case 0x718: + case 0x750: + case 0x758: + case 0x790: + case 0x798: + if (!ea) + return; + switch (r.type) { + case fex_int: + *(short *)ea = (short) r.val.i; + break; + + case fex_llong: + *(short *)ea = (short) r.val.l; + break; + + case fex_float: + *(short *)ea = (short) r.val.f; + break; + + case fex_double: + *(short *)ea = (short) r.val.d; + break; + + case fex_ldouble: + *(short *)ea = (short) r.val.q; + break; + + default: + break; + } + if (ex != FPE_FLTRES && (op & 8) != 0) + pop(uap); + return; + + case 0x730: + case 0x770: + case 0x7b0: + /* fbstp; don't bother */ + if (ea && ex != FPE_FLTRES) + pop(uap); + return; + + case 0x738: + case 0x778: + case 0x7b8: + if (!ea) + return; + switch (r.type) { + case fex_int: + *(long long *)ea = (long long) r.val.i; + break; + + case fex_llong: + *(long long *)ea = r.val.l; + break; + + case fex_float: + *(long long *)ea = (long long) r.val.f; + break; + + case fex_double: + *(long long *)ea = (long long) r.val.d; + break; + + case fex_ldouble: + *(long long *)ea = (long long) r.val.q; + break; + + default: + break; + } + if (ex != FPE_FLTRES) + pop(uap); + return; + } + + /* for all other instructions, the result goes into a register */ + switch (r.type) { + case fex_int: + r.val.q = (long double) r.val.i; + break; + + case fex_llong: + r.val.q = (long double) r.val.l; + break; + + case fex_float: + r.val.q = (long double) r.val.f; + break; + + case fex_double: + r.val.q = (long double) r.val.d; + break; + + default: + break; + } + + /* for load instructions, push the result onto the stack */ + switch (op & 0x7f8) { + case 0x100: + case 0x140: + case 0x180: + case 0x500: + case 0x540: + case 0x580: + if (ea) + push(r.val.q, uap); + return; + } + + /* for all other instructions, if the exception is overflow, + underflow, or inexact, the stack has already been updated */ + stack = (ex == FPE_FLTOVF || ex == FPE_FLTUND || ex == FPE_FLTRES); + switch (op & 0x7f8) { + case 0x1f0: /* oddballs */ + switch (op) { + case 0x1f1: /* fyl2x */ + case 0x1f3: /* fpatan */ + case 0x1f9: /* fyl2xp1 */ + /* pop the stack, leaving the result in st */ + if (!stack) + pop(uap); + fpreg(uap, 0) = r.val.q; + return; + + case 0x1f2: /* fpatan */ + /* fptan pushes 1.0 afterward */ + if (stack) + fpreg(uap, 1) = r.val.q; + else { + fpreg(uap, 0) = r.val.q; + push(1.0L, uap); + } + return; + + case 0x1f4: /* fxtract */ + case 0x1fb: /* fsincos */ + /* leave the supplied result in st */ + if (stack) + fpreg(uap, 0) = r.val.q; + else { + fpreg(uap, 0) = 0.0; /* punt */ + push(r.val.q, uap); + } + return; + } + + /* all others leave the stack alone and the result in st */ + fpreg(uap, 0) = r.val.q; + return; + + case 0x4c0: + case 0x4c8: + case 0x4e0: + case 0x4e8: + case 0x4f0: + case 0x4f8: + fpreg(uap, op & 7) = r.val.q; + return; + + case 0x6c0: + case 0x6c8: + case 0x6e0: + case 0x6e8: + case 0x6f0: + case 0x6f8: + /* stack is popped afterward */ + if (stack) + fpreg(uap, (op - 1) & 7) = r.val.q; + else { + fpreg(uap, op & 7) = r.val.q; + pop(uap); + } + return; + + default: + fpreg(uap, 0) = r.val.q; + return; + } +} diff --git a/usr/src/lib/libm/common/m9x/__fex_sparc.c b/usr/src/lib/libm/common/m9x/__fex_sparc.c new file mode 100644 index 0000000000..12ab60bb73 --- /dev/null +++ b/usr/src/lib/libm/common/m9x/__fex_sparc.c @@ -0,0 +1,900 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(__sparc) +#include "fenv_synonyms.h" +#include <stdio.h> +#include <unistd.h> +#include <string.h> +#include <signal.h> +#include <siginfo.h> +#include <thread.h> +#include <ucontext.h> +#include <math.h> +#if defined(__SUNPRO_C) +#include <sunmath.h> +#endif +#include <fenv.h> + +#include "fenv_inlines.h" +#include "libm_inlines.h" + +#ifdef __sparcv9 + +#define FPreg(X) &uap->uc_mcontext.fpregs.fpu_fr.fpu_regs[X] + +#define FPREG(X) &uap->uc_mcontext.fpregs.fpu_fr.fpu_dregs[(X>>1)| \ + ((X&1)<<4)] + +#else + +#include <sys/procfs.h> + +#define FPxreg(X) &((prxregset_t*)uap->uc_mcontext.xrs.xrs_ptr)->pr_un.pr_v8p.pr_xfr.pr_regs[X] + +#define FPreg(X) &uap->uc_mcontext.fpregs.fpu_fr.fpu_regs[X] + +#define FPREG(X) ((X & 1)? FPxreg(X - 1) : FPreg(X)) + +#endif /* __sparcv9 */ + +#include "fex_handler.h" + +/* avoid dependence on libsunmath */ +static enum fp_class_type +my_fp_classl(long double *a) +{ + int msw = *(int*)a & ~0x80000000; + + if (msw >= 0x7fff0000) { + if (((msw & 0xffff) | *(1+(int*)a) | *(2+(int*)a) | *(3+(int*)a)) == 0) + return fp_infinity; + else if (msw & 0x8000) + return fp_quiet; + else + return fp_signaling; + } else if (msw < 0x10000) { + if ((msw | *(1+(int*)a) | *(2+(int*)a) | *(3+(int*)a)) == 0) + return fp_zero; + else + return fp_subnormal; + } else + return fp_normal; +} + +/* +* Determine which type of invalid operation exception occurred +*/ +enum fex_exception +__fex_get_invalid_type(siginfo_t *sip, ucontext_t *uap) +{ + unsigned instr, opf, rs1, rs2; + enum fp_class_type t1, t2; + + /* parse the instruction which caused the exception */ + instr = uap->uc_mcontext.fpregs.fpu_q->FQu.fpq.fpq_instr; + opf = (instr >> 5) & 0x1ff; + rs1 = (instr >> 14) & 0x1f; + rs2 = instr & 0x1f; + + /* determine the classes of the operands */ + switch (opf & 3) { + case 1: /* single */ + t1 = fp_classf(*(float*)FPreg(rs1)); + t2 = fp_classf(*(float*)FPreg(rs2)); + break; + + case 2: /* double */ + t1 = fp_class(*(double*)FPREG(rs1)); + t2 = fp_class(*(double*)FPREG(rs2)); + break; + + case 3: /* quad */ + t1 = my_fp_classl((long double*)FPREG(rs1)); + t2 = my_fp_classl((long double*)FPREG(rs2)); + break; + + default: /* integer operands never cause an invalid operation */ + return (enum fex_exception) -1; + } + + /* if rs2 is snan, return immediately */ + if (t2 == fp_signaling) + return fex_inv_snan; + + /* determine the type of operation */ + switch ((instr >> 19) & 0x183f) { + case 0x1034: /* add, subtract, multiply, divide, square root, convert */ + switch (opf & 0x1fc) { + case 0x40: + case 0x44: /* add or subtract */ + if (t1 == fp_signaling) + return fex_inv_snan; + else + return fex_inv_isi; + + case 0x48: + case 0x68: + case 0x6c: /* multiply */ + if (t1 == fp_signaling) + return fex_inv_snan; + else + return fex_inv_zmi; + + case 0x4c: /* divide */ + if (t1 == fp_signaling) + return fex_inv_snan; + else if (t1 == fp_zero) + return fex_inv_zdz; + else + return fex_inv_idi; + + case 0x28: /* square root */ + return fex_inv_sqrt; + + case 0x80: + case 0xd0: /* convert to integer */ + return fex_inv_int; + } + break; + + case 0x1035: /* compare */ + if (t1 == fp_signaling) + return fex_inv_snan; + else + return fex_inv_cmp; + } + + return (enum fex_exception) -1; +} + +#ifdef __sparcv9 +extern void _Qp_sqrt(long double *, const long double *); +#else +extern long double _Q_sqrt(long double); +#endif + +/* +* Get the operands, generate the default untrapped result with +* exceptions, and set a code indicating the type of operation +*/ +void +__fex_get_op(siginfo_t *sip, ucontext_t *uap, fex_info_t *info) +{ + unsigned long fsr; + unsigned instr, opf, rs1, rs2; + volatile int c; + + /* parse the instruction which caused the exception */ + instr = uap->uc_mcontext.fpregs.fpu_q->FQu.fpq.fpq_instr; + opf = (instr >> 5) & 0x1ff; + rs1 = (instr >> 14) & 0x1f; + rs2 = instr & 0x1f; + + /* get the operands */ + switch (opf & 3) { + case 0: /* integer */ + info->op1.type = fex_nodata; + if (opf & 0x40) { + info->op2.type = fex_int; + info->op2.val.i = *(int*)FPreg(rs2); + } + else { + info->op2.type = fex_llong; + info->op2.val.l = *(long long*)FPREG(rs2); + } + break; + + case 1: /* single */ + info->op1.type = info->op2.type = fex_float; + info->op1.val.f = *(float*)FPreg(rs1); + info->op2.val.f = *(float*)FPreg(rs2); + break; + + case 2: /* double */ + info->op1.type = info->op2.type = fex_double; + info->op1.val.d = *(double*)FPREG(rs1); + info->op2.val.d = *(double*)FPREG(rs2); + break; + + case 3: /* quad */ + info->op1.type = info->op2.type = fex_ldouble; + info->op1.val.q = *(long double*)FPREG(rs1); + info->op2.val.q = *(long double*)FPREG(rs2); + break; + } + + /* initialize res to the default untrapped result and ex to the + corresponding flags (assume trapping is disabled and flags + are clear) */ + info->op = fex_other; + info->res.type = fex_nodata; + switch ((instr >> 19) & 0x183f) { + case 0x1035: /* compare */ + info->op = fex_cmp; + switch (opf) { + case 0x51: /* compare single */ + c = (info->op1.val.f == info->op2.val.f); + break; + + case 0x52: /* compare double */ + c = (info->op1.val.d == info->op2.val.d); + break; + + case 0x53: /* compare quad */ + c = (info->op1.val.q == info->op2.val.q); + break; + + case 0x55: /* compare single with exception */ + c = (info->op1.val.f < info->op2.val.f); + break; + + case 0x56: /* compare double with exception */ + c = (info->op1.val.d < info->op2.val.d); + break; + + case 0x57: /* compare quad with exception */ + c = (info->op1.val.q < info->op2.val.q); + break; + } + break; + + case 0x1034: /* add, subtract, multiply, divide, square root, convert */ + switch (opf) { + case 0x41: /* add single */ + info->op = fex_add; + info->res.type = fex_float; + info->res.val.f = info->op1.val.f + info->op2.val.f; + break; + + case 0x42: /* add double */ + info->op = fex_add; + info->res.type = fex_double; + info->res.val.d = info->op1.val.d + info->op2.val.d; + break; + + case 0x43: /* add quad */ + info->op = fex_add; + info->res.type = fex_ldouble; + info->res.val.q = info->op1.val.q + info->op2.val.q; + break; + + case 0x45: /* subtract single */ + info->op = fex_sub; + info->res.type = fex_float; + info->res.val.f = info->op1.val.f - info->op2.val.f; + break; + + case 0x46: /* subtract double */ + info->op = fex_sub; + info->res.type = fex_double; + info->res.val.d = info->op1.val.d - info->op2.val.d; + break; + + case 0x47: /* subtract quad */ + info->op = fex_sub; + info->res.type = fex_ldouble; + info->res.val.q = info->op1.val.q - info->op2.val.q; + break; + + case 0x49: /* multiply single */ + info->op = fex_mul; + info->res.type = fex_float; + info->res.val.f = info->op1.val.f * info->op2.val.f; + break; + + case 0x4a: /* multiply double */ + info->op = fex_mul; + info->res.type = fex_double; + info->res.val.d = info->op1.val.d * info->op2.val.d; + break; + + case 0x4b: /* multiply quad */ + info->op = fex_mul; + info->res.type = fex_ldouble; + info->res.val.q = info->op1.val.q * info->op2.val.q; + break; + + case 0x69: /* fsmuld */ + info->op = fex_mul; + info->res.type = fex_double; + info->res.val.d = (double)info->op1.val.f * (double)info->op2.val.f; + break; + + case 0x6e: /* fdmulq */ + info->op = fex_mul; + info->res.type = fex_ldouble; + info->res.val.q = (long double)info->op1.val.d * + (long double)info->op2.val.d; + break; + + case 0x4d: /* divide single */ + info->op = fex_div; + info->res.type = fex_float; + info->res.val.f = info->op1.val.f / info->op2.val.f; + break; + + case 0x4e: /* divide double */ + info->op = fex_div; + info->res.type = fex_double; + info->res.val.d = info->op1.val.d / info->op2.val.d; + break; + + case 0x4f: /* divide quad */ + info->op = fex_div; + info->res.type = fex_ldouble; + info->res.val.q = info->op1.val.q / info->op2.val.q; + break; + + case 0x29: /* square root single */ + info->op = fex_sqrt; + info->op1 = info->op2; + info->op2.type = fex_nodata; + info->res.type = fex_float; + info->res.val.f = sqrtf(info->op1.val.f); + break; + + case 0x2a: /* square root double */ + info->op = fex_sqrt; + info->op1 = info->op2; + info->op2.type = fex_nodata; + info->res.type = fex_double; + info->res.val.d = sqrt(info->op1.val.d); + break; + + case 0x2b: /* square root quad */ + info->op = fex_sqrt; + info->op1 = info->op2; + info->op2.type = fex_nodata; + info->res.type = fex_ldouble; +#ifdef __sparcv9 + _Qp_sqrt(&info->res.val.q, &info->op1.val.q); +#else + info->res.val.q = _Q_sqrt(info->op1.val.q); +#endif + break; + + default: /* conversions */ + info->op = fex_cnvt; + info->op1 = info->op2; + info->op2.type = fex_nodata; + switch (opf) { + case 0xd1: /* convert single to int */ + info->res.type = fex_int; + info->res.val.i = (int) info->op1.val.f; + break; + + case 0xd2: /* convert double to int */ + info->res.type = fex_int; + info->res.val.i = (int) info->op1.val.d; + break; + + case 0xd3: /* convert quad to int */ + info->res.type = fex_int; + info->res.val.i = (int) info->op1.val.q; + break; + + case 0x81: /* convert single to long long */ + info->res.type = fex_llong; + info->res.val.l = (long long) info->op1.val.f; + break; + + case 0x82: /* convert double to long long */ + info->res.type = fex_llong; + info->res.val.l = (long long) info->op1.val.d; + break; + + case 0x83: /* convert quad to long long */ + info->res.type = fex_llong; + info->res.val.l = (long long) info->op1.val.q; + break; + + case 0xc4: /* convert int to single */ + info->res.type = fex_float; + info->res.val.f = (float) info->op1.val.i; + break; + + case 0x84: /* convert long long to single */ + info->res.type = fex_float; + info->res.val.f = (float) info->op1.val.l; + break; + + case 0x88: /* convert long long to double */ + info->res.type = fex_double; + info->res.val.d = (double) info->op1.val.l; + break; + + case 0xc6: /* convert double to single */ + info->res.type = fex_float; + info->res.val.f = (float) info->op1.val.d; + break; + + case 0xc7: /* convert quad to single */ + info->res.type = fex_float; + info->res.val.f = (float) info->op1.val.q; + break; + + case 0xc9: /* convert single to double */ + info->res.type = fex_double; + info->res.val.d = (double) info->op1.val.f; + break; + + case 0xcb: /* convert quad to double */ + info->res.type = fex_double; + info->res.val.d = (double) info->op1.val.q; + break; + + case 0xcd: /* convert single to quad */ + info->res.type = fex_ldouble; + info->res.val.q = (long double) info->op1.val.f; + break; + + case 0xce: /* convert double to quad */ + info->res.type = fex_ldouble; + info->res.val.q = (long double) info->op1.val.d; + break; + } + } + break; + } + __fenv_getfsr(&fsr); + info->flags = (int)__fenv_get_ex(fsr); + __fenv_set_ex(fsr, 0); + __fenv_setfsr(&fsr); +} + +/* +* Store the specified result; if no result is given but the exception +* is underflow or overflow, supply the default trapped result +*/ +void +__fex_st_result(siginfo_t *sip, ucontext_t *uap, fex_info_t *info) +{ + unsigned instr, opf, rs1, rs2, rd; + long double qscl; + double dscl; + float fscl; + + /* parse the instruction which caused the exception */ + instr = uap->uc_mcontext.fpregs.fpu_q->FQu.fpq.fpq_instr; + opf = (instr >> 5) & 0x1ff; + rs1 = (instr >> 14) & 0x1f; + rs2 = instr & 0x1f; + rd = (instr >> 25) & 0x1f; + + /* if the instruction is a compare, just set fcc to unordered */ + if (((instr >> 19) & 0x183f) == 0x1035) { + if (rd == 0) + uap->uc_mcontext.fpregs.fpu_fsr |= 0xc00; + else { +#ifdef __sparcv9 + uap->uc_mcontext.fpregs.fpu_fsr |= (3l << ((rd << 1) + 30)); +#else + ((prxregset_t*)uap->uc_mcontext.xrs.xrs_ptr)->pr_un.pr_v8p.pr_xfsr |= (3 << ((rd - 1) << 1)); +#endif + } + return; + } + + /* if there is no result available, try to generate the untrapped + default */ + if (info->res.type == fex_nodata) { + /* set scale factors for exponent wrapping */ + switch (sip->si_code) { + case FPE_FLTOVF: + fscl = 1.262177448e-29f; /* 2^-96 */ + dscl = 6.441148769597133308e-232; /* 2^-768 */ + qscl = 8.778357852076208839765066529179033145e-3700l;/* 2^-12288 */ + break; + + case FPE_FLTUND: + fscl = 7.922816251e+28f; /* 2^96 */ + dscl = 1.552518092300708935e+231; /* 2^768 */ + qscl = 1.139165225263043370845938579315932009e+3699l;/* 2^12288 */ + break; + + default: + /* user may have blown away the default result by mistake, + so try to regenerate it */ + (void) __fex_get_op(sip, uap, info); + if (info->res.type != fex_nodata) + goto stuff; + /* couldn't do it */ + return; + } + + /* get the operands */ + switch (opf & 3) { + case 1: /* single */ + info->op1.val.f = *(float*)FPreg(rs1); + info->op2.val.f = *(float*)FPreg(rs2); + break; + + case 2: /* double */ + info->op1.val.d = *(double*)FPREG(rs1); + info->op2.val.d = *(double*)FPREG(rs2); + break; + + case 3: /* quad */ + info->op1.val.q = *(long double*)FPREG(rs1); + info->op2.val.q = *(long double*)FPREG(rs2); + break; + } + + /* generate the wrapped result */ + switch (opf) { + case 0x41: /* add single */ + info->res.type = fex_float; + info->res.val.f = fscl * (fscl * info->op1.val.f + + fscl * info->op2.val.f); + break; + + case 0x42: /* add double */ + info->res.type = fex_double; + info->res.val.d = dscl * (dscl * info->op1.val.d + + dscl * info->op2.val.d); + break; + + case 0x43: /* add quad */ + info->res.type = fex_ldouble; + info->res.val.q = qscl * (qscl * info->op1.val.q + + qscl * info->op2.val.q); + break; + + case 0x45: /* subtract single */ + info->res.type = fex_float; + info->res.val.f = fscl * (fscl * info->op1.val.f - + fscl * info->op2.val.f); + break; + + case 0x46: /* subtract double */ + info->res.type = fex_double; + info->res.val.d = dscl * (dscl * info->op1.val.d - + dscl * info->op2.val.d); + break; + + case 0x47: /* subtract quad */ + info->res.type = fex_ldouble; + info->res.val.q = qscl * (qscl * info->op1.val.q - + qscl * info->op2.val.q); + break; + + case 0x49: /* multiply single */ + info->res.type = fex_float; + info->res.val.f = (fscl * info->op1.val.f) * + (fscl * info->op2.val.f); + break; + + case 0x4a: /* multiply double */ + info->res.type = fex_double; + info->res.val.d = (dscl * info->op1.val.d) * + (dscl * info->op2.val.d); + break; + + case 0x4b: /* multiply quad */ + info->res.type = fex_ldouble; + info->res.val.q = (qscl * info->op1.val.q) * + (qscl * info->op2.val.q); + break; + + case 0x4d: /* divide single */ + info->res.type = fex_float; + info->res.val.f = (fscl * info->op1.val.f) / + (info->op2.val.f / fscl); + break; + + case 0x4e: /* divide double */ + info->res.type = fex_double; + info->res.val.d = (dscl * info->op1.val.d) / + (info->op2.val.d / dscl); + break; + + case 0x4f: /* divide quad */ + info->res.type = fex_ldouble; + info->res.val.q = (qscl * info->op1.val.q) / + (info->op2.val.q / qscl); + break; + + case 0xc6: /* convert double to single */ + info->res.type = fex_float; + info->res.val.f = (float) (fscl * (fscl * info->op1.val.d)); + break; + + case 0xc7: /* convert quad to single */ + info->res.type = fex_float; + info->res.val.f = (float) (fscl * (fscl * info->op1.val.q)); + break; + + case 0xcb: /* convert quad to double */ + info->res.type = fex_double; + info->res.val.d = (double) (dscl * (dscl * info->op1.val.q)); + break; + } + + if (info->res.type == fex_nodata) + /* couldn't do it */ + return; + } + +stuff: + /* stick the result in the destination */ + if (opf & 0x80) { /* conversion */ + if (opf & 0x10) { /* result is an int */ + switch (info->res.type) { + case fex_llong: + info->res.val.i = (int) info->res.val.l; + break; + + case fex_float: + info->res.val.i = (int) info->res.val.f; + break; + + case fex_double: + info->res.val.i = (int) info->res.val.d; + break; + + case fex_ldouble: + info->res.val.i = (int) info->res.val.q; + break; + + default: + break; + } + *(int*)FPreg(rd) = info->res.val.i; + return; + } + + switch (opf & 0xc) { + case 0: /* result is long long */ + switch (info->res.type) { + case fex_int: + info->res.val.l = (long long) info->res.val.i; + break; + + case fex_float: + info->res.val.l = (long long) info->res.val.f; + break; + + case fex_double: + info->res.val.l = (long long) info->res.val.d; + break; + + case fex_ldouble: + info->res.val.l = (long long) info->res.val.q; + break; + + default: + break; + } + *(long long*)FPREG(rd) = info->res.val.l; + break; + + case 0x4: /* result is float */ + switch (info->res.type) { + case fex_int: + info->res.val.f = (float) info->res.val.i; + break; + + case fex_llong: + info->res.val.f = (float) info->res.val.l; + break; + + case fex_double: + info->res.val.f = (float) info->res.val.d; + break; + + case fex_ldouble: + info->res.val.f = (float) info->res.val.q; + break; + + default: + break; + } + *(float*)FPreg(rd) = info->res.val.f; + break; + + case 0x8: /* result is double */ + switch (info->res.type) { + case fex_int: + info->res.val.d = (double) info->res.val.i; + break; + + case fex_llong: + info->res.val.d = (double) info->res.val.l; + break; + + case fex_float: + info->res.val.d = (double) info->res.val.f; + break; + + case fex_ldouble: + info->res.val.d = (double) info->res.val.q; + break; + + default: + break; + } + *(double*)FPREG(rd) = info->res.val.d; + break; + + case 0xc: /* result is long double */ + switch (info->res.type) { + case fex_int: + info->res.val.q = (long double) info->res.val.i; + break; + + case fex_llong: + info->res.val.q = (long double) info->res.val.l; + break; + + case fex_float: + info->res.val.q = (long double) info->res.val.f; + break; + + case fex_double: + info->res.val.q = (long double) info->res.val.d; + break; + + default: + break; + } + *(long double*)FPREG(rd) = info->res.val.q; + break; + } + return; + } + + if ((opf & 0xf0) == 0x60) { /* fsmuld, fdmulq */ + switch (opf & 0xc0) { + case 0x8: /* result is double */ + switch (info->res.type) { + case fex_int: + info->res.val.d = (double) info->res.val.i; + break; + + case fex_llong: + info->res.val.d = (double) info->res.val.l; + break; + + case fex_float: + info->res.val.d = (double) info->res.val.f; + break; + + case fex_ldouble: + info->res.val.d = (double) info->res.val.q; + break; + + default: + break; + } + *(double*)FPREG(rd) = info->res.val.d; + break; + + case 0xc: /* result is long double */ + switch (info->res.type) { + case fex_int: + info->res.val.q = (long double) info->res.val.i; + break; + + case fex_llong: + info->res.val.q = (long double) info->res.val.l; + break; + + case fex_float: + info->res.val.q = (long double) info->res.val.f; + break; + + case fex_double: + info->res.val.q = (long double) info->res.val.d; + break; + + default: + break; + } + *(long double*)FPREG(rd) = info->res.val.q; + break; + } + return; + } + + switch (opf & 3) { /* other arithmetic op */ + case 1: /* result is float */ + switch (info->res.type) { + case fex_int: + info->res.val.f = (float) info->res.val.i; + break; + + case fex_llong: + info->res.val.f = (float) info->res.val.l; + break; + + case fex_double: + info->res.val.f = (float) info->res.val.d; + break; + + case fex_ldouble: + info->res.val.f = (float) info->res.val.q; + break; + + default: + break; + } + *(float*)FPreg(rd) = info->res.val.f; + break; + + case 2: /* result is double */ + switch (info->res.type) { + case fex_int: + info->res.val.d = (double) info->res.val.i; + break; + + case fex_llong: + info->res.val.d = (double) info->res.val.l; + break; + + case fex_float: + info->res.val.d = (double) info->res.val.f; + break; + + case fex_ldouble: + info->res.val.d = (double) info->res.val.q; + break; + + default: + break; + } + *(double*)FPREG(rd) = info->res.val.d; + break; + + case 3: /* result is long double */ + switch (info->res.type) { + case fex_int: + info->res.val.q = (long double) info->res.val.i; + break; + + case fex_llong: + info->res.val.q = (long double) info->res.val.l; + break; + + case fex_float: + info->res.val.q = (long double) info->res.val.f; + break; + + case fex_double: + info->res.val.q = (long double) info->res.val.d; + break; + + default: + break; + } + *(long double*)FPREG(rd) = info->res.val.q; + break; + } +} +#endif /* defined(__sparc) */ diff --git a/usr/src/lib/libm/common/m9x/__fex_sse.c b/usr/src/lib/libm/common/m9x/__fex_sse.c new file mode 100644 index 0000000000..01d08e956c --- /dev/null +++ b/usr/src/lib/libm/common/m9x/__fex_sse.c @@ -0,0 +1,1608 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "fenv_synonyms.h" +#include <ucontext.h> +#include <fenv.h> +#if defined(__SUNPRO_C) +#include <sunmath.h> +#else +#include <sys/ieeefp.h> +#endif +#include "fex_handler.h" +#include "fenv_inlines.h" + +#if !defined(REG_PC) +#define REG_PC EIP +#endif + +#if !defined(REG_PS) +#define REG_PS EFL +#endif + +#ifdef __amd64 +#define regno(X) ((X < 4)? REG_RAX - X : \ + ((X > 4)? REG_RAX + 1 - X : REG_RSP)) +#else +#define regno(X) (EAX - X) +#endif + +/* + * Support for SSE instructions + */ + +/* + * Decode an SSE instruction. Fill in *inst and return the length of the + * instruction in bytes. Return 0 if the instruction is not recognized. + */ +int +__fex_parse_sse(ucontext_t *uap, sseinst_t *inst) +{ + unsigned char *ip; + char *addr; + int i, dbl, simd, rex, modrm, sib, r; + + i = 0; + ip = (unsigned char *)uap->uc_mcontext.gregs[REG_PC]; + + /* look for pseudo-prefixes */ + dbl = 0; + simd = SIMD; + if (ip[i] == 0xF3) { + simd = 0; + i++; + } else if (ip[i] == 0x66) { + dbl = DOUBLE; + i++; + } else if (ip[i] == 0xF2) { + dbl = DOUBLE; + simd = 0; + i++; + } + + /* look for AMD64 REX prefix */ + rex = 0; + if (ip[i] >= 0x40 && ip[i] <= 0x4F) { + rex = ip[i]; + i++; + } + + /* parse opcode */ + if (ip[i++] != 0x0F) + return 0; + switch (ip[i++]) { + case 0x2A: + inst->op = (int)cvtsi2ss + simd + dbl; + if (!simd) + inst->op = (int)inst->op + (rex & 8); + break; + + case 0x2C: + inst->op = (int)cvttss2si + simd + dbl; + if (!simd) + inst->op = (int)inst->op + (rex & 8); + break; + + case 0x2D: + inst->op = (int)cvtss2si + simd + dbl; + if (!simd) + inst->op = (int)inst->op + (rex & 8); + break; + + case 0x2E: + /* oddball: scalar instruction in a SIMD opcode group */ + if (!simd) + return 0; + inst->op = (int)ucomiss + dbl; + break; + + case 0x2F: + /* oddball: scalar instruction in a SIMD opcode group */ + if (!simd) + return 0; + inst->op = (int)comiss + dbl; + break; + + case 0x51: + inst->op = (int)sqrtss + simd + dbl; + break; + + case 0x58: + inst->op = (int)addss + simd + dbl; + break; + + case 0x59: + inst->op = (int)mulss + simd + dbl; + break; + + case 0x5A: + inst->op = (int)cvtss2sd + simd + dbl; + break; + + case 0x5B: + if (dbl) { + if (simd) + inst->op = cvtps2dq; + else + return 0; + } else { + inst->op = (simd)? cvtdq2ps : cvttps2dq; + } + break; + + case 0x5C: + inst->op = (int)subss + simd + dbl; + break; + + case 0x5D: + inst->op = (int)minss + simd + dbl; + break; + + case 0x5E: + inst->op = (int)divss + simd + dbl; + break; + + case 0x5F: + inst->op = (int)maxss + simd + dbl; + break; + + case 0xC2: + inst->op = (int)cmpss + simd + dbl; + break; + + case 0xE6: + if (simd) { + if (dbl) + inst->op = cvttpd2dq; + else + return 0; + } else { + inst->op = (dbl)? cvtpd2dq : cvtdq2pd; + } + break; + + default: + return 0; + } + + /* locate operands */ + modrm = ip[i++]; + + if (inst->op == cvtss2si || inst->op == cvttss2si || + inst->op == cvtsd2si || inst->op == cvttsd2si || + inst->op == cvtss2siq || inst->op == cvttss2siq || + inst->op == cvtsd2siq || inst->op == cvttsd2siq) { + /* op1 is a gp register */ + r = ((rex & 4) << 1) | ((modrm >> 3) & 7); + inst->op1 = (sseoperand_t *)&uap->uc_mcontext.gregs[regno(r)]; + } else if (inst->op == cvtps2pi || inst->op == cvttps2pi || + inst->op == cvtpd2pi || inst->op == cvttpd2pi) { + /* op1 is a mmx register */ +#ifdef __amd64 + inst->op1 = (sseoperand_t *)&uap->uc_mcontext.fpregs.fp_reg_set. + fpchip_state.st[(modrm >> 3) & 7]; +#else + inst->op1 = (sseoperand_t *)(10 * ((modrm >> 3) & 7) + + (char *)&uap->uc_mcontext.fpregs.fp_reg_set. + fpchip_state.state[7]); +#endif + } else { + /* op1 is a xmm register */ + r = ((rex & 4) << 1) | ((modrm >> 3) & 7); + inst->op1 = (sseoperand_t *)&uap->uc_mcontext.fpregs. + fp_reg_set.fpchip_state.xmm[r]; + } + + if ((modrm >> 6) == 3) { + if (inst->op == cvtsi2ss || inst->op == cvtsi2sd || + inst->op == cvtsi2ssq || inst->op == cvtsi2sdq) { + /* op2 is a gp register */ + r = ((rex & 1) << 3) | (modrm & 7); + inst->op2 = (sseoperand_t *)&uap->uc_mcontext. + gregs[regno(r)]; + } else if (inst->op == cvtpi2ps || inst->op == cvtpi2pd) { + /* op2 is a mmx register */ +#ifdef __amd64 + inst->op2 = (sseoperand_t *)&uap->uc_mcontext.fpregs. + fp_reg_set.fpchip_state.st[modrm & 7]; +#else + inst->op2 = (sseoperand_t *)(10 * (modrm & 7) + + (char *)&uap->uc_mcontext.fpregs.fp_reg_set. + fpchip_state.state[7]); +#endif + } else { + /* op2 is a xmm register */ + r = ((rex & 1) << 3) | (modrm & 7); + inst->op2 = (sseoperand_t *)&uap->uc_mcontext.fpregs. + fp_reg_set.fpchip_state.xmm[r]; + } + } else if ((modrm & 0xc7) == 0x05) { +#ifdef __amd64 + /* address of next instruction + offset */ + r = i + 4; + if (inst->op == cmpss || inst->op == cmpps || + inst->op == cmpsd || inst->op == cmppd) + r++; + inst->op2 = (sseoperand_t *)(ip + r + *(int *)(ip + i)); +#else + /* absolute address */ + inst->op2 = (sseoperand_t *)(*(int *)(ip + i)); +#endif + i += 4; + } else { + /* complex address */ + if ((modrm & 7) == 4) { + /* parse sib byte */ + sib = ip[i++]; + if ((sib & 7) == 5 && (modrm >> 6) == 0) { + /* start with absolute address */ + addr = (char *)(uintptr_t)(*(int *)(ip + i)); + i += 4; + } else { + /* start with base */ + r = ((rex & 1) << 3) | (sib & 7); + addr = (char *)uap->uc_mcontext.gregs[regno(r)]; + } + r = ((rex & 2) << 2) | ((sib >> 3) & 7); + if (r != 4) { + /* add scaled index */ + addr += uap->uc_mcontext.gregs[regno(r)] + << (sib >> 6); + } + } else { + r = ((rex & 1) << 3) | (modrm & 7); + addr = (char *)uap->uc_mcontext.gregs[regno(r)]; + } + + /* add displacement, if any */ + if ((modrm >> 6) == 1) { + addr += (char)ip[i++]; + } else if ((modrm >> 6) == 2) { + addr += *(int *)(ip + i); + i += 4; + } + inst->op2 = (sseoperand_t *)addr; + } + + if (inst->op == cmpss || inst->op == cmpps || inst->op == cmpsd || + inst->op == cmppd) { + /* get the immediate operand */ + inst->imm = ip[i++]; + } + + return i; +} + +static enum fp_class_type +my_fp_classf(float *x) +{ + int i = *(int *)x & ~0x80000000; + + if (i < 0x7f800000) { + if (i < 0x00800000) + return ((i == 0)? fp_zero : fp_subnormal); + return fp_normal; + } + else if (i == 0x7f800000) + return fp_infinity; + else if (i & 0x400000) + return fp_quiet; + else + return fp_signaling; +} + +static enum fp_class_type +my_fp_class(double *x) +{ + int i = *(1+(int *)x) & ~0x80000000; + + if (i < 0x7ff00000) { + if (i < 0x00100000) + return (((i | *(int *)x) == 0)? fp_zero : fp_subnormal); + return fp_normal; + } + else if (i == 0x7ff00000 && *(int *)x == 0) + return fp_infinity; + else if (i & 0x80000) + return fp_quiet; + else + return fp_signaling; +} + +/* + * Inspect a scalar SSE instruction that incurred an invalid operation + * exception to determine which type of exception it was. + */ +static enum fex_exception +__fex_get_sse_invalid_type(sseinst_t *inst) +{ + enum fp_class_type t1, t2; + + /* check op2 for signaling nan */ + t2 = ((int)inst->op & DOUBLE)? my_fp_class(&inst->op2->d[0]) : + my_fp_classf(&inst->op2->f[0]); + if (t2 == fp_signaling) + return fex_inv_snan; + + /* eliminate all single-operand instructions */ + switch (inst->op) { + case cvtsd2ss: + case cvtss2sd: + /* hmm, this shouldn't have happened */ + return (enum fex_exception) -1; + + case sqrtss: + case sqrtsd: + return fex_inv_sqrt; + + case cvtss2si: + case cvtsd2si: + case cvttss2si: + case cvttsd2si: + case cvtss2siq: + case cvtsd2siq: + case cvttss2siq: + case cvttsd2siq: + return fex_inv_int; + default: + break; + } + + /* check op1 for signaling nan */ + t1 = ((int)inst->op & DOUBLE)? my_fp_class(&inst->op1->d[0]) : + my_fp_classf(&inst->op1->f[0]); + if (t1 == fp_signaling) + return fex_inv_snan; + + /* check two-operand instructions for other cases */ + switch (inst->op) { + case cmpss: + case cmpsd: + case minss: + case minsd: + case maxss: + case maxsd: + case comiss: + case comisd: + return fex_inv_cmp; + + case addss: + case addsd: + case subss: + case subsd: + if (t1 == fp_infinity && t2 == fp_infinity) + return fex_inv_isi; + break; + + case mulss: + case mulsd: + if ((t1 == fp_zero && t2 == fp_infinity) || + (t2 == fp_zero && t1 == fp_infinity)) + return fex_inv_zmi; + break; + + case divss: + case divsd: + if (t1 == fp_zero && t2 == fp_zero) + return fex_inv_zdz; + if (t1 == fp_infinity && t2 == fp_infinity) + return fex_inv_idi; + default: + break; + } + + return (enum fex_exception)-1; +} + +/* inline templates */ +extern void sse_cmpeqss(float *, float *, int *); +extern void sse_cmpltss(float *, float *, int *); +extern void sse_cmpless(float *, float *, int *); +extern void sse_cmpunordss(float *, float *, int *); +extern void sse_minss(float *, float *, float *); +extern void sse_maxss(float *, float *, float *); +extern void sse_addss(float *, float *, float *); +extern void sse_subss(float *, float *, float *); +extern void sse_mulss(float *, float *, float *); +extern void sse_divss(float *, float *, float *); +extern void sse_sqrtss(float *, float *); +extern void sse_ucomiss(float *, float *); +extern void sse_comiss(float *, float *); +extern void sse_cvtss2sd(float *, double *); +extern void sse_cvtsi2ss(int *, float *); +extern void sse_cvttss2si(float *, int *); +extern void sse_cvtss2si(float *, int *); +#ifdef __amd64 +extern void sse_cvtsi2ssq(long long *, float *); +extern void sse_cvttss2siq(float *, long long *); +extern void sse_cvtss2siq(float *, long long *); +#endif +extern void sse_cmpeqsd(double *, double *, long long *); +extern void sse_cmpltsd(double *, double *, long long *); +extern void sse_cmplesd(double *, double *, long long *); +extern void sse_cmpunordsd(double *, double *, long long *); +extern void sse_minsd(double *, double *, double *); +extern void sse_maxsd(double *, double *, double *); +extern void sse_addsd(double *, double *, double *); +extern void sse_subsd(double *, double *, double *); +extern void sse_mulsd(double *, double *, double *); +extern void sse_divsd(double *, double *, double *); +extern void sse_sqrtsd(double *, double *); +extern void sse_ucomisd(double *, double *); +extern void sse_comisd(double *, double *); +extern void sse_cvtsd2ss(double *, float *); +extern void sse_cvtsi2sd(int *, double *); +extern void sse_cvttsd2si(double *, int *); +extern void sse_cvtsd2si(double *, int *); +#ifdef __amd64 +extern void sse_cvtsi2sdq(long long *, double *); +extern void sse_cvttsd2siq(double *, long long *); +extern void sse_cvtsd2siq(double *, long long *); +#endif + +/* + * Fill in *info with the operands, default untrapped result, and + * flags produced by a scalar SSE instruction, and return the type + * of trapped exception (if any). On entry, the mxcsr must have + * all exceptions masked and all flags clear. The same conditions + * will hold on exit. + * + * This routine does not work if the instruction specified by *inst + * is not a scalar instruction. + */ +enum fex_exception +__fex_get_sse_op(ucontext_t *uap, sseinst_t *inst, fex_info_t *info) +{ + unsigned int e, te, mxcsr, oldmxcsr, subnorm; + + /* + * Perform the operation with traps disabled and check the + * exception flags. If the underflow trap was enabled, also + * check for an exact subnormal result. + */ + __fenv_getmxcsr(&oldmxcsr); + subnorm = 0; + if ((int)inst->op & DOUBLE) { + if (inst->op == cvtsi2sd) { + info->op1.type = fex_int; + info->op1.val.i = inst->op2->i[0]; + info->op2.type = fex_nodata; + } else if (inst->op == cvtsi2sdq) { + info->op1.type = fex_llong; + info->op1.val.l = inst->op2->l[0]; + info->op2.type = fex_nodata; + } else if (inst->op == sqrtsd || inst->op == cvtsd2ss || + inst->op == cvttsd2si || inst->op == cvtsd2si || + inst->op == cvttsd2siq || inst->op == cvtsd2siq) { + info->op1.type = fex_double; + info->op1.val.d = inst->op2->d[0]; + info->op2.type = fex_nodata; + } else { + info->op1.type = fex_double; + info->op1.val.d = inst->op1->d[0]; + info->op2.type = fex_double; + info->op2.val.d = inst->op2->d[0]; + } + info->res.type = fex_double; + switch (inst->op) { + case cmpsd: + info->op = fex_cmp; + info->res.type = fex_llong; + switch (inst->imm & 3) { + case 0: + sse_cmpeqsd(&info->op1.val.d, &info->op2.val.d, + &info->res.val.l); + break; + + case 1: + sse_cmpltsd(&info->op1.val.d, &info->op2.val.d, + &info->res.val.l); + break; + + case 2: + sse_cmplesd(&info->op1.val.d, &info->op2.val.d, + &info->res.val.l); + break; + + case 3: + sse_cmpunordsd(&info->op1.val.d, + &info->op2.val.d, &info->res.val.l); + } + if (inst->imm & 4) + info->res.val.l ^= 0xffffffffffffffffull; + break; + + case minsd: + info->op = fex_other; + sse_minsd(&info->op1.val.d, &info->op2.val.d, + &info->res.val.d); + break; + + case maxsd: + info->op = fex_other; + sse_maxsd(&info->op1.val.d, &info->op2.val.d, + &info->res.val.d); + break; + + case addsd: + info->op = fex_add; + sse_addsd(&info->op1.val.d, &info->op2.val.d, + &info->res.val.d); + if (my_fp_class(&info->res.val.d) == fp_subnormal) + subnorm = 1; + break; + + case subsd: + info->op = fex_sub; + sse_subsd(&info->op1.val.d, &info->op2.val.d, + &info->res.val.d); + if (my_fp_class(&info->res.val.d) == fp_subnormal) + subnorm = 1; + break; + + case mulsd: + info->op = fex_mul; + sse_mulsd(&info->op1.val.d, &info->op2.val.d, + &info->res.val.d); + if (my_fp_class(&info->res.val.d) == fp_subnormal) + subnorm = 1; + break; + + case divsd: + info->op = fex_div; + sse_divsd(&info->op1.val.d, &info->op2.val.d, + &info->res.val.d); + if (my_fp_class(&info->res.val.d) == fp_subnormal) + subnorm = 1; + break; + + case sqrtsd: + info->op = fex_sqrt; + sse_sqrtsd(&info->op1.val.d, &info->res.val.d); + break; + + case cvtsd2ss: + info->op = fex_cnvt; + info->res.type = fex_float; + sse_cvtsd2ss(&info->op1.val.d, &info->res.val.f); + if (my_fp_classf(&info->res.val.f) == fp_subnormal) + subnorm = 1; + break; + + case cvtsi2sd: + info->op = fex_cnvt; + sse_cvtsi2sd(&info->op1.val.i, &info->res.val.d); + break; + + case cvttsd2si: + info->op = fex_cnvt; + info->res.type = fex_int; + sse_cvttsd2si(&info->op1.val.d, &info->res.val.i); + break; + + case cvtsd2si: + info->op = fex_cnvt; + info->res.type = fex_int; + sse_cvtsd2si(&info->op1.val.d, &info->res.val.i); + break; + +#ifdef __amd64 + case cvtsi2sdq: + info->op = fex_cnvt; + sse_cvtsi2sdq(&info->op1.val.l, &info->res.val.d); + break; + + case cvttsd2siq: + info->op = fex_cnvt; + info->res.type = fex_llong; + sse_cvttsd2siq(&info->op1.val.d, &info->res.val.l); + break; + + case cvtsd2siq: + info->op = fex_cnvt; + info->res.type = fex_llong; + sse_cvtsd2siq(&info->op1.val.d, &info->res.val.l); + break; +#endif + + case ucomisd: + info->op = fex_cmp; + info->res.type = fex_nodata; + sse_ucomisd(&info->op1.val.d, &info->op2.val.d); + break; + + case comisd: + info->op = fex_cmp; + info->res.type = fex_nodata; + sse_comisd(&info->op1.val.d, &info->op2.val.d); + break; + default: + break; + } + } else { + if (inst->op == cvtsi2ss) { + info->op1.type = fex_int; + info->op1.val.i = inst->op2->i[0]; + info->op2.type = fex_nodata; + } else if (inst->op == cvtsi2ssq) { + info->op1.type = fex_llong; + info->op1.val.l = inst->op2->l[0]; + info->op2.type = fex_nodata; + } else if (inst->op == sqrtss || inst->op == cvtss2sd || + inst->op == cvttss2si || inst->op == cvtss2si || + inst->op == cvttss2siq || inst->op == cvtss2siq) { + info->op1.type = fex_float; + info->op1.val.f = inst->op2->f[0]; + info->op2.type = fex_nodata; + } else { + info->op1.type = fex_float; + info->op1.val.f = inst->op1->f[0]; + info->op2.type = fex_float; + info->op2.val.f = inst->op2->f[0]; + } + info->res.type = fex_float; + switch (inst->op) { + case cmpss: + info->op = fex_cmp; + info->res.type = fex_int; + switch (inst->imm & 3) { + case 0: + sse_cmpeqss(&info->op1.val.f, &info->op2.val.f, + &info->res.val.i); + break; + + case 1: + sse_cmpltss(&info->op1.val.f, &info->op2.val.f, + &info->res.val.i); + break; + + case 2: + sse_cmpless(&info->op1.val.f, &info->op2.val.f, + &info->res.val.i); + break; + + case 3: + sse_cmpunordss(&info->op1.val.f, + &info->op2.val.f, &info->res.val.i); + } + if (inst->imm & 4) + info->res.val.i ^= 0xffffffffu; + break; + + case minss: + info->op = fex_other; + sse_minss(&info->op1.val.f, &info->op2.val.f, + &info->res.val.f); + break; + + case maxss: + info->op = fex_other; + sse_maxss(&info->op1.val.f, &info->op2.val.f, + &info->res.val.f); + break; + + case addss: + info->op = fex_add; + sse_addss(&info->op1.val.f, &info->op2.val.f, + &info->res.val.f); + if (my_fp_classf(&info->res.val.f) == fp_subnormal) + subnorm = 1; + break; + + case subss: + info->op = fex_sub; + sse_subss(&info->op1.val.f, &info->op2.val.f, + &info->res.val.f); + if (my_fp_classf(&info->res.val.f) == fp_subnormal) + subnorm = 1; + break; + + case mulss: + info->op = fex_mul; + sse_mulss(&info->op1.val.f, &info->op2.val.f, + &info->res.val.f); + if (my_fp_classf(&info->res.val.f) == fp_subnormal) + subnorm = 1; + break; + + case divss: + info->op = fex_div; + sse_divss(&info->op1.val.f, &info->op2.val.f, + &info->res.val.f); + if (my_fp_classf(&info->res.val.f) == fp_subnormal) + subnorm = 1; + break; + + case sqrtss: + info->op = fex_sqrt; + sse_sqrtss(&info->op1.val.f, &info->res.val.f); + break; + + case cvtss2sd: + info->op = fex_cnvt; + info->res.type = fex_double; + sse_cvtss2sd(&info->op1.val.f, &info->res.val.d); + break; + + case cvtsi2ss: + info->op = fex_cnvt; + sse_cvtsi2ss(&info->op1.val.i, &info->res.val.f); + break; + + case cvttss2si: + info->op = fex_cnvt; + info->res.type = fex_int; + sse_cvttss2si(&info->op1.val.f, &info->res.val.i); + break; + + case cvtss2si: + info->op = fex_cnvt; + info->res.type = fex_int; + sse_cvtss2si(&info->op1.val.f, &info->res.val.i); + break; + +#ifdef __amd64 + case cvtsi2ssq: + info->op = fex_cnvt; + sse_cvtsi2ssq(&info->op1.val.l, &info->res.val.f); + break; + + case cvttss2siq: + info->op = fex_cnvt; + info->res.type = fex_llong; + sse_cvttss2siq(&info->op1.val.f, &info->res.val.l); + break; + + case cvtss2siq: + info->op = fex_cnvt; + info->res.type = fex_llong; + sse_cvtss2siq(&info->op1.val.f, &info->res.val.l); + break; +#endif + + case ucomiss: + info->op = fex_cmp; + info->res.type = fex_nodata; + sse_ucomiss(&info->op1.val.f, &info->op2.val.f); + break; + + case comiss: + info->op = fex_cmp; + info->res.type = fex_nodata; + sse_comiss(&info->op1.val.f, &info->op2.val.f); + break; + default: + break; + } + } + __fenv_getmxcsr(&mxcsr); + info->flags = mxcsr & 0x3d; + __fenv_setmxcsr(&oldmxcsr); + + /* determine which exception would have been trapped */ + te = ~(uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.mxcsr + >> 7) & 0x3d; + e = mxcsr & te; + if (e & FE_INVALID) + return __fex_get_sse_invalid_type(inst); + if (e & FE_DIVBYZERO) + return fex_division; + if (e & FE_OVERFLOW) + return fex_overflow; + if ((e & FE_UNDERFLOW) || (subnorm && (te & FE_UNDERFLOW))) + return fex_underflow; + if (e & FE_INEXACT) + return fex_inexact; + return (enum fex_exception)-1; +} + +/* + * Emulate a SIMD SSE instruction to determine which exceptions occur + * in each part. For i = 0, 1, 2, and 3, set e[i] to indicate the + * trapped exception that would occur if the i-th part of the SIMD + * instruction were executed in isolation; set e[i] to -1 if no + * trapped exception would occur in this part. Also fill in info[i] + * with the corresponding operands, default untrapped result, and + * flags. + * + * This routine does not work if the instruction specified by *inst + * is not a SIMD instruction. + */ +void +__fex_get_simd_op(ucontext_t *uap, sseinst_t *inst, enum fex_exception *e, + fex_info_t *info) +{ + sseinst_t dummy; + int i; + + e[0] = e[1] = e[2] = e[3] = -1; + + /* perform each part of the SIMD operation */ + switch (inst->op) { + case cmpps: + dummy.op = cmpss; + dummy.imm = inst->imm; + for (i = 0; i < 4; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case minps: + dummy.op = minss; + for (i = 0; i < 4; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case maxps: + dummy.op = maxss; + for (i = 0; i < 4; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case addps: + dummy.op = addss; + for (i = 0; i < 4; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case subps: + dummy.op = subss; + for (i = 0; i < 4; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case mulps: + dummy.op = mulss; + for (i = 0; i < 4; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case divps: + dummy.op = divss; + for (i = 0; i < 4; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case sqrtps: + dummy.op = sqrtss; + for (i = 0; i < 4; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case cvtdq2ps: + dummy.op = cvtsi2ss; + for (i = 0; i < 4; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->i[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case cvttps2dq: + dummy.op = cvttss2si; + for (i = 0; i < 4; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case cvtps2dq: + dummy.op = cvtss2si; + for (i = 0; i < 4; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case cvtpi2ps: + dummy.op = cvtsi2ss; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->i[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case cvttps2pi: + dummy.op = cvttss2si; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case cvtps2pi: + dummy.op = cvtss2si; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case cmppd: + dummy.op = cmpsd; + dummy.imm = inst->imm; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case minpd: + dummy.op = minsd; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case maxpd: + dummy.op = maxsd; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case addpd: + dummy.op = addsd; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case subpd: + dummy.op = subsd; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case mulpd: + dummy.op = mulsd; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case divpd: + dummy.op = divsd; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case sqrtpd: + dummy.op = sqrtsd; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case cvtpi2pd: + case cvtdq2pd: + dummy.op = cvtsi2sd; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->i[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case cvttpd2pi: + case cvttpd2dq: + dummy.op = cvttsd2si; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case cvtpd2pi: + case cvtpd2dq: + dummy.op = cvtsd2si; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case cvtps2pd: + dummy.op = cvtss2sd; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case cvtpd2ps: + dummy.op = cvtsd2ss; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + default: + break; + } +} + +/* + * Store the result value from *info in the destination of the scalar + * SSE instruction specified by *inst. If no result is given but the + * exception is underflow or overflow, supply the default trapped result. + * + * This routine does not work if the instruction specified by *inst + * is not a scalar instruction. + */ +void +__fex_st_sse_result(ucontext_t *uap, sseinst_t *inst, enum fex_exception e, + fex_info_t *info) +{ + int i = 0; + long long l = 0L;; + float f = 0.0, fscl; + double d = 0.0L, dscl; + + /* for compares that write eflags, just set the flags + to indicate "unordered" */ + if (inst->op == ucomiss || inst->op == comiss || + inst->op == ucomisd || inst->op == comisd) { + uap->uc_mcontext.gregs[REG_PS] |= 0x45; + return; + } + + /* if info doesn't specify a result value, try to generate + the default trapped result */ + if (info->res.type == fex_nodata) { + /* set scale factors for exponent wrapping */ + switch (e) { + case fex_overflow: + fscl = 1.262177448e-29f; /* 2^-96 */ + dscl = 6.441148769597133308e-232; /* 2^-768 */ + break; + + case fex_underflow: + fscl = 7.922816251e+28f; /* 2^96 */ + dscl = 1.552518092300708935e+231; /* 2^768 */ + break; + + default: + (void) __fex_get_sse_op(uap, inst, info); + if (info->res.type == fex_nodata) + return; + goto stuff; + } + + /* generate the wrapped result */ + if (inst->op == cvtsd2ss) { + info->op1.type = fex_double; + info->op1.val.d = inst->op2->d[0]; + info->op2.type = fex_nodata; + info->res.type = fex_float; + info->res.val.f = (float)(fscl * (fscl * + info->op1.val.d)); + } else if ((int)inst->op & DOUBLE) { + info->op1.type = fex_double; + info->op1.val.d = inst->op1->d[0]; + info->op2.type = fex_double; + info->op2.val.d = inst->op2->d[0]; + info->res.type = fex_double; + switch (inst->op) { + case addsd: + info->res.val.d = dscl * (dscl * + info->op1.val.d + dscl * info->op2.val.d); + break; + + case subsd: + info->res.val.d = dscl * (dscl * + info->op1.val.d - dscl * info->op2.val.d); + break; + + case mulsd: + info->res.val.d = (dscl * info->op1.val.d) * + (dscl * info->op2.val.d); + break; + + case divsd: + info->res.val.d = (dscl * info->op1.val.d) / + (info->op2.val.d / dscl); + break; + + default: + return; + } + } else { + info->op1.type = fex_float; + info->op1.val.f = inst->op1->f[0]; + info->op2.type = fex_float; + info->op2.val.f = inst->op2->f[0]; + info->res.type = fex_float; + switch (inst->op) { + case addss: + info->res.val.f = fscl * (fscl * + info->op1.val.f + fscl * info->op2.val.f); + break; + + case subss: + info->res.val.f = fscl * (fscl * + info->op1.val.f - fscl * info->op2.val.f); + break; + + case mulss: + info->res.val.f = (fscl * info->op1.val.f) * + (fscl * info->op2.val.f); + break; + + case divss: + info->res.val.f = (fscl * info->op1.val.f) / + (info->op2.val.f / fscl); + break; + + default: + return; + } + } + } + + /* put the result in the destination */ +stuff: + if (inst->op == cmpss || inst->op == cvttss2si || inst->op == cvtss2si + || inst->op == cvttsd2si || inst->op == cvtsd2si) { + switch (info->res.type) { + case fex_int: + i = info->res.val.i; + break; + + case fex_llong: + i = info->res.val.l; + break; + + case fex_float: + i = info->res.val.f; + break; + + case fex_double: + i = info->res.val.d; + break; + + case fex_ldouble: + i = info->res.val.q; + break; + + default: + break; + } + inst->op1->i[0] = i; + } else if (inst->op == cmpsd || inst->op == cvttss2siq || + inst->op == cvtss2siq || inst->op == cvttsd2siq || + inst->op == cvtsd2siq) { + switch (info->res.type) { + case fex_int: + l = info->res.val.i; + break; + + case fex_llong: + l = info->res.val.l; + break; + + case fex_float: + l = info->res.val.f; + break; + + case fex_double: + l = info->res.val.d; + break; + + case fex_ldouble: + l = info->res.val.q; + break; + + default: + break; + } + inst->op1->l[0] = l; + } else if ((((int)inst->op & DOUBLE) && inst->op != cvtsd2ss) || + inst->op == cvtss2sd) { + switch (info->res.type) { + case fex_int: + d = info->res.val.i; + break; + + case fex_llong: + d = info->res.val.l; + break; + + case fex_float: + d = info->res.val.f; + break; + + case fex_double: + d = info->res.val.d; + break; + + case fex_ldouble: + d = info->res.val.q; + break; + + default: + break; + } + inst->op1->d[0] = d; + } else { + switch (info->res.type) { + case fex_int: + f = info->res.val.i; + break; + + case fex_llong: + f = info->res.val.l; + break; + + case fex_float: + f = info->res.val.f; + break; + + case fex_double: + f = info->res.val.d; + break; + + case fex_ldouble: + f = info->res.val.q; + break; + + default: + break; + } + inst->op1->f[0] = f; + } +} + +/* + * Store the results from a SIMD instruction. For each i, store + * the result value from info[i] in the i-th part of the destination + * of the SIMD SSE instruction specified by *inst. If no result + * is given but the exception indicated by e[i] is underflow or + * overflow, supply the default trapped result. + * + * This routine does not work if the instruction specified by *inst + * is not a SIMD instruction. + */ +void +__fex_st_simd_result(ucontext_t *uap, sseinst_t *inst, enum fex_exception *e, + fex_info_t *info) +{ + sseinst_t dummy; + int i; + + /* store each part */ + switch (inst->op) { + case cmpps: + dummy.op = cmpss; + dummy.imm = inst->imm; + for (i = 0; i < 4; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + break; + + case minps: + dummy.op = minss; + for (i = 0; i < 4; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + break; + + case maxps: + dummy.op = maxss; + for (i = 0; i < 4; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + break; + + case addps: + dummy.op = addss; + for (i = 0; i < 4; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + break; + + case subps: + dummy.op = subss; + for (i = 0; i < 4; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + break; + + case mulps: + dummy.op = mulss; + for (i = 0; i < 4; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + break; + + case divps: + dummy.op = divss; + for (i = 0; i < 4; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + break; + + case sqrtps: + dummy.op = sqrtss; + for (i = 0; i < 4; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + break; + + case cvtdq2ps: + dummy.op = cvtsi2ss; + for (i = 0; i < 4; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->i[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + break; + + case cvttps2dq: + dummy.op = cvttss2si; + for (i = 0; i < 4; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + break; + + case cvtps2dq: + dummy.op = cvtss2si; + for (i = 0; i < 4; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + break; + + case cvtpi2ps: + dummy.op = cvtsi2ss; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->i[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + break; + + case cvttps2pi: + dummy.op = cvttss2si; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + break; + + case cvtps2pi: + dummy.op = cvtss2si; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + break; + + case cmppd: + dummy.op = cmpsd; + dummy.imm = inst->imm; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + break; + + case minpd: + dummy.op = minsd; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + break; + + case maxpd: + dummy.op = maxsd; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + break; + + case addpd: + dummy.op = addsd; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + break; + + case subpd: + dummy.op = subsd; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + break; + + case mulpd: + dummy.op = mulsd; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + break; + + case divpd: + dummy.op = divsd; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + break; + + case sqrtpd: + dummy.op = sqrtsd; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + break; + + case cvtpi2pd: + case cvtdq2pd: + dummy.op = cvtsi2sd; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->i[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + break; + + case cvttpd2pi: + case cvttpd2dq: + dummy.op = cvttsd2si; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + /* for cvttpd2dq, zero the high 64 bits of the destination */ + if (inst->op == cvttpd2dq) + inst->op1->l[1] = 0ll; + break; + + case cvtpd2pi: + case cvtpd2dq: + dummy.op = cvtsd2si; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + /* for cvtpd2dq, zero the high 64 bits of the destination */ + if (inst->op == cvtpd2dq) + inst->op1->l[1] = 0ll; + break; + + case cvtps2pd: + dummy.op = cvtss2sd; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + break; + + case cvtpd2ps: + dummy.op = cvtsd2ss; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + /* zero the high 64 bits of the destination */ + inst->op1->l[1] = 0ll; + + default: + break; + } +} + diff --git a/usr/src/lib/libm/common/m9x/__fex_sym.c b/usr/src/lib/libm/common/m9x/__fex_sym.c new file mode 100644 index 0000000000..66a1d9bfe5 --- /dev/null +++ b/usr/src/lib/libm/common/m9x/__fex_sym.c @@ -0,0 +1,307 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "fenv_synonyms.h" +#include <elf.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <fcntl.h> +#include <procfs.h> +#include <string.h> +#include <sys/stat.h> + +#if defined(__sparcv9) || defined(__amd64) + +#define Elf_Ehdr Elf64_Ehdr +#define Elf_Phdr Elf64_Phdr +#define Elf_Shdr Elf64_Shdr +#define Elf_Sym Elf64_Sym +#define ELF_ST_BIND ELF64_ST_BIND +#define ELF_ST_TYPE ELF64_ST_TYPE + +#else + +#define Elf_Ehdr Elf32_Ehdr +#define Elf_Phdr Elf32_Phdr +#define Elf_Shdr Elf32_Shdr +#define Elf_Sym Elf32_Sym +#define ELF_ST_BIND ELF32_ST_BIND +#define ELF_ST_TYPE ELF32_ST_TYPE + +#endif /* __sparcv9 */ + +/* semi-permanent data established by __fex_sym_init */ +static prmap_t *pm = NULL; /* prmap_t array */ +static int npm = 0; /* number of entries in pm */ + +/* transient data modified by __fex_sym */ +static prmap_t *lpm = NULL; /* prmap_t found in last call */ +static Elf_Phdr *ph = NULL; /* program header array */ +static int phsize = 0; /* size of ph */ +static int nph; /* number of entries in ph */ +static char *stbuf = NULL; /* symbol and string table buffer */ +static int stbufsize = 0; /* size of stbuf */ +static int stoffset; /* offset of string table in stbuf */ +static int nsyms; /* number of symbols in stbuf */ + +/* get a current prmap_t list (must call this before each stack trace) */ +void +__fex_sym_init() +{ + struct stat statbuf; + long n; + int i; + + /* clear out the previous prmap_t list */ + if (pm != NULL) + free(pm); + pm = lpm = NULL; + npm = 0; + + /* get the current prmap_t list */ + if (stat("/proc/self/map", &statbuf) < 0 || statbuf.st_size <= 0 || + (pm = (prmap_t*)malloc(statbuf.st_size)) == NULL) + return; + if ((i = open("/proc/self/map", O_RDONLY)) < 0) + { + free(pm); + pm = NULL; + return; + } + n = read(i, pm, statbuf.st_size); + close(i); + if (n != statbuf.st_size) + { + free(pm); + pm = NULL; + } + else + npm = (int) (n / sizeof(prmap_t)); +} + +/* read ELF program headers and symbols; return -1 on error, 0 otherwise */ +static int +__fex_read_syms(int fd) +{ + Elf_Ehdr h; + Elf_Shdr *sh; + int i, size; + + /* read the ELF header */ + if (read(fd, &h, sizeof(h)) != sizeof(h)) + return -1; + if (h.e_ident[EI_MAG0] != ELFMAG0 || + h.e_ident[EI_MAG1] != ELFMAG1 || + h.e_ident[EI_MAG2] != ELFMAG2 || + h.e_ident[EI_MAG3] != ELFMAG3 || + h.e_phentsize != sizeof(Elf_Phdr) || + h.e_shentsize != sizeof(Elf_Shdr)) + return -1; + + /* get space for the program headers */ + size = h.e_phnum * h.e_phentsize; + if (size > phsize) + { + if (ph) + free(ph); + phsize = nph = 0; + if ((ph = (Elf_Phdr*)malloc(size)) == NULL) + return -1; + phsize = size; + } + + /* read the program headers */ + if (lseek(fd, h.e_phoff, SEEK_SET) != h.e_phoff || + read(fd, ph, size) != (ssize_t)size) + { + nph = 0; + return -1; + } + nph = h.e_phnum; + + /* read the section headers */ + size = h.e_shnum * h.e_shentsize; + if ((sh = (Elf_Shdr*)malloc(size)) == NULL) + return -1; + if (lseek(fd, h.e_shoff, SEEK_SET) != h.e_shoff || + read(fd, sh, size) != (ssize_t)size) + { + free(sh); + return -1; + } + + /* find the symtab section header */ + for (i = 0; i < h.e_shnum; i++) + { + if (sh[i].sh_type == SHT_SYMTAB) + break; /* assume there is only one */ + } + if (i == h.e_shnum || sh[i].sh_size == 0 || + sh[i].sh_entsize != sizeof(Elf_Sym) || + sh[i].sh_link < 1 || sh[i].sh_link >= h.e_shnum || + sh[sh[i].sh_link].sh_type != SHT_STRTAB || + sh[sh[i].sh_link].sh_size == 0) + { + free(sh); + return -1; + } + + /* get space for the symbol and string tables */ + size = (int) (sh[i].sh_size + sh[sh[i].sh_link].sh_size); + if (size > stbufsize) + { + if (stbuf) + free(stbuf); + stbufsize = nsyms = 0; + if ((stbuf = (char*)malloc(size)) == NULL) + { + free(sh); + return -1; + } + stbufsize = size; + } + + /* read the symbol and string tables */ + if (lseek(fd, sh[i].sh_offset, SEEK_SET) != sh[i].sh_offset || + read(fd, stbuf, sh[i].sh_size) != sh[i].sh_size || + lseek(fd, sh[sh[i].sh_link].sh_offset, SEEK_SET) != + sh[sh[i].sh_link].sh_offset || + read(fd, stbuf + sh[i].sh_size, sh[sh[i].sh_link].sh_size) != + sh[sh[i].sh_link].sh_size) + { + free(sh); + return (-1); + } + nsyms = (int) (sh[i].sh_size / sh[i].sh_entsize); + stoffset = (int) sh[i].sh_size; + + free(sh); + return (0); +} + +/* find the symbol corresponding to the given text address; + return NULL on error, symbol address otherwise */ +char * +__fex_sym(char *a, char **name) +{ + Elf_Sym *s; + unsigned long fo, va, value; + int fd, i, j, nm; + char fname[PRMAPSZ+20]; + + /* see if the last prmap_t found contains the indicated address */ + if (lpm) + { + if (a >= (char*)lpm->pr_vaddr && a < (char*)lpm->pr_vaddr + + lpm->pr_size) + goto cont; + } + + /* look for a prmap_t that contains the indicated address */ + for (i = 0; i < npm; i++) + { + if (a >= (char*)pm[i].pr_vaddr && a < (char*)pm[i].pr_vaddr + + pm[i].pr_size) + break; + } + if (i == npm) + return NULL; + + /* get an open file descriptor for the mapped object */ + if (pm[i].pr_mapname[0] == '\0') + return NULL; + strcpy(fname, "/proc/self/object/"); + strncat(fname, pm[i].pr_mapname, PRMAPSZ); + fd = open(fname, O_RDONLY); + if (fd < 0) + return NULL; + + /* read the program headers and symbols */ + lpm = NULL; + j = __fex_read_syms(fd); + close(fd); + if (j < 0) + return NULL; + lpm = &pm[i]; + +cont: + /* compute the file offset corresponding to the mapped address */ + fo = (a - (char*)lpm->pr_vaddr) + lpm->pr_offset; + + /* find the program header containing the file offset */ + for (i = 0; i < nph; i++) + { + if (ph[i].p_type == PT_LOAD && fo >= ph[i].p_offset && + fo < ph[i].p_offset + ph[i].p_filesz) + break; + } + if (i == nph) + return NULL; + + /* compute the virtual address corresponding to the file offset */ + va = (fo - ph[i].p_offset) + ph[i].p_vaddr; + + /* find the symbol in this segment with the highest value + less than or equal to the virtual address */ + s = (Elf_Sym*)stbuf; + value = nm = 0; + for (j = 0; j < nsyms; j++) + { + if (s[j].st_name == 0 || s[j].st_shndx == SHN_UNDEF || + (ELF_ST_BIND(s[j].st_info) != STB_LOCAL && + ELF_ST_BIND(s[j].st_info) != STB_GLOBAL && + ELF_ST_BIND(s[j].st_info) != STB_WEAK) || + (ELF_ST_TYPE(s[j].st_info) != STT_NOTYPE && + ELF_ST_TYPE(s[j].st_info) != STT_OBJECT && + ELF_ST_TYPE(s[j].st_info) != STT_FUNC)) + { + continue; + } + + if (s[j].st_value < ph[i].p_vaddr || s[j].st_value >= ph[i].p_vaddr + + ph[i].p_memsz) + { + continue; + } + + if (s[j].st_value < value || s[j].st_value > va) + continue; + + value = s[j].st_value; + nm = s[j].st_name; + } + if (nm == 0) + return NULL; + + /* pass back the name and return the mapped address of the symbol */ + *name = stbuf + stoffset + nm; + fo = (value - ph[i].p_vaddr) + ph[i].p_offset; + return (char*)lpm->pr_vaddr + (fo - lpm->pr_offset); +} diff --git a/usr/src/lib/libm/common/m9x/fdim.c b/usr/src/lib/libm/common/m9x/fdim.c new file mode 100644 index 0000000000..405c9c8dc1 --- /dev/null +++ b/usr/src/lib/libm/common/m9x/fdim.c @@ -0,0 +1,52 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak fdim = __fdim +#endif + +/* + * fdim(x,y) returns x - y if x > y, +0 if x <= y, and NaN if x and + * y are unordered. + * + * fdim(x,y) raises overflow or inexact if x > y and x - y overflows + * or is inexact. It raises invalid if either operand is a signaling + * NaN. Otherwise, it raises no exceptions. + */ + +#include "libm.h" /* for islessequal macro */ + +double +__fdim(double x, double y) { + if (islessequal(x, y)) { + x = 0.0; + y = -x; + } + return (x - y); +} diff --git a/usr/src/lib/libm/common/m9x/fdimf.c b/usr/src/lib/libm/common/m9x/fdimf.c new file mode 100644 index 0000000000..db8524946b --- /dev/null +++ b/usr/src/lib/libm/common/m9x/fdimf.c @@ -0,0 +1,55 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak fdimf = __fdimf +#endif + +#include "libm.h" /* for islessequal macro */ + +float +__fdimf(float x, float y) { + /* + * On SPARC v8plus/v9, this could be implemented as follows + * (assuming %f0 = x, %f1 = y, return value left in %f0): + * + * fcmps %fcc0,%f0,%f1 + * st %g0,[scratch] ! use fzero instead of st/ld + * ld [scratch],%f2 ! if VIS is available + * fnegs %f2,%f3 + * fmovsle %fcc0,%f2,%f0 + * fmovsle %fcc0,%f3,%f1 + * fsubs %f0,%f1,%f0 + */ + if (islessequal(x, y)) { + x = 0.0f; + y = -x; + } + return (x - y); +} diff --git a/usr/src/lib/libm/common/m9x/fdiml.c b/usr/src/lib/libm/common/m9x/fdiml.c new file mode 100644 index 0000000000..967e8f03e3 --- /dev/null +++ b/usr/src/lib/libm/common/m9x/fdiml.c @@ -0,0 +1,43 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak fdiml = __fdiml +#endif + +#include "libm.h" /* for islessequal macro */ + +long double +__fdiml(long double x, long double y) { + if (islessequal(x, y)) { + x = 0.0l; + y = -x; + } + return (x - y); +} diff --git a/usr/src/lib/libm/common/m9x/feexcept.c b/usr/src/lib/libm/common/m9x/feexcept.c new file mode 100644 index 0000000000..03fe34fc5f --- /dev/null +++ b/usr/src/lib/libm/common/m9x/feexcept.c @@ -0,0 +1,138 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak feclearexcept = __feclearexcept +#pragma weak feraiseexcept = __feraiseexcept +#pragma weak fetestexcept = __fetestexcept +#pragma weak fegetexceptflag = __fegetexceptflag +#pragma weak fesetexceptflag = __fesetexceptflag + +#pragma weak feclearexcept96 = __feclearexcept +#pragma weak feraiseexcept96 = __feraiseexcept +#pragma weak fetestexcept96 = __fetestexcept +#pragma weak fegetexceptflag96 = __fegetexceptflag +#pragma weak fesetexceptflag96 = __fesetexceptflag + +#include "fenv_synonyms.h" +#include <fenv.h> +#include <sys/ieeefp.h> +#include <ucontext.h> +#include <thread.h> +#include "fex_handler.h" +#include "fenv_inlines.h" + + +int feclearexcept(int e) +{ + unsigned long fsr; + + __fenv_getfsr(&fsr); + __fenv_set_ex(fsr, __fenv_get_ex(fsr) & ~e); + __fenv_setfsr(&fsr); + if (fex_get_log()) + __fex_update_te(); + return 0; +} + +/* +* note - __fex_hdlr depends on fetestexcept following feraiseexcept +*/ +int feraiseexcept(int e) +{ + volatile double t; + unsigned long fsr; + + if (e & FE_INVALID) { + t = 0.0; + t /= 0.0; + } + if (e & FE_DIVBYZERO) { + t = 1.0e300; + t /= 0.0; + } + if (e & FE_OVERFLOW) { + /* if overflow is not trapped, avoid raising inexact */ + __fenv_getfsr(&fsr); + if (!(__fenv_get_te(fsr) & (1 << fp_trap_overflow))) { + __fenv_set_ex(fsr, __fenv_get_ex(fsr) | FE_OVERFLOW); + __fenv_setfsr(&fsr); + } + else { + t = 1.0e300; + t *= 1.0e300; + } + } + if (e & FE_UNDERFLOW) { + /* if underflow is not trapped, avoid raising inexact */ + __fenv_getfsr(&fsr); + if (!(__fenv_get_te(fsr) & (1 << fp_trap_underflow))) { + __fenv_set_ex(fsr, __fenv_get_ex(fsr) | FE_UNDERFLOW); + __fenv_setfsr(&fsr); + } + else { + t = 1.0e-307; + t -= 1.001e-307; + } + } + if (e & FE_INEXACT) { + t = 1.0e300; + t += 1.0e-307; + } + return 0; +} + +int fetestexcept(int e) +{ + unsigned long fsr; + + __fenv_getfsr(&fsr); + return (int)__fenv_get_ex(fsr) & e; +} + +int fegetexceptflag(fexcept_t *p, int e) +{ + unsigned long fsr; + + __fenv_getfsr(&fsr); + *p = (int)__fenv_get_ex(fsr) & e; + return 0; +} + +int fesetexceptflag(const fexcept_t *p, int e) +{ + unsigned long fsr; + + __fenv_getfsr(&fsr); + __fenv_set_ex(fsr, (((int)__fenv_get_ex(fsr) & ~e) | (*p & e)) & + FE_ALL_EXCEPT); + __fenv_setfsr(&fsr); + if (fex_get_log()) + __fex_update_te(); + return 0; +} diff --git a/usr/src/lib/libm/common/m9x/fenv.c b/usr/src/lib/libm/common/m9x/fenv.c new file mode 100644 index 0000000000..cf8f2ca72b --- /dev/null +++ b/usr/src/lib/libm/common/m9x/fenv.c @@ -0,0 +1,118 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak fex_merge_flags = __fex_merge_flags + +#pragma weak feholdexcept = __feholdexcept +#pragma weak feupdateenv = __feupdateenv +#pragma weak fegetenv = __fegetenv +#pragma weak fesetenv = __fesetenv + +#pragma weak feholdexcept96 = __feholdexcept96 +#pragma weak feupdateenv96 = __feupdateenv +#pragma weak fegetenv96 = __fegetenv +#pragma weak fesetenv96 = __fesetenv + +#include "fenv_synonyms.h" +#include <fenv.h> +#include <ucontext.h> +#include <thread.h> +#include "fex_handler.h" +#include "fenv_inlines.h" + +const fenv_t __fenv_dfl_env = { + { + { FEX_NONSTOP, (void(*)())0 }, + { FEX_NONSTOP, (void(*)())0 }, + { FEX_NONSTOP, (void(*)())0 }, + { FEX_NONSTOP, (void(*)())0 }, + { FEX_NONSTOP, (void(*)())0 }, + { FEX_NONSTOP, (void(*)())0 }, + { FEX_NONSTOP, (void(*)())0 }, + { FEX_NONSTOP, (void(*)())0 }, + { FEX_NONSTOP, (void(*)())0 }, + { FEX_NONSTOP, (void(*)())0 }, + { FEX_NONSTOP, (void(*)())0 }, + { FEX_NONSTOP, (void(*)())0 }, + }, +#ifdef __x86 + 0x13000000 +#else + 0 +#endif +}; + +int feholdexcept(fenv_t *p) +{ + (void) fegetenv(p); + (void) feclearexcept(FE_ALL_EXCEPT); + return !fex_set_handling(FEX_ALL, FEX_NONSTOP, NULL); +} + +int feholdexcept96(fenv_t *p) +{ + (void) fegetenv(p); + (void) feclearexcept(FE_ALL_EXCEPT); + return fex_set_handling(FEX_ALL, FEX_NONSTOP, NULL); +} + +int feupdateenv(const fenv_t *p) +{ + unsigned long fsr; + + __fenv_getfsr(&fsr); + (void) fesetenv(p); + (void) feraiseexcept((int)__fenv_get_ex(fsr)); + return 0; +} + +int fegetenv(fenv_t *p) +{ + fex_getexcepthandler(&p->__handlers, FEX_ALL); + __fenv_getfsr(&p->__fsr); + return 0; +} + +int fesetenv(const fenv_t *p) +{ + __fenv_setfsr(&p->__fsr); + fex_setexcepthandler(&p->__handlers, FEX_ALL); + return 0; +} + +void fex_merge_flags(const fenv_t *p) +{ + unsigned long fsr; + + __fenv_getfsr(&fsr); + __fenv_set_ex(fsr, __fenv_get_ex(fsr) | __fenv_get_ex(p->__fsr)); + __fenv_setfsr(&fsr); + if (fex_get_log()) + __fex_update_te(); +} diff --git a/usr/src/lib/libm/common/m9x/fenv_inlines.h b/usr/src/lib/libm/common/m9x/fenv_inlines.h new file mode 100644 index 0000000000..945ec424d0 --- /dev/null +++ b/usr/src/lib/libm/common/m9x/fenv_inlines.h @@ -0,0 +1,687 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2011, Richard Lowe + */ + +#ifndef _FENV_INLINES_H +#define _FENV_INLINES_H + +#ifdef __GNUC__ + +#ifdef __cplusplus +extern "C" { +#endif + +#include <sys/types.h> + +#if defined(__x86) + +/* + * Floating point Control Word and Status Word + * Definition should actually be shared with x86 + * (much of this 'amd64' code can be, in fact.) + */ +union fp_cwsw { + uint32_t cwsw; + struct { + uint16_t cw; + uint16_t sw; + } words; +}; + +extern __inline__ void +__fenv_getcwsw(unsigned int *value) +{ + union fp_cwsw *u = (union fp_cwsw *)value; + + __asm__ __volatile__( + "fstsw %0\n\t" + "fstcw %1\n\t" + : "=m" (u->words.cw), "=m" (u->words.sw)); +} + +extern __inline__ void +__fenv_setcwsw(const unsigned int *value) +{ + union fp_cwsw cwsw; + short fenv[16]; + + cwsw.cwsw = *value; + + __asm__ __volatile__( + "fstenv %0\n\t" + "movw %4,%1\n\t" + "movw %3,%2\n\t" + "fldenv %0\n\t" + "fwait\n\t" + : "=m" (fenv), "=m" (fenv[0]), "=m" (fenv[2]) + : "r" (cwsw.words.cw), "r" (cwsw.words.sw) + /* For practical purposes, we clobber the whole FPU */ + : "cc", "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", + "st(6)", "st(7)"); +} + +extern __inline__ void +__fenv_getmxcsr(unsigned int *value) +{ + __asm__ __volatile__("stmxcsr %0" : "=m" (*value)); +} + +extern __inline__ void +__fenv_setmxcsr(const unsigned int *value) +{ + __asm__ __volatile__("ldmxcsr %0" : : "m" (*value)); +} + +extern __inline__ long double +f2xm1(long double x) +{ + long double ret; + + __asm__ __volatile__("f2xm1" : "=t" (ret) : "0" (x) : "cc"); + return (ret); +} + +extern __inline__ long double +fyl2x(long double y, long double x) +{ + long double ret; + + __asm__ __volatile__("fyl2x" + : "=t" (ret) + : "0" (x), "u" (y) + : "st(1)", "cc"); + return (ret); +} + +extern __inline__ long double +fptan(long double x) +{ + /* + * fptan pushes 1.0 then the result on completion, so we want to pop + * the FP stack twice, so we need a dummy value into which to pop it. + */ + long double ret; + long double dummy; + + __asm__ __volatile__("fptan" + : "=t" (dummy), "=u" (ret) + : "0" (x) + : "cc"); + return (ret); +} + +extern __inline__ long double +fpatan(long double x, long double y) +{ + long double ret; + + __asm__ __volatile__("fpatan" + : "=t" (ret) + : "0" (y), "u" (x) + : "st(1)", "cc"); + return (ret); +} + +extern __inline__ long double +fxtract(long double x) +{ + __asm__ __volatile__("fxtract" : "+t" (x) : : "cc"); + return (x); +} + +extern __inline__ long double +fprem1(long double idend, long double div) +{ + __asm__ __volatile__("fprem1" : "+t" (div) : "u" (idend) : "cc"); + return (div); +} + +extern __inline__ long double +fprem(long double idend, long double div) +{ + __asm__ __volatile__("fprem" : "+t" (div) : "u" (idend) : "cc"); + return (div); +} + +extern __inline__ long double +fyl2xp1(long double y, long double x) +{ + long double ret; + + __asm__ __volatile__("fyl2xp1" + : "=t" (ret) + : "0" (x), "u" (y) + : "st(1)", "cc"); + return (ret); +} + +extern __inline__ long double +fsqrt(long double x) +{ + __asm__ __volatile__("fsqrt" : "+t" (x) : : "cc"); + return (x); +} + +extern __inline__ long double +fsincos(long double x) +{ + long double dummy; + + __asm__ __volatile__("fsincos" : "+t" (x), "=u" (dummy) : : "cc"); + return (x); +} + +extern __inline__ long double +frndint(long double x) +{ + __asm__ __volatile__("frndint" : "+t" (x) : : "cc"); + return (x); +} + +extern __inline__ long double +fscale(long double x, long double y) +{ + long double ret; + + __asm__ __volatile__("fscale" : "=t" (ret) : "0" (y), "u" (x) : "cc"); + return (ret); +} + +extern __inline__ long double +fsin(long double x) +{ + __asm__ __volatile__("fsin" : "+t" (x) : : "cc"); + return (x); +} + +extern __inline__ long double +fcos(long double x) +{ + __asm__ __volatile__("fcos" : "+t" (x) : : "cc"); + return (x); +} + +extern __inline__ void +sse_cmpeqss(float *f1, float *f2, int *i1) +{ + __asm__ __volatile__( + "cmpeqss %2, %1\n\t" + "movss %1, %0" + : "=m" (*i1), "+x" (*f1) + : "x" (*f2) + : "cc"); +} + +extern __inline__ void +sse_cmpltss(float *f1, float *f2, int *i1) +{ + __asm__ __volatile__( + "cmpltss %2, %1\n\t" + "movss %1, %0" + : "=m" (*i1), "+x" (*f1) + : "x" (*f2) + : "cc"); +} + +extern __inline__ void +sse_cmpless(float *f1, float *f2, int *i1) +{ + __asm__ __volatile__( + "cmpless %2, %1\n\t" + "movss %1, %0" + : "=m" (*i1), "+x" (*f1) + : "x" (*f2) + : "cc"); +} + +extern __inline__ void +sse_cmpunordss(float *f1, float *f2, int *i1) +{ + __asm__ __volatile__( + "cmpunordss %2, %1\n\t" + "movss %1, %0" + : "=m" (*i1), "+x" (*f1) + : "x" (*f2) + : "cc"); +} + +extern __inline__ void +sse_minss(float *f1, float *f2, float *f3) +{ + __asm__ __volatile__( + "minss %2, %1\n\t" + "movss %1, %0" + : "=m" (*f3), "+x" (*f1) + : "x" (*f2)); +} + +extern __inline__ void +sse_maxss(float *f1, float *f2, float *f3) +{ + __asm__ __volatile__( + "maxss %2, %1\n\t" + "movss %1, %0" + : "=m" (*f3), "+x" (*f1) + : "x" (*f2)); +} + +extern __inline__ void +sse_addss(float *f1, float *f2, float *f3) +{ + __asm__ __volatile__( + "addss %2, %1\n\t" + "movss %1, %0" + : "=m" (*f3), "+x" (*f1) + : "x" (*f2)); +} + +extern __inline__ void +sse_subss(float *f1, float *f2, float *f3) +{ + __asm__ __volatile__( + "subss %2, %1\n\t" + "movss %1, %0" + : "=m" (*f3), "+x" (*f1) + : "x" (*f2)); +} + +extern __inline__ void +sse_mulss(float *f1, float *f2, float *f3) +{ + __asm__ __volatile__( + "mulss %2, %1\n\t" + "movss %1, %0" + : "=m" (*f3), "+x" (*f1) + : "x" (*f2)); +} + +extern __inline__ void +sse_divss(float *f1, float *f2, float *f3) +{ + __asm__ __volatile__( + "divss %2, %1\n\t" + "movss %1, %0" + : "=m" (*f3), "+x" (*f1) + : "x" (*f2)); +} + +extern __inline__ void +sse_sqrtss(float *f1, float *f2) +{ + double tmp; + + __asm__ __volatile__( + "sqrtss %2, %1\n\t" + "movss %1, %0" + : "=m" (*f2), "=x" (tmp) + : "m" (*f1)); +} + +extern __inline__ void +sse_ucomiss(float *f1, float *f2) +{ + __asm__ __volatile__("ucomiss %1, %0" : : "x" (*f1), "x" (*f2)); + +} + +extern __inline__ void +sse_comiss(float *f1, float *f2) +{ + __asm__ __volatile__("comiss %1, %0" : : "x" (*f1), "x" (*f2)); +} + +extern __inline__ void +sse_cvtss2sd(float *f1, double *d1) +{ + double tmp; + + __asm__ __volatile__( + "cvtss2sd %2, %1\n\t" + "movsd %1, %0" + : "=m" (*d1), "=x" (tmp) + : "m" (*f1)); +} + +extern __inline__ void +sse_cvtsi2ss(int *i1, float *f1) +{ + double tmp; + + __asm__ __volatile__( + "cvtsi2ss %2, %1\n\t" + "movss %1, %0" + : "=m" (*f1), "=x" (tmp) + : "m" (*i1)); +} + +extern __inline__ void +sse_cvttss2si(float *f1, int *i1) +{ + int tmp; + + __asm__ __volatile__( + "cvttss2si %2, %1\n\t" + "movl %1, %0" + : "=m" (*i1), "=r" (tmp) + : "m" (*f1)); +} + +extern __inline__ void +sse_cvtss2si(float *f1, int *i1) +{ + int tmp; + + __asm__ __volatile__( + "cvtss2si %2, %1\n\t" + "movl %1, %0" + : "=m" (*i1), "=r" (tmp) + : "m" (*f1)); +} + +#if defined(__amd64) +extern __inline__ void +sse_cvtsi2ssq(long long *ll1, float *f1) +{ + double tmp; + + __asm__ __volatile__( + "cvtsi2ssq %2, %1\n\t" + "movss %1, %0" + : "=m" (*f1), "=x" (tmp) + : "m" (*ll1)); +} + +extern __inline__ void +sse_cvttss2siq(float *f1, long long *ll1) +{ + uint64_t tmp; + + __asm__ __volatile__( + "cvttss2siq %2, %1\n\t" + "movq %1, %0" + : "=m" (*ll1), "=r" (tmp) + : "m" (*f1)); +} + +extern __inline__ void +sse_cvtss2siq(float *f1, long long *ll1) +{ + uint64_t tmp; + + __asm__ __volatile__( + "cvtss2siq %2, %1\n\t" + "movq %1, %0" + : "=m" (*ll1), "=r" (tmp) + : "m" (*f1)); +} + +#endif + +extern __inline__ void +sse_cmpeqsd(double *d1, double *d2, long long *ll1) +{ + __asm__ __volatile__( + "cmpeqsd %2,%1\n\t" + "movsd %1,%0" + : "=m" (*ll1), "+x" (*d1) + : "x" (*d2)); +} + +extern __inline__ void +sse_cmpltsd(double *d1, double *d2, long long *ll1) +{ + __asm__ __volatile__( + "cmpltsd %2,%1\n\t" + "movsd %1,%0" + : "=m" (*ll1), "+x" (*d1) + : "x" (*d2)); +} + +extern __inline__ void +sse_cmplesd(double *d1, double *d2, long long *ll1) +{ + __asm__ __volatile__( + "cmplesd %2,%1\n\t" + "movsd %1,%0" + : "=m" (*ll1), "+x" (*d1) + : "x" (*d2)); +} + +extern __inline__ void +sse_cmpunordsd(double *d1, double *d2, long long *ll1) +{ + __asm__ __volatile__( + "cmpunordsd %2,%1\n\t" + "movsd %1,%0" + : "=m" (*ll1), "+x" (*d1) + : "x" (*d2)); +} + + +extern __inline__ void +sse_minsd(double *d1, double *d2, double *d3) +{ + __asm__ __volatile__( + "minsd %2,%1\n\t" + "movsd %1,%0" + : "=m" (*d3), "+x" (*d1) + : "x" (*d2)); +} + +extern __inline__ void +sse_maxsd(double *d1, double *d2, double *d3) +{ + __asm__ __volatile__( + "maxsd %2,%1\n\t" + "movsd %1,%0" + : "=m" (*d3), "+x" (*d1) + : "x" (*d2)); +} + +extern __inline__ void +sse_addsd(double *d1, double *d2, double *d3) +{ + __asm__ __volatile__( + "addsd %2,%1\n\t" + "movsd %1,%0" + : "=m" (*d3), "+x" (*d1) + : "x" (*d2)); +} + +extern __inline__ void +sse_subsd(double *d1, double *d2, double *d3) +{ + __asm__ __volatile__( + "subsd %2,%1\n\t" + "movsd %1,%0" + : "=m" (*d3), "+x" (*d1) + : "x" (*d2)); +} + +extern __inline__ void +sse_mulsd(double *d1, double *d2, double *d3) +{ + __asm__ __volatile__( + "mulsd %2,%1\n\t" + "movsd %1,%0" + : "=m" (*d3), "+x" (*d1) + : "x" (*d2)); +} + +extern __inline__ void +sse_divsd(double *d1, double *d2, double *d3) +{ + __asm__ __volatile__( + "divsd %2,%1\n\t" + "movsd %1,%0" + : "=m" (*d3), "+x" (*d1) + : "x" (*d2)); +} + +extern __inline__ void +sse_sqrtsd(double *d1, double *d2) +{ + double tmp; + + __asm__ __volatile__( + "sqrtsd %2, %1\n\t" + "movsd %1, %0" + : "=m" (*d2), "=x" (tmp) + : "m" (*d1)); +} + +extern __inline__ void +sse_ucomisd(double *d1, double *d2) +{ + __asm__ __volatile__("ucomisd %1, %0" : : "x" (*d1), "x" (*d2)); +} + +extern __inline__ void +sse_comisd(double *d1, double *d2) +{ + __asm__ __volatile__("comisd %1, %0" : : "x" (*d1), "x" (*d2)); +} + +extern __inline__ void +sse_cvtsd2ss(double *d1, float *f1) +{ + double tmp; + + __asm__ __volatile__( + "cvtsd2ss %2,%1\n\t" + "movss %1,%0" + : "=m" (*f1), "=x" (tmp) + : "m" (*d1)); +} + +extern __inline__ void +sse_cvtsi2sd(int *i1, double *d1) +{ + double tmp; + __asm__ __volatile__( + "cvtsi2sd %2,%1\n\t" + "movsd %1,%0" + : "=m" (*d1), "=x" (tmp) + : "m" (*i1)); +} + +extern __inline__ void +sse_cvttsd2si(double *d1, int *i1) +{ + int tmp; + + __asm__ __volatile__( + "cvttsd2si %2,%1\n\t" + "movl %1,%0" + : "=m" (*i1), "=r" (tmp) + : "m" (*d1)); +} + +extern __inline__ void +sse_cvtsd2si(double *d1, int *i1) +{ + int tmp; + + __asm__ __volatile__( + "cvtsd2si %2,%1\n\t" + "movl %1,%0" + : "=m" (*i1), "=r" (tmp) + : "m" (*d1)); +} + +#if defined(__amd64) +extern __inline__ void +sse_cvtsi2sdq(long long *ll1, double *d1) +{ + double tmp; + + __asm__ __volatile__( + "cvtsi2sdq %2,%1\n\t" + "movsd %1,%0" + : "=m" (*d1), "=x" (tmp) + : "m" (*ll1)); +} + +extern __inline__ void +sse_cvttsd2siq(double *d1, long long *ll1) +{ + uint64_t tmp; + + __asm__ __volatile__( + "cvttsd2siq %2,%1\n\t" + "movq %1,%0" + : "=m" (*ll1), "=r" (tmp) + : "m" (*d1)); +} + +extern __inline__ void +sse_cvtsd2siq(double *d1, long long *ll1) +{ + uint64_t tmp; + + __asm__ __volatile__( + "cvtsd2siq %2,%1\n\t" + "movq %1,%0" + : "=m" (*ll1), "=r" (tmp) + : "m" (*d1)); +} +#endif + +#elif defined(__sparc) +extern __inline__ void +__fenv_getfsr(unsigned long *l) +{ + __asm__ __volatile__( +#if defined(__sparcv9) + "stx %%fsr,%0\n\t" +#else + "st %%fsr,%0\n\t" +#endif + : "=m" (*l)); +} + +extern __inline__ void +__fenv_setfsr(const unsigned long *l) +{ + __asm__ __volatile__( +#if defined(__sparcv9) + "ldx %0,%%fsr\n\t" +#else + "ld %0,%%fsr\n\t" +#endif + : : "m" (*l) : "cc"); +} + +extern __inline__ void +__fenv_getfsr32(unsigned int *l) +{ + __asm__ __volatile__("st %%fsr,%0\n\t" : "=m" (*l)); +} + +extern __inline__ void +__fenv_setfsr32(const unsigned int *l) +{ + __asm__ __volatile__("ld %0,%%fsr\n\t" : : "m" (*l)); +} +#else +#error "GCC FENV inlines not implemented for this platform" +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* __GNUC__ */ + +#endif /* _FENV_INLINES_H */ diff --git a/usr/src/lib/libm/common/m9x/fenv_synonyms.h b/usr/src/lib/libm/common/m9x/fenv_synonyms.h new file mode 100644 index 0000000000..3c7e3b0caa --- /dev/null +++ b/usr/src/lib/libm/common/m9x/fenv_synonyms.h @@ -0,0 +1,108 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _M9X_FENV_SYNONYMS_H +#define _M9X_FENV_SYNONYMS_H + +#include <sys/isa_defs.h> + +/* feexcept.c */ +#define feclearexcept __feclearexcept +#define feraiseexcept __feraiseexcept +#define fetestexcept __fetestexcept +#define fegetexceptflag __fegetexceptflag +#define fesetexceptflag __fesetexceptflag + +/* fenv.c */ +#define feholdexcept __feholdexcept +#define feholdexcept96 __feholdexcept96 +#define feupdateenv __feupdateenv +#define fegetenv __fegetenv +#define fesetenv __fesetenv +#define fex_merge_flags __fex_merge_flags + +#if defined(__x86) +/* feprec.c */ +#define fegetprec __fegetprec +#define fesetprec __fesetprec +#endif + +/* feround.c */ +#define fegetround __fegetround +#define fesetround __fesetround +#define fesetround96 __fesetround96 + +/* fex_handler.c */ +#define fex_get_handling __fex_get_handling +#define fex_set_handling __fex_set_handling +#define fex_getexcepthandler __fex_getexcepthandler +#define fex_setexcepthandler __fex_setexcepthandler + +/* fex_log.c */ +#define fex_get_log __fex_get_log +#define fex_set_log __fex_set_log +#define fex_get_log_depth __fex_get_log_depth +#define fex_set_log_depth __fex_set_log_depth +#define fex_log_entry __fex_log_entry + +/* libc, libthread */ +#define close _close +#define getcontext _getcontext +#define getpid _getpid +#define kill _kill +#define lseek _lseek +#define mutex_lock _mutex_lock +#define mutex_unlock _mutex_unlock +#define open _open +#define read _read +#define sigaction _sigaction +#define sigemptyset _sigemptyset +#define sigismember _sigismember +#define sigprocmask _sigprocmask +#define stat _stat +#define thr_getspecific _thr_getspecific +#define thr_keycreate _thr_keycreate +#define thr_main _thr_main +#define thr_setspecific _thr_setspecific +#define write _write + +/* ??? see V9 /usr/include/stdio.h */ +#ifdef __sparcv9 +#define fileno _fileno +#endif + +#ifdef __sparc +/* libm, libsunmath */ +#define fp_class __fp_class +#define fp_classf __fp_classf +#define sqrt __sqrt +#define sqrtf __sqrtf +#endif + +#endif /* _M9X_FENV_SYNONYMS_H */ diff --git a/usr/src/lib/libm/common/m9x/feprec.c b/usr/src/lib/libm/common/m9x/feprec.c new file mode 100644 index 0000000000..975210ce05 --- /dev/null +++ b/usr/src/lib/libm/common/m9x/feprec.c @@ -0,0 +1,57 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak fegetprec = __fegetprec +#pragma weak fesetprec = __fesetprec + +#include "fenv_synonyms.h" +#include <fenv.h> +#include <ucontext.h> +#include <thread.h> +#include "fex_handler.h" + +int fegetprec(void) +{ + unsigned long fsr; + + __fenv_getfsr(&fsr); + return __fenv_get_rp(fsr); +} + +int fesetprec(int r) +{ + unsigned long fsr; + + if (r != FE_FLTPREC && r != FE_DBLPREC && r != FE_LDBLPREC) + return 0; + __fenv_getfsr(&fsr); + __fenv_set_rp(fsr, r); + __fenv_setfsr(&fsr); + return 1; +} diff --git a/usr/src/lib/libm/common/m9x/feround.c b/usr/src/lib/libm/common/m9x/feround.c new file mode 100644 index 0000000000..5d56ae0c89 --- /dev/null +++ b/usr/src/lib/libm/common/m9x/feround.c @@ -0,0 +1,83 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak fegetround = __fegetround +#pragma weak fesetround = __fesetround + +#pragma weak fegetround96 = __fegetround +#pragma weak fesetround96 = __fesetround96 + +#include "fenv_synonyms.h" +#include <fenv.h> +#include <ucontext.h> +#include <thread.h> +#include "fex_handler.h" +#include "fenv_inlines.h" + +#if defined(__i386) && !defined(__amd64) +#include <float.h> +#endif + +int fegetround(void) +{ + unsigned long fsr; + + __fenv_getfsr(&fsr); + return (int)__fenv_get_rd(fsr); +} + +int fesetround(int r) +{ + unsigned long fsr; + + if (r & ~3) + return -1; + __fenv_getfsr(&fsr); + __fenv_set_rd(fsr, r); + __fenv_setfsr(&fsr); +#if defined(__i386) && !defined(__amd64) + FLT_ROUNDS = (0x2D >> (r << 1)) & 3; /* 0->1, 1->3, 2->2, 3->0 */ +#endif + return 0; +} + +int fesetround96(int r) +{ + unsigned long fsr; + + if (r & ~3) + return 0; + __fenv_getfsr(&fsr); + __fenv_set_rd(fsr, r); + __fenv_setfsr(&fsr); +#if defined(__i386) && !defined(__amd64) + FLT_ROUNDS = (0x2D >> (r << 1)) & 3; /* 0->1, 1->3, 2->2, 3->0 */ +#endif + return 1; +} diff --git a/usr/src/lib/libm/common/m9x/fex_handler.c b/usr/src/lib/libm/common/m9x/fex_handler.c new file mode 100644 index 0000000000..9e6210e523 --- /dev/null +++ b/usr/src/lib/libm/common/m9x/fex_handler.c @@ -0,0 +1,92 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak fex_get_handling = __fex_get_handling +#pragma weak fex_set_handling = __fex_set_handling +#pragma weak fex_getexcepthandler = __fex_getexcepthandler +#pragma weak fex_setexcepthandler = __fex_setexcepthandler + +#include "fenv_synonyms.h" +#include <fenv.h> +#include <ucontext.h> +#include <thread.h> +#include "fex_handler.h" + +int fex_get_handling(int e) +{ + struct fex_handler_data *thr_handlers; + int i; + + thr_handlers = __fex_get_thr_handlers(); + for (i = 0; i < FEX_NUM_EXC; i++) + if (e & (1 << i)) + return thr_handlers[i].__mode; + return FEX_NOHANDLER; +} + +int fex_set_handling(int e, int mode, void (*handler)()) +{ + struct fex_handler_data *thr_handlers; + int i; + + if (e & ~((1 << FEX_NUM_EXC) - 1)) + return 0; + thr_handlers = __fex_get_thr_handlers(); + for (i = 0; i < FEX_NUM_EXC; i++) { + if (e & (1 << i)) { + thr_handlers[i].__mode = mode; + thr_handlers[i].__handler = handler; + } + } + __fex_update_te(); + return 1; +} + +void fex_getexcepthandler(fex_handler_t *buf, int e) +{ + struct fex_handler_data *thr_handlers; + int i; + + thr_handlers = __fex_get_thr_handlers(); + for (i = 0; i < FEX_NUM_EXC; i++) + if (e & (1 << i)) + (*buf)[i] = thr_handlers[i]; +} + +void fex_setexcepthandler(const fex_handler_t *buf, int e) +{ + struct fex_handler_data *thr_handlers; + int i; + + thr_handlers = __fex_get_thr_handlers(); + for (i = 0; i < FEX_NUM_EXC; i++) + if (e & (1 << i)) + thr_handlers[i] = (*buf)[i]; + __fex_update_te(); +} diff --git a/usr/src/lib/libm/common/m9x/fex_handler.h b/usr/src/lib/libm/common/m9x/fex_handler.h new file mode 100644 index 0000000000..45f2d0713f --- /dev/null +++ b/usr/src/lib/libm/common/m9x/fex_handler.h @@ -0,0 +1,217 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _M9X_FEX_HANDLER_H +#define _M9X_FEX_HANDLER_H + +/* the following enums must match the bit positions in fenv.h */ +enum fex_exception { + fex_inexact = 0, + fex_division = 1, + fex_underflow = 2, + fex_overflow = 3, + fex_inv_zdz = 4, + fex_inv_idi = 5, + fex_inv_isi = 6, + fex_inv_zmi = 7, + fex_inv_sqrt = 8, + fex_inv_snan = 9, + fex_inv_int = 10, + fex_inv_cmp = 11 +}; + + +/* auxiliary functions in __fex_hdlr.c */ +extern struct fex_handler_data *__fex_get_thr_handlers(void); +extern void __fex_update_te(void); + +/* auxiliary functions in __fex_sym.c */ +extern void __fex_sym_init(void); +extern char *__fex_sym(char *, char **); + +/* auxiliary functions in fex_log.c */ +extern void __fex_mklog(ucontext_t *, char *, int, enum fex_exception, + int, void *); + +/* system-dependent auxiliary functions */ +extern enum fex_exception __fex_get_invalid_type(siginfo_t *, ucontext_t *); +extern void __fex_get_op(siginfo_t *, ucontext_t *, fex_info_t *); +extern void __fex_st_result(siginfo_t *, ucontext_t *, fex_info_t *); + +/* inline templates and macros for accessing fp state */ +extern void __fenv_getfsr(unsigned long *); +extern void __fenv_setfsr(const unsigned long *); + +#if defined(__sparc) + +#define __fenv_get_rd(X) ((X>>30)&0x3) +#define __fenv_set_rd(X,Y) X=(X&~0xc0000000ul)|((Y)<<30) + +#define __fenv_get_te(X) ((X>>23)&0x1f) +#define __fenv_set_te(X,Y) X=(X&~0x0f800000ul)|((Y)<<23) + +#define __fenv_get_ex(X) ((X>>5)&0x1f) +#define __fenv_set_ex(X,Y) X=(X&~0x000003e0ul)|((Y)<<5) + +#elif defined(__x86) + +extern void __fenv_getcwsw(unsigned int *); +extern void __fenv_setcwsw(const unsigned int *); + +extern void __fenv_getmxcsr(unsigned int *); +extern void __fenv_setmxcsr(const unsigned int *); + +#define __fenv_get_rd(X) ((X>>26)&3) +#define __fenv_set_rd(X,Y) X=(X&~0x0c000000)|((Y)<<26) + +#define __fenv_get_rp(X) ((X>>24)&3) +#define __fenv_set_rp(X,Y) X=(X&~0x03000000)|((Y)<<24) + +#define __fenv_get_te(X) ((X>>16)&0x3d) +#define __fenv_set_te(X,Y) X=(X&~0x003d0000)|((Y)<<16) + +#define __fenv_get_ex(X) (X&0x3d) +#define __fenv_set_ex(X,Y) X=(X&~0x0000003d)|(Y) + +/* + * These macros define some useful distinctions between various + * SSE instructions. In some cases, distinctions are made for + * the purpose of simplifying the decoding of instructions, while + * in other cases, they are made for the purpose of simplying the + * emulation. Note that these values serve as bit flags within + * the enum values in sseinst_t. + */ +#define DOUBLE 0x100 +#define SIMD 0x080 +#define INTREG 0x040 + +typedef union { + double d[2]; + long long l[2]; + float f[4]; + int i[4]; +} sseoperand_t; + +/* structure to hold a decoded SSE instruction */ +typedef struct { + enum { + /* single precision scalar instructions */ + cmpss = 0, + minss = 1, + maxss = 2, + addss = 3, + subss = 4, + mulss = 5, + divss = 6, + sqrtss = 7, + ucomiss = 16, + comiss = 17, + cvtss2sd = 32, + cvtsi2ss = INTREG + 0, + cvttss2si = INTREG + 1, + cvtss2si = INTREG + 2, + cvtsi2ssq = INTREG + 8, + cvttss2siq = INTREG + 9, + cvtss2siq = INTREG + 10, + + /* single precision SIMD instructions */ + cmpps = SIMD + 0, + minps = SIMD + 1, + maxps = SIMD + 2, + addps = SIMD + 3, + subps = SIMD + 4, + mulps = SIMD + 5, + divps = SIMD + 6, + sqrtps = SIMD + 7, + cvtps2pd = SIMD + 32, + cvtdq2ps = SIMD + 34, + cvttps2dq = SIMD + 35, + cvtps2dq = SIMD + 36, + cvtpi2ps = SIMD + INTREG + 0, + cvttps2pi = SIMD + INTREG + 1, + cvtps2pi = SIMD + INTREG + 2, + + /* double precision scalar instructions */ + cmpsd = DOUBLE + 0, + minsd = DOUBLE + 1, + maxsd = DOUBLE + 2, + addsd = DOUBLE + 3, + subsd = DOUBLE + 4, + mulsd = DOUBLE + 5, + divsd = DOUBLE + 6, + sqrtsd = DOUBLE + 7, + ucomisd = DOUBLE + 16, + comisd = DOUBLE + 17, + cvtsd2ss = DOUBLE + 32, + cvtsi2sd = DOUBLE + INTREG + 0, + cvttsd2si = DOUBLE + INTREG + 1, + cvtsd2si = DOUBLE + INTREG + 2, + cvtsi2sdq = DOUBLE + INTREG + 8, + cvttsd2siq = DOUBLE + INTREG + 9, + cvtsd2siq = DOUBLE + INTREG + 10, + + /* double precision SIMD instructions */ + cmppd = DOUBLE + SIMD + 0, + minpd = DOUBLE + SIMD + 1, + maxpd = DOUBLE + SIMD + 2, + addpd = DOUBLE + SIMD + 3, + subpd = DOUBLE + SIMD + 4, + mulpd = DOUBLE + SIMD + 5, + divpd = DOUBLE + SIMD + 6, + sqrtpd = DOUBLE + SIMD + 7, + cvtpd2ps = DOUBLE + SIMD + 32, + cvtdq2pd = DOUBLE + SIMD + 34, + cvttpd2dq = DOUBLE + SIMD + 35, + cvtpd2dq = DOUBLE + SIMD + 36, + cvtpi2pd = DOUBLE + SIMD + INTREG + 0, + cvttpd2pi = DOUBLE + SIMD + INTREG + 1, + cvtpd2pi = DOUBLE + SIMD + INTREG + 2, + } op; + int imm; + sseoperand_t *op1, *op2; +} sseinst_t; + +/* x86-specific auxiliary functions */ +extern int *__fex_accrued(void); +extern void __fex_get_x86_exc(siginfo_t *, ucontext_t *); +extern int __fex_parse_sse(ucontext_t *, sseinst_t *); +extern enum fex_exception __fex_get_sse_op(ucontext_t *, sseinst_t *, + fex_info_t *); +extern void __fex_get_simd_op(ucontext_t *, sseinst_t *, + enum fex_exception *, fex_info_t *); +extern void __fex_st_sse_result(ucontext_t *, sseinst_t *, + enum fex_exception, fex_info_t *); +extern void __fex_st_simd_result(ucontext_t *, sseinst_t *, + enum fex_exception *, fex_info_t *); + +#else +#error Unknown architecture +#endif + +#endif /* _M9X_FEX_HANDLER_H */ diff --git a/usr/src/lib/libm/common/m9x/fex_log.c b/usr/src/lib/libm/common/m9x/fex_log.c new file mode 100644 index 0000000000..6840719fae --- /dev/null +++ b/usr/src/lib/libm/common/m9x/fex_log.c @@ -0,0 +1,399 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak fex_get_log = __fex_get_log +#pragma weak fex_set_log = __fex_set_log +#pragma weak fex_get_log_depth = __fex_get_log_depth +#pragma weak fex_set_log_depth = __fex_set_log_depth +#pragma weak fex_log_entry = __fex_log_entry + +#include "fenv_synonyms.h" +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <string.h> +#include <signal.h> +#include <ucontext.h> +#include <sys/frame.h> +#include <fenv.h> +#include <sys/ieeefp.h> +#include <thread.h> +#include "fex_handler.h" + +#if !defined(PC) +#if defined(REG_PC) +#define PC REG_PC +#else +#error Neither PC nor REG_PC is defined! +#endif +#endif + +static FILE *log_fp = NULL; +static mutex_t log_lock = DEFAULTMUTEX; +static int log_depth = 100; + +FILE *fex_get_log(void) +{ + FILE *fp; + + mutex_lock(&log_lock); + fp = log_fp; + mutex_unlock(&log_lock); + return fp; +} + +int fex_set_log(FILE *fp) +{ + mutex_lock(&log_lock); + log_fp = fp; + mutex_unlock(&log_lock); + __fex_update_te(); + return 1; +} + +int fex_get_log_depth(void) +{ + int d; + + mutex_lock(&log_lock); + d = log_depth; + mutex_unlock(&log_lock); + return d; +} + +int fex_set_log_depth(int d) +{ + if (d < 0) + return 0; + mutex_lock(&log_lock); + log_depth = d; + mutex_unlock(&log_lock); + return 1; +} + +static struct exc_list { + struct exc_list *next; + char *addr; + unsigned long code; + int nstack; + char *stack[1]; /* actual length is max(1,nstack) */ +} *list = NULL; + +#ifdef __sparcv9 +#define FRAMEP(X) (struct frame *)((char*)(X)+(((long)(X)&1)?2047:0)) +#else +#define FRAMEP(X) (struct frame *)(X) +#endif + +#ifdef _LP64 +#define PDIG "16" +#else +#define PDIG "8" +#endif + +/* look for a matching exc_list; return 1 if one is found, + otherwise add this one to the list and return 0 */ +static int check_exc_list(char *addr, unsigned long code, char *stk, + struct frame *fp) +{ + struct exc_list *l, *ll = NULL; + struct frame *f; + int i, n; + + if (list) { + for (l = list; l; ll = l, l = l->next) { + if (l->addr != addr || l->code != code) + continue; + if (log_depth < 1 || l->nstack < 1) + return 1; + if (l->stack[0] != stk) + continue; + n = 1; + for (i = 1, f = fp; i < log_depth && i < l->nstack && + f && f->fr_savpc; i++, f = FRAMEP(f->fr_savfp)) + if (l->stack[i] != (char *)f->fr_savpc) { + n = 0; + break; + } + if (n) + return 1; + } + } + + /* create a new exc_list structure and tack it on the list */ + for (n = 1, f = fp; n < log_depth && f && f->fr_savpc; + n++, f = FRAMEP(f->fr_savfp)) ; + if ((l = (struct exc_list *)malloc(sizeof(struct exc_list) + + (n - 1) * sizeof(char *))) != NULL) { + l->next = NULL; + l->addr = addr; + l->code = code; + l->nstack = ((log_depth < 1)? 0 : n); + l->stack[0] = stk; + for (i = 1; i < n; i++) { + l->stack[i] = (char *)fp->fr_savpc; + fp = FRAMEP(fp->fr_savfp); + } + if (list) + ll->next = l; + else + list = l; + } + return 0; +} + +/* +* Warning: cleverness ahead +* +* In the following code, the use of sprintf+write rather than fprintf +* to send output to the log file is intentional. The reason is that +* fprintf is not async-signal-safe. "But," you protest, "SIGFPE is +* not an asynchronous signal! It's always handled by the same thread +* that executed the fpop that provoked it." That's true, but a prob- +* lem arises because (i) base conversion in fprintf can cause a fp +* exception and (ii) my signal handler acquires a mutex lock before +* sending output to the log file (so that outputs for entries from +* different threads aren't interspersed). Therefore, if the code +* were to use fprintf, a deadlock could occur as follows: +* +* Thread A Thread B +* +* Incurs a fp exception, Calls fprintf, +* acquires log_lock acquires file rmutex lock +* +* Calls fprintf, Incurs a fp exception, +* waits for file rmutex lock waits for log_lock +* +* (I could just verify that fprintf doesn't hold the rmutex lock while +* it's doing the base conversion, but since efficiency is of little +* concern here, I opted for the safe and dumb route.) +*/ + +static void print_stack(int fd, char *addr, struct frame *fp) +{ + int i; + char *name, buf[30]; + + for (i = 0; i < log_depth && addr != NULL; i++) { + if (__fex_sym(addr, &name) != NULL) { + write(fd, buf, sprintf(buf, " 0x%0" PDIG "lx ", + (long)addr)); + write(fd, name, strlen(name)); + write(fd, "\n", 1); + if (!strcmp(name, "main")) + break; + } else { + write(fd, buf, sprintf(buf, " 0x%0" PDIG "lx\n", + (long)addr)); + } + if (fp == NULL) + break; + addr = (char *)fp->fr_savpc; + fp = FRAMEP(fp->fr_savfp); + } +} + +void fex_log_entry(const char *msg) +{ + ucontext_t uc; + struct frame *fp; + char *stk; + int fd; + + /* if logging is disabled, just return */ + mutex_lock(&log_lock); + if (log_fp == NULL) { + mutex_unlock(&log_lock); + return; + } + + /* get the frame pointer from the current context and + pop our own frame */ + getcontext(&uc); +#if defined(__sparc) || defined(__amd64) + fp = FRAMEP(uc.uc_mcontext.gregs[REG_SP]); +#elif defined(__i386) /* !defined(__amd64) */ + fp = FRAMEP(uc.uc_mcontext.gregs[EBP]); +#else +#error Unknown architecture +#endif + if (fp == NULL) { + mutex_unlock(&log_lock); + return; + } + stk = (char *)fp->fr_savpc; + fp = FRAMEP(fp->fr_savfp); + + /* if we've already logged this message here, don't make an entry */ + if (check_exc_list(stk, (unsigned long)msg, stk, fp)) { + mutex_unlock(&log_lock); + return; + } + + /* make an entry */ + fd = fileno(log_fp); + write(fd, "fex_log_entry: ", 15); + write(fd, msg, strlen(msg)); + write(fd, "\n", 1); + __fex_sym_init(); + print_stack(fd, stk, fp); + mutex_unlock(&log_lock); +} + +static const char *exception[FEX_NUM_EXC] = { + "inexact result", + "division by zero", + "underflow", + "overflow", + "invalid operation (0/0)", + "invalid operation (inf/inf)", + "invalid operation (inf-inf)", + "invalid operation (0*inf)", + "invalid operation (sqrt)", + "invalid operation (snan)", + "invalid operation (int)", + "invalid operation (cmp)" +}; + +void +__fex_mklog(ucontext_t *uap, char *addr, int f, enum fex_exception e, + int m, void *p) +{ + struct frame *fp; + char *stk, *name, buf[30]; + int fd; + + /* if logging is disabled, just return */ + mutex_lock(&log_lock); + if (log_fp == NULL) { + mutex_unlock(&log_lock); + return; + } + + /* get stack info */ +#if defined(__sparc) + stk = (char*)uap->uc_mcontext.gregs[REG_PC]; + fp = FRAMEP(uap->uc_mcontext.gregs[REG_SP]); +#elif defined(__amd64) + stk = (char*)uap->uc_mcontext.gregs[REG_PC]; + fp = FRAMEP(uap->uc_mcontext.gregs[REG_RBP]); +#elif defined(__i386) /* !defined(__amd64) */ + stk = (char*)uap->uc_mcontext.gregs[PC]; + fp = FRAMEP(uap->uc_mcontext.gregs[EBP]); +#else +#error Unknown architecture +#endif + + /* if the handling mode is the default and this exception's + flag is already raised, don't make an entry */ + if (m == FEX_NONSTOP) { + switch (e) { + case fex_inexact: + if (f & FE_INEXACT) { + mutex_unlock(&log_lock); + return; + } + break; + case fex_underflow: + if (f & FE_UNDERFLOW) { + mutex_unlock(&log_lock); + return; + } + break; + case fex_overflow: + if (f & FE_OVERFLOW) { + mutex_unlock(&log_lock); + return; + } + break; + case fex_division: + if (f & FE_DIVBYZERO) { + mutex_unlock(&log_lock); + return; + } + break; + default: + if (f & FE_INVALID) { + mutex_unlock(&log_lock); + return; + } + break; + } + } + + /* if we've already logged this exception at this address, + don't make an entry */ + if (check_exc_list(addr, (unsigned long)e, stk, fp)) { + mutex_unlock(&log_lock); + return; + } + + /* make an entry */ + fd = fileno(log_fp); + write(fd, "Floating point ", 15); + write(fd, exception[e], strlen(exception[e])); + write(fd, buf, sprintf(buf, " at 0x%0" PDIG "lx", (long)addr)); + __fex_sym_init(); + if (__fex_sym(addr, &name) != NULL) { + write(fd, " ", 1); + write(fd, name, strlen(name)); + } + switch (m) { + case FEX_NONSTOP: + write(fd, ", nonstop mode\n", 15); + break; + + case FEX_ABORT: + write(fd, ", abort\n", 8); + break; + + case FEX_NOHANDLER: + if (p == (void *)SIG_DFL) { + write(fd, ", handler: SIG_DFL\n", 19); + break; + } + else if (p == (void *)SIG_IGN) { + write(fd, ", handler: SIG_IGN\n", 19); + break; + } + /* fall through*/ + default: + write(fd, ", handler: ", 11); + if (__fex_sym((char *)p, &name) != NULL) { + write(fd, name, strlen(name)); + write(fd, "\n", 1); + } else { + write(fd, buf, sprintf(buf, "0x%0" PDIG "lx\n", + (long)p)); + } + break; + } + print_stack(fd, stk, fp); + mutex_unlock(&log_lock); +} diff --git a/usr/src/lib/libm/common/m9x/fma.c b/usr/src/lib/libm/common/m9x/fma.c new file mode 100644 index 0000000000..f06349a2c4 --- /dev/null +++ b/usr/src/lib/libm/common/m9x/fma.c @@ -0,0 +1,497 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak fma = __fma +#endif + +#include "libm.h" +#include "fma.h" +#include "fenv_inlines.h" + +#if defined(__sparc) + +static const union { + unsigned i[2]; + double d; +} C[] = { + { 0x3fe00000u, 0 }, + { 0x40000000u, 0 }, + { 0x43300000u, 0 }, + { 0x41a00000u, 0 }, + { 0x3e500000u, 0 }, + { 0x3df00000u, 0 }, + { 0x3bf00000u, 0 }, + { 0x7fe00000u, 0 }, + { 0x00100000u, 0 }, + { 0x00100001u, 0 } +}; + +#define half C[0].d +#define two C[1].d +#define two52 C[2].d +#define two27 C[3].d +#define twom26 C[4].d +#define twom32 C[5].d +#define twom64 C[6].d +#define huge C[7].d +#define tiny C[8].d +#define tiny2 C[9].d + +static const unsigned int fsr_rm = 0xc0000000u; + +/* + * fma for SPARC: 64-bit double precision, big-endian + */ +double +__fma(double x, double y, double z) { + union { + unsigned i[2]; + double d; + } xx, yy, zz; + double xhi, yhi, xlo, ylo, t; + unsigned int xy0, xy1, xy2, xy3, z0, z1, z2, z3, fsr, rm, sticky; + int hx, hy, hz, ex, ey, ez, exy, sxy, sz, e, ibit; + volatile double dummy; + + /* extract the high order words of the arguments */ + xx.d = x; + yy.d = y; + zz.d = z; + hx = xx.i[0] & ~0x80000000; + hy = yy.i[0] & ~0x80000000; + hz = zz.i[0] & ~0x80000000; + + /* dispense with inf, nan, and zero cases */ + if (hx >= 0x7ff00000 || hy >= 0x7ff00000 || (hx | xx.i[1]) == 0 || + (hy | yy.i[1]) == 0) /* x or y is inf, nan, or zero */ + return (x * y + z); + + if (hz >= 0x7ff00000) /* z is inf or nan */ + return (x + z); /* avoid spurious under/overflow in x * y */ + + if ((hz | zz.i[1]) == 0) /* z is zero */ + /* + * x * y isn't zero but could underflow to zero, + * so don't add z, lest we perturb the sign + */ + return (x * y); + + /* + * now x, y, and z are all finite and nonzero; save the fsr and + * set round-to-negative-infinity mode (and clear nonstandard + * mode before we try to scale subnormal operands) + */ + __fenv_getfsr32(&fsr); + __fenv_setfsr32(&fsr_rm); + + /* extract signs and exponents, and normalize subnormals */ + sxy = (xx.i[0] ^ yy.i[0]) & 0x80000000; + sz = zz.i[0] & 0x80000000; + ex = hx >> 20; + if (!ex) { + xx.d = x * two52; + ex = ((xx.i[0] & ~0x80000000) >> 20) - 52; + } + ey = hy >> 20; + if (!ey) { + yy.d = y * two52; + ey = ((yy.i[0] & ~0x80000000) >> 20) - 52; + } + ez = hz >> 20; + if (!ez) { + zz.d = z * two52; + ez = ((zz.i[0] & ~0x80000000) >> 20) - 52; + } + + /* multiply x*y to 106 bits */ + exy = ex + ey - 0x3ff; + xx.i[0] = (xx.i[0] & 0xfffff) | 0x3ff00000; + yy.i[0] = (yy.i[0] & 0xfffff) | 0x3ff00000; + x = xx.d; + y = yy.d; + xhi = ((x + twom26) + two27) - two27; + yhi = ((y + twom26) + two27) - two27; + xlo = x - xhi; + ylo = y - yhi; + x *= y; + y = ((xhi * yhi - x) + xhi * ylo + xlo * yhi) + xlo * ylo; + if (x >= two) { + x *= half; + y *= half; + exy++; + } + + /* extract the significands */ + xx.d = x; + xy0 = (xx.i[0] & 0xfffff) | 0x100000; + xy1 = xx.i[1]; + yy.d = t = y + twom32; + xy2 = yy.i[1]; + yy.d = (y - (t - twom32)) + twom64; + xy3 = yy.i[1]; + z0 = (zz.i[0] & 0xfffff) | 0x100000; + z1 = zz.i[1]; + z2 = z3 = 0; + + /* + * now x*y is represented by sxy, exy, and xy[0-3], and z is + * represented likewise; swap if need be so |xy| <= |z| + */ + if (exy > ez || (exy == ez && (xy0 > z0 || (xy0 == z0 && + (xy1 > z1 || (xy1 == z1 && (xy2 | xy3) != 0)))))) { + e = sxy; sxy = sz; sz = e; + e = exy; exy = ez; ez = e; + e = xy0; xy0 = z0; z0 = e; + e = xy1; xy1 = z1; z1 = e; + z2 = xy2; xy2 = 0; + z3 = xy3; xy3 = 0; + } + + /* shift the significand of xy keeping a sticky bit */ + e = ez - exy; + if (e > 116) { + xy0 = xy1 = xy2 = 0; + xy3 = 1; + } else if (e >= 96) { + sticky = xy3 | xy2 | xy1 | ((xy0 << 1) << (127 - e)); + xy3 = xy0 >> (e - 96); + if (sticky) + xy3 |= 1; + xy0 = xy1 = xy2 = 0; + } else if (e >= 64) { + sticky = xy3 | xy2 | ((xy1 << 1) << (95 - e)); + xy3 = (xy1 >> (e - 64)) | ((xy0 << 1) << (95 - e)); + if (sticky) + xy3 |= 1; + xy2 = xy0 >> (e - 64); + xy0 = xy1 = 0; + } else if (e >= 32) { + sticky = xy3 | ((xy2 << 1) << (63 - e)); + xy3 = (xy2 >> (e - 32)) | ((xy1 << 1) << (63 - e)); + if (sticky) + xy3 |= 1; + xy2 = (xy1 >> (e - 32)) | ((xy0 << 1) << (63 - e)); + xy1 = xy0 >> (e - 32); + xy0 = 0; + } else if (e) { + sticky = (xy3 << 1) << (31 - e); + xy3 = (xy3 >> e) | ((xy2 << 1) << (31 - e)); + if (sticky) + xy3 |= 1; + xy2 = (xy2 >> e) | ((xy1 << 1) << (31 - e)); + xy1 = (xy1 >> e) | ((xy0 << 1) << (31 - e)); + xy0 >>= e; + } + + /* if this is a magnitude subtract, negate the significand of xy */ + if (sxy ^ sz) { + xy0 = ~xy0; + xy1 = ~xy1; + xy2 = ~xy2; + xy3 = -xy3; + if (xy3 == 0) + if (++xy2 == 0) + if (++xy1 == 0) + xy0++; + } + + /* add, propagating carries */ + z3 += xy3; + e = (z3 < xy3); + z2 += xy2; + if (e) { + z2++; + e = (z2 <= xy2); + } else + e = (z2 < xy2); + z1 += xy1; + if (e) { + z1++; + e = (z1 <= xy1); + } else + e = (z1 < xy1); + z0 += xy0; + if (e) + z0++; + + /* postnormalize and collect rounding information into z2 */ + if (ez < 1) { + /* result is tiny; shift right until exponent is within range */ + e = 1 - ez; + if (e > 56) { + z2 = 1; /* result can't be exactly zero */ + z0 = z1 = 0; + } else if (e >= 32) { + sticky = z3 | z2 | ((z1 << 1) << (63 - e)); + z2 = (z1 >> (e - 32)) | ((z0 << 1) << (63 - e)); + if (sticky) + z2 |= 1; + z1 = z0 >> (e - 32); + z0 = 0; + } else { + sticky = z3 | (z2 << 1) << (31 - e); + z2 = (z2 >> e) | ((z1 << 1) << (31 - e)); + if (sticky) + z2 |= 1; + z1 = (z1 >> e) | ((z0 << 1) << (31 - e)); + z0 >>= e; + } + ez = 1; + } else if (z0 >= 0x200000) { + /* carry out; shift right by one */ + sticky = (z2 & 1) | z3; + z2 = (z2 >> 1) | (z1 << 31); + if (sticky) + z2 |= 1; + z1 = (z1 >> 1) | (z0 << 31); + z0 >>= 1; + ez++; + } else { + if (z0 < 0x100000 && (z0 | z1 | z2 | z3) != 0) { + /* + * borrow/cancellation; shift left as much as + * exponent allows + */ + while (!(z0 | (z1 & 0xffe00000)) && ez >= 33) { + z0 = z1; + z1 = z2; + z2 = z3; + z3 = 0; + ez -= 32; + } + while (z0 < 0x100000 && ez > 1) { + z0 = (z0 << 1) | (z1 >> 31); + z1 = (z1 << 1) | (z2 >> 31); + z2 = (z2 << 1) | (z3 >> 31); + z3 <<= 1; + ez--; + } + } + if (z3) + z2 |= 1; + } + + /* get the rounding mode and clear current exceptions */ + rm = fsr >> 30; + fsr &= ~FSR_CEXC; + + /* strip off the integer bit, if there is one */ + ibit = z0 & 0x100000; + if (ibit) + z0 -= 0x100000; + else { + ez = 0; + if (!(z0 | z1 | z2)) { /* exact zero */ + zz.i[0] = rm == FSR_RM ? 0x80000000 : 0; + zz.i[1] = 0; + __fenv_setfsr32(&fsr); + return (zz.d); + } + } + + /* + * flip the sense of directed roundings if the result is negative; + * the logic below applies to a positive result + */ + if (sz) + rm ^= rm >> 1; + + /* round and raise exceptions */ + if (z2) { + fsr |= FSR_NXC; + + /* decide whether to round the fraction up */ + if (rm == FSR_RP || (rm == FSR_RN && (z2 > 0x80000000u || + (z2 == 0x80000000u && (z1 & 1))))) { + /* round up and renormalize if necessary */ + if (++z1 == 0) { + if (++z0 == 0x100000) { + z0 = 0; + ez++; + } + } + } + } + + /* check for under/overflow */ + if (ez >= 0x7ff) { + if (rm == FSR_RN || rm == FSR_RP) { + zz.i[0] = sz | 0x7ff00000; + zz.i[1] = 0; + } else { + zz.i[0] = sz | 0x7fefffff; + zz.i[1] = 0xffffffff; + } + fsr |= FSR_OFC | FSR_NXC; + } else { + zz.i[0] = sz | (ez << 20) | z0; + zz.i[1] = z1; + + /* + * !ibit => exact result was tiny before rounding, + * z2 nonzero => result delivered is inexact + */ + if (!ibit) { + if (z2) + fsr |= FSR_UFC | FSR_NXC; + else if (fsr & FSR_UFM) + fsr |= FSR_UFC; + } + } + + /* restore the fsr and emulate exceptions as needed */ + if ((fsr & FSR_CEXC) & (fsr >> 23)) { + __fenv_setfsr32(&fsr); + if (fsr & FSR_OFC) { + dummy = huge; + dummy *= huge; + } else if (fsr & FSR_UFC) { + dummy = tiny; + if (fsr & FSR_NXC) + dummy *= tiny; + else + dummy -= tiny2; + } else { + dummy = huge; + dummy += tiny; + } + } else { + fsr |= (fsr & 0x1f) << 5; + __fenv_setfsr32(&fsr); + } + return (zz.d); +} + +#elif defined(__x86) + +#if defined(__amd64) +#define NI 4 +#else +#define NI 3 +#endif + +/* + * fma for x86: 64-bit double precision, little-endian + */ +double +__fma(double x, double y, double z) { + union { + unsigned i[NI]; + long double e; + } xx, yy, zz; + long double xe, ye, xhi, xlo, yhi, ylo; + int ex, ey, ez; + unsigned cwsw, oldcwsw, rm; + + /* convert the operands to double extended */ + xx.e = (long double) x; + yy.e = (long double) y; + zz.e = (long double) z; + + /* extract the exponents of the arguments */ + ex = xx.i[2] & 0x7fff; + ey = yy.i[2] & 0x7fff; + ez = zz.i[2] & 0x7fff; + + /* dispense with inf, nan, and zero cases */ + if (ex == 0x7fff || ey == 0x7fff || ex == 0 || ey == 0) + /* x or y is inf, nan, or zero */ + return ((double) (xx.e * yy.e + zz.e)); + + if (ez >= 0x7fff) /* z is inf or nan */ + return ((double) (xx.e + zz.e)); + /* avoid spurious inexact in x * y */ + + /* + * save the control and status words, mask all exceptions, and + * set rounding to 64-bit precision and to-nearest + */ + __fenv_getcwsw(&oldcwsw); + cwsw = (oldcwsw & 0xf0c0ffff) | 0x033f0000; + __fenv_setcwsw(&cwsw); + + /* multiply x*y to 106 bits */ + xe = xx.e; + xx.i[0] = 0; + xhi = xx.e; /* hi 32 bits */ + xlo = xe - xhi; /* lo 21 bits */ + ye = yy.e; + yy.i[0] = 0; + yhi = yy.e; + ylo = ye - yhi; + xe = xe * ye; + ye = ((xhi * yhi - xe) + xhi * ylo + xlo * yhi) + xlo * ylo; + + /* distill the sum of xe, ye, and z */ + xhi = ye + zz.e; + yhi = xhi - ye; + xlo = (zz.e - yhi) + (ye - (xhi - yhi)); + /* now (xhi,xlo) = ye + z */ + + yhi = xe + xhi; + ye = yhi - xe; + ylo = (xhi - ye) + (xe - (yhi - ye)); /* now (yhi,ylo) = xe + xhi */ + + xhi = xlo + ylo; + xe = xhi - xlo; + xlo = (ylo - xe) + (xlo - (xhi - xe)); /* now (xhi,xlo) = xlo + ylo */ + + yy.e = yhi + xhi; + ylo = (yhi - yy.e) + xhi; /* now (yy.e,ylo) = xhi + yhi */ + + if (yy.i[1] != 0) { /* yy.e is nonzero */ + /* perturb yy.e if its least significant 10 bits are zero */ + if (!(yy.i[0] & 0x3ff)) { + xx.e = ylo + xlo; + if (xx.i[1] != 0) { + xx.i[2] = (xx.i[2] & 0x8000) | + ((yy.i[2] & 0x7fff) - 63); + xx.i[1] = 0x80000000; + xx.i[0] = 0; + yy.e += xx.e; + } + } + } else { + /* set sign of zero result according to rounding direction */ + rm = oldcwsw & 0x0c000000; + yy.i[2] = ((rm == FCW_RM)? 0x8000 : 0); + } + + /* + * restore the control and status words and convert the result + * to double + */ + __fenv_setcwsw(&oldcwsw); + return ((double) yy.e); +} + +#else +#error Unknown architecture +#endif diff --git a/usr/src/lib/libm/common/m9x/fma.h b/usr/src/lib/libm/common/m9x/fma.h new file mode 100644 index 0000000000..07a497b2b0 --- /dev/null +++ b/usr/src/lib/libm/common/m9x/fma.h @@ -0,0 +1,126 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _FMA_H +#define _FMA_H + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef __sparc + +/* + * Common definitions for fma routines (SPARC) + */ + +/* fsr fields */ + +/* current exception bits */ +#define FSR_NXC 0x1 +#define FSR_DZC 0x2 +#define FSR_UFC 0x4 +#define FSR_OFC 0x8 +#define FSR_NVC 0x10 +#define FSR_CEXC 0x1f /* mask for all cexc bits */ + +/* accrued exception bits */ +#define FSR_NXA 0x20 +#define FSR_DZA 0x40 +#define FSR_UFA 0x80 +#define FSR_OFA 0x100 +#define FSR_NVA 0x200 + +/* trap enable bits */ +#define FSR_NXM 0x00800000 +#define FSR_DZM 0x01000000 +#define FSR_UFM 0x02000000 +#define FSR_OFM 0x04000000 +#define FSR_NVM 0x08000000 + +/* rounding directions (right-adjusted) */ +#define FSR_RN 0 +#define FSR_RZ 1 +#define FSR_RP 2 +#define FSR_RM 3 + +/* inline templates */ +extern void __fenv_getfsr32(unsigned int *); +extern void __fenv_setfsr32(const unsigned int *); + +#endif /* __sparc */ + + +#if defined(__x86) + +/* + * Common definitions for fma routines (x86) + */ + +/* control and status word fields */ + +/* exception flags */ +#define FSW_NV 0x1 +#define FSW_DN 0x2 +#define FSW_DZ 0x4 +#define FSW_OF 0x8 +#define FSW_UF 0x10 +#define FSW_NX 0x20 + +/* exception masks */ +#define FCW_NVM 0x00010000 +#define FCW_DNM 0x00020000 +#define FCW_DZM 0x00040000 +#define FCW_OFM 0x00080000 +#define FCW_UFM 0x00100000 +#define FCW_NXM 0x00200000 +#define FCW_ALLM 0x003f0000 + +/* rounding directions */ +#define FCW_RN 0x00000000 +#define FCW_RM 0x04000000 +#define FCW_RP 0x08000000 +#define FCW_RZ 0x0c000000 + +/* rounding precisions */ +#define FCW_P24 0x00000000 +#define FCW_P53 0x02000000 +#define FCW_P64 0x03000000 + +/* inline templates */ +extern void __fenv_getcwsw(unsigned int *); +extern void __fenv_setcwsw(const unsigned int *); + +#endif /* __x86 */ + +#ifdef __cplusplus +} +#endif + +#endif /* _FMA_H */ diff --git a/usr/src/lib/libm/common/m9x/fmaf.c b/usr/src/lib/libm/common/m9x/fmaf.c new file mode 100644 index 0000000000..ea925ba726 --- /dev/null +++ b/usr/src/lib/libm/common/m9x/fmaf.c @@ -0,0 +1,243 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak fmaf = __fmaf +#endif + +#include "libm.h" +#include "fma.h" +#include "fenv_inlines.h" + +#if defined(__sparc) + +/* + * fmaf for SPARC: 32-bit single precision, big-endian + */ +float +__fmaf(float x, float y, float z) { + union { + unsigned i[2]; + double d; + } xy, zz; + unsigned u, s; + int exy, ez; + + /* + * the following operations can only raise the invalid exception, + * and then only if either x*y is of the form Inf*0 or one of x, + * y, or z is a signaling NaN + */ + xy.d = (double) x * y; + zz.d = (double) z; + + /* + * if the sum xy + z will be exact, just compute it and cast the + * result to float + */ + exy = (xy.i[0] >> 20) & 0x7ff; + ez = (zz.i[0] >> 20) & 0x7ff; + if ((ez - exy <= 4 && exy - ez <= 28) || exy == 0x7ff || exy == 0 || + ez == 0x7ff || ez == 0) { + return ((float) (xy.d + zz.d)); + } + + /* + * collapse the tail of the smaller summand into a "sticky bit" + * so that the sum can be computed without error + */ + if (ez > exy) { + if (ez - exy < 31) { + u = xy.i[1]; + s = 2 << (ez - exy); + if (u & (s - 1)) + u |= s; + xy.i[1] = u & ~(s - 1); + } else if (ez - exy < 51) { + u = xy.i[0]; + s = 1 << (ez - exy - 31); + if ((u & (s - 1)) | xy.i[1]) + u |= s; + xy.i[0] = u & ~(s - 1); + xy.i[1] = 0; + } else { + /* collapse all of xy into a single bit */ + xy.i[0] = (xy.i[0] & 0x80000000) | ((ez - 51) << 20); + xy.i[1] = 0; + } + } else { + if (exy - ez < 31) { + u = zz.i[1]; + s = 2 << (exy - ez); + if (u & (s - 1)) + u |= s; + zz.i[1] = u & ~(s - 1); + } else if (exy - ez < 51) { + u = zz.i[0]; + s = 1 << (exy - ez - 31); + if ((u & (s - 1)) | zz.i[1]) + u |= s; + zz.i[0] = u & ~(s - 1); + zz.i[1] = 0; + } else { + /* collapse all of zz into a single bit */ + zz.i[0] = (zz.i[0] & 0x80000000) | ((exy - 51) << 20); + zz.i[1] = 0; + } + } + + return ((float) (xy.d + zz.d)); +} + +#elif defined(__x86) + +#if defined(__amd64) +#define NI 4 +#else +#define NI 3 +#endif + +/* + * fmaf for x86: 32-bit single precision, little-endian + */ +float +__fmaf(float x, float y, float z) { + union { + unsigned i[NI]; + long double e; + } xy, zz; + unsigned u, s, cwsw, oldcwsw; + int exy, ez; + + /* set rounding precision to 64 bits */ + __fenv_getcwsw(&oldcwsw); + cwsw = (oldcwsw & 0xfcffffff) | 0x03000000; + __fenv_setcwsw(&cwsw); + + /* + * the following operations can only raise the invalid exception, + * and then only if either x*y is of the form Inf*0 or one of x, + * y, or z is a signaling NaN + */ + xy.e = (long double) x * y; + zz.e = (long double) z; + + /* + * if the sum xy + z will be exact, just compute it and cast the + * result to float + */ + exy = xy.i[2] & 0x7fff; + ez = zz.i[2] & 0x7fff; + if ((ez - exy <= 15 && exy - ez <= 39) || exy == 0x7fff || exy == 0 || + ez == 0x7fff || ez == 0) { + goto cont; + } + + /* + * collapse the tail of the smaller summand into a "sticky bit" + * so that the sum can be computed without error + */ + if (ez > exy) { + if (ez - exy < 31) { + u = xy.i[0]; + s = 2 << (ez - exy); + if (u & (s - 1)) + u |= s; + xy.i[0] = u & ~(s - 1); + } else if (ez - exy < 62) { + u = xy.i[1]; + s = 1 << (ez - exy - 31); + if ((u & (s - 1)) | xy.i[0]) + u |= s; + xy.i[1] = u & ~(s - 1); + xy.i[0] = 0; + } else { + /* collapse all of xy into a single bit */ + xy.i[0] = 0; + xy.i[1] = 0x80000000; + xy.i[2] = (xy.i[2] & 0x8000) | (ez - 62); + } + } else { + if (exy - ez < 62) { + u = zz.i[1]; + s = 1 << (exy - ez - 31); + if ((u & (s - 1)) | zz.i[0]) + u |= s; + zz.i[1] = u & ~(s - 1); + zz.i[0] = 0; + } else { + /* collapse all of zz into a single bit */ + zz.i[0] = 0; + zz.i[1] = 0x80000000; + zz.i[2] = (zz.i[2] & 0x8000) | (exy - 62); + } + } + +cont: + xy.e += zz.e; + + /* restore the rounding precision */ + __fenv_getcwsw(&cwsw); + cwsw = (cwsw & 0xfcffffff) | (oldcwsw & 0x03000000); + __fenv_setcwsw(&cwsw); + + return ((float) xy.e); +} + +#if 0 +/* + * another fmaf for x86: assumes return value will be left in + * long double (80-bit double extended) precision + */ +long double +__fmaf(float x, float y, float z) { + /* + * Note: This implementation assumes the rounding precision mode + * is set to the default, rounding to 64 bit precision. If this + * routine must work in non-default rounding precision modes, do + * the following instead: + * + * long double t; + * + * <set rp mode to round to 64 bit precision> + * t = x * y; + * <restore rp mode> + * return t + z; + * + * Note that the code to change rounding precision must not alter + * the exception masks or flags, since the product x * y may raise + * an invalid operation exception. + */ + return ((long double) x * y + z); +} +#endif + +#else +#error Unknown architecture +#endif diff --git a/usr/src/lib/libm/common/m9x/fmal.c b/usr/src/lib/libm/common/m9x/fmal.c new file mode 100644 index 0000000000..b60e034139 --- /dev/null +++ b/usr/src/lib/libm/common/m9x/fmal.c @@ -0,0 +1,1227 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak fmal = __fmal +#endif + +#include "libm.h" +#include "fma.h" +#include "fenv_inlines.h" + +#if defined(__sparc) + +static const union { + unsigned i[2]; + double d; +} C[] = { + { 0x3fe00000u, 0 }, + { 0x40000000u, 0 }, + { 0x3ef00000u, 0 }, + { 0x3e700000u, 0 }, + { 0x41300000u, 0 }, + { 0x3e300000u, 0 }, + { 0x3b300000u, 0 }, + { 0x38300000u, 0 }, + { 0x42300000u, 0 }, + { 0x3df00000u, 0 }, + { 0x7fe00000u, 0 }, + { 0x00100000u, 0 }, + { 0x00100001u, 0 }, + { 0, 0 }, + { 0x7ff00000u, 0 }, + { 0x7ff00001u, 0 } +}; + +#define half C[0].d +#define two C[1].d +#define twom16 C[2].d +#define twom24 C[3].d +#define two20 C[4].d +#define twom28 C[5].d +#define twom76 C[6].d +#define twom124 C[7].d +#define two36 C[8].d +#define twom32 C[9].d +#define huge C[10].d +#define tiny C[11].d +#define tiny2 C[12].d +#define zero C[13].d +#define inf C[14].d +#define snan C[15].d + +static const unsigned int fsr_rm = 0xc0000000u; + +/* + * fmal for SPARC: 128-bit quad precision, big-endian + */ +long double +__fmal(long double x, long double y, long double z) { + union { + unsigned int i[4]; + long double q; + } xx, yy, zz; + union { + unsigned int i[2]; + double d; + } u; + double dx[5], dy[5], dxy[9], c, s; + unsigned int xy0, xy1, xy2, xy3, xy4, xy5, xy6, xy7; + unsigned int z0, z1, z2, z3, z4, z5, z6, z7; + unsigned int rm, sticky; + unsigned int fsr; + int hx, hy, hz, ex, ey, ez, exy, sxy, sz, e, ibit; + int cx, cy, cz; + volatile double dummy; + + /* extract the high order words of the arguments */ + xx.q = x; + yy.q = y; + zz.q = z; + hx = xx.i[0] & ~0x80000000; + hy = yy.i[0] & ~0x80000000; + hz = zz.i[0] & ~0x80000000; + + /* + * distinguish zero, finite nonzero, infinite, and quiet nan + * arguments; raise invalid and return for signaling nans + */ + if (hx >= 0x7fff0000) { + if ((hx & 0xffff) | xx.i[1] | xx.i[2] | xx.i[3]) { + if (!(hx & 0x8000)) { + /* signaling nan, raise invalid */ + dummy = snan; + dummy += snan; + xx.i[0] |= 0x8000; + return (xx.q); + } + cx = 3; /* quiet nan */ + } else + cx = 2; /* inf */ + } else if (hx == 0) { + cx = (xx.i[1] | xx.i[2] | xx.i[3]) ? 1 : 0; + /* subnormal or zero */ + } else + cx = 1; /* finite nonzero */ + + if (hy >= 0x7fff0000) { + if ((hy & 0xffff) | yy.i[1] | yy.i[2] | yy.i[3]) { + if (!(hy & 0x8000)) { + dummy = snan; + dummy += snan; + yy.i[0] |= 0x8000; + return (yy.q); + } + cy = 3; + } else + cy = 2; + } else if (hy == 0) { + cy = (yy.i[1] | yy.i[2] | yy.i[3]) ? 1 : 0; + } else + cy = 1; + + if (hz >= 0x7fff0000) { + if ((hz & 0xffff) | zz.i[1] | zz.i[2] | zz.i[3]) { + if (!(hz & 0x8000)) { + dummy = snan; + dummy += snan; + zz.i[0] |= 0x8000; + return (zz.q); + } + cz = 3; + } else + cz = 2; + } else if (hz == 0) { + cz = (zz.i[1] | zz.i[2] | zz.i[3]) ? 1 : 0; + } else + cz = 1; + + /* get the fsr and clear current exceptions */ + __fenv_getfsr32(&fsr); + fsr &= ~FSR_CEXC; + + /* handle all other zero, inf, and nan cases */ + if (cx != 1 || cy != 1 || cz != 1) { + /* if x or y is a quiet nan, return it */ + if (cx == 3) { + __fenv_setfsr32(&fsr); + return (x); + } + if (cy == 3) { + __fenv_setfsr32(&fsr); + return (y); + } + + /* if x*y is 0*inf, raise invalid and return the default nan */ + if ((cx == 0 && cy == 2) || (cx == 2 && cy == 0)) { + dummy = zero; + dummy *= inf; + zz.i[0] = 0x7fffffff; + zz.i[1] = zz.i[2] = zz.i[3] = 0xffffffff; + return (zz.q); + } + + /* if z is a quiet nan, return it */ + if (cz == 3) { + __fenv_setfsr32(&fsr); + return (z); + } + + /* + * now none of x, y, or z is nan; handle cases where x or y + * is inf + */ + if (cx == 2 || cy == 2) { + /* + * if z is also inf, either we have inf-inf or + * the result is the same as z depending on signs + */ + if (cz == 2) { + if ((int) ((xx.i[0] ^ yy.i[0]) ^ zz.i[0]) < 0) { + dummy = inf; + dummy -= inf; + zz.i[0] = 0x7fffffff; + zz.i[1] = zz.i[2] = zz.i[3] = + 0xffffffff; + return (zz.q); + } + __fenv_setfsr32(&fsr); + return (z); + } + + /* otherwise the result is inf with appropriate sign */ + zz.i[0] = ((xx.i[0] ^ yy.i[0]) & 0x80000000) | + 0x7fff0000; + zz.i[1] = zz.i[2] = zz.i[3] = 0; + __fenv_setfsr32(&fsr); + return (zz.q); + } + + /* if z is inf, return it */ + if (cz == 2) { + __fenv_setfsr32(&fsr); + return (z); + } + + /* + * now x, y, and z are all finite; handle cases where x or y + * is zero + */ + if (cx == 0 || cy == 0) { + /* either we have 0-0 or the result is the same as z */ + if (cz == 0 && (int) ((xx.i[0] ^ yy.i[0]) ^ zz.i[0]) < + 0) { + zz.i[0] = (fsr >> 30) == FSR_RM ? 0x80000000 : + 0; + __fenv_setfsr32(&fsr); + return (zz.q); + } + __fenv_setfsr32(&fsr); + return (z); + } + + /* if we get here, x and y are nonzero finite, z must be zero */ + return (x * y); + } + + /* + * now x, y, and z are all finite and nonzero; set round-to- + * negative-infinity mode + */ + __fenv_setfsr32(&fsr_rm); + + /* + * get the signs and exponents and normalize the significands + * of x and y + */ + sxy = (xx.i[0] ^ yy.i[0]) & 0x80000000; + ex = hx >> 16; + hx &= 0xffff; + if (!ex) { + if (hx | (xx.i[1] & 0xfffe0000)) { + ex = 1; + } else if (xx.i[1] | (xx.i[2] & 0xfffe0000)) { + hx = xx.i[1]; + xx.i[1] = xx.i[2]; + xx.i[2] = xx.i[3]; + xx.i[3] = 0; + ex = -31; + } else if (xx.i[2] | (xx.i[3] & 0xfffe0000)) { + hx = xx.i[2]; + xx.i[1] = xx.i[3]; + xx.i[2] = xx.i[3] = 0; + ex = -63; + } else { + hx = xx.i[3]; + xx.i[1] = xx.i[2] = xx.i[3] = 0; + ex = -95; + } + while ((hx & 0x10000) == 0) { + hx = (hx << 1) | (xx.i[1] >> 31); + xx.i[1] = (xx.i[1] << 1) | (xx.i[2] >> 31); + xx.i[2] = (xx.i[2] << 1) | (xx.i[3] >> 31); + xx.i[3] <<= 1; + ex--; + } + } else + hx |= 0x10000; + ey = hy >> 16; + hy &= 0xffff; + if (!ey) { + if (hy | (yy.i[1] & 0xfffe0000)) { + ey = 1; + } else if (yy.i[1] | (yy.i[2] & 0xfffe0000)) { + hy = yy.i[1]; + yy.i[1] = yy.i[2]; + yy.i[2] = yy.i[3]; + yy.i[3] = 0; + ey = -31; + } else if (yy.i[2] | (yy.i[3] & 0xfffe0000)) { + hy = yy.i[2]; + yy.i[1] = yy.i[3]; + yy.i[2] = yy.i[3] = 0; + ey = -63; + } else { + hy = yy.i[3]; + yy.i[1] = yy.i[2] = yy.i[3] = 0; + ey = -95; + } + while ((hy & 0x10000) == 0) { + hy = (hy << 1) | (yy.i[1] >> 31); + yy.i[1] = (yy.i[1] << 1) | (yy.i[2] >> 31); + yy.i[2] = (yy.i[2] << 1) | (yy.i[3] >> 31); + yy.i[3] <<= 1; + ey--; + } + } else + hy |= 0x10000; + exy = ex + ey - 0x3fff; + + /* convert the significands of x and y to doubles */ + c = twom16; + dx[0] = (double) ((int) hx) * c; + dy[0] = (double) ((int) hy) * c; + + c *= twom24; + dx[1] = (double) ((int) (xx.i[1] >> 8)) * c; + dy[1] = (double) ((int) (yy.i[1] >> 8)) * c; + + c *= twom24; + dx[2] = (double) ((int) (((xx.i[1] << 16) | (xx.i[2] >> 16)) & + 0xffffff)) * c; + dy[2] = (double) ((int) (((yy.i[1] << 16) | (yy.i[2] >> 16)) & + 0xffffff)) * c; + + c *= twom24; + dx[3] = (double) ((int) (((xx.i[2] << 8) | (xx.i[3] >> 24)) & + 0xffffff)) * c; + dy[3] = (double) ((int) (((yy.i[2] << 8) | (yy.i[3] >> 24)) & + 0xffffff)) * c; + + c *= twom24; + dx[4] = (double) ((int) (xx.i[3] & 0xffffff)) * c; + dy[4] = (double) ((int) (yy.i[3] & 0xffffff)) * c; + + /* form the "digits" of the product */ + dxy[0] = dx[0] * dy[0]; + dxy[1] = dx[0] * dy[1] + dx[1] * dy[0]; + dxy[2] = dx[0] * dy[2] + dx[1] * dy[1] + dx[2] * dy[0]; + dxy[3] = dx[0] * dy[3] + dx[1] * dy[2] + dx[2] * dy[1] + + dx[3] * dy[0]; + dxy[4] = dx[0] * dy[4] + dx[1] * dy[3] + dx[2] * dy[2] + + dx[3] * dy[1] + dx[4] * dy[0]; + dxy[5] = dx[1] * dy[4] + dx[2] * dy[3] + dx[3] * dy[2] + + dx[4] * dy[1]; + dxy[6] = dx[2] * dy[4] + dx[3] * dy[3] + dx[4] * dy[2]; + dxy[7] = dx[3] * dy[4] + dx[4] * dy[3]; + dxy[8] = dx[4] * dy[4]; + + /* split odd-numbered terms and combine into even-numbered terms */ + c = (dxy[1] + two20) - two20; + dxy[0] += c; + dxy[1] -= c; + c = (dxy[3] + twom28) - twom28; + dxy[2] += c + dxy[1]; + dxy[3] -= c; + c = (dxy[5] + twom76) - twom76; + dxy[4] += c + dxy[3]; + dxy[5] -= c; + c = (dxy[7] + twom124) - twom124; + dxy[6] += c + dxy[5]; + dxy[8] += (dxy[7] - c); + + /* propagate carries, adjusting the exponent if need be */ + dxy[7] = dxy[6] + dxy[8]; + dxy[5] = dxy[4] + dxy[7]; + dxy[3] = dxy[2] + dxy[5]; + dxy[1] = dxy[0] + dxy[3]; + if (dxy[1] >= two) { + dxy[0] *= half; + dxy[1] *= half; + dxy[2] *= half; + dxy[3] *= half; + dxy[4] *= half; + dxy[5] *= half; + dxy[6] *= half; + dxy[7] *= half; + dxy[8] *= half; + exy++; + } + + /* extract the significand of x*y */ + s = two36; + u.d = c = dxy[1] + s; + xy0 = u.i[1]; + c -= s; + dxy[1] -= c; + dxy[0] -= c; + + s *= twom32; + u.d = c = dxy[1] + s; + xy1 = u.i[1]; + c -= s; + dxy[2] += (dxy[0] - c); + dxy[3] = dxy[2] + dxy[5]; + + s *= twom32; + u.d = c = dxy[3] + s; + xy2 = u.i[1]; + c -= s; + dxy[4] += (dxy[2] - c); + dxy[5] = dxy[4] + dxy[7]; + + s *= twom32; + u.d = c = dxy[5] + s; + xy3 = u.i[1]; + c -= s; + dxy[4] -= c; + dxy[5] = dxy[4] + dxy[7]; + + s *= twom32; + u.d = c = dxy[5] + s; + xy4 = u.i[1]; + c -= s; + dxy[6] += (dxy[4] - c); + dxy[7] = dxy[6] + dxy[8]; + + s *= twom32; + u.d = c = dxy[7] + s; + xy5 = u.i[1]; + c -= s; + dxy[8] += (dxy[6] - c); + + s *= twom32; + u.d = c = dxy[8] + s; + xy6 = u.i[1]; + c -= s; + dxy[8] -= c; + + s *= twom32; + u.d = c = dxy[8] + s; + xy7 = u.i[1]; + + /* extract the sign, exponent, and significand of z */ + sz = zz.i[0] & 0x80000000; + ez = hz >> 16; + z0 = hz & 0xffff; + if (!ez) { + if (z0 | (zz.i[1] & 0xfffe0000)) { + z1 = zz.i[1]; + z2 = zz.i[2]; + z3 = zz.i[3]; + ez = 1; + } else if (zz.i[1] | (zz.i[2] & 0xfffe0000)) { + z0 = zz.i[1]; + z1 = zz.i[2]; + z2 = zz.i[3]; + z3 = 0; + ez = -31; + } else if (zz.i[2] | (zz.i[3] & 0xfffe0000)) { + z0 = zz.i[2]; + z1 = zz.i[3]; + z2 = z3 = 0; + ez = -63; + } else { + z0 = zz.i[3]; + z1 = z2 = z3 = 0; + ez = -95; + } + while ((z0 & 0x10000) == 0) { + z0 = (z0 << 1) | (z1 >> 31); + z1 = (z1 << 1) | (z2 >> 31); + z2 = (z2 << 1) | (z3 >> 31); + z3 <<= 1; + ez--; + } + } else { + z0 |= 0x10000; + z1 = zz.i[1]; + z2 = zz.i[2]; + z3 = zz.i[3]; + } + z4 = z5 = z6 = z7 = 0; + + /* + * now x*y is represented by sxy, exy, and xy[0-7], and z is + * represented likewise; swap if need be so |xy| <= |z| + */ + if (exy > ez || (exy == ez && (xy0 > z0 || (xy0 == z0 && (xy1 > z1 || + (xy1 == z1 && (xy2 > z2 || (xy2 == z2 && (xy3 > z3 || + (xy3 == z3 && (xy4 | xy5 | xy6 | xy7) != 0)))))))))) { + e = sxy; sxy = sz; sz = e; + e = exy; exy = ez; ez = e; + e = xy0; xy0 = z0; z0 = e; + e = xy1; xy1 = z1; z1 = e; + e = xy2; xy2 = z2; z2 = e; + e = xy3; xy3 = z3; z3 = e; + z4 = xy4; xy4 = 0; + z5 = xy5; xy5 = 0; + z6 = xy6; xy6 = 0; + z7 = xy7; xy7 = 0; + } + + /* shift the significand of xy keeping a sticky bit */ + e = ez - exy; + if (e > 236) { + xy0 = xy1 = xy2 = xy3 = xy4 = xy5 = xy6 = 0; + xy7 = 1; + } else if (e >= 224) { + sticky = xy7 | xy6 | xy5 | xy4 | xy3 | xy2 | xy1 | + ((xy0 << 1) << (255 - e)); + xy7 = xy0 >> (e - 224); + if (sticky) + xy7 |= 1; + xy0 = xy1 = xy2 = xy3 = xy4 = xy5 = xy6 = 0; + } else if (e >= 192) { + sticky = xy7 | xy6 | xy5 | xy4 | xy3 | xy2 | + ((xy1 << 1) << (223 - e)); + xy7 = (xy1 >> (e - 192)) | ((xy0 << 1) << (223 - e)); + if (sticky) + xy7 |= 1; + xy6 = xy0 >> (e - 192); + xy0 = xy1 = xy2 = xy3 = xy4 = xy5 = 0; + } else if (e >= 160) { + sticky = xy7 | xy6 | xy5 | xy4 | xy3 | + ((xy2 << 1) << (191 - e)); + xy7 = (xy2 >> (e - 160)) | ((xy1 << 1) << (191 - e)); + if (sticky) + xy7 |= 1; + xy6 = (xy1 >> (e - 160)) | ((xy0 << 1) << (191 - e)); + xy5 = xy0 >> (e - 160); + xy0 = xy1 = xy2 = xy3 = xy4 = 0; + } else if (e >= 128) { + sticky = xy7 | xy6 | xy5 | xy4 | ((xy3 << 1) << (159 - e)); + xy7 = (xy3 >> (e - 128)) | ((xy2 << 1) << (159 - e)); + if (sticky) + xy7 |= 1; + xy6 = (xy2 >> (e - 128)) | ((xy1 << 1) << (159 - e)); + xy5 = (xy1 >> (e - 128)) | ((xy0 << 1) << (159 - e)); + xy4 = xy0 >> (e - 128); + xy0 = xy1 = xy2 = xy3 = 0; + } else if (e >= 96) { + sticky = xy7 | xy6 | xy5 | ((xy4 << 1) << (127 - e)); + xy7 = (xy4 >> (e - 96)) | ((xy3 << 1) << (127 - e)); + if (sticky) + xy7 |= 1; + xy6 = (xy3 >> (e - 96)) | ((xy2 << 1) << (127 - e)); + xy5 = (xy2 >> (e - 96)) | ((xy1 << 1) << (127 - e)); + xy4 = (xy1 >> (e - 96)) | ((xy0 << 1) << (127 - e)); + xy3 = xy0 >> (e - 96); + xy0 = xy1 = xy2 = 0; + } else if (e >= 64) { + sticky = xy7 | xy6 | ((xy5 << 1) << (95 - e)); + xy7 = (xy5 >> (e - 64)) | ((xy4 << 1) << (95 - e)); + if (sticky) + xy7 |= 1; + xy6 = (xy4 >> (e - 64)) | ((xy3 << 1) << (95 - e)); + xy5 = (xy3 >> (e - 64)) | ((xy2 << 1) << (95 - e)); + xy4 = (xy2 >> (e - 64)) | ((xy1 << 1) << (95 - e)); + xy3 = (xy1 >> (e - 64)) | ((xy0 << 1) << (95 - e)); + xy2 = xy0 >> (e - 64); + xy0 = xy1 = 0; + } else if (e >= 32) { + sticky = xy7 | ((xy6 << 1) << (63 - e)); + xy7 = (xy6 >> (e - 32)) | ((xy5 << 1) << (63 - e)); + if (sticky) + xy7 |= 1; + xy6 = (xy5 >> (e - 32)) | ((xy4 << 1) << (63 - e)); + xy5 = (xy4 >> (e - 32)) | ((xy3 << 1) << (63 - e)); + xy4 = (xy3 >> (e - 32)) | ((xy2 << 1) << (63 - e)); + xy3 = (xy2 >> (e - 32)) | ((xy1 << 1) << (63 - e)); + xy2 = (xy1 >> (e - 32)) | ((xy0 << 1) << (63 - e)); + xy1 = xy0 >> (e - 32); + xy0 = 0; + } else if (e) { + sticky = (xy7 << 1) << (31 - e); + xy7 = (xy7 >> e) | ((xy6 << 1) << (31 - e)); + if (sticky) + xy7 |= 1; + xy6 = (xy6 >> e) | ((xy5 << 1) << (31 - e)); + xy5 = (xy5 >> e) | ((xy4 << 1) << (31 - e)); + xy4 = (xy4 >> e) | ((xy3 << 1) << (31 - e)); + xy3 = (xy3 >> e) | ((xy2 << 1) << (31 - e)); + xy2 = (xy2 >> e) | ((xy1 << 1) << (31 - e)); + xy1 = (xy1 >> e) | ((xy0 << 1) << (31 - e)); + xy0 >>= e; + } + + /* if this is a magnitude subtract, negate the significand of xy */ + if (sxy ^ sz) { + xy0 = ~xy0; + xy1 = ~xy1; + xy2 = ~xy2; + xy3 = ~xy3; + xy4 = ~xy4; + xy5 = ~xy5; + xy6 = ~xy6; + xy7 = -xy7; + if (xy7 == 0) + if (++xy6 == 0) + if (++xy5 == 0) + if (++xy4 == 0) + if (++xy3 == 0) + if (++xy2 == 0) + if (++xy1 == 0) + xy0++; + } + + /* add, propagating carries */ + z7 += xy7; + e = (z7 < xy7); + z6 += xy6; + if (e) { + z6++; + e = (z6 <= xy6); + } else + e = (z6 < xy6); + z5 += xy5; + if (e) { + z5++; + e = (z5 <= xy5); + } else + e = (z5 < xy5); + z4 += xy4; + if (e) { + z4++; + e = (z4 <= xy4); + } else + e = (z4 < xy4); + z3 += xy3; + if (e) { + z3++; + e = (z3 <= xy3); + } else + e = (z3 < xy3); + z2 += xy2; + if (e) { + z2++; + e = (z2 <= xy2); + } else + e = (z2 < xy2); + z1 += xy1; + if (e) { + z1++; + e = (z1 <= xy1); + } else + e = (z1 < xy1); + z0 += xy0; + if (e) + z0++; + + /* postnormalize and collect rounding information into z4 */ + if (ez < 1) { + /* result is tiny; shift right until exponent is within range */ + e = 1 - ez; + if (e > 116) { + z4 = 1; /* result can't be exactly zero */ + z0 = z1 = z2 = z3 = 0; + } else if (e >= 96) { + sticky = z7 | z6 | z5 | z4 | z3 | z2 | + ((z1 << 1) << (127 - e)); + z4 = (z1 >> (e - 96)) | ((z0 << 1) << (127 - e)); + if (sticky) + z4 |= 1; + z3 = z0 >> (e - 96); + z0 = z1 = z2 = 0; + } else if (e >= 64) { + sticky = z7 | z6 | z5 | z4 | z3 | + ((z2 << 1) << (95 - e)); + z4 = (z2 >> (e - 64)) | ((z1 << 1) << (95 - e)); + if (sticky) + z4 |= 1; + z3 = (z1 >> (e - 64)) | ((z0 << 1) << (95 - e)); + z2 = z0 >> (e - 64); + z0 = z1 = 0; + } else if (e >= 32) { + sticky = z7 | z6 | z5 | z4 | ((z3 << 1) << (63 - e)); + z4 = (z3 >> (e - 32)) | ((z2 << 1) << (63 - e)); + if (sticky) + z4 |= 1; + z3 = (z2 >> (e - 32)) | ((z1 << 1) << (63 - e)); + z2 = (z1 >> (e - 32)) | ((z0 << 1) << (63 - e)); + z1 = z0 >> (e - 32); + z0 = 0; + } else { + sticky = z7 | z6 | z5 | (z4 << 1) << (31 - e); + z4 = (z4 >> e) | ((z3 << 1) << (31 - e)); + if (sticky) + z4 |= 1; + z3 = (z3 >> e) | ((z2 << 1) << (31 - e)); + z2 = (z2 >> e) | ((z1 << 1) << (31 - e)); + z1 = (z1 >> e) | ((z0 << 1) << (31 - e)); + z0 >>= e; + } + ez = 1; + } else if (z0 >= 0x20000) { + /* carry out; shift right by one */ + sticky = (z4 & 1) | z5 | z6 | z7; + z4 = (z4 >> 1) | (z3 << 31); + if (sticky) + z4 |= 1; + z3 = (z3 >> 1) | (z2 << 31); + z2 = (z2 >> 1) | (z1 << 31); + z1 = (z1 >> 1) | (z0 << 31); + z0 >>= 1; + ez++; + } else { + if (z0 < 0x10000 && (z0 | z1 | z2 | z3 | z4 | z5 | z6 | z7) + != 0) { + /* + * borrow/cancellation; shift left as much as + * exponent allows + */ + while (!(z0 | (z1 & 0xfffe0000)) && ez >= 33) { + z0 = z1; + z1 = z2; + z2 = z3; + z3 = z4; + z4 = z5; + z5 = z6; + z6 = z7; + z7 = 0; + ez -= 32; + } + while (z0 < 0x10000 && ez > 1) { + z0 = (z0 << 1) | (z1 >> 31); + z1 = (z1 << 1) | (z2 >> 31); + z2 = (z2 << 1) | (z3 >> 31); + z3 = (z3 << 1) | (z4 >> 31); + z4 = (z4 << 1) | (z5 >> 31); + z5 = (z5 << 1) | (z6 >> 31); + z6 = (z6 << 1) | (z7 >> 31); + z7 <<= 1; + ez--; + } + } + if (z5 | z6 | z7) + z4 |= 1; + } + + /* get the rounding mode */ + rm = fsr >> 30; + + /* strip off the integer bit, if there is one */ + ibit = z0 & 0x10000; + if (ibit) + z0 -= 0x10000; + else { + ez = 0; + if (!(z0 | z1 | z2 | z3 | z4)) { /* exact zero */ + zz.i[0] = rm == FSR_RM ? 0x80000000 : 0; + zz.i[1] = zz.i[2] = zz.i[3] = 0; + __fenv_setfsr32(&fsr); + return (zz.q); + } + } + + /* + * flip the sense of directed roundings if the result is negative; + * the logic below applies to a positive result + */ + if (sz) + rm ^= rm >> 1; + + /* round and raise exceptions */ + if (z4) { + fsr |= FSR_NXC; + + /* decide whether to round the fraction up */ + if (rm == FSR_RP || (rm == FSR_RN && (z4 > 0x80000000u || + (z4 == 0x80000000u && (z3 & 1))))) { + /* round up and renormalize if necessary */ + if (++z3 == 0) + if (++z2 == 0) + if (++z1 == 0) + if (++z0 == 0x10000) { + z0 = 0; + ez++; + } + } + } + + /* check for under/overflow */ + if (ez >= 0x7fff) { + if (rm == FSR_RN || rm == FSR_RP) { + zz.i[0] = sz | 0x7fff0000; + zz.i[1] = zz.i[2] = zz.i[3] = 0; + } else { + zz.i[0] = sz | 0x7ffeffff; + zz.i[1] = zz.i[2] = zz.i[3] = 0xffffffff; + } + fsr |= FSR_OFC | FSR_NXC; + } else { + zz.i[0] = sz | (ez << 16) | z0; + zz.i[1] = z1; + zz.i[2] = z2; + zz.i[3] = z3; + + /* + * !ibit => exact result was tiny before rounding, + * z4 nonzero => result delivered is inexact + */ + if (!ibit) { + if (z4) + fsr |= FSR_UFC | FSR_NXC; + else if (fsr & FSR_UFM) + fsr |= FSR_UFC; + } + } + + /* restore the fsr and emulate exceptions as needed */ + if ((fsr & FSR_CEXC) & (fsr >> 23)) { + __fenv_setfsr32(&fsr); + if (fsr & FSR_OFC) { + dummy = huge; + dummy *= huge; + } else if (fsr & FSR_UFC) { + dummy = tiny; + if (fsr & FSR_NXC) + dummy *= tiny; + else + dummy -= tiny2; + } else { + dummy = huge; + dummy += tiny; + } + } else { + fsr |= (fsr & 0x1f) << 5; + __fenv_setfsr32(&fsr); + } + return (zz.q); +} + +#elif defined(__x86) + +static const union { + unsigned i[2]; + double d; +} C[] = { + { 0, 0x3fe00000u }, + { 0, 0x40000000u }, + { 0, 0x3df00000u }, + { 0, 0x3bf00000u }, + { 0, 0x41f00000u }, + { 0, 0x43e00000u }, + { 0, 0x7fe00000u }, + { 0, 0x00100000u }, + { 0, 0x00100001u } +}; + +#define half C[0].d +#define two C[1].d +#define twom32 C[2].d +#define twom64 C[3].d +#define two32 C[4].d +#define two63 C[5].d +#define huge C[6].d +#define tiny C[7].d +#define tiny2 C[8].d + +#if defined(__amd64) +#define NI 4 +#else +#define NI 3 +#endif + +/* + * fmal for x86: 80-bit extended double precision, little-endian + */ +long double +__fmal(long double x, long double y, long double z) { + union { + unsigned i[NI]; + long double e; + } xx, yy, zz; + long double xhi, yhi, xlo, ylo, t; + unsigned xy0, xy1, xy2, xy3, xy4, z0, z1, z2, z3, z4; + unsigned oldcwsw, cwsw, rm, sticky, carry; + int ex, ey, ez, exy, sxy, sz, e, tinyafter; + volatile double dummy; + + /* extract the exponents of the arguments */ + xx.e = x; + yy.e = y; + zz.e = z; + ex = xx.i[2] & 0x7fff; + ey = yy.i[2] & 0x7fff; + ez = zz.i[2] & 0x7fff; + + /* dispense with inf, nan, and zero cases */ + if (ex == 0x7fff || ey == 0x7fff || (ex | xx.i[1] | xx.i[0]) == 0 || + (ey | yy.i[1] | yy.i[0]) == 0) /* x or y is inf, nan, or 0 */ + return (x * y + z); + + if (ez == 0x7fff) /* z is inf or nan */ + return (x + z); /* avoid spurious under/overflow in x * y */ + + if ((ez | zz.i[1] | zz.i[0]) == 0) /* z is zero */ + /* + * x * y isn't zero but could underflow to zero, + * so don't add z, lest we perturb the sign + */ + return (x * y); + + /* + * now x, y, and z are all finite and nonzero; extract signs and + * normalize the significands (this will raise the denormal operand + * exception if need be) + */ + sxy = (xx.i[2] ^ yy.i[2]) & 0x8000; + sz = zz.i[2] & 0x8000; + if (!ex) { + xx.e = x * two63; + ex = (xx.i[2] & 0x7fff) - 63; + } + if (!ey) { + yy.e = y * two63; + ey = (yy.i[2] & 0x7fff) - 63; + } + if (!ez) { + zz.e = z * two63; + ez = (zz.i[2] & 0x7fff) - 63; + } + + /* + * save the control and status words, mask all exceptions, and + * set rounding to 64-bit precision and toward-zero + */ + __fenv_getcwsw(&oldcwsw); + cwsw = (oldcwsw & 0xf0c0ffff) | 0x0f3f0000; + __fenv_setcwsw(&cwsw); + + /* multiply x*y to 128 bits */ + exy = ex + ey - 0x3fff; + xx.i[2] = 0x3fff; + yy.i[2] = 0x3fff; + x = xx.e; + y = yy.e; + xhi = ((x + twom32) + two32) - two32; + yhi = ((y + twom32) + two32) - two32; + xlo = x - xhi; + ylo = y - yhi; + x *= y; + y = ((xhi * yhi - x) + xhi * ylo + xlo * yhi) + xlo * ylo; + if (x >= two) { + x *= half; + y *= half; + exy++; + } + + /* extract the significands */ + xx.e = x; + xy0 = xx.i[1]; + xy1 = xx.i[0]; + yy.e = t = y + twom32; + xy2 = yy.i[0]; + yy.e = (y - (t - twom32)) + twom64; + xy3 = yy.i[0]; + xy4 = 0; + z0 = zz.i[1]; + z1 = zz.i[0]; + z2 = z3 = z4 = 0; + + /* + * now x*y is represented by sxy, exy, and xy[0-4], and z is + * represented likewise; swap if need be so |xy| <= |z| + */ + if (exy > ez || (exy == ez && (xy0 > z0 || (xy0 == z0 && + (xy1 > z1 || (xy1 == z1 && (xy2 | xy3) != 0)))))) { + e = sxy; sxy = sz; sz = e; + e = exy; exy = ez; ez = e; + e = xy0; xy0 = z0; z0 = e; + e = xy1; xy1 = z1; z1 = e; + z2 = xy2; xy2 = 0; + z3 = xy3; xy3 = 0; + } + + /* shift the significand of xy keeping a sticky bit */ + e = ez - exy; + if (e > 130) { + xy0 = xy1 = xy2 = xy3 = 0; + xy4 = 1; + } else if (e >= 128) { + sticky = xy3 | xy2 | xy1 | ((xy0 << 1) << (159 - e)); + xy4 = xy0 >> (e - 128); + if (sticky) + xy4 |= 1; + xy0 = xy1 = xy2 = xy3 = 0; + } else if (e >= 96) { + sticky = xy3 | xy2 | ((xy1 << 1) << (127 - e)); + xy4 = (xy1 >> (e - 96)) | ((xy0 << 1) << (127 - e)); + if (sticky) + xy4 |= 1; + xy3 = xy0 >> (e - 96); + xy0 = xy1 = xy2 = 0; + } else if (e >= 64) { + sticky = xy3 | ((xy2 << 1) << (95 - e)); + xy4 = (xy2 >> (e - 64)) | ((xy1 << 1) << (95 - e)); + if (sticky) + xy4 |= 1; + xy3 = (xy1 >> (e - 64)) | ((xy0 << 1) << (95 - e)); + xy2 = xy0 >> (e - 64); + xy0 = xy1 = 0; + } else if (e >= 32) { + sticky = (xy3 << 1) << (63 - e); + xy4 = (xy3 >> (e - 32)) | ((xy2 << 1) << (63 - e)); + if (sticky) + xy4 |= 1; + xy3 = (xy2 >> (e - 32)) | ((xy1 << 1) << (63 - e)); + xy2 = (xy1 >> (e - 32)) | ((xy0 << 1) << (63 - e)); + xy1 = xy0 >> (e - 32); + xy0 = 0; + } else if (e) { + xy4 = (xy3 << 1) << (31 - e); + xy3 = (xy3 >> e) | ((xy2 << 1) << (31 - e)); + xy2 = (xy2 >> e) | ((xy1 << 1) << (31 - e)); + xy1 = (xy1 >> e) | ((xy0 << 1) << (31 - e)); + xy0 >>= e; + } + + /* if this is a magnitude subtract, negate the significand of xy */ + if (sxy ^ sz) { + xy0 = ~xy0; + xy1 = ~xy1; + xy2 = ~xy2; + xy3 = ~xy3; + xy4 = -xy4; + if (xy4 == 0) + if (++xy3 == 0) + if (++xy2 == 0) + if (++xy1 == 0) + xy0++; + } + + /* add, propagating carries */ + z4 += xy4; + carry = (z4 < xy4); + z3 += xy3; + if (carry) { + z3++; + carry = (z3 <= xy3); + } else + carry = (z3 < xy3); + z2 += xy2; + if (carry) { + z2++; + carry = (z2 <= xy2); + } else + carry = (z2 < xy2); + z1 += xy1; + if (carry) { + z1++; + carry = (z1 <= xy1); + } else + carry = (z1 < xy1); + z0 += xy0; + if (carry) { + z0++; + carry = (z0 <= xy0); + } else + carry = (z0 < xy0); + + /* for a magnitude subtract, ignore the last carry out */ + if (sxy ^ sz) + carry = 0; + + /* postnormalize and collect rounding information into z2 */ + if (ez < 1) { + /* result is tiny; shift right until exponent is within range */ + e = 1 - ez; + if (e > 67) { + z2 = 1; /* result can't be exactly zero */ + z0 = z1 = 0; + } else if (e >= 64) { + sticky = z4 | z3 | z2 | z1 | ((z0 << 1) << (95 - e)); + z2 = (z0 >> (e - 64)) | ((carry << 1) << (95 - e)); + if (sticky) + z2 |= 1; + z1 = carry >> (e - 64); + z0 = 0; + } else if (e >= 32) { + sticky = z4 | z3 | z2 | ((z1 << 1) << (63 - e)); + z2 = (z1 >> (e - 32)) | ((z0 << 1) << (63 - e)); + if (sticky) + z2 |= 1; + z1 = (z0 >> (e - 32)) | ((carry << 1) << (63 - e)); + z0 = carry >> (e - 32); + } else { + sticky = z4 | z3 | (z2 << 1) << (31 - e); + z2 = (z2 >> e) | ((z1 << 1) << (31 - e)); + if (sticky) + z2 |= 1; + z1 = (z1 >> e) | ((z0 << 1) << (31 - e)); + z0 = (z0 >> e) | ((carry << 1) << (31 - e)); + } + ez = 1; + } else if (carry) { + /* carry out; shift right by one */ + sticky = (z2 & 1) | z3 | z4; + z2 = (z2 >> 1) | (z1 << 31); + if (sticky) + z2 |= 1; + z1 = (z1 >> 1) | (z0 << 31); + z0 = (z0 >> 1) | 0x80000000; + ez++; + } else { + if (z0 < 0x80000000u && (z0 | z1 | z2 | z3 | z4) != 0) { + /* + * borrow/cancellation; shift left as much as + * exponent allows + */ + while (!z0 && ez >= 33) { + z0 = z1; + z1 = z2; + z2 = z3; + z3 = z4; + z4 = 0; + ez -= 32; + } + while (z0 < 0x80000000u && ez > 1) { + z0 = (z0 << 1) | (z1 >> 31); + z1 = (z1 << 1) | (z2 >> 31); + z2 = (z2 << 1) | (z3 >> 31); + z3 = (z3 << 1) | (z4 >> 31); + z4 <<= 1; + ez--; + } + } + if (z3 | z4) + z2 |= 1; + } + + /* get the rounding mode */ + rm = oldcwsw & 0x0c000000; + + /* adjust exponent if result is subnormal */ + tinyafter = 0; + if (!(z0 & 0x80000000)) { + ez = 0; + tinyafter = 1; + if (!(z0 | z1 | z2)) { /* exact zero */ + zz.i[2] = rm == FCW_RM ? 0x8000 : 0; + zz.i[1] = zz.i[0] = 0; + __fenv_setcwsw(&oldcwsw); + return (zz.e); + } + } + + /* + * flip the sense of directed roundings if the result is negative; + * the logic below applies to a positive result + */ + if (sz && (rm == FCW_RM || rm == FCW_RP)) + rm = (FCW_RM + FCW_RP) - rm; + + /* round */ + if (z2) { + if (rm == FCW_RP || (rm == FCW_RN && (z2 > 0x80000000u || + (z2 == 0x80000000u && (z1 & 1))))) { + /* round up and renormalize if necessary */ + if (++z1 == 0) { + if (++z0 == 0) { + z0 = 0x80000000; + ez++; + } else if (z0 == 0x80000000) { + /* rounded up to smallest normal */ + ez = 1; + if ((rm == FCW_RP && z2 > + 0x80000000u) || (rm == FCW_RN && + z2 >= 0xc0000000u)) + /* + * would have rounded up to + * smallest normal even with + * unbounded range + */ + tinyafter = 0; + } + } + } + } + + /* restore the control and status words, check for over/underflow */ + __fenv_setcwsw(&oldcwsw); + if (ez >= 0x7fff) { + if (rm == FCW_RN || rm == FCW_RP) { + zz.i[2] = sz | 0x7fff; + zz.i[1] = 0x80000000; + zz.i[0] = 0; + } else { + zz.i[2] = sz | 0x7ffe; + zz.i[1] = 0xffffffff; + zz.i[0] = 0xffffffff; + } + dummy = huge; + dummy *= huge; + } else { + zz.i[2] = sz | ez; + zz.i[1] = z0; + zz.i[0] = z1; + + /* + * tinyafter => result rounded w/ unbounded range would be tiny, + * z2 nonzero => result delivered is inexact + */ + if (tinyafter) { + dummy = tiny; + if (z2) + dummy *= tiny; + else + dummy -= tiny2; + } else if (z2) { + dummy = huge; + dummy += tiny; + } + } + + return (zz.e); +} + +#else +#error Unknown architecture +#endif diff --git a/usr/src/lib/libm/common/m9x/fmax.c b/usr/src/lib/libm/common/m9x/fmax.c new file mode 100644 index 0000000000..f9a66d45d1 --- /dev/null +++ b/usr/src/lib/libm/common/m9x/fmax.c @@ -0,0 +1,81 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak fmax = __fmax +#endif + +/* + * fmax(x,y) returns the larger of x and y. If just one of the + * arguments is NaN, fmax returns the other argument. If both + * arguments are NaN, fmax returns NaN. + * + * See fmaxf.c for a discussion of implementation trade-offs. + */ + +#include "libm.h" /* for isgreaterequal macro */ +#include <fenv.h> + +double +__fmax(double x, double y) { + union { + unsigned i[2]; + double d; + } xx, yy; + unsigned s; + + /* if y is nan, replace it by x */ + if (y != y) + y = x; + + /* if x is nan, replace it by y */ + if (x != x) + x = y; + + /* At this point, x and y are either both numeric, or both NaN */ + if (!isnan(x) && !isgreaterequal(x, y)) + x = y; + + /* + * clear the sign of the result if either x or y has its sign clear + */ + xx.d = x; + yy.d = y; +#if defined(__sparc) + s = ~(xx.i[0] & yy.i[0]) & 0x80000000; + xx.i[0] &= ~s; +#elif defined(__x86) + s = ~(xx.i[1] & yy.i[1]) & 0x80000000; + xx.i[1] &= ~s; +#else +#error Unknown architecture +#endif + + return (xx.d); +} diff --git a/usr/src/lib/libm/common/m9x/fmaxf.c b/usr/src/lib/libm/common/m9x/fmaxf.c new file mode 100644 index 0000000000..370512c37c --- /dev/null +++ b/usr/src/lib/libm/common/m9x/fmaxf.c @@ -0,0 +1,143 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak fmaxf = __fmaxf +#endif + +/* + * fmax(x,y) returns the larger of x and y. If just one of the + * arguments is NaN, fmax returns the other argument. If both + * arguments are NaN, fmax returns NaN (ideally, one of the + * argument NaNs). + * + * C99 does not require that fmax(-0,+0) = fmax(+0,-0) = +0, but + * ideally fmax should satisfy this. + * + * C99 makes no mention of exceptions for fmax. I suppose ideally + * either fmax never raises any exceptions or else it raises the + * invalid operation exception if and only if some argument is a + * signaling NaN. In the former case, fmax should always return + * one of its arguments. In the latter, fmax shouldn't return a + * signaling NaN, although when both arguments are signaling NaNs, + * this ideal is at odds with the stipulation that fmax should + * always return one of its arguments. + * + * Commutativity of fmax follows from the properties listed above + * except when both arguments are NaN. In that case, fmax may be + * declared commutative by fiat because there is no portable way + * to tell different NaNs apart. Ideally fmax would be truly com- + * mutative for all arguments. + * + * On SPARC V8, fmax must involve tests and branches. Ideally, + * an implementation on SPARC V9 should avoid branching, using + * conditional moves instead where necessary, and be as efficient + * as possible in its use of other resources. + * + * It appears to be impossible to attain all of the aforementioned + * ideals simultaneously. The implementation below satisfies the + * following (on SPARC): + * + * 1. fmax(x,y) returns the larger of x and y if neither x nor y + * is NaN and the non-NaN argument if just one of x or y is NaN. + * If both x and y are NaN, fmax(x,y) returns x unchanged. + * 2. fmax(-0,+0) = fmax(+0,-0) = +0. + * 3. If either argument is a signaling NaN, fmax raises the invalid + * operation exception. Otherwise, it raises no exceptions. + */ + +#include "libm.h" /* for isgreaterequal macro */ + +float +__fmaxf(float x, float y) { + /* + * On SPARC v8plus/v9, this could be implemented as follows + * (assuming %f0 = x, %f1 = y, return value left in %f0): + * + * fcmps %fcc0,%f1,%f1 + * fmovsu %fcc0,%f0,%f1 + * fcmps %fcc0,%f0,%f1 + * fmovsul %fcc0,%f1,%f0 + * st %f0,[x] + * st %f1,[y] + * ld [x],%l0 + * ld [y],%l1 + * and %l0,%l1,%l2 + * sethi %hi(0x80000000),%l3 + * andn %l3,%l2,%l2 + * andn %l0,%l2,%l0 + * st %l0,[x] + * ld [x],%f0 + * + * If VIS instructions are available, use this code instead: + * + * fcmps %fcc0,%f1,%f1 + * fmovsu %fcc0,%f0,%f1 + * fcmps %fcc0,%f0,%f1 + * fmovsul %fcc0,%f1,%f0 + * fands %f0,%f1,%f2 + * fzeros %f3 + * fnegs %f3,%f3 + * fandnot2s %f3,%f2,%f2 + * fandnot2s %f0,%f2,%f0 + * + * If VIS 3.0 instructions are available, use this: + * + * flcmps %fcc0,%f0,%f1 + * fmovslg %fcc0,%f1,%f0 ! move if %fcc0 is 1 or 2 + */ + + union { + unsigned i; + float f; + } xx, yy; + unsigned s; + + /* if y is nan, replace it by x */ + if (y != y) + y = x; + + /* if x is nan, replace it by y */ + if (x != x) + x = y; + + /* At this point, x and y are either both numeric, or both NaN */ + if (!isnan(x) && !isgreaterequal(x, y)) + x = y; + + /* + * clear the sign of the result if either x or y has its sign clear + */ + xx.f = x; + yy.f = y; + s = ~(xx.i & yy.i) & 0x80000000; + xx.i &= ~s; + + return (xx.f); +} diff --git a/usr/src/lib/libm/common/m9x/fmaxl.c b/usr/src/lib/libm/common/m9x/fmaxl.c new file mode 100644 index 0000000000..f6de1d5ebe --- /dev/null +++ b/usr/src/lib/libm/common/m9x/fmaxl.c @@ -0,0 +1,78 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak fmaxl = __fmaxl +#endif + +#include "libm.h" /* for isgreaterequal macro */ + +long double +__fmaxl(long double x, long double y) { + union { +#if defined(__sparc) + unsigned i[4]; +#elif defined(__x86) + unsigned i[3]; +#else +#error Unknown architecture +#endif + long double ld; + } xx, yy; + unsigned s; + + /* if y is nan, replace it by x */ + if (y != y) + y = x; + + /* if x is nan, replace it by y */ + if (x != x) + x = y; + + /* At this point, x and y are either both numeric, or both NaN */ + if (!isnan(x) && !isgreaterequal(x, y)) + x = y; + + /* + * clear the sign of the result if either x or y has its sign clear + */ + xx.ld = x; + yy.ld = y; +#if defined(__sparc) + s = ~(xx.i[0] & yy.i[0]) & 0x80000000; + xx.i[0] &= ~s; +#elif defined(__x86) + s = ~(xx.i[2] & yy.i[2]) & 0x8000; + xx.i[2] &= ~s; +#else +#error Unknown architecture +#endif + + return (xx.ld); +} diff --git a/usr/src/lib/libm/common/m9x/fmin.c b/usr/src/lib/libm/common/m9x/fmin.c new file mode 100644 index 0000000000..52c137187b --- /dev/null +++ b/usr/src/lib/libm/common/m9x/fmin.c @@ -0,0 +1,82 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak fmin = __fmin +#endif + +/* + * fmin(x,y) returns the smaller of x and y. If just one of the + * arguments is NaN, fmin returns the other argument. If both + * arguments are NaN, fmin returns NaN. + * + * See fmaxf.c for a discussion of implementation trade-offs. + */ + +#include "libm.h" /* for islessequal macro */ + +#include "fenv_inlines.h" +#include <stdio.h> +#include <sys/isa_defs.h> + +double +__fmin(double x, double y) { + union { + unsigned i[2]; + double d; + } xx, yy; + unsigned s; + + /* if y is nan, replace it by x */ + if (y != y) + y = x; + + /* if x is nan, replace it by y */ + if (x != x) + x = y; + + /* At this point, x and y are either both numeric, or both NaN */ + if (!isnan(x) && !islessequal(x, y)) + x = y; + + /* + * set the sign of the result if either x or y has its sign set + */ + xx.d = x; + yy.d = y; +#if defined(_BIG_ENDIAN) + s = (xx.i[0] | yy.i[0]) & 0x80000000; + xx.i[0] |= s; +#else + s = (xx.i[1] | yy.i[1]) & 0x80000000; + xx.i[1] |= s; +#endif + + return (xx.d); +} diff --git a/usr/src/lib/libm/common/m9x/fminf.c b/usr/src/lib/libm/common/m9x/fminf.c new file mode 100644 index 0000000000..77ed8591d1 --- /dev/null +++ b/usr/src/lib/libm/common/m9x/fminf.c @@ -0,0 +1,102 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak fminf = __fminf +#endif + +#include "libm.h" /* for islessequal macro */ + +float +__fminf(float x, float y) { + /* + * On SPARC v8plus/v9, this could be implemented as follows + * (assuming %f0 = x, %f1 = y, return value left in %f0): + * + * fcmps %fcc0,%f1,%f1 + * fmovsu %fcc0,%f0,%f1 + * fcmps %fcc0,%f0,%f1 + * fmovsug %fcc0,%f1,%f0 + * st %f0,[x] + * st %f1,[y] + * ld [x],%l0 + * ld [y],%l1 + * or %l0,%l1,%l2 + * sethi %hi(0x80000000),%l3 + * and %l3,%l2,%l2 + * or %l0,%l2,%l0 + * st %l0,[x] + * ld [x],%f0 + * + * If VIS instructions are available, use this code instead: + * + * fcmps %fcc0,%f1,%f1 + * fmovsu %fcc0,%f0,%f1 + * fcmps %fcc0,%f0,%f1 + * fmovsug %fcc0,%f1,%f0 + * fors %f0,%f1,%f2 + * fzeros %f3 + * fnegs %f3,%f3 + * fands %f3,%f2,%f2 + * fors %f0,%f2,%f0 + * + * If VIS 3.0 instructions are available, use this: + * + * flcmps %fcc0,%f0,%f1 + * fmovsge %fcc0,%f1,%f0 ! move if %fcc0 is 0 or 2 + */ + + union { + unsigned i; + float f; + } xx, yy; + unsigned s; + + /* if y is nan, replace it by x */ + if (y != y) + y = x; + + /* if x is nan, replace it by y */ + if (x != x) + x = y; + + /* At this point, x and y are either both numeric, or both NaN */ + if (!isnan(x) && !islessequal(x, y)) + x = y; + + /* + * set the sign of the result if either x or y has its sign set + */ + xx.f = x; + yy.f = y; + s = (xx.i | yy.i) & 0x80000000; + xx.i |= s; + + return (xx.f); +} diff --git a/usr/src/lib/libm/common/m9x/fminl.c b/usr/src/lib/libm/common/m9x/fminl.c new file mode 100644 index 0000000000..5dd2a430c7 --- /dev/null +++ b/usr/src/lib/libm/common/m9x/fminl.c @@ -0,0 +1,78 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak fminl = __fminl +#endif + +#include "libm.h" /* for islessequal macro */ + +long double +__fminl(long double x, long double y) { + union { +#if defined(__sparc) + unsigned i[4]; +#elif defined(__x86) + unsigned i[3]; +#else +#error Unknown architecture +#endif + long double ld; + } xx, yy; + unsigned s; + + /* if y is nan, replace it by x */ + if (y != y) + y = x; + + /* if x is nan, replace it by y */ + if (x != x) + x = y; + + /* At this point, x and y are either both numeric, or both NaN */ + if (!isnan(x) && !islessequal(x, y)) + x = y; + + /* + * set the sign of the result if either x or y has its sign set + */ + xx.ld = x; + yy.ld = y; +#if defined(__sparc) + s = (xx.i[0] | yy.i[0]) & 0x80000000; + xx.i[0] |= s; +#elif defined(__x86) + s = (xx.i[2] | yy.i[2]) & 0x8000; + xx.i[2] |= s; +#else +#error Unknown architecture +#endif + + return (xx.ld); +} diff --git a/usr/src/lib/libm/common/m9x/frexp.c b/usr/src/lib/libm/common/m9x/frexp.c new file mode 100644 index 0000000000..d13ebd907f --- /dev/null +++ b/usr/src/lib/libm/common/m9x/frexp.c @@ -0,0 +1,103 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak frexp = __frexp +#endif + +/* + * frexp(x, exp) returns the normalized significand of x and sets + * *exp so that x = r*2^(*exp) where r is the return value. If x + * is finite and nonzero, 1/2 <= |r| < 1. + * + * If x is zero, infinite or NaN, frexp returns x and sets *exp = 0. + * (The relevant standards do not specify *exp when x is infinite or + * NaN, but this code sets it anyway.) + * + * If x is a signaling NaN, this code returns x without attempting + * to raise the invalid operation exception. If x is subnormal, + * this code treats it as nonzero regardless of nonstandard mode. + */ + +#include "libm.h" + +double +__frexp(double x, int *exp) { + union { + unsigned i[2]; + double d; + } xx, yy; + double t; + unsigned hx; + int e; + + xx.d = x; + hx = xx.i[HIWORD] & ~0x80000000; + + if (hx >= 0x7ff00000) { /* x is infinite or NaN */ + *exp = 0; + return (x); + } + + e = 0; + if (hx < 0x00100000) { /* x is subnormal or zero */ + if ((hx | xx.i[LOWORD]) == 0) { + *exp = 0; + return (x); + } + + /* + * normalize x by regarding it as an integer + * + * Here we use 32-bit integer arithmetic to avoid trapping + * or emulating 64-bit arithmetic. If 64-bit arithmetic is + * available (e.g., in SPARC V9), do this instead: + * + * long lx = ((long) hx << 32) | xx.i[LOWORD]; + * xx.d = (xx.i[HIWORD] < 0)? -lx : lx; + * + * If subnormal arithmetic doesn't trap, just multiply x by + * a power of two. + */ + yy.i[HIWORD] = 0x43300000 | hx; + yy.i[LOWORD] = xx.i[LOWORD]; + t = yy.d; + yy.i[HIWORD] = 0x43300000; + yy.i[LOWORD] = 0; + t -= yy.d; /* t = |x| scaled */ + xx.d = ((int)xx.i[HIWORD] < 0)? -t : t; + hx = xx.i[HIWORD] & ~0x80000000; + e = -1074; + } + + /* now xx.d is normal */ + xx.i[HIWORD] = (xx.i[HIWORD] & ~0x7ff00000) | 0x3fe00000; + *exp = e + (hx >> 20) - 0x3fe; + return (xx.d); +} diff --git a/usr/src/lib/libm/common/m9x/frexpf.c b/usr/src/lib/libm/common/m9x/frexpf.c new file mode 100644 index 0000000000..7a89fbb428 --- /dev/null +++ b/usr/src/lib/libm/common/m9x/frexpf.c @@ -0,0 +1,70 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak frexpf = __frexpf +#endif + +#include "libm.h" + +float +__frexpf(float x, int *exp) { + union { + unsigned i; + float f; + } xx; + unsigned hx; + int e; + + xx.f = x; + hx = xx.i & ~0x80000000; + + if (hx >= 0x7f800000) { /* x is infinite or NaN */ + *exp = 0; + return (x); + } + + e = 0; + if (hx < 0x00800000) { /* x is subnormal or zero */ + if (hx == 0) { + *exp = 0; + return (x); + } + + /* normalize x by regarding it as an integer */ + xx.f = (int) xx.i < 0 ? -(int) hx : (int) hx; + hx = xx.i & ~0x80000000; + e = -149; + } + + /* now xx.f is normal */ + xx.i = (xx.i & ~0x7f800000) | 0x3f000000; + *exp = e + (hx >> 23) - 0x7e; + return (xx.f); +} diff --git a/usr/src/lib/libm/common/m9x/frexpl.c b/usr/src/lib/libm/common/m9x/frexpl.c new file mode 100644 index 0000000000..69a2088974 --- /dev/null +++ b/usr/src/lib/libm/common/m9x/frexpl.c @@ -0,0 +1,127 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak frexpl = __frexpl +#endif + +#include "libm.h" + +#if defined(__sparc) + +long double +__frexpl(long double x, int *exp) { + union { + unsigned i[4]; + long double q; + } xx; + unsigned hx; + int e, s; + + xx.q = x; + hx = xx.i[0] & ~0x80000000; + + if (hx >= 0x7fff0000) { /* x is infinite or NaN */ + *exp = 0; + return (x); + } + + e = 0; + if (hx < 0x00010000) { /* x is subnormal or zero */ + if ((hx | xx.i[1] | xx.i[2] | xx.i[3]) == 0) { + *exp = 0; + return (x); + } + + /* normalize x */ + s = xx.i[0] & 0x80000000; + while ((hx | (xx.i[1] & 0xffff0000)) == 0) { + hx = xx.i[1]; + xx.i[1] = xx.i[2]; + xx.i[2] = xx.i[3]; + xx.i[3] = 0; + e -= 32; + } + while (hx < 0x10000) { + hx = (hx << 1) | (xx.i[1] >> 31); + xx.i[1] = (xx.i[1] << 1) | (xx.i[2] >> 31); + xx.i[2] = (xx.i[2] << 1) | (xx.i[3] >> 31); + xx.i[3] <<= 1; + e--; + } + xx.i[0] = s | hx; + } + + /* now xx.q is normal */ + xx.i[0] = (xx.i[0] & ~0x7fff0000) | 0x3ffe0000; + *exp = e + (hx >> 16) - 0x3ffe; + return (xx.q); +} + +#elif defined(__x86) + +long double +__frexpl(long double x, int *exp) { + union { + unsigned i[3]; + long double e; + } xx; + unsigned hx; + int e; + + xx.e = x; + hx = xx.i[2] & 0x7fff; + + if (hx >= 0x7fff) { /* x is infinite or NaN */ + *exp = 0; + return (x); + } + + e = 0; + if (hx < 0x0001) { /* x is subnormal or zero */ + if ((xx.i[0] | xx.i[1]) == 0) { + *exp = 0; + return (x); + } + + /* normalize x */ + xx.e *= 18446744073709551616.0L; /* 2^64 */ + hx = xx.i[2] & 0x7fff; + e = -64; + } + + /* now xx.e is normal */ + xx.i[2] = (xx.i[2] & 0x8000) | 0x3ffe; + *exp = e + hx - 0x3ffe; + return (xx.e); +} + +#else +#error Unknown architecture +#endif diff --git a/usr/src/lib/libm/common/m9x/ldexp.c b/usr/src/lib/libm/common/m9x/ldexp.c new file mode 100644 index 0000000000..ac0fcc8dd3 --- /dev/null +++ b/usr/src/lib/libm/common/m9x/ldexp.c @@ -0,0 +1,57 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak ldexp = __ldexp +#endif + +#include "libm.h" +#include <errno.h> + +double +ldexp(double x, int n) { + int *px = (int *) &x, ix = px[HIWORD] & ~0x80000000; + + if (ix >= 0x7ff00000 || (px[LOWORD] | ix) == 0) +#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN) + return (ix >= 0x7ff80000 ? x : x + x); + /* assumes sparc-like QNaN */ +#else + return (x + x); +#endif + x = scalbn(x, n); + ix = px[HIWORD] & ~0x80000000; + /* + * SVID3 requires both overflow and underflow cases to set errno + * XPG3/XPG4/XPG4.2/SUSv2 requires overflow to set errno + */ + if (ix >= 0x7ff00000 || (px[LOWORD] | ix) == 0) + errno = ERANGE; + return (x); +} diff --git a/usr/src/lib/libm/common/m9x/ldexpf.c b/usr/src/lib/libm/common/m9x/ldexpf.c new file mode 100644 index 0000000000..1655ea1996 --- /dev/null +++ b/usr/src/lib/libm/common/m9x/ldexpf.c @@ -0,0 +1,39 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak ldexpf = __ldexpf +#endif + +#include "libm.h" + +float +ldexpf(float x, int n) { + return (scalbnf(x, n)); +} diff --git a/usr/src/lib/libm/common/m9x/ldexpl.c b/usr/src/lib/libm/common/m9x/ldexpl.c new file mode 100644 index 0000000000..d3d9aa5a86 --- /dev/null +++ b/usr/src/lib/libm/common/m9x/ldexpl.c @@ -0,0 +1,39 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak ldexpl = __ldexpl +#endif + +#include "libm.h" + +long double +ldexpl(long double x, int n) { + return (scalbnl(x, n)); +} diff --git a/usr/src/lib/libm/common/m9x/llrint.c b/usr/src/lib/libm/common/m9x/llrint.c new file mode 100644 index 0000000000..fce2e970d6 --- /dev/null +++ b/usr/src/lib/libm/common/m9x/llrint.c @@ -0,0 +1,81 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak llrint = __llrint +#if defined(__sparcv9) || defined(__amd64) +#pragma weak lrint = __llrint +#pragma weak __lrint = __llrint +#endif +#endif + +/* + * llrint(x) rounds its argument to the nearest integer according + * to the current rounding direction and converts the result to a + * 64 bit signed integer. + * + * If x is NaN, infinite, or so large that the nearest integer would + * exceed 64 bits, the invalid operation exception is raised. If x + * is not an integer, the inexact exception is raised. + */ + +#include "libm.h" + +long long +llrint(double x) { + /* + * Note: The following code works on x86 (in the default rounding + * precision mode), but one should just use the fistpll instruction + * instead. + */ + union { + unsigned i[2]; + double d; + } xx, yy; + unsigned hx; + + xx.d = x; + hx = xx.i[HIWORD] & ~0x80000000; + + if (hx < 0x43300000) { /* |x| < 2^52 */ + /* add and subtract a power of two to round x to an integer */ +#if defined(__sparc) || defined(__amd64) + yy.i[HIWORD] = (xx.i[HIWORD] & 0x80000000) | 0x43300000; +#elif defined(__i386) /* !defined(__amd64) */ + yy.i[HIWORD] = (xx.i[HIWORD] & 0x80000000) | 0x43e00000; +#else +#error Unknown architecture +#endif + yy.i[LOWORD] = 0; + x = (x + yy.d) - yy.d; + } + + /* now x is nan, inf, or integral */ + return ((long long) x); +} diff --git a/usr/src/lib/libm/common/m9x/llrintf.c b/usr/src/lib/libm/common/m9x/llrintf.c new file mode 100644 index 0000000000..abdc3772b8 --- /dev/null +++ b/usr/src/lib/libm/common/m9x/llrintf.c @@ -0,0 +1,77 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak llrintf = __llrintf +#if defined(__sparcv9) || defined(__amd64) +#pragma weak lrintf = __llrintf +#pragma weak __lrintf = __llrintf +#endif +#endif + +#include "libm.h" + +long long +llrintf(float x) { + /* + * Note: The following code works on x86 (in the default rounding + * precision mode), but one should just use the fistpll instruction + * instead. + */ + union { + unsigned i; + float f; + } xx, yy; + unsigned hx; + + xx.f = x; + hx = xx.i & ~0x80000000; + + if (hx < 0x4b000000) { /* |x| < 2^23 */ + /* add and subtract a power of two to round x to an integer */ +#if defined(__sparc) || defined(__amd64) + yy.i = (xx.i & 0x80000000) | 0x4b000000; +#elif defined(__i386) + /* assume 64-bit precision */ + yy.i = (xx.i & 0x80000000) | 0x5f000000; +#else +#error Unknown architecture +#endif + x = (x + yy.f) - yy.f; + + /* + * on LP32 architectures, we can just convert x to a 32-bit + * integer and sign-extend it + */ + return ((long) x); + } + + /* now x is nan, inf, or integral */ + return ((long long) x); +} diff --git a/usr/src/lib/libm/common/m9x/llrintl.c b/usr/src/lib/libm/common/m9x/llrintl.c new file mode 100644 index 0000000000..e58a6a83ca --- /dev/null +++ b/usr/src/lib/libm/common/m9x/llrintl.c @@ -0,0 +1,175 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak llrintl = __llrintl +#if defined(__sparcv9) || defined(__amd64) +#pragma weak lrintl = __llrintl +#pragma weak __lrintl = __llrintl +#endif +#endif + +#include "libm.h" + +#if defined(__sparc) + +#include "fma.h" +#include "fenv_inlines.h" + +long long +llrintl(long double x) { + union { + unsigned i[4]; + long double q; + } xx; + union { + unsigned i[2]; + long long l; + } zz; + union { + unsigned i; + float f; + } tt; + unsigned int hx, sx, frac, fsr; + int rm, j; + volatile float dummy; + + xx.q = x; + sx = xx.i[0] & 0x80000000; + hx = xx.i[0] & ~0x80000000; + + /* handle trivial cases */ + if (hx > 0x403e0000) { /* |x| > 2^63 + ... or x is nan */ + /* convert an out-of-range float */ + tt.i = sx | 0x7f000000; + return ((long long) tt.f); + } else if ((hx | xx.i[1] | xx.i[2] | xx.i[3]) == 0) /* x is zero */ + return (0LL); + + /* get the rounding mode */ + __fenv_getfsr32(&fsr); + rm = fsr >> 30; + + /* flip the sense of directed roundings if x is negative */ + if (sx) + rm ^= rm >> 1; + + /* handle |x| < 1 */ + if (hx < 0x3fff0000) { + dummy = 1.0e30f; /* x is nonzero, so raise inexact */ + dummy += 1.0e-30f; + if (rm == FSR_RP || (rm == FSR_RN && (hx >= 0x3ffe0000 && + ((hx & 0xffff) | xx.i[1] | xx.i[2] | xx.i[3])))) + return (sx ? -1LL : 1LL); + return (0LL); + } + + /* extract the integer and fractional parts of x */ + j = 0x406f - (hx >> 16); + xx.i[0] = 0x10000 | (xx.i[0] & 0xffff); + if (j >= 96) { + zz.i[0] = 0; + zz.i[1] = xx.i[0] >> (j - 96); + frac = ((xx.i[0] << 1) << (127 - j)) | (xx.i[1] >> (j - 96)); + if (((xx.i[1] << 1) << (127 - j)) | xx.i[2] | xx.i[3]) + frac |= 1; + } else if (j >= 64) { + zz.i[0] = xx.i[0] >> (j - 64); + zz.i[1] = ((xx.i[0] << 1) << (95 - j)) | (xx.i[1] >> (j - 64)); + frac = ((xx.i[1] << 1) << (95 - j)) | (xx.i[2] >> (j - 64)); + if (((xx.i[2] << 1) << (95 - j)) | xx.i[3]) + frac |= 1; + } else { + zz.i[0] = ((xx.i[0] << 1) << (63 - j)) | (xx.i[1] >> (j - 32)); + zz.i[1] = ((xx.i[1] << 1) << (63 - j)) | (xx.i[2] >> (j - 32)); + frac = ((xx.i[2] << 1) << (63 - j)) | (xx.i[3] >> (j - 32)); + if ((xx.i[3] << 1) << (63 - j)) + frac |= 1; + } + + /* round */ + if (frac && (rm == FSR_RP || (rm == FSR_RN && (frac > 0x80000000u || + (frac == 0x80000000 && (zz.i[1] & 1)))))) { + if (++zz.i[1] == 0) + zz.i[0]++; + } + + /* check for result out of range (note that z is |x| at this point) */ + if (zz.i[0] > 0x80000000u || (zz.i[0] == 0x80000000 && (zz.i[1] || + !sx))) { + tt.i = sx | 0x7f000000; + return ((long long) tt.f); + } + + /* raise inexact if need be */ + if (frac) { + dummy = 1.0e30F; + dummy += 1.0e-30F; + } + + /* negate result if need be */ + if (sx) { + zz.i[0] = ~zz.i[0]; + zz.i[1] = -zz.i[1]; + if (zz.i[1] == 0) + zz.i[0]++; + } + return (zz.l); +} +#elif defined(__x86) +long long +llrintl(long double x) { + /* + * Note: The following code works on x86 (in the default rounding + * precision mode), but one ought to just use the fistpll instruction + * instead. + */ + union { + unsigned i[3]; + long double e; + } xx, yy; + int ex; + + xx.e = x; + ex = xx.i[2] & 0x7fff; + + if (ex < 0x403e) { /* |x| < 2^63 */ + /* add and subtract a power of two to round x to an integer */ + yy.i[2] = (xx.i[2] & 0x8000) | 0x403e; + yy.i[1] = 0x80000000; + yy.i[0] = 0; + x = (x + yy.e) - yy.e; + } + + /* now x is nan, inf, or integral */ + return ((long long) x); +} +#else +#error Unknown architecture +#endif diff --git a/usr/src/lib/libm/common/m9x/llround.c b/usr/src/lib/libm/common/m9x/llround.c new file mode 100644 index 0000000000..77fd84a084 --- /dev/null +++ b/usr/src/lib/libm/common/m9x/llround.c @@ -0,0 +1,85 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak llround = __llround +#if defined(__sparcv9) || defined(__amd64) +#pragma weak lround = __llround +#pragma weak __lround = __llround +#endif +#endif + +/* + * llround(x) rounds its argument to the nearest integer, rounding + * ties away from zero, and converts the result to a 64 bit signed + * integer. + * + * If x is NaN, infinite, or so large that the nearest integer + * would exceed 64 bits, the invalid operation exception is raised. + */ + +#include "libm.h" + +long long +llround(double x) { + union { + unsigned i[2]; + double d; + } xx; + unsigned hx, sx, i; + + xx.d = x; + hx = xx.i[HIWORD] & ~0x80000000; + sx = xx.i[HIWORD] & 0x80000000; + + if (hx < 0x43300000) { /* |x| < 2^52 */ + /* handle |x| < 1 */ + if (hx < 0x3ff00000) { + if (hx >= 0x3fe00000) + return (sx ? -1LL : 1LL); + return (0LL); + } + + /* round x at the integer bit */ + if (hx < 0x41300000) { + i = 1 << (0x412 - (hx >> 20)); + xx.i[HIWORD] = (xx.i[HIWORD] + i) & ~(i | (i - 1)); + xx.i[LOWORD] = 0; + } else { + i = 1 << (0x432 - (hx >> 20)); + xx.i[LOWORD] += i; + if (xx.i[LOWORD] < i) + xx.i[HIWORD]++; + xx.i[LOWORD] &= ~(i | (i - 1)); + } + } + + /* now x is nan, inf, or integral */ + return ((long long) xx.d); +} diff --git a/usr/src/lib/libm/common/m9x/llroundf.c b/usr/src/lib/libm/common/m9x/llroundf.c new file mode 100644 index 0000000000..a1756b5b32 --- /dev/null +++ b/usr/src/lib/libm/common/m9x/llroundf.c @@ -0,0 +1,73 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak llroundf = __llroundf +#if defined(__sparcv9) || defined(__amd64) +#pragma weak lroundf = __llroundf +#pragma weak __lroundf = __llroundf +#endif +#endif + +#include "libm.h" + +long long +llroundf(float x) { + union { + unsigned i; + float f; + } xx; + unsigned hx, sx, i; + + xx.f = x; + hx = xx.i & ~0x80000000; + sx = xx.i & 0x80000000; + + if (hx < 0x4b000000) { /* |x| < 2^23 */ + /* handle |x| < 1 */ + if (hx < 0x3f800000) { + if (hx >= 0x3f000000) + return (sx ? -1LL : 1LL); + return (0LL); + } + + /* round x at the integer bit */ + i = 1 << (0x95 - (hx >> 23)); + xx.i = (xx.i + i) & ~((i << 1) - 1); + + /* + * on LP32 architectures, we can just convert x to a 32-bit + * integer and sign-extend it + */ + return ((long) xx.f); + } + + /* now x is nan, inf, or integral */ + return ((long long) x); +} diff --git a/usr/src/lib/libm/common/m9x/llroundl.c b/usr/src/lib/libm/common/m9x/llroundl.c new file mode 100644 index 0000000000..a8f5984ab8 --- /dev/null +++ b/usr/src/lib/libm/common/m9x/llroundl.c @@ -0,0 +1,166 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak llroundl = __llroundl +#if defined(__sparcv9) || defined(__amd64) +#pragma weak lroundl = __llroundl +#pragma weak __lroundl = __llroundl +#endif +#endif + +#include "libm.h" + +#if defined(__sparc) +long long +llroundl(long double x) { + union { + unsigned i[4]; + long double q; + } xx; + union { + unsigned i[2]; + long long l; + } zz; + union { + unsigned i; + float f; + } tt; + unsigned hx, sx, frac; + int j; + + xx.q = x; + sx = xx.i[0] & 0x80000000; + hx = xx.i[0] & ~0x80000000; + + /* handle trivial cases */ + if (hx > 0x403e0000) { /* |x| > 2^63 + ... or x is nan */ + /* convert an out-of-range float */ + tt.i = sx | 0x7f000000; + return ((long long) tt.f); + } + + /* handle |x| < 1 */ + if (hx < 0x3fff0000) { + if (hx >= 0x3ffe0000) + return (sx ? -1LL : 1LL); + return (0LL); + } + + /* extract the integer and fractional parts of x */ + j = 0x406f - (hx >> 16); + xx.i[0] = 0x10000 | (xx.i[0] & 0xffff); + if (j >= 96) { + zz.i[0] = 0; + zz.i[1] = xx.i[0] >> (j - 96); + frac = ((xx.i[0] << 1) << (127 - j)) | (xx.i[1] >> (j - 96)); + if (((xx.i[1] << 1) << (127 - j)) | xx.i[2] | xx.i[3]) + frac |= 1; + } else if (j >= 64) { + zz.i[0] = xx.i[0] >> (j - 64); + zz.i[1] = ((xx.i[0] << 1) << (95 - j)) | (xx.i[1] >> (j - 64)); + frac = ((xx.i[1] << 1) << (95 - j)) | (xx.i[2] >> (j - 64)); + if (((xx.i[2] << 1) << (95 - j)) | xx.i[3]) + frac |= 1; + } else { + zz.i[0] = ((xx.i[0] << 1) << (63 - j)) | (xx.i[1] >> (j - 32)); + zz.i[1] = ((xx.i[1] << 1) << (63 - j)) | (xx.i[2] >> (j - 32)); + frac = ((xx.i[2] << 1) << (63 - j)) | (xx.i[3] >> (j - 32)); + if ((xx.i[3] << 1) << (63 - j)) + frac |= 1; + } + + /* round */ + if (frac >= 0x80000000u) { + if (++zz.i[1] == 0) + zz.i[0]++; + } + + /* check for result out of range (note that z is |x| at this point) */ + if (zz.i[0] > 0x80000000u || (zz.i[0] == 0x80000000 && (zz.i[1] || + !sx))) { + tt.i = sx | 0x7f000000; + return ((long long) tt.f); + } + + /* negate result if need be */ + if (sx) { + zz.i[0] = ~zz.i[0]; + zz.i[1] = -zz.i[1]; + if (zz.i[1] == 0) + zz.i[0]++; + } + + return (zz.l); +} +#elif defined(__x86) +long long +llroundl(long double x) { + union { + unsigned i[3]; + long double e; + } xx; + int ex, sx, i; + + xx.e = x; + ex = xx.i[2] & 0x7fff; + sx = xx.i[2] & 0x8000; + + if (ex < 0x403e) { /* |x| < 2^63 */ + /* handle |x| < 1 */ + if (ex < 0x3fff) { + if (ex >= 0x3ffe) + return (sx ? -1LL : 1LL); + return (0LL); + } + + /* round x at the integer bit */ + if (ex < 0x401e) { + i = 1 << (0x401d - ex); + xx.i[1] = (xx.i[1] + i) & ~(i | (i - 1)); + xx.i[0] = 0; + } else { + i = 1 << (0x403d - ex); + xx.i[0] += i; + if (xx.i[0] < i) + xx.i[1]++; + xx.i[0] &= ~(i | (i - 1)); + } + if (xx.i[1] == 0) { + xx.i[2] = sx | ++ex; + xx.i[1] = 0x80000000U; + } + } + + /* now x is nan, inf, or integral */ + return ((long long) xx.e); +} +#else +#error Unknown architecture +#endif diff --git a/usr/src/lib/libm/common/m9x/lrint.c b/usr/src/lib/libm/common/m9x/lrint.c new file mode 100644 index 0000000000..c3e4ed495d --- /dev/null +++ b/usr/src/lib/libm/common/m9x/lrint.c @@ -0,0 +1,81 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak lrint = __lrint +#endif + +/* + * lrint(x) rounds its argument to the nearest integer according + * to the current rounding direction and converts the result to + * a 32 bit signed integer. + * + * If x is NaN, infinite, or so large that the nearest integer + * would exceed 32 bits, the invalid operation exception is raised. + * If x is not an integer, the inexact exception is raised. + */ + +#include <sys/isa_defs.h> /* _ILP32 */ +#include "libm.h" + +#if defined(_ILP32) +long +lrint(double x) { + /* + * Note: The following code works on x86 (in the default rounding + * precision mode), but one should just use the fistpl instruction + * instead. + */ + union { + unsigned i[2]; + double d; + } xx, yy; + unsigned hx; + + xx.d = x; + hx = xx.i[HIWORD] & ~0x80000000; + if (hx < 0x43300000) { /* |x| < 2^52 */ + /* add and subtract a power of two to round x to an integer */ +#if defined(__sparc) + yy.i[HIWORD] = (xx.i[HIWORD] & 0x80000000) | 0x43300000; +#elif defined(__x86) + yy.i[HIWORD] = (xx.i[HIWORD] & 0x80000000) | 0x43e00000; +#else +#error Unknown architecture +#endif + yy.i[LOWORD] = 0; + x = (x + yy.d) - yy.d; + } + + /* now x is nan, inf, or integral */ + return ((long) x); +} +#else +#error Unsupported architecture +#endif /* defined(_ILP32) */ diff --git a/usr/src/lib/libm/common/m9x/lrintf.c b/usr/src/lib/libm/common/m9x/lrintf.c new file mode 100644 index 0000000000..06d5754853 --- /dev/null +++ b/usr/src/lib/libm/common/m9x/lrintf.c @@ -0,0 +1,72 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak lrintf = __lrintf +#endif + +#include <sys/isa_defs.h> /* _ILP32 */ +#include "libm.h" + +#if defined(_ILP32) +long +lrintf(float x) { + /* + * Note: The following code works on x86 (in the default rounding + * precision mode), but one should just use the fistpl instruction + * instead. + */ + union { + unsigned i; + float f; + } xx, yy; + unsigned hx; + + xx.f = x; + hx = xx.i & ~0x80000000; + if (hx < 0x4b000000) { /* |x| < 2^23 */ + /* add and subtract a power of two to round x to an integer */ +#if defined(__sparc) + yy.i = (xx.i & 0x80000000) | 0x4b000000; +#elif defined(__x86) + /* assume 64-bit precision */ + yy.i = (xx.i & 0x80000000) | 0x5f000000; +#else +#error Unknown architecture +#endif + x = (x + yy.f) - yy.f; + return ((long) x); + } + + /* now x is nan, inf, or integral */ + return ((long) x); +} +#else +#error Unsupported architecture +#endif /* defined(_ILP32) */ diff --git a/usr/src/lib/libm/common/m9x/lrintl.c b/usr/src/lib/libm/common/m9x/lrintl.c new file mode 100644 index 0000000000..00d29926a0 --- /dev/null +++ b/usr/src/lib/libm/common/m9x/lrintl.c @@ -0,0 +1,156 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak lrintl = __lrintl +#endif + +#include <sys/isa_defs.h> /* _ILP32 */ +#include "libm.h" + +#if defined(_ILP32) +#if defined(__sparc) + +#include "fma.h" +#include "fenv_inlines.h" + +long +lrintl(long double x) { + union { + unsigned int i[4]; + long double q; + } xx; + union { + unsigned int i; + float f; + } tt; + unsigned int hx, sx, frac, l, fsr; + int rm, j; + volatile float dummy; + + xx.q = x; + sx = xx.i[0] & 0x80000000; + hx = xx.i[0] & ~0x80000000; + + /* handle trivial cases */ + if (hx > 0x401e0000) { /* |x| > 2^31 + ... or x is nan */ + /* convert an out-of-range float */ + tt.i = sx | 0x7f000000; + return ((long) tt.f); + } else if ((hx | xx.i[1] | xx.i[2] | xx.i[3]) == 0) /* x is zero */ + return (0L); + + /* get the rounding mode */ + __fenv_getfsr32(&fsr); + rm = fsr >> 30; + + /* flip the sense of directed roundings if x is negative */ + if (sx) + rm ^= rm >> 1; + + /* handle |x| < 1 */ + if (hx < 0x3fff0000) { + dummy = 1.0e30F; /* x is nonzero, so raise inexact */ + dummy += 1.0e-30F; + if (rm == FSR_RP || (rm == FSR_RN && (hx >= 0x3ffe0000 && + ((hx & 0xffff) | xx.i[1] | xx.i[2] | xx.i[3])))) + return (sx ? -1L : 1L); + return (0L); + } + + /* extract the integer and fractional parts of x */ + j = 0x406f - (hx >> 16); /* 91 <= j <= 112 */ + xx.i[0] = 0x10000 | (xx.i[0] & 0xffff); + if (j >= 96) { /* 96 <= j <= 112 */ + l = xx.i[0] >> (j - 96); + frac = ((xx.i[0] << 1) << (127 - j)) | (xx.i[1] >> (j - 96)); + if (((xx.i[1] << 1) << (127 - j)) | xx.i[2] | xx.i[3]) + frac |= 1; + } else { /* 91 <= j <= 95 */ + l = (xx.i[0] << (96 - j)) | (xx.i[1] >> (j - 64)); + frac = (xx.i[1] << (96 - j)) | (xx.i[2] >> (j - 64)); + if ((xx.i[2] << (96 - j)) | xx.i[3]) + frac |= 1; + } + + /* round */ + if (frac && (rm == FSR_RP || (rm == FSR_RN && (frac > 0x80000000U || + (frac == 0x80000000 && (l & 1)))))) + l++; + + /* check for result out of range (note that z is |x| at this point) */ + if (l > 0x80000000U || (l == 0x80000000U && !sx)) { + tt.i = sx | 0x7f000000; + return ((long) tt.f); + } + + /* raise inexact if need be */ + if (frac) { + dummy = 1.0e30F; + dummy += 1.0e-30F; + } + + /* negate result if need be */ + if (sx) + l = -l; + return ((long) l); +} +#elif defined(__x86) +long +lrintl(long double x) { + /* + * Note: The following code works on x86 (in the default rounding + * precision mode), but one ought to just use the fistpl instruction + * instead. + */ + union { + unsigned i[3]; + long double e; + } xx, yy; + int ex; + + xx.e = x; + ex = xx.i[2] & 0x7fff; + if (ex < 0x403e) { /* |x| < 2^63 */ + /* add and subtract a power of two to round x to an integer */ + yy.i[2] = (xx.i[2] & 0x8000) | 0x403e; + yy.i[1] = 0x80000000; + yy.i[0] = 0; + x = (x + yy.e) - yy.e; + } + + /* now x is nan, inf, or integral */ + return ((long) x); +} +#else +#error Unknown architecture +#endif /* defined(__sparc) || defined(__x86) */ +#else +#error Unsupported architecture +#endif /* defined(_ILP32) */ diff --git a/usr/src/lib/libm/common/m9x/lround.c b/usr/src/lib/libm/common/m9x/lround.c new file mode 100644 index 0000000000..f09463f35e --- /dev/null +++ b/usr/src/lib/libm/common/m9x/lround.c @@ -0,0 +1,83 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak lround = __lround +#endif + +/* + * lround(x) rounds its argument to the nearest integer, rounding ties + * away from zero, and converts the result to a 32 bit signed integer. + * + * If x is NaN, infinite, or so large that the nearest integer + * would exceed 32 bits, the invalid operation exception is raised. + */ + +#include <sys/isa_defs.h> /* _ILP32 */ +#include "libm.h" + +#if defined(_ILP32) +long +lround(double x) { + union { + unsigned i[2]; + double d; + } xx; + unsigned hx, sx, i; + + xx.d = x; + hx = xx.i[HIWORD] & ~0x80000000; + sx = xx.i[HIWORD] & 0x80000000; + if (hx < 0x43300000) { /* |x| < 2^52 */ + if (hx < 0x3ff00000) { /* |x| < 1 */ + if (hx >= 0x3fe00000) + return (sx ? -1L : 1L); + return (0L); + } + + /* round x at the integer bit */ + if (hx < 0x41300000) { + i = 1 << (0x412 - (hx >> 20)); + xx.i[HIWORD] = (xx.i[HIWORD] + i) & ~(i | (i - 1)); + xx.i[LOWORD] = 0; + } else { + i = 1 << (0x432 - (hx >> 20)); + xx.i[LOWORD] += i; + if (xx.i[LOWORD] < i) + xx.i[HIWORD]++; + xx.i[LOWORD] &= ~(i | (i - 1)); + } + } + + /* now x is nan, inf, or integral */ + return ((long) xx.d); +} +#else +#error Unsupported architecture +#endif /* defined(_ILP32) */ diff --git a/usr/src/lib/libm/common/m9x/lroundf.c b/usr/src/lib/libm/common/m9x/lroundf.c new file mode 100644 index 0000000000..552316411f --- /dev/null +++ b/usr/src/lib/libm/common/m9x/lroundf.c @@ -0,0 +1,67 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak lroundf = __lroundf +#endif + +#include <sys/isa_defs.h> /* _ILP32 */ +#include "libm.h" + +#if defined(_ILP32) +long +lroundf(float x) { + union { + unsigned i; + float f; + } xx; + unsigned hx, sx, i; + + xx.f = x; + hx = xx.i & ~0x80000000; + sx = xx.i & 0x80000000; + if (hx < 0x4b000000) { /* |x| < 2^23 */ + if (hx < 0x3f800000) { /* |x| < 1 */ + if (hx >= 0x3f000000) + return (sx ? -1L : 1L); + return (0L); + } + + /* round x at the integer bit */ + i = 1 << (0x95 - (hx >> 23)); + xx.i = (xx.i + i) & ~((i << 1) - 1); + return ((long) xx.f); + } + + /* now x is nan, inf, or integral */ + return ((long) x); +} +#else +#error Unsupported architecture +#endif /* defined(_ILP32) */ diff --git a/usr/src/lib/libm/common/m9x/lroundl.c b/usr/src/lib/libm/common/m9x/lroundl.c new file mode 100644 index 0000000000..d2536a83be --- /dev/null +++ b/usr/src/lib/libm/common/m9x/lroundl.c @@ -0,0 +1,145 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak lroundl = __lroundl +#endif + +#include <sys/isa_defs.h> /* _ILP32 */ +#include "libm.h" + +#if defined(_ILP32) +#if defined(__sparc) +long +lroundl(long double x) { + union { + unsigned i[4]; + long double q; + } xx; + union { + unsigned i; + float f; + } tt; + unsigned hx, sx, frac, l; + int j; + + xx.q = x; + sx = xx.i[0] & 0x80000000; + hx = xx.i[0] & ~0x80000000; + + /* handle trivial cases */ + if (hx > 0x401e0000) { /* |x| > 2^31 + ... or x is nan */ + /* convert an out-of-range float */ + tt.i = sx | 0x7f000000; + return ((long) tt.f); + } + + /* handle |x| < 1 */ + if (hx < 0x3fff0000) { + if (hx >= 0x3ffe0000) + return (sx ? -1L : 1L); + return (0L); + } + + /* extract the integer and fractional parts of x */ + j = 0x406f - (hx >> 16); /* 91 <= j <= 112 */ + xx.i[0] = 0x10000 | (xx.i[0] & 0xffff); + if (j >= 96) { /* 96 <= j <= 112 */ + l = xx.i[0] >> (j - 96); + frac = ((xx.i[0] << 1) << (127 - j)) | (xx.i[1] >> (j - 96)); + if (((xx.i[1] << 1) << (127 - j)) | xx.i[2] | xx.i[3]) + frac |= 1; + } else { /* 91 <= j <= 95 */ + l = (xx.i[0] << (96 - j)) | (xx.i[1] >> (j - 64)); + frac = (xx.i[1] << (96 - j)) | (xx.i[2] >> (j - 64)); + if ((xx.i[2] << (96 - j)) | xx.i[3]) + frac |= 1; + } + + /* round */ + if (frac >= 0x80000000U) + l++; + + /* check for result out of range (note that z is |x| at this point) */ + if (l > 0x80000000U || (l == 0x80000000U && !sx)) { + tt.i = sx | 0x7f000000; + return ((long) tt.f); + } + + /* negate result if need be */ + if (sx) + l = -l; + return ((long) l); +} +#elif defined(__x86) +long +lroundl(long double x) { + union { + unsigned i[3]; + long double e; + } xx; + int ex, sx, i; + + xx.e = x; + ex = xx.i[2] & 0x7fff; + sx = xx.i[2] & 0x8000; + if (ex < 0x403e) { /* |x| < 2^63 */ + if (ex < 0x3fff) { /* |x| < 1 */ + if (ex >= 0x3ffe) + return (sx ? -1L : 1L); + return (0L); + } + + /* round x at the integer bit */ + if (ex < 0x401e) { + i = 1 << (0x401d - ex); + xx.i[1] = (xx.i[1] + i) & ~(i | (i - 1)); + xx.i[0] = 0; + } else { + i = 1 << (0x403d - ex); + xx.i[0] += i; + if (xx.i[0] < i) + xx.i[1]++; + xx.i[0] &= ~(i | (i - 1)); + } + if (xx.i[1] == 0) { + xx.i[2] = sx | ++ex; + xx.i[1] = 0x80000000U; + } + } + + /* now x is nan, inf, or integral */ + return ((long) xx.e); +} +#else +#error Unknown architecture +#endif /* defined(__sparc) || defined(__x86) */ +#else +#error Unsupported architecture +#endif /* defined(_ILP32) */ diff --git a/usr/src/lib/libm/common/m9x/modf.c b/usr/src/lib/libm/common/m9x/modf.c new file mode 100644 index 0000000000..636bb2967d --- /dev/null +++ b/usr/src/lib/libm/common/m9x/modf.c @@ -0,0 +1,93 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak modf = __modf +#pragma weak _modf = __modf +#endif + +/* + * modf(x, iptr) decomposes x into an integral part and a fractional + * part both having the same sign as x. It stores the integral part + * in *iptr and returns the fractional part. + * + * If x is infinite, modf sets *iptr to x and returns copysign(0.0,x). + * If x is NaN, modf sets *iptr to x and returns x. + * + * If x is a signaling NaN, this code does not attempt to raise the + * invalid operation exception. + */ + +#include "libm.h" + +double +__modf(double x, double *iptr) { + union { + unsigned i[2]; + double d; + } xx, yy; + unsigned hx, s; + + xx.d = x; + hx = xx.i[HIWORD] & ~0x80000000; + + if (hx >= 0x43300000) { /* x is NaN, infinite, or integral */ + *iptr = x; + if (hx < 0x7ff00000 || (hx == 0x7ff00000 && + xx.i[LOWORD] == 0)) { + xx.i[HIWORD] &= 0x80000000; + xx.i[LOWORD] = 0; + } + return (xx.d); + } + + if (hx < 0x3ff00000) { /* |x| < 1 */ + xx.i[HIWORD] &= 0x80000000; + xx.i[LOWORD] = 0; + *iptr = xx.d; + return (x); + } + + /* split x at the binary point */ + s = xx.i[HIWORD] & 0x80000000; + if (hx < 0x41400000) { + yy.i[HIWORD] = xx.i[HIWORD] & ~((1 << (0x413 - (hx >> 20))) - + 1); + yy.i[LOWORD] = 0; + } else { + yy.i[HIWORD] = xx.i[HIWORD]; + yy.i[LOWORD] = xx.i[LOWORD] & ~((1 << (0x433 - (hx >> 20))) - + 1); + } + *iptr = yy.d; + xx.d -= yy.d; + xx.i[HIWORD] = (xx.i[HIWORD] & ~0x80000000) | s; + /* keep sign of x */ + return (xx.d); +} diff --git a/usr/src/lib/libm/common/m9x/modff.c b/usr/src/lib/libm/common/m9x/modff.c new file mode 100644 index 0000000000..1add78a7c5 --- /dev/null +++ b/usr/src/lib/libm/common/m9x/modff.c @@ -0,0 +1,69 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak modff = __modff +#pragma weak _modff = __modff +#endif + +#include "libm.h" + +float +__modff(float x, float *iptr) { + union { + unsigned i; + float f; + } xx, yy; + unsigned hx, s; + + xx.f = x; + hx = xx.i & ~0x80000000; + + if (hx >= 0x4b000000) { /* x is NaN, infinite, or integral */ + *iptr = x; + if (hx <= 0x7f800000) + xx.i &= 0x80000000; + return (xx.f); + } + + if (hx < 0x3f800000) { /* |x| < 1 */ + xx.i &= 0x80000000; + *iptr = xx.f; + return (x); + } + + /* split x at the binary point */ + s = xx.i & 0x80000000; + yy.i = xx.i & ~((1 << (0x96 - (hx >> 23))) - 1); + *iptr = yy.f; + xx.f -= yy.f; + xx.i = (xx.i & ~0x80000000) | s; + /* restore sign in case difference is 0 */ + return (xx.f); +} diff --git a/usr/src/lib/libm/common/m9x/modfl.c b/usr/src/lib/libm/common/m9x/modfl.c new file mode 100644 index 0000000000..000b03d41f --- /dev/null +++ b/usr/src/lib/libm/common/m9x/modfl.c @@ -0,0 +1,150 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak modfl = __modfl +#endif + +#include "libm.h" + +#if defined(__sparc) + +long double +__modfl(long double x, long double *iptr) { + union { + unsigned i[4]; + long double q; + } xx, yy; + unsigned hx, s; + + xx.q = x; + hx = xx.i[0] & ~0x80000000; + + if (hx >= 0x406f0000) { /* x is NaN, infinite, or integral */ + *iptr = x; + if (hx < 0x7fff0000 || (hx == 0x7fff0000 && + (xx.i[1] | xx.i[2] | xx.i[3]) == 0)) { + xx.i[0] &= 0x80000000; + xx.i[1] = xx.i[2] = xx.i[3] = 0; + } + return (xx.q); + } + + if (hx < 0x3fff0000) { /* |x| < 1 */ + xx.i[0] &= 0x80000000; + xx.i[1] = xx.i[2] = xx.i[3] = 0; + *iptr = xx.q; + return (x); + } + + /* split x at the binary point */ + s = xx.i[0] & 0x80000000; + if (hx < 0x40100000) { + yy.i[0] = xx.i[0] & ~((1 << (0x400f - (hx >> 16))) - 1); + yy.i[1] = yy.i[2] = yy.i[3] = 0; + } else if (hx < 0x40300000) { + yy.i[0] = xx.i[0]; + yy.i[1] = xx.i[1] & ~((1 << (0x402f - (hx >> 16))) - 1); + yy.i[2] = yy.i[3] = 0; + } else if (hx < 0x40500000) { + yy.i[0] = xx.i[0]; + yy.i[1] = xx.i[1]; + yy.i[2] = xx.i[2] & ~((1 << (0x404f - (hx >> 16))) - 1); + yy.i[3] = 0; + } else { + yy.i[0] = xx.i[0]; + yy.i[1] = xx.i[1]; + yy.i[2] = xx.i[2]; + yy.i[3] = xx.i[3] & ~((1 << (0x406f - (hx >> 16))) - 1); + } + *iptr = yy.q; + + /* + * we could implement the following more efficiently than by using + * software emulation of fsubq, but we'll do it this way for now + * (and hope hardware support becomes commonplace) + */ + xx.q -= yy.q; + xx.i[0] = (xx.i[0] & ~0x80000000) | s; /* keep sign of x */ + return (xx.q); +} + +#elif defined(__x86) + +long double +__modfl(long double x, long double *iptr) { + union { + unsigned i[3]; + long double e; + } xx, yy; + unsigned hx, s; + + /* + * It might be faster to use one of the x86 fpops instead of + * the following. + */ + xx.e = x; + hx = xx.i[2] & 0x7fff; + + if (hx >= 0x403e) { /* x is NaN, infinite, or integral */ + *iptr = x; + if (hx < 0x7fff || (hx == 0x7fff && + ((xx.i[1] << 1) | xx.i[0]) == 0)) { + xx.i[2] &= 0x8000; + xx.i[1] = xx.i[0] = 0; + } + return (xx.e); + } + + if (hx < 0x3fff) { /* |x| < 1 */ + xx.i[2] &= 0x8000; + xx.i[1] = xx.i[0] = 0; + *iptr = xx.e; + return (x); + } + + /* split x at the binary point */ + s = xx.i[2] & 0x8000; + yy.i[2] = xx.i[2]; + if (hx < 0x401f) { + yy.i[1] = xx.i[1] & ~((1 << (0x401e - hx)) - 1); + yy.i[0] = 0; + } else { + yy.i[1] = xx.i[1]; + yy.i[0] = xx.i[0] & ~((1 << (0x403e - hx)) - 1); + } + *iptr = yy.e; + xx.e -= yy.e; + xx.i[2] = (xx.i[2] & ~0x8000) | s; /* keep sign of x */ + return (xx.e); +} + +#else +#error Unknown architecture +#endif diff --git a/usr/src/lib/libm/common/m9x/nan.c b/usr/src/lib/libm/common/m9x/nan.c new file mode 100644 index 0000000000..da153ad515 --- /dev/null +++ b/usr/src/lib/libm/common/m9x/nan.c @@ -0,0 +1,63 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak nan = __nan +#endif + +/* + * nan(c) returns a NaN. This implementation ignores c. + */ + +#include "libm.h" +#include <sys/isa_defs.h> + +#if defined(__sparc) + +static const union { + unsigned i[2]; + double d; +} __nan_union = { 0x7fffffff, 0xffffffff }; + +#elif defined(__i386) || defined(__amd64) + +static const union { + unsigned i[2]; + double d; +} __nan_union = { 0xffffffff, 0x7fffffff }; + +#else +#error Unknown architecture +#endif + +/* ARGSUSED0 */ +double +__nan(const char *c) { + return (__nan_union.d); +} diff --git a/usr/src/lib/libm/common/m9x/nanf.c b/usr/src/lib/libm/common/m9x/nanf.c new file mode 100644 index 0000000000..0e580b9923 --- /dev/null +++ b/usr/src/lib/libm/common/m9x/nanf.c @@ -0,0 +1,45 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak nanf = __nanf +#endif + +#include "libm.h" + +static const union { + unsigned i; + float f; +} __nanf_union = { 0x7fffffff }; + +/* ARGSUSED0 */ +float +__nanf(const char *c) { + return (__nanf_union.f); +} diff --git a/usr/src/lib/libm/common/m9x/nanl.c b/usr/src/lib/libm/common/m9x/nanl.c new file mode 100644 index 0000000000..f1f7a52e43 --- /dev/null +++ b/usr/src/lib/libm/common/m9x/nanl.c @@ -0,0 +1,58 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak nanl = __nanl +#endif + +#include "libm.h" + +#if defined(__sparc) + +static const union { + unsigned i[4]; + long double ld; +} __nanl_union = { 0x7fffffff, 0xffffffff, 0xffffffff, 0xffffffff }; + +#elif defined(__x86) + +static const union { + unsigned i[3]; + long double ld; +} __nanl_union = { 0xffffffff, 0xffffffff, 0x7fff }; + +#else +#error Unknown architecture +#endif + +/* ARGSUSED0 */ +long double +__nanl(const char *c) { + return (__nanl_union.ld); +} diff --git a/usr/src/lib/libm/common/m9x/nearbyint.c b/usr/src/lib/libm/common/m9x/nearbyint.c new file mode 100644 index 0000000000..5f988fb818 --- /dev/null +++ b/usr/src/lib/libm/common/m9x/nearbyint.c @@ -0,0 +1,122 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak nearbyint = __nearbyint +#endif + +/* + * nearbyint(x) returns the nearest fp integer to x in the direction + * corresponding to the current rounding direction without raising + * the inexact exception. + * + * nearbyint(x) is x unchanged if x is +/-0 or +/-inf. If x is NaN, + * nearbyint(x) is also NaN. + */ + +#include "libm.h" +#include "fenv_synonyms.h" +#include <fenv.h> + +double +__nearbyint(double x) { + union { + unsigned i[2]; + double d; + } xx; + unsigned hx, sx, i, frac; + int rm, j; + + xx.d = x; + sx = xx.i[HIWORD] & 0x80000000; + hx = xx.i[HIWORD] & ~0x80000000; + + /* handle trivial cases */ + if (hx >= 0x43300000) { /* x is nan, inf, or already integral */ + if (hx >= 0x7ff00000) /* x is inf or nan */ +#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN) + return (hx >= 0x7ff80000 ? x : x + x); + /* assumes sparc-like QNaN */ +#else + return (x + x); +#endif + return (x); + } else if ((hx | xx.i[LOWORD]) == 0) /* x is zero */ + return (x); + + /* get the rounding mode */ + rm = fegetround(); + + /* flip the sense of directed roundings if x is negative */ + if (sx && (rm == FE_UPWARD || rm == FE_DOWNWARD)) + rm = (FE_UPWARD + FE_DOWNWARD) - rm; + + /* handle |x| < 1 */ + if (hx < 0x3ff00000) { + if (rm == FE_UPWARD || (rm == FE_TONEAREST && + (hx >= 0x3fe00000 && ((hx & 0xfffff) | xx.i[LOWORD])))) + xx.i[HIWORD] = sx | 0x3ff00000; + else + xx.i[HIWORD] = sx; + xx.i[LOWORD] = 0; + return (xx.d); + } + + /* round x at the integer bit */ + j = 0x433 - (hx >> 20); + if (j >= 32) { + i = 1 << (j - 32); + frac = ((xx.i[HIWORD] << 1) << (63 - j)) | + (xx.i[LOWORD] >> (j - 32)); + if (xx.i[LOWORD] & (i - 1)) + frac |= 1; + if (!frac) + return (x); + xx.i[LOWORD] = 0; + xx.i[HIWORD] &= ~(i - 1); + if ((rm == FE_UPWARD) || ((rm == FE_TONEAREST) && + ((frac > 0x80000000u) || ((frac == 0x80000000) && + (xx.i[HIWORD] & i))))) + xx.i[HIWORD] += i; + } else { + i = 1 << j; + frac = (xx.i[LOWORD] << 1) << (31 - j); + if (!frac) + return (x); + xx.i[LOWORD] &= ~(i - 1); + if ((rm == FE_UPWARD) || ((rm == FE_TONEAREST) && + (frac > 0x80000000u || ((frac == 0x80000000) && + (xx.i[LOWORD] & i))))) { + xx.i[LOWORD] += i; + if (xx.i[LOWORD] == 0) + xx.i[HIWORD]++; + } + } + return (xx.d); +} diff --git a/usr/src/lib/libm/common/m9x/nearbyintf.c b/usr/src/lib/libm/common/m9x/nearbyintf.c new file mode 100644 index 0000000000..99377c187e --- /dev/null +++ b/usr/src/lib/libm/common/m9x/nearbyintf.c @@ -0,0 +1,186 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak nearbyintf = __nearbyintf +#endif + +#include "libm.h" +#include "fenv_synonyms.h" +#include <fenv.h> + +float +__nearbyintf(float x) { + union { + unsigned i; + float f; + } xx; + unsigned hx, sx, i, frac; + int rm; + + xx.f = x; + sx = xx.i & 0x80000000; + hx = xx.i & ~0x80000000; + + /* handle trivial cases */ + if (hx >= 0x4b000000) { /* x is nan, inf, or already integral */ + if (hx > 0x7f800000) /* x is nan */ + return (x * x); /* + -> * for Cheetah */ + return (x); + } else if (hx == 0) /* x is zero */ + return (x); + + /* get the rounding mode */ + rm = fegetround(); + + /* flip the sense of directed roundings if x is negative */ + if (sx && (rm == FE_UPWARD || rm == FE_DOWNWARD)) + rm = (FE_UPWARD + FE_DOWNWARD) - rm; + + /* handle |x| < 1 */ + if (hx < 0x3f800000) { + if (rm == FE_UPWARD || (rm == FE_TONEAREST && hx > 0x3f000000)) + xx.i = sx | 0x3f800000; + else + xx.i = sx; + return (xx.f); + } + + /* round x at the integer bit */ + i = 1 << (0x96 - (hx >> 23)); + frac = hx & (i - 1); + if (!frac) + return (x); + + hx &= ~(i - 1); + if (rm == FE_UPWARD || (rm == FE_TONEAREST && (frac > (i >> 1) || + ((frac == (i >> 1)) && (hx & i))))) + xx.i = sx | (hx + i); + else + xx.i = sx | hx; + return (xx.f); +} + +#if 0 + +/* + * Alternate implementations for SPARC, x86, using fp ops. These may + * be faster depending on how expensive saving and restoring the fp + * modes and status flags is. + */ + +#include "libm.h" +#include "fma.h" + +#if defined(__sparc) + +float +__nearbyintf(float x) { + union { + unsigned i; + float f; + } xx, yy; + float z; + unsigned hx, sx, fsr, oldfsr; + int rm; + + xx.f = x; + sx = xx.i & 0x80000000; + hx = xx.i & ~0x80000000; + + /* handle trivial cases */ + if (hx >= 0x4b000000) /* x is nan, inf, or already integral */ + return (x + 0.0f); + else if (hx == 0) /* x is zero */ + return (x); + + /* save the fsr */ + __fenv_getfsr(&oldfsr); + + /* handle |x| < 1 */ + if (hx < 0x3f800000) { + /* flip the sense of directed roundings if x is negative */ + rm = oldfsr >> 30; + if (sx) + rm ^= rm >> 1; + if (rm == FSR_RP || (rm == FSR_RN && hx > 0x3f000000)) + xx.i = sx | 0x3f800000; + else + xx.i = sx; + return (xx.f); + } + + /* clear the inexact trap */ + fsr = oldfsr & ~FSR_NXM; + __fenv_setfsr(&fsr); + + /* round x at the integer bit */ + yy.i = sx | 0x4b000000; + z = (x + yy.f) - yy.f; + + /* restore the old fsr */ + __fenv_setfsr(&oldfsr); + + return (z); +} + +#elif defined(__x86) + +/* inline template */ +extern long double frndint(long double); + +float +__nearbyintf(float x) { + long double z; + unsigned oldcwsw, cwsw; + + /* save the control and status words, mask the inexact exception */ + __fenv_getcwsw(&oldcwsw); + cwsw = oldcwsw | 0x00200000; + __fenv_setcwsw(&cwsw); + + z = frndint((long double) x); + + /* + * restore the control and status words, preserving all but the + * inexact flag + */ + __fenv_getcwsw(&cwsw); + oldcwsw |= (cwsw & 0x1f); + __fenv_setcwsw(&oldcwsw); + + /* note: the value of z is representable in single precision */ + return (z); +} + +#else +#error Unknown architecture +#endif + +#endif diff --git a/usr/src/lib/libm/common/m9x/nearbyintl.c b/usr/src/lib/libm/common/m9x/nearbyintl.c new file mode 100644 index 0000000000..0c1148728b --- /dev/null +++ b/usr/src/lib/libm/common/m9x/nearbyintl.c @@ -0,0 +1,186 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak nearbyintl = __nearbyintl +#endif + +#include "libm.h" +#include "fma.h" +#include "fenv_inlines.h" + +#if defined(__sparc) + +static union { + unsigned i; + float f; +} snan = { 0x7f800001 }; + +long double +__nearbyintl(long double x) { + union { + unsigned i[4]; + long double q; + } xx; + unsigned hx, sx, i, frac; + unsigned int fsr; + int rm, j; + volatile float dummy; + + xx.q = x; + sx = xx.i[0] & 0x80000000; + hx = xx.i[0] & ~0x80000000; + + /* handle trivial cases */ + if (hx >= 0x406f0000) { /* x is nan, inf, or already integral */ + /* check for signaling nan */ + if ((hx > 0x7fff0000 || (hx == 0x7fff0000 && + (xx.i[1] | xx.i[2] | xx.i[3]))) && !(hx & 0x8000)) { + dummy = snan.f; + dummy += snan.f; + xx.i[0] = sx | hx | 0x8000; + } + return (xx.q); + } else if ((hx | xx.i[1] | xx.i[2] | xx.i[3]) == 0) /* x is zero */ + return (x); + + /* get the rounding mode */ + __fenv_getfsr32(&fsr); + rm = fsr >> 30; + + /* flip the sense of directed roundings if x is negative */ + if (sx) + rm ^= rm >> 1; + + /* handle |x| < 1 */ + if (hx < 0x3fff0000) { + if (rm == FSR_RP || (rm == FSR_RN && (hx >= 0x3ffe0000 && + ((hx & 0xffff) | xx.i[1] | xx.i[2] | xx.i[3])))) + xx.i[0] = sx | 0x3fff0000; + else + xx.i[0] = sx; + xx.i[1] = xx.i[2] = xx.i[3] = 0; + return (xx.q); + } + + /* round x at the integer bit */ + j = 0x406f - (hx >> 16); + if (j >= 96) { + i = 1 << (j - 96); + frac = ((xx.i[0] << 1) << (127 - j)) | (xx.i[1] >> (j - 96)); + if ((xx.i[1] & (i - 1)) | xx.i[2] | xx.i[3]) + frac |= 1; + if (!frac) + return (x); + xx.i[1] = xx.i[2] = xx.i[3] = 0; + xx.i[0] &= ~(i - 1); + if (rm == FSR_RP || (rm == FSR_RN && (frac > 0x80000000u || + (frac == 0x80000000 && (xx.i[0] & i))))) + xx.i[0] += i; + } else if (j >= 64) { + i = 1 << (j - 64); + frac = ((xx.i[1] << 1) << (95 - j)) | (xx.i[2] >> (j - 64)); + if ((xx.i[2] & (i - 1)) | xx.i[3]) + frac |= 1; + if (!frac) + return (x); + xx.i[2] = xx.i[3] = 0; + xx.i[1] &= ~(i - 1); + if (rm == FSR_RP || (rm == FSR_RN && (frac > 0x80000000u || + (frac == 0x80000000 && (xx.i[1] & i))))) { + xx.i[1] += i; + if (xx.i[1] == 0) + xx.i[0]++; + } + } else if (j >= 32) { + i = 1 << (j - 32); + frac = ((xx.i[2] << 1) << (63 - j)) | (xx.i[3] >> (j - 32)); + if (xx.i[3] & (i - 1)) + frac |= 1; + if (!frac) + return (x); + xx.i[3] = 0; + xx.i[2] &= ~(i - 1); + if (rm == FSR_RP || (rm == FSR_RN && (frac > 0x80000000u || + (frac == 0x80000000 && (xx.i[2] & i))))) { + xx.i[2] += i; + if (xx.i[2] == 0) + if (++xx.i[1] == 0) + xx.i[0]++; + } + } else { + i = 1 << j; + frac = (xx.i[3] << 1) << (31 - j); + if (!frac) + return (x); + xx.i[3] &= ~(i - 1); + if (rm == FSR_RP || (rm == FSR_RN && (frac > 0x80000000u || + (frac == 0x80000000 && (xx.i[3] & i))))) { + xx.i[3] += i; + if (xx.i[3] == 0) + if (++xx.i[2] == 0) + if (++xx.i[1] == 0) + xx.i[0]++; + } + } + + return (xx.q); +} + +#elif defined(__x86) + +/* inline template */ +extern long double frndint(long double); + +long double +__nearbyintl(long double x) { + long double z; + unsigned oldcwsw, cwsw; + + /* save the control and status words, mask the inexact exception */ + __fenv_getcwsw(&oldcwsw); + cwsw = oldcwsw | 0x00200000; + __fenv_setcwsw(&cwsw); + + z = frndint(x); + + /* + * restore the control and status words, preserving all but the + * inexact flag + */ + __fenv_getcwsw(&cwsw); + oldcwsw |= (cwsw & 0x1f); + __fenv_setcwsw(&oldcwsw); + + return (z); +} + +#else +#error Unknown architecture +#endif diff --git a/usr/src/lib/libm/common/m9x/nexttoward.c b/usr/src/lib/libm/common/m9x/nexttoward.c new file mode 100644 index 0000000000..ceb9969eef --- /dev/null +++ b/usr/src/lib/libm/common/m9x/nexttoward.c @@ -0,0 +1,223 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak nexttoward = __nexttoward +#endif + +/* + * nexttoward(x, y) delivers the next representable number after x + * in the direction of y. If x and y are both zero, the result is + * zero with the same sign as y. If either x or y is NaN, the result + * is NaN. + * + * If x != y and the result is infinite, overflow is raised; if + * x != y and the result is subnormal or zero, underflow is raised. + * (This is wrong, but it's what C99 apparently wants.) + */ + +#include "libm.h" + +#if defined(__sparc) + +static union { + unsigned i[2]; + double d; +} C[] = { + 0x00100000, 0, + 0x7fe00000, 0, + 0x7fffffff, 0xffffffff +}; + +#define tiny C[0].d +#define huge C[1].d +#define qnan C[2].d + +enum fcc_type { + fcc_equal = 0, + fcc_less = 1, + fcc_greater = 2, + fcc_unordered = 3 +}; + +#ifdef __sparcv9 +#define _Q_cmp _Qp_cmp +#endif + +extern enum fcc_type _Q_cmp(const long double *, const long double *); + +double +__nexttoward(double x, long double y) { + union { + unsigned i[2]; + double d; + } xx; + union { + unsigned i[4]; + long double q; + } yy; + long double lx; + unsigned hx; + volatile double dummy; + enum fcc_type rel; + + /* + * It would be somewhat more efficient to check for NaN and + * zero operands before converting x to long double and then + * to code the comparison in line rather than calling _Q_cmp. + * However, since this code probably won't get used much, + * I'm opting in favor of simplicity instead. + */ + lx = xx.d = x; + hx = (xx.i[0] & ~0x80000000) | xx.i[1]; + + /* check for each of four possible orderings */ + rel = _Q_cmp(&lx, &y); + if (rel == fcc_unordered) + return (qnan); + + if (rel == fcc_equal) { + if (hx == 0) { /* x is zero; return zero with y's sign */ + yy.q = y; + xx.i[0] = yy.i[0]; + return (xx.d); + } + return (x); + } + + if (rel == fcc_less) { + if (hx == 0) { /* x is zero */ + xx.i[0] = 0; + xx.i[1] = 0x00000001; + } else if ((int)xx.i[0] >= 0) { /* x is positive */ + if (++xx.i[1] == 0) + xx.i[0]++; + } else { + if (xx.i[1]-- == 0) + xx.i[0]--; + } + } else { + if (hx == 0) { /* x is zero */ + xx.i[0] = 0x80000000; + xx.i[1] = 0x00000001; + } else if ((int)xx.i[0] >= 0) { /* x is positive */ + if (xx.i[1]-- == 0) + xx.i[0]--; + } else { + if (++xx.i[1] == 0) + xx.i[0]++; + } + } + + /* raise exceptions as needed */ + hx = xx.i[0] & ~0x80000000; + if (hx == 0x7ff00000) { + dummy = huge; + dummy *= huge; + } else if (hx < 0x00100000) { + dummy = tiny; + dummy *= tiny; + } + + return (xx.d); +} + +#elif defined(__x86) + +static union { + unsigned i[2]; + double d; +} C[] = { + 0, 0x00100000, + 0, 0x7fe00000, +}; + +#define tiny C[0].d +#define huge C[1].d + +double +__nexttoward(double x, long double y) { + union { + unsigned i[2]; + double d; + } xx; + unsigned hx; + long double lx; + volatile double dummy; + + lx = xx.d = x; + hx = (xx.i[1] & ~0x80000000) | xx.i[0]; + + /* check for each of four possible orderings */ + if (isunordered(lx, y)) + return ((double) (lx + y)); + + if (lx == y) + return ((double) y); + + if (lx < y) { + if (hx == 0) { /* x is zero */ + xx.i[0] = 0x00000001; + xx.i[1] = 0; + } else if ((int)xx.i[1] >= 0) { /* x is positive */ + if (++xx.i[0] == 0) + xx.i[1]++; + } else { + if (xx.i[0]-- == 0) + xx.i[1]--; + } + } else { + if (hx == 0) { /* x is zero */ + xx.i[0] = 0x00000001; + xx.i[1] = 0x80000000; + } else if ((int)xx.i[1] >= 0) { /* x is positive */ + if (xx.i[0]-- == 0) + xx.i[1]--; + } else { + if (++xx.i[0] == 0) + xx.i[1]++; + } + } + + /* raise exceptions as needed */ + hx = xx.i[1] & ~0x80000000; + if (hx == 0x7ff00000) { + dummy = huge; + dummy *= huge; + } else if (hx < 0x00100000) { + dummy = tiny; + dummy *= tiny; + } + + return (xx.d); +} + +#else +#error Unknown architecture +#endif diff --git a/usr/src/lib/libm/common/m9x/nexttowardf.c b/usr/src/lib/libm/common/m9x/nexttowardf.c new file mode 100644 index 0000000000..55ad74d106 --- /dev/null +++ b/usr/src/lib/libm/common/m9x/nexttowardf.c @@ -0,0 +1,185 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak nexttowardf = __nexttowardf +#endif + +#include "libm.h" + +static union { + unsigned i; + float f; +} C[] = { + 0x00800000, + 0x7f000000, + 0x7fffffff +}; + +#define tiny C[0].f +#define huge C[1].f +#define qnan C[2].f + +#if defined(__sparc) + +enum fcc_type { + fcc_equal = 0, + fcc_less = 1, + fcc_greater = 2, + fcc_unordered = 3 +}; + +#ifdef __sparcv9 +#define _Q_cmp _Qp_cmp +#endif + +extern enum fcc_type _Q_cmp(const long double *, const long double *); + +float +__nexttowardf(float x, long double y) { + union { + unsigned i; + float f; + } xx; + union { + unsigned i[4]; + long double q; + } yy; + long double lx; + unsigned hx; + volatile float dummy; + enum fcc_type rel; + + /* + * It would be somewhat more efficient to check for NaN and + * zero operands before converting x to long double and then + * to code the comparison in line rather than calling _Q_cmp. + * However, since this code probably won't get used much, + * I'm opting in favor of simplicity instead. + */ + lx = xx.f = x; + hx = xx.i & ~0x80000000; + + /* check for each of four possible orderings */ + rel = _Q_cmp(&lx, &y); + if (rel == fcc_unordered) + return (qnan); + + if (rel == fcc_equal) { + if (hx == 0) { /* x is zero; return zero with y's sign */ + yy.q = y; + xx.i = yy.i[0]; + return (xx.f); + } + return (x); + } + + if (rel == fcc_less) { + if (hx == 0) /* x is zero */ + xx.i = 0x00000001; + else if ((int) xx.i >= 0) /* x is positive */ + xx.i++; + else + xx.i--; + } else { + if (hx == 0) /* x is zero */ + xx.i = 0x80000001; + else if ((int) xx.i >= 0) /* x is positive */ + xx.i--; + else + xx.i++; + } + + /* raise exceptions as needed */ + hx = xx.i & ~0x80000000; + if (hx == 0x7f800000) { + dummy = huge; + dummy *= huge; + } else if (hx < 0x00800000) { + dummy = tiny; + dummy *= tiny; + } + + return (xx.f); +} + +#elif defined(__x86) + +float +__nexttowardf(float x, long double y) { + union { + unsigned i; + float f; + } xx; + unsigned hx; + long double lx; + volatile float dummy; + + lx = xx.f = x; + hx = xx.i & ~0x80000000; + + /* check for each of four possible orderings */ + if (isunordered(lx, y)) + return ((float) (lx + y)); + + if (lx == y) + return ((float) y); + + if (lx < y) { + if (hx == 0) /* x is zero */ + xx.i = 0x00000001; + else if ((int) xx.i >= 0) /* x is positive */ + xx.i++; + else + xx.i--; + } else { + if (hx == 0) /* x is zero */ + xx.i = 0x80000001; + else if ((int) xx.i >= 0) /* x is positive */ + xx.i--; + else + xx.i++; + } + + /* raise exceptions as needed */ + hx = xx.i & ~0x80000000; + if (hx == 0x7f800000) { + dummy = huge; + dummy *= huge; + } else if (hx < 0x00800000) { + dummy = tiny; + dummy *= tiny; + } + + return (xx.f); +} + +#else +#error Unknown architecture +#endif diff --git a/usr/src/lib/libm/common/m9x/nexttowardl.c b/usr/src/lib/libm/common/m9x/nexttowardl.c new file mode 100644 index 0000000000..87dfc13e66 --- /dev/null +++ b/usr/src/lib/libm/common/m9x/nexttowardl.c @@ -0,0 +1,119 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak nexttowardl = __nexttowardl +#endif + +#include "libm.h" +#include <float.h> /* LDBL_MAX, LDBL_MIN */ + +#if defined(__sparc) +#define n0 0 +#define n1 1 +#define n2 2 +#define n3 3 +#define X86PDNRM1(x) +#define INC(px) { \ + if (++px[n3] == 0) \ + if (++px[n2] == 0) \ + if (++px[n1] == 0) \ + ++px[n0]; \ + } +#define DEC(px) { \ + if (--px[n3] == 0xffffffff) \ + if (--px[n2] == 0xffffffff) \ + if (--px[n1] == 0xffffffff) \ + --px[n0]; \ + } +#elif defined(__x86) +#define n0 2 +#define n1 1 +#define n2 0 +#define n3 0 +/* + * if pseudo-denormal, replace by the equivalent normal + */ +#define X86PDNRM1(x) if (XBIASED_EXP(x) == 0 && (((int *) &x)[1] & \ + 0x80000000) != 0) \ + ((int *) &x)[2] |= 1 +#define INC(px) { \ + if (++px[n2] == 0) \ + if ((++px[n1] & ~0x80000000) == 0) \ + px[n1] = 0x80000000, ++px[n0]; \ + } +#define DEC(px) { \ + if (--px[n2] == 0xffffffff) \ + if (--px[n1] == 0x7fffffff) \ + if ((--px[n0] & 0x7fff) != 0) \ + px[n1] |= 0x80000000; \ + } +#endif + +long double +nexttowardl(long double x, long double y) { + int *px = (int *) &x; + int *py = (int *) &y; + + if (x == y) + return (y); /* C99 requirement */ + if (x != x || y != y) + return (x * y); + + if (ISZEROL(x)) { /* x == 0.0 */ + px[n0] = py[n0] & XSGNMSK; + px[n1] = px[n2] = 0; + px[n3] = 1; + } else { + X86PDNRM1(x); + if ((px[n0] & XSGNMSK) == 0) { /* x > 0.0 */ + if (x > y) /* x > y */ + DEC(px) + else + INC(px) + } else { + if (x < y) /* x < y */ + DEC(px) + else + INC(px) + } + } +#ifndef lint + { + volatile long double dummy; + int k = XBIASED_EXP(x); + + if (k == 0) + dummy = LDBL_MIN * copysignl(LDBL_MIN, x); + else if (k == 0x7fff) + dummy = LDBL_MAX * copysignl(LDBL_MAX, x); + } +#endif + return (x); +} diff --git a/usr/src/lib/libm/common/m9x/remquo.c b/usr/src/lib/libm/common/m9x/remquo.c new file mode 100644 index 0000000000..2d05ec6de6 --- /dev/null +++ b/usr/src/lib/libm/common/m9x/remquo.c @@ -0,0 +1,267 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak remquo = __remquo + +/* INDENT OFF */ +/* + * double remquo(double x, double y, int *quo) return remainder(x,y) and an + * integer pointer quo such that *quo = N mod {2**31}, where N is the + * exact integral part of x/y rounded to nearest even. + * + * remquo call internal fmodquo + */ +/* INDENT ON */ + +#include "libm.h" +#include "libm_synonyms.h" +#include "libm_protos.h" +#include <math.h> /* fabs() */ +#include <sys/isa_defs.h> + +#if defined(_BIG_ENDIAN) +#define HIWORD 0 +#define LOWORD 1 +#else +#define HIWORD 1 +#define LOWORD 0 +#endif +#define __HI(x) ((int *) &x)[HIWORD] +#define __LO(x) ((int *) &x)[LOWORD] + +static const double one = 1.0, Zero[] = {0.0, -0.0}; + +static double +fmodquo(double x, double y, int *quo) { + int n, hx, hy, hz, ix, iy, sx, sq, i, m; + unsigned lx, ly, lz; + + hx = __HI(x); /* high word of x */ + lx = __LO(x); /* low word of x */ + hy = __HI(y); /* high word of y */ + ly = __LO(y); /* low word of y */ + sx = hx & 0x80000000; /* sign of x */ + sq = (hx ^ hy) & 0x80000000; /* sign of x/y */ + hx ^= sx; /* |x| */ + hy &= 0x7fffffff; /* |y| */ + + /* purge off exception values */ + *quo = 0; + if ((hy | ly) == 0 || hx >= 0x7ff00000 || /* y=0, or x !finite */ + (hy | ((ly | -ly) >> 31)) > 0x7ff00000) /* or y is NaN */ + return ((x * y) / (x * y)); + if (hx <= hy) { + if (hx < hy || lx < ly) + return (x); /* |x|<|y| return x */ + if (lx == ly) { + *quo = 1 + (sq >> 30); + /* |x|=|y| return x*0 */ + return (Zero[(unsigned) sx >> 31]); + } + } + + /* determine ix = ilogb(x) */ + if (hx < 0x00100000) { /* subnormal x */ + if (hx == 0) { + for (ix = -1043, i = lx; i > 0; i <<= 1) + ix -= 1; + } else { + for (ix = -1022, i = (hx << 11); i > 0; i <<= 1) + ix -= 1; + } + } else + ix = (hx >> 20) - 1023; + + /* determine iy = ilogb(y) */ + if (hy < 0x00100000) { /* subnormal y */ + if (hy == 0) { + for (iy = -1043, i = ly; i > 0; i <<= 1) + iy -= 1; + } else { + for (iy = -1022, i = (hy << 11); i > 0; i <<= 1) + iy -= 1; + } + } else + iy = (hy >> 20) - 1023; + + /* set up {hx,lx}, {hy,ly} and align y to x */ + if (ix >= -1022) + hx = 0x00100000 | (0x000fffff & hx); + else { /* subnormal x, shift x to normal */ + n = -1022 - ix; + if (n <= 31) { + hx = (hx << n) | (lx >> (32 - n)); + lx <<= n; + } else { + hx = lx << (n - 32); + lx = 0; + } + } + if (iy >= -1022) + hy = 0x00100000 | (0x000fffff & hy); + else { /* subnormal y, shift y to normal */ + n = -1022 - iy; + if (n <= 31) { + hy = (hy << n) | (ly >> (32 - n)); + ly <<= n; + } else { + hy = ly << (n - 32); + ly = 0; + } + } + + /* fix point fmod */ + n = ix - iy; + m = 0; + while (n--) { + hz = hx - hy; + lz = lx - ly; + if (lx < ly) + hz -= 1; + if (hz < 0) { + hx = hx + hx + (lx >> 31); + lx = lx + lx; + } else { + m += 1; + if ((hz | lz) == 0) { /* return sign(x)*0 */ + if (n < 31) + m <<= 1 + n; + else + m = 0; + m &= 0x7fffffff; + *quo = sq >= 0 ? m : -m; + return (Zero[(unsigned) sx >> 31]); + } + hx = hz + hz + (lz >> 31); + lx = lz + lz; + } + m += m; + } + hz = hx - hy; + lz = lx - ly; + if (lx < ly) + hz -= 1; + if (hz >= 0) { + hx = hz; + lx = lz; + m += 1; + } + m &= 0x7fffffff; + *quo = sq >= 0 ? m : -m; + + /* convert back to floating value and restore the sign */ + if ((hx | lx) == 0) { /* return sign(x)*0 */ + return (Zero[(unsigned) sx >> 31]); + } + while (hx < 0x00100000) { /* normalize x */ + hx = hx + hx + (lx >> 31); + lx = lx + lx; + iy -= 1; + } + if (iy >= -1022) { /* normalize output */ + hx = (hx - 0x00100000) | ((iy + 1023) << 20); + __HI(x) = hx | sx; + __LO(x) = lx; + } else { /* subnormal output */ + n = -1022 - iy; + if (n <= 20) { + lx = (lx >> n) | ((unsigned) hx << (32 - n)); + hx >>= n; + } else if (n <= 31) { + lx = (hx << (32 - n)) | (lx >> n); + hx = sx; + } else { + lx = hx >> (n - 32); + hx = sx; + } + __HI(x) = hx | sx; + __LO(x) = lx; + x *= one; /* create necessary signal */ + } + return (x); /* exact output */ +} + +#define zero Zero[0] + +double +remquo(double x, double y, int *quo) { + int hx, hy, sx, sq; + double v; + unsigned ly; + + hx = __HI(x); /* high word of x */ + hy = __HI(y); /* high word of y */ + ly = __LO(y); /* low word of y */ + sx = hx & 0x80000000; /* sign of x */ + sq = (hx ^ hy) & 0x80000000; /* sign of x/y */ + hx ^= sx; /* |x| */ + hy &= 0x7fffffff; /* |y| */ + + /* purge off exception values */ + *quo = 0; + if ((hy | ly) == 0 || hx >= 0x7ff00000 || /* y=0, or x !finite */ + (hy | ((ly | -ly) >> 31)) > 0x7ff00000) /* or y is NaN */ + return ((x * y) / (x * y)); + + y = fabs(y); + x = fabs(x); + if (hy <= 0x7fdfffff) { + x = fmodquo(x, y + y, quo); + *quo = ((*quo) & 0x3fffffff) << 1; + } + if (hy < 0x00200000) { + if (x + x > y) { + *quo += 1; + if (x == y) + x = zero; + else + x -= y; + if (x + x >= y) { + x -= y; + *quo += 1; + } + } + } else { + v = 0.5 * y; + if (x > v) { + *quo += 1; + if (x == y) + x = zero; + else + x -= y; + if (x >= v) { + x -= y; + *quo += 1; + } + } + } + if (sq != 0) + *quo = -(*quo); + return (sx == 0 ? x : -x); +} diff --git a/usr/src/lib/libm/common/m9x/remquof.c b/usr/src/lib/libm/common/m9x/remquof.c new file mode 100644 index 0000000000..c89552857a --- /dev/null +++ b/usr/src/lib/libm/common/m9x/remquof.c @@ -0,0 +1,268 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak remquof = __remquof + +/* INDENT OFF */ +/* + * float remquof(float x, float y, int *quo) return remainderf(x,y) and an + * integer pointer quo such that *quo = N mod (2**31), where N is the + * exact integeral part of x/y rounded to nearest even. + * + * remquof call internal fmodquof + */ + +#include "libm.h" +#include "libm_synonyms.h" +#include "libm_protos.h" +#include <math.h> +extern float fabsf(float); + +static const int + is = (int) 0x80000000, + im = 0x007fffff, + ii = 0x7f800000, + iu = 0x00800000; + +static const float zero = 0.0F, half = 0.5F; +/* INDENT ON */ + +static float +fmodquof(float x, float y, int *quo) { + float w; + int hx, ix, iy, iz, k, ny, nd, m, sq; + + hx = *(int *) &x; + ix = hx & 0x7fffffff; + iy = *(int *) &y; + sq = (iy ^ hx) & is; /* sign of x/y */ + iy &= 0x7fffffff; + + /* purge off exception values */ + *quo = 0; + if (ix >= ii || iy > ii || iy == 0) { + w = x * y; + w = w / w; + } else if (ix <= iy) { + if (ix < iy) + w = x; /* return x if |x|<|y| */ + else { + *quo = 1 + (sq >> 30); + w = zero * x; /* return sign(x)*0.0 */ + } + } else { + /* INDENT OFF */ + /* + * scale x,y to "normal" with + * ny = exponent of y + * nd = exponent of x minus exponent of y + */ + /* INDENT ON */ + ny = iy >> 23; + k = ix >> 23; + + /* special case for subnormal y or x */ + if (ny == 0) { + ny = 1; + while (iy < iu) { + ny -= 1; + iy += iy; + } + nd = k - ny; + if (k == 0) { + nd += 1; + while (ix < iu) { + nd -= 1; + ix += ix; + } + } else + ix = iu | (ix & im); + } else { + nd = k - ny; + ix = iu | (ix & im); + iy = iu | (iy & im); + } + /* INDENT OFF */ + /* fix point fmod for normalized ix and iy */ + /* + * while (nd--) { + * iz = ix - iy; + * if (iz < 0) + * ix = ix + ix; + * else if (iz == 0) { + * *(int *) &w = is & hx; + * return w; + * } else + * ix = iz + iz; + * } + */ + /* INDENT ON */ + /* unroll the above loop 4 times to gain performance */ + m = 0; + k = nd >> 2; + nd -= (k << 2); + while (k--) { + iz = ix - iy; + if (iz >= 0) { + m += 1; + ix = iz + iz; + } else + ix += ix; + m += m; + iz = ix - iy; + if (iz >= 0) { + m += 1; + ix = iz + iz; + } else + ix += ix; + m += m; + iz = ix - iy; + if (iz >= 0) { + m += 1; + ix = iz + iz; + } else + ix += ix; + m += m; + iz = ix - iy; + if (iz >= 0) { + m += 1; + ix = iz + iz; + } else + ix += ix; + m += m; + if (iz == 0) { + iz = (k << 2) + nd; + if (iz < 32) + m <<= iz; + else + m = 0; + m &= 0x7fffffff; + *quo = sq >= 0 ? m : -m; + *(int *) &w = is & hx; + return (w); + } + } + while (nd--) { + iz = ix - iy; + if (iz >= 0) { + m += 1; + ix = iz + iz; + } else + ix += ix; + m += m; + } + /* end of unrolling */ + + iz = ix - iy; + if (iz >= 0) { + m += 1; + ix = iz; + } + m &= 0x7fffffff; + *quo = sq >= 0 ? m : -m; + + /* convert back to floating value and restore the sign */ + if (ix == 0) { + *(int *) &w = is & hx; + return (w); + } + while (ix < iu) { + ix += ix; + ny -= 1; + } + while (ix > (iu + iu)) { + ny += 1; + ix >>= 1; + } + if (ny > 0) + *(int *) &w = (is & hx) | (ix & im) | (ny << 23); + else { /* subnormal output */ + k = -ny + 1; + ix >>= k; + *(int *) &w = (is & hx) | ix; + } + } + return (w); +} + +float +remquof(float x, float y, int *quo) { + int hx, hy, sx, sq; + float v; + + hx = *(int *) &x; /* high word of x */ + hy = *(int *) &y; /* high word of y */ + sx = hx & is; /* sign of x */ + sq = (hx ^ hy) & is; /* sign of x/y */ + hx ^= sx; /* |x| */ + hy &= 0x7fffffff; /* |y| */ + + /* purge off exception values: y is 0 or NaN, x is Inf or NaN */ + *quo = 0; + if (hx >= ii || hy > ii || hy == 0) { + v = x * y; + return (v / v); + } + + y = fabsf(y); + x = fabsf(x); + if (hy <= 0x7f7fffff) { + x = fmodquof(x, y + y, quo); + *quo = ((*quo) & 0x3fffffff) << 1; + } + if (hy < 0x01000000) { + if (x + x > y) { + *quo += 1; + if (x == y) + x = zero; + else + x -= y; + if (x + x >= y) { + x -= y; + *quo += 1; + } + } + } else { + v = half * y; + if (x > v) { + *quo += 1; + if (x == y) + x = zero; + else + x -= y; + if (x >= v) { + x -= y; + *quo += 1; + } + } + } + if (sq != 0) + *quo = -(*quo); + return (sx == 0 ? x : -x); +} diff --git a/usr/src/lib/libm/common/m9x/remquol.c b/usr/src/lib/libm/common/m9x/remquol.c new file mode 100644 index 0000000000..54fba44e99 --- /dev/null +++ b/usr/src/lib/libm/common/m9x/remquol.c @@ -0,0 +1,347 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak remquol = __remquol + +#include "libm.h" +#include "libm_synonyms.h" +#if defined(__SUNPRO_C) +#include <sunmath.h> /* fabsl */ +#endif +/* INDENT OFF */ +static const int + is = -0x7fffffff - 1, + im = 0x0000ffff, + iu = 0x00010000; + +static const long double zero = 0.0L, one = 1.0L; +/* INDENT ON */ + +#if defined(__sparc) +#define __H0(x) ((int *) &x)[0] +#define __H1(x) ((int *) &x)[1] +#define __H2(x) ((int *) &x)[2] +#define __H3(x) ((int *) &x)[3] +#else +#error Unsupported architecture +#endif + +/* + * On entrance: *quo is initialized to 0, x finite and y non-zero & ordered + */ +static long double +fmodquol(long double x, long double y, int *quo) { + long double a, b; + int n, ix, iy, k, sx, sq, m; + int hx; + int x0, y0, z0, carry; + unsigned x1, x2, x3, y1, y2, y3, z1, z2, z3; + + hx = __H0(x); + x1 = __H1(x); + x2 = __H2(x); + x3 = __H3(x); + y0 = __H0(y); + y1 = __H1(y); + y2 = __H2(y); + y3 = __H3(y); + + sx = hx & is; + sq = (hx ^ y0) & is; + x0 = hx ^ sx; + y0 &= ~0x80000000; + + a = fabsl(x); + b = fabsl(y); + if (a <= b) { + if (a < b) + return (x); + else { + *quo = 1 + (sq >> 30); + return (zero * x); + } + } + /* determine ix = ilogbl(x) */ + if (x0 < iu) { /* subnormal x */ + ix = 0; + ix = -16382; + while (x0 == 0) { + ix -= 16; + x0 = x1 >> 16; + x1 = (x1 << 16) | (x2 >> 16); + x2 = (x2 << 16) | (x3 >> 16); + x3 = (x3 << 16); + } + while (x0 < iu) { + ix -= 1; + x0 = (x0 << 1) | (x1 >> 31); + x1 = (x1 << 1) | (x2 >> 31); + x2 = (x2 << 1) | (x3 >> 31); + x3 <<= 1; + } + } else { + ix = (x0 >> 16) - 16383; + x0 = iu | (x0 & im); + } + + /* determine iy = ilogbl(y) */ + if (y0 < iu) { /* subnormal y */ + iy = -16382; + while (y0 == 0) { + iy -= 16; + y0 = y1 >> 16; + y1 = (y1 << 16) | (y2 >> 16); + y2 = (y2 << 16) | (y3 >> 16); + y3 = (y3 << 16); + } + while (y0 < iu) { + iy -= 1; + y0 = (y0 << 1) | (y1 >> 31); + y1 = (y1 << 1) | (y2 >> 31); + y2 = (y2 << 1) | (y3 >> 31); + y3 <<= 1; + } + } else { + iy = (y0 >> 16) - 16383; + y0 = iu | (y0 & im); + } + + + /* fix point fmod */ + n = ix - iy; + m = 0; + while (n--) { + while (x0 == 0 && n >= 16) { + m <<= 16; + n -= 16; + x0 = x1 >> 16; + x1 = (x1 << 16) | (x2 >> 16); + x2 = (x2 << 16) | (x3 >> 16); + x3 = (x3 << 16); + } + while (x0 < iu && n >= 1) { + m += m; + n -= 1; + x0 = (x0 << 1) | (x1 >> 31); + x1 = (x1 << 1) | (x2 >> 31); + x2 = (x2 << 1) | (x3 >> 31); + x3 = (x3 << 1); + } + carry = 0; + z3 = x3 - y3; + carry = z3 > x3; + if (carry == 0) { + z2 = x2 - y2; + carry = z2 > x2; + } else { + z2 = x2 - y2 - 1; + carry = z2 >= x2; + } + if (carry == 0) { + z1 = x1 - y1; + carry = z1 > x1; + } else { + z1 = x1 - y1 - 1; + carry = z1 >= x1; + } + z0 = x0 - y0 - carry; + if (z0 < 0) { /* double x */ + x0 = x0 + x0 + ((x1 & is) != 0); + x1 = x1 + x1 + ((x2 & is) != 0); + x2 = x2 + x2 + ((x3 & is) != 0); + x3 = x3 + x3; + m += m; + } else { + m += 1; + if (z0 == 0) { + if ((z1 | z2 | z3) == 0) { + /* 0: we are done */ + if (n < 31) + m <<= (1 + n); + else + m = 0; + m &= ~0x80000000; + *quo = sq >= 0 ? m : -m; + __H0(a) = hx & is; + __H1(a) = __H2(a) = __H3(a) = 0; + return (a); + } + } + /* x = z << 1 */ + z0 = z0 + z0 + ((z1 & is) != 0); + z1 = z1 + z1 + ((z2 & is) != 0); + z2 = z2 + z2 + ((z3 & is) != 0); + z3 = z3 + z3; + x0 = z0; + x1 = z1; + x2 = z2; + x3 = z3; + m += m; + } + } + carry = 0; + z3 = x3 - y3; + carry = z3 > x3; + if (carry == 0) { + z2 = x2 - y2; + carry = z2 > x2; + } else { + z2 = x2 - y2 - 1; + carry = z2 >= x2; + } + if (carry == 0) { + z1 = x1 - y1; + carry = z1 > x1; + } else { + z1 = x1 - y1 - 1; + carry = z1 >= x1; + } + z0 = x0 - y0 - carry; + if (z0 >= 0) { + x0 = z0; + x1 = z1; + x2 = z2; + x3 = z3; + m += 1; + } + m &= ~0x80000000; + *quo = sq >= 0 ? m : -m; + + /* convert back to floating value and restore the sign */ + if ((x0 | x1 | x2 | x3) == 0) { + __H0(a) = hx & is; + __H1(a) = __H2(a) = __H3(a) = 0; + return (a); + } + while (x0 < iu) { + if (x0 == 0) { + iy -= 16; + x0 = x1 >> 16; + x1 = (x1 << 16) | (x2 >> 16); + x2 = (x2 << 16) | (x3 >> 16); + x3 = (x3 << 16); + } else { + x0 = x0 + x0 + ((x1 & is) != 0); + x1 = x1 + x1 + ((x2 & is) != 0); + x2 = x2 + x2 + ((x3 & is) != 0); + x3 = x3 + x3; + iy -= 1; + } + } + + /* normalize output */ + if (iy >= -16382) { + __H0(a) = sx | (x0 - iu) | ((iy + 16383) << 16); + __H1(a) = x1; + __H2(a) = x2; + __H3(a) = x3; + } else { /* subnormal output */ + n = -16382 - iy; + k = n & 31; + if (k <= 16) { + x3 = (x2 << (32 - k)) | (x3 >> k); + x2 = (x1 << (32 - k)) | (x2 >> k); + x1 = (x0 << (32 - k)) | (x1 >> k); + x0 >>= k; + } else { + x3 = (x2 << (32 - k)) | (x3 >> k); + x2 = (x1 << (32 - k)) | (x2 >> k); + x1 = (x0 << (32 - k)) | (x1 >> k); + x0 = 0; + } + while (n >= 32) { + n -= 32; + x3 = x2; + x2 = x1; + x1 = x0; + x0 = 0; + } + __H0(a) = x0 | sx; + __H1(a) = x1; + __H2(a) = x2; + __H3(a) = x3; + a *= one; + } + return (a); +} + +long double +remquol(long double x, long double y, int *quo) { + int hx, hy, sx, sq; + long double v; + + hx = __H0(x); /* high word of x */ + hy = __H0(y); /* high word of y */ + sx = hx & is; /* sign of x */ + sq = (hx ^ hy) & is; /* sign of x/y */ + hx ^= sx; /* |x| */ + hy &= ~0x80000000; + + /* purge off exception values */ + *quo = 0; + /* y=0, y is NaN, x is NaN or inf */ + if (y == 0.0L || y != y || hx >= 0x7fff0000) + return ((x * y) / (x * y)); + + y = fabsl(y); + x = fabsl(x); + if (hy <= 0x7ffdffff) { + x = fmodquol(x, y + y, quo); + *quo = ((*quo) & 0x3fffffff) << 1; + } + if (hy < 0x00020000) { + if (x + x > y) { + *quo += 1; + if (x == y) + x = zero; + else + x -= y; + if (x + x >= y) { + x -= y; + *quo += 1; + } + } + } else { + v = 0.5L * y; + if (x > v) { + *quo += 1; + if (x == y) + x = zero; + else + x -= y; + if (x >= v) { + x -= y; + *quo += 1; + } + } + } + if (sq != 0) + *quo = -(*quo); + return (sx == 0 ? x : -x); +} diff --git a/usr/src/lib/libm/common/m9x/round.c b/usr/src/lib/libm/common/m9x/round.c new file mode 100644 index 0000000000..f4f5a48f2f --- /dev/null +++ b/usr/src/lib/libm/common/m9x/round.c @@ -0,0 +1,76 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak round = __round +#endif + +#include "libm.h" + +double +round(double x) { + union { + unsigned i[2]; + double d; + } xx; + unsigned hx, sx, i; + + xx.d = x; + hx = xx.i[HIWORD] & ~0x80000000; + sx = xx.i[HIWORD] & 0x80000000; + if (hx < 0x43300000) { /* |x| < 2^52 */ + if (hx < 0x3ff00000) { /* |x| < 1 */ + if (hx >= 0x3fe00000) + return (sx ? -1.0 : 1.0); + return (sx ? -0.0 : 0.0); + } + + /* round x at the integer bit */ + if (hx < 0x41300000) { + i = 1 << (0x412 - (hx >> 20)); + xx.i[HIWORD] = (xx.i[HIWORD] + i) & ~(i | (i - 1)); + xx.i[LOWORD] = 0; + } else { + i = 1 << (0x432 - (hx >> 20)); + xx.i[LOWORD] += i; + if (xx.i[LOWORD] < i) + xx.i[HIWORD]++; + xx.i[LOWORD] &= ~(i | (i - 1)); + } + return (xx.d); + } else if (hx < 0x7ff00000) + return (x); + else +#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN) + return (hx >= 0x7ff80000 ? x : x + x); + /* assumes sparc-like QNaN */ +#else + return (x + x); +#endif +} diff --git a/usr/src/lib/libm/common/m9x/roundf.c b/usr/src/lib/libm/common/m9x/roundf.c new file mode 100644 index 0000000000..acb6d840d3 --- /dev/null +++ b/usr/src/lib/libm/common/m9x/roundf.c @@ -0,0 +1,66 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak roundf = __roundf +#endif + +#include "libm.h" + +float +roundf(float x) { + union { + unsigned i; + float f; + } xx; + unsigned hx, sx, i; + + xx.f = x; + hx = xx.i & ~0x80000000; + sx = xx.i & 0x80000000; + if (hx < 0x4b000000) { /* |x| < 2^23 */ + if (hx < 0x3f800000) { /* |x| < 1 */ + if (hx >= 0x3f000000) + return (sx ? -1.0F : 1.0F); + return (sx ? -0.0F : 0.0F); + } + + /* round x at the integer bit */ + i = 1 << (0x95 - (hx >> 23)); + xx.i = (xx.i + i) & ~((i << 1) - 1); + return (xx.f); + } else if (hx < 0x7f800000) /* |x| is integral */ + return (x); + else +#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN) + return (hx > 0x7f800000 ? x * x : x + x); +#else + return (x + x); +#endif +} diff --git a/usr/src/lib/libm/common/m9x/roundl.c b/usr/src/lib/libm/common/m9x/roundl.c new file mode 100644 index 0000000000..957b93251b --- /dev/null +++ b/usr/src/lib/libm/common/m9x/roundl.c @@ -0,0 +1,166 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak roundl = __roundl +#endif + +#include "libm.h" + +#if defined(__sparc) +long double +roundl(long double x) { + union { + unsigned i[4]; + long double q; + } xx; + unsigned hx, sx, v; + int j; + + xx.q = x; + sx = xx.i[0] & 0x80000000; + hx = xx.i[0] & ~0x80000000; + + /* handle trivial cases */ + if (hx >= 0x406f0000) /* |x| >= 2^112 + ... or x is nan */ + return (hx >= 0x7fff0000 ? x + x : x); + + /* handle |x| < 1 */ + if (hx < 0x3fff0000) { + if (hx >= 0x3ffe0000) + return (sx ? -1.0L : 1.0L); + return (sx ? -0.0L : 0.0L); + } + + xx.i[0] = hx; + j = 0x406f - (hx >> 16); /* 1 <= j <= 112 */ + if (j >= 96) { /* 96 <= j <= 112 */ + v = (1U << (j - 96)) >> 1; + if (v) { + if (xx.i[0] & v) + xx.i[0] += v; + xx.i[0] &= ~(v - 1); + } else if (xx.i[1] & 0x80000000) + ++xx.i[0]; + xx.i[1] = xx.i[2] = xx.i[3] = 0; + } else if (j >= 64) { /* 64 <= j <= 95 */ + v = (1U << (j - 64)) >> 1; + if (v) { + if (xx.i[1] & v) { + xx.i[1] += v; + if (xx.i[1] < v) + ++xx.i[0]; + } + xx.i[1] &= ~(v - 1); + } else if (xx.i[2] & 0x80000000) { + if (++xx.i[1] == 0) + ++xx.i[0]; + } + xx.i[2] = xx.i[3] = 0; + } else if (j >= 32) { /* 32 <= j <= 63 */ + v = (1U << (j - 32)) >> 1; + if (v) { + if (xx.i[2] & v) { + xx.i[2] += v; + if (xx.i[2] < v) { + if (++xx.i[1] == 0) + ++xx.i[0]; + } + } + xx.i[2] &= ~(v - 1); + } else if (xx.i[3] & 0x80000000) { + if (++xx.i[2] == 0) { + if (++xx.i[1] == 0) + ++xx.i[0]; + } + } + xx.i[3] = 0; + } else { /* 1 <= j <= 31 */ + v = 1U << (j - 1); + if (xx.i[3] & v) { + xx.i[3] += v; + if (xx.i[3] < v) { + if (++xx.i[2] == 0) { + if (++xx.i[1] == 0) + ++xx.i[0]; + } + } + } + xx.i[3] &= ~(v - 1); + } + + /* negate result if need be */ + if (sx) + xx.i[0] |= 0x80000000; + return (xx.q); +} +#elif defined(__x86) +long double +roundl(long double x) { + union { + unsigned i[3]; + long double e; + } xx; + int ex, sx, i; + + xx.e = x; + ex = xx.i[2] & 0x7fff; + sx = xx.i[2] & 0x8000; + if (ex < 0x403e) { /* |x| < 2^63 */ + if (ex < 0x3fff) { /* |x| < 1 */ + if (ex >= 0x3ffe) + return (sx ? -1.0L : 1.0L); + return (sx ? -0.0L : 0.0L); + } + + /* round x at the integer bit */ + if (ex < 0x401e) { + i = 1 << (0x401d - ex); + xx.i[1] = (xx.i[1] + i) & ~(i | (i - 1)); + xx.i[0] = 0; + } else { + i = 1 << (0x403d - ex); + xx.i[0] += i; + if (xx.i[0] < i) + xx.i[1]++; + xx.i[0] &= ~(i | (i - 1)); + } + if (xx.i[1] == 0) { + xx.i[2] = sx | ++ex; + xx.i[1] = 0x80000000U; + } + return (xx.e); + } else if (ex < 0x7fff) /* x is integral */ + return (x); + else /* inf or nan */ + return (x + x); +} +#else +#error Unknown architecture +#endif /* defined(__sparc) || defined(__x86) */ diff --git a/usr/src/lib/libm/common/m9x/scalbln.c b/usr/src/lib/libm/common/m9x/scalbln.c new file mode 100644 index 0000000000..eb11e56a54 --- /dev/null +++ b/usr/src/lib/libm/common/m9x/scalbln.c @@ -0,0 +1,109 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak scalbln = __scalbln +#endif + +#include "libm.h" +#include <float.h> /* DBL_MAX, DBL_MIN */ + +static const double twom54 = 5.5511151231257827021181583404541015625e-17; +#if defined(USE_FPSCALE) || defined(__x86) +static const double two52 = 4503599627370496.0; +#else +/* + * Normalize non-zero subnormal x and return biased exponent of x in [-51,0] + */ +static int +ilogb_biased(unsigned *px) { + int s = 52; + unsigned v = px[HIWORD] & ~0x80000000, w = px[LOWORD], t = v; + + if (t) + s -= 32; + else + t = w; + if (t & 0xffff0000) + s -= 16, t >>= 16; + if (t & 0xff00) + s -= 8, t >>= 8; + if (t & 0xf0) + s -= 4, t >>= 4; + t <<= 1; + s -= (0xffffaa50 >> t) & 0x3; + if (s < 32) { + v = (v << s) | w >> (32 - s); + w <<= s; + } else { + v = w << (s - 32); + w = 0; + } + px[HIWORD] = (px[HIWORD] & 0x80000000) | v; + px[LOWORD] = w; + return (1 - s); +} +#endif /* defined(USE_FPSCALE) */ + +double +scalbln(double x, long n) { + int *px = (int *) &x, ix, k; + + ix = px[HIWORD] & ~0x80000000; + k = ix >> 20; + if (k == 0x7ff) +#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN) + return ((px[HIWORD] & 0x80000) != 0 ? x : x + x); + /* assumes sparc-like QNaN */ +#else + return (x + x); +#endif + if ((px[LOWORD] | ix) == 0 || n == 0) + return (x); + if (k == 0) { +#if defined(USE_FPSCALE) || defined(__x86) + x *= two52; + k = ((px[HIWORD] & ~0x80000000) >> 20) - 52; +#else + k = ilogb_biased((unsigned *) px); +#endif + } + k += (int) n; + if (n > 5000 || k > 0x7fe) + return (DBL_MAX * copysign(DBL_MAX, x)); + if (n < -5000 || k <= -54) + return (DBL_MIN * copysign(DBL_MIN, x)); + if (k > 0) { + px[HIWORD] = (px[HIWORD] & ~0x7ff00000) | (k << 20); + return (x); + } + k += 54; + px[HIWORD] = (px[HIWORD] & ~0x7ff00000) | (k << 20); + return (x * twom54); +} diff --git a/usr/src/lib/libm/common/m9x/scalblnf.c b/usr/src/lib/libm/common/m9x/scalblnf.c new file mode 100644 index 0000000000..dc0132378c --- /dev/null +++ b/usr/src/lib/libm/common/m9x/scalblnf.c @@ -0,0 +1,94 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak scalblnf = __scalblnf +#endif + +#include "libm.h" +#include <float.h> /* FLT_MAX, FLT_MIN */ + +static const float twom25f = 2.98023223876953125e-8F; +#if defined(USE_FPSCALE) || defined(__x86) +static const float two23f = 8388608.0F; +#else +/* + * v: a non-zero subnormal |x|; returns [-22, 0] + */ +static int +ilogbf_biased(unsigned v) { + int r = -22; + + if (v & 0xffff0000) + r += 16, v >>= 16; + if (v & 0xff00) + r += 8, v >>= 8; + if (v & 0xf0) + r += 4, v >>= 4; + v <<= 1; + return (r + ((0xffffaa50 >> v) & 0x3)); +} +#endif /* defined(USE_FPSCALE) */ + +float +scalblnf(float x, long n) { + int *px = (int *) &x, ix, k; + + ix = *px & ~0x80000000; + k = ix >> 23; + if (k == 0xff) +#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN) + return (ix > 0x7f800000 ? x * x : x); +#else + return (x + x); +#endif + if (ix == 0 || n == 0) + return (x); + if (k == 0) { +#if defined(USE_FPSCALE) || defined(__x86) + x *= two23f; + k = ((*px & ~0x80000000) >> 23) - 23; +#else + k = ilogbf_biased(ix); + *px = (*px & 0x80000000) | (ix << (-k + 1)); +#endif + } + k += (int) n; + if (n > 5000 || k > 0xfe) + return (FLT_MAX * copysignf(FLT_MAX, x)); + if (n < -5000 || k <= -25) + return (FLT_MIN * copysignf(FLT_MIN, x)); + if (k > 0) { + *px = (*px & ~0x7f800000) | (k << 23); + return (x); + } + k += 25; + *px = (*px & ~0x7f800000) | (k << 23); + return (x * twom25f); +} diff --git a/usr/src/lib/libm/common/m9x/scalblnl.c b/usr/src/lib/libm/common/m9x/scalblnl.c new file mode 100644 index 0000000000..d5ffc989c6 --- /dev/null +++ b/usr/src/lib/libm/common/m9x/scalblnl.c @@ -0,0 +1,82 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak scalblnl = __scalblnl +#endif + +#include "libm.h" +#include <float.h> /* LDBL_MAX, LDBL_MIN */ + +#if defined(__sparc) +#define XSET_EXP(k, x) ((int *) &x)[0] = (((int *) &x)[0] & ~0x7fff0000) | \ + (k << 16) +#define ISINFNANL(k, x) (k == 0x7fff) +#define XTWOT_OFFSET 113 +static const long double xtwot = 10384593717069655257060992658440192.0L, + /* 2^113 */ + twomtm1 = 4.814824860968089632639944856462318296E-35L; /* 2^-114 */ +#elif defined(__x86) +#define XSET_EXP(k, x) ((int *) &x)[2] = (((int *) &x)[2] & ~0x7fff) | k +#if defined(HANDLE_UNSUPPORTED) +#define ISINFNANL(k, x) (k == 0x7fff || \ + (k != 0 && (((int *) &x)[1] & 0x80000000) == 0)) +#else +#define ISINFNANL(k, x) (k == 0x7fff) +#endif +#define XTWOT_OFFSET 64 +static const long double xtwot = 18446744073709551616.0L, /* 2^64 */ + twomtm1 = 2.7105054312137610850186E-20L; /* 2^-65 */ +#endif + +long double +scalblnl(long double x, long n) { + int k = XBIASED_EXP(x); + + if (ISINFNANL(k, x)) + return (x + x); + if (ISZEROL(x) || n == 0) + return (x); + if (k == 0) { + x *= xtwot; + k = XBIASED_EXP(x) - XTWOT_OFFSET; + } + k += (int) n; + if (n > 50000 || k > 0x7ffe) + return (LDBL_MAX * copysignl(LDBL_MAX, x)); + if (n < -50000 || k <= -XTWOT_OFFSET - 1) + return (LDBL_MIN * copysignl(LDBL_MIN, x)); + if (k > 0) { + XSET_EXP(k, x); + return (x); + } + k += XTWOT_OFFSET + 1; + XSET_EXP(k, x); + return (x * twomtm1); +} diff --git a/usr/src/lib/libm/common/m9x/tgamma.c b/usr/src/lib/libm/common/m9x/tgamma.c new file mode 100644 index 0000000000..651fa7d125 --- /dev/null +++ b/usr/src/lib/libm/common/m9x/tgamma.c @@ -0,0 +1,1704 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak tgamma = __tgamma +#endif + +/* INDENT OFF */ +/* + * True gamma function + * double tgamma(double x) + * + * Error: + * ------ + * Less that one ulp for both positive and negative arguments. + * + * Algorithm: + * --------- + * A: For negative argument + * (1) gamma(-n or -inf) is NaN + * (2) Underflow Threshold + * (3) Reduction to gamma(1+x) + * B: For x between 1 and 2 + * C: For x between 0 and 1 + * D: For x between 2 and 8 + * E: Overflow thresold {see over.c} + * F: For overflow_threshold >= x >= 8 + * + * Implementation details + * ----------------------- + * -pi + * (A) For negative argument, use gamma(-x) = ------------------------. + * (sin(pi*x)*gamma(1+x)) + * + * (1) gamma(-n or -inf) is NaN with invalid signal by SUSv3 spec. + * (Ideally, gamma(-n) = 1/sinpi(n) = (-1)**(n+1) * inf.) + * + * (2) Underflow Threshold. For each precision, there is a value T + * such that when x>T and when x is not an integer, gamma(-x) will + * always underflow. A table of the underflow threshold value is given + * below. For proof, see file "under.c". + * + * Precision underflow threshold T = + * ---------------------------------------------------------------------- + * single 41.000041962 = 41 + 11 ULP + * (machine format) 4224000B + * double 183.000000000000312639 = 183 + 11 ULP + * (machine format) 4066E000 0000000B + * quad 1774.0000000000000000000000000000017749370 = 1774 + 9 ULP + * (machine format) 4009BB80000000000000000000000009 + * ---------------------------------------------------------------------- + * + * (3) Reduction to gamma(1+x). + * Because of (1) and (2), we need only consider non-integral x + * such that 0<x<T. Let k = [x] and z = x-[x]. Define + * sin(x*pi) cos(x*pi) + * kpsin(x) = --------- and kpcos(x) = --------- . Then + * pi pi + * 1 + * gamma(-x) = --------------------. + * -kpsin(x)*gamma(1+x) + * Since x = k+z, + * k+1 + * -sin(x*pi) = -sin(k*pi+z*pi) = (-1) *sin(z*pi), + * k+1 + * we have -kpsin(x) = (-1) * kpsin(z). We can further + * reduce z to t by + * (I) t = z when 0.00000 <= z < 0.31830... + * (II) t = 0.5-z when 0.31830... <= z < 0.681690... + * (III) t = 1-z when 0.681690... <= z < 1.00000 + * and correspondingly + * (I) kpsin(z) = kpsin(t) ... 0<= z < 0.3184 + * (II) kpsin(z) = kpcos(t) ... |t| < 0.182 + * (III) kpsin(z) = kpsin(t) ... 0<= t < 0.3184 + * + * Using a special Remez algorithm, we obtain the following polynomial + * approximation for kpsin(t) for 0<=t<0.3184: + * + * Computation note: in simulating higher precision arithmetic, kcpsin + * return head = t and tail = ks[0]*t^3 + (...) to maintain extra bits. + * + * Quad precision, remez error <= 2**(-129.74) + * 3 5 27 + * kpsin(t) = t + ks[0] * t + ks[1] * t + ... + ks[12] * t + * + * ks[ 0] = -1.64493406684822643647241516664602518705158902870e+0000 + * ks[ 1] = 8.11742425283353643637002772405874238094995726160e-0001 + * ks[ 2] = -1.90751824122084213696472111835337366232282723933e-0001 + * ks[ 3] = 2.61478478176548005046532613563241288115395517084e-0002 + * ks[ 4] = -2.34608103545582363750893072647117829448016479971e-0003 + * ks[ 5] = 1.48428793031071003684606647212534027556262040158e-0004 + * ks[ 6] = -6.97587366165638046518462722252768122615952898698e-0006 + * ks[ 7] = 2.53121740413702536928659271747187500934840057929e-0007 + * ks[ 8] = -7.30471182221385990397683641695766121301933621956e-0009 + * ks[ 9] = 1.71653847451163495739958249695549313987973589884e-0010 + * ks[10] = -3.34813314714560776122245796929054813458341420565e-0012 + * ks[11] = 5.50724992262622033449487808306969135431411753047e-0014 + * ks[12] = -7.67678132753577998601234393215802221104236979928e-0016 + * + * Double precision, Remez error <= 2**(-62.9) + * 3 5 15 + * kpsin(t) = t + ks[0] * t + ks[1] * t + ... + ks[6] * t + * + * ks[0] = -1.644934066848226406065691 (0x3ffa51a6 625307d3) + * ks[1] = 8.11742425283341655883668741874008920850698590621e-0001 + * ks[2] = -1.90751824120862873825597279118304943994042258291e-0001 + * ks[3] = 2.61478477632554278317289628332654539353521911570e-0002 + * ks[4] = -2.34607978510202710377617190278735525354347705866e-0003 + * ks[5] = 1.48413292290051695897242899977121846763824221705e-0004 + * ks[6] = -6.87730769637543488108688726777687262485357072242e-0006 + * + * Single precision, Remez error <= 2**(-34.09) + * 3 5 9 + * kpsin(t) = t + ks[0] * t + ks[1] * t + ... + ks[3] * t + * + * ks[0] = -1.64493404985645811354476665052005342839447790544e+0000 + * ks[1] = 8.11740794458351064092797249069438269367389272270e-0001 + * ks[2] = -1.90703144603551216933075809162889536878854055202e-0001 + * ks[3] = 2.55742333994264563281155312271481108635575331201e-0002 + * + * Computation note: in simulating higher precision arithmetic, kcpsin + * return head = t and tail = kc[0]*t^3 + (...) to maintain extra bits + * precision. + * + * And for kpcos(t) for |t|< 0.183: + * + * Quad precision, remez <= 2**(-122.48) + * 2 4 22 + * kpcos(t) = 1/pi + pi/2 * t + kc[2] * t + ... + kc[11] * t + * + * kc[2] = 1.29192819501249250731151312779548918765320728489e+0000 + * kc[3] = -4.25027339979557573976029596929319207009444090366e-0001 + * kc[4] = 7.49080661650990096109672954618317623888421628613e-0002 + * kc[5] = -8.21458866111282287985539464173976555436050215120e-0003 + * kc[6] = 6.14202578809529228503205255165761204750211603402e-0004 + * kc[7] = -3.33073432691149607007217330302595267179545908740e-0005 + * kc[8] = 1.36970959047832085796809745461530865597993680204e-0006 + * kc[9] = -4.41780774262583514450246512727201806217271097336e-0008 + * kc[10]= 1.14741409212381858820016567664488123478660705759e-0009 + * kc[11]= -2.44261236114707374558437500654381006300502749632e-0011 + * + * Double precision, remez < 2**(61.91) + * 2 4 12 + * kpcos(t) = 1/pi + pi/2 *t + kc[2] * t + ... + kc[6] * t + * + * kc[2] = 1.29192819501230224953283586722575766189551966008e+0000 + * kc[3] = -4.25027339940149518500158850753393173519732149213e-0001 + * kc[4] = 7.49080625187015312373925142219429422375556727752e-0002 + * kc[5] = -8.21442040906099210866977352284054849051348692715e-0003 + * kc[6] = 6.10411356829515414575566564733632532333904115968e-0004 + * + * Single precision, remez < 2**(-30.13) + * 2 6 + * kpcos(t) = kc[0] + kc[1] * t + ... + kc[3] * t + * + * kc[0] = 3.18309886183790671537767526745028724068919291480e-0001 + * kc[1] = -1.57079581447762568199467875065854538626594937791e+0000 + * kc[2] = 1.29183528092558692844073004029568674027807393862e+0000 + * kc[3] = -4.20232949771307685981015914425195471602739075537e-0001 + * + * Computation note: in simulating higher precision arithmetic, kcpcos + * return head = 1/pi chopped, and tail = pi/2 *t^2 + (tail part of 1/pi + * + ...) to maintain extra bits precision. In particular, pi/2 * t^2 + * is calculated with great care. + * + * Thus, the computation of gamma(-x), x>0, is: + * Let k = int(x), z = x-k. + * For z in (I) + * k+1 + * (-1) + * gamma(-x) = ------------------- ; + * kpsin(z)*gamma(1+x) + * + * otherwise, for z in (II), + * k+1 + * (-1) + * gamma(-x) = ----------------------- ; + * kpcos(0.5-z)*gamma(1+x) + * + * otherwise, for z in (III), + * k+1 + * (-1) + * gamma(-x) = --------------------- . + * kpsin(1-z)*gamma(1+x) + * + * Thus, the computation of gamma(-x) reduced to the computation of + * gamma(1+x) and kpsin(), kpcos(). + * + * (B) For x between 1 and 2. We break [1,2] into three parts: + * GT1 = [1.0000, 1.2845] + * GT2 = [1.2844, 1.6374] + * GT3 = [1.6373, 2.0000] + * + * For x in GTi, i=1,2,3, let + * z1 = 1.134861805732790769689793935774652917006 + * gz1 = gamma(z1) = 0.9382046279096824494097535615803269576988 + * tz1 = gamma'(z1) = -0.3517214357852935791015625000000000000000 + * + * z2 = 1.461632144968362341262659542325721328468e+0000 + * gz2 = gamma(z2) = 0.8856031944108887002788159005825887332080 + * tz2 = gamma'(z2) = 0.00 + * + * z3 = 1.819773101100500601787868704921606996312e+0000 + * gz3 = gamma(z3) = 0.9367814114636523216188468970808378497426 + * tz3 = gamma'(z3) = 0.2805306315422058105468750000000000000000 + * + * and + * y = x-zi ... for extra precision, write y = y.h + y.l + * Then + * gamma(x) = gzi + tzi*(y.h+y.l) + y*y*Ri(y), + * = gzi.h + (tzi*y.h + ((tzi*y.l+gzi.l) + y*y*Ri(y))) + * = gy.h + gy.l + * where + * (I) For double precision + * + * Ri(y) = Pi(y)/Qi(y), i=1,2,3; + * + * P1(y) = p1[0] + p1[1]*y + ... + p1[4]*y^4 + * Q1(y) = q1[0] + q1[1]*y + ... + q1[5]*y^5 + * + * P2(y) = p2[0] + p2[1]*y + ... + p2[3]*y^3 + * Q2(y) = q2[0] + q2[1]*y + ... + q2[6]*y^6 + * + * P3(y) = p3[0] + p3[1]*y + ... + p3[4]*y^4 + * Q3(y) = q3[0] + q3[1]*y + ... + q3[5]*y^5 + * + * Remez precision of Ri(y): + * |gamma(x)-(gzi+tzi*y) - y*y*Ri(y)| <= 2**-62.3 ... for i = 1 + * <= 2**-59.4 ... for i = 2 + * <= 2**-62.1 ... for i = 3 + * + * (II) For quad precision + * + * Ri(y) = Pi(y)/Qi(y), i=1,2,3; + * + * P1(y) = p1[0] + p1[1]*y + ... + p1[9]*y^9 + * Q1(y) = q1[0] + q1[1]*y + ... + q1[8]*y^8 + * + * P2(y) = p2[0] + p2[1]*y + ... + p2[9]*y^9 + * Q2(y) = q2[0] + q2[1]*y + ... + q2[9]*y^9 + * + * P3(y) = p3[0] + p3[1]*y + ... + p3[9]*y^9 + * Q3(y) = q3[0] + q3[1]*y + ... + q3[9]*y^9 + * + * Remez precision of Ri(y): + * |gamma(x)-(gzi+tzi*y) - y*y*Ri(y)| <= 2**-118.2 ... for i = 1 + * <= 2**-126.8 ... for i = 2 + * <= 2**-119.5 ... for i = 3 + * + * (III) For single precision + * + * Ri(y) = Pi(y), i=1,2,3; + * + * P1(y) = p1[0] + p1[1]*y + ... + p1[5]*y^5 + * + * P2(y) = p2[0] + p2[1]*y + ... + p2[5]*y^5 + * + * P3(y) = p3[0] + p3[1]*y + ... + p3[4]*y^4 + * + * Remez precision of Ri(y): + * |gamma(x)-(gzi+tzi*y) - y*y*Ri(y)| <= 2**-30.8 ... for i = 1 + * <= 2**-31.6 ... for i = 2 + * <= 2**-29.5 ... for i = 3 + * + * Notes. (1) GTi and zi are choosen to balance the interval width and + * minimize the distant between gamma(x) and the tangent line at + * zi. In particular, we have + * |gamma(x)-(gzi+tzi*(x-zi))| <= 0.01436... for x in [1,z2] + * <= 0.01265... for x in [z2,2] + * + * (2) zi are slightly adjusted so that tzi=gamma'(zi) is very + * close to a single precision value. + * + * Coefficents: Single precision + * i= 1: + * P1[0] = 7.09087253435088360271451613398019280077561279443e-0001 + * P1[1] = -5.17229560788652108545141978238701790105241761089e-0001 + * P1[2] = 5.23403394528150789405825222323770647162337764327e-0001 + * P1[3] = -4.54586308717075010784041566069480411732634814899e-0001 + * P1[4] = 4.20596490915239085459964590559256913498190955233e-0001 + * P1[5] = -3.57307589712377520978332185838241458642142185789e-0001 + * + * i = 2: + * p2[0] = 4.28486983980295198166056119223984284434264344578e-0001 + * p2[1] = -1.30704539487709138528680121627899735386650103914e-0001 + * p2[2] = 1.60856285038051955072861219352655851542955430871e-0001 + * p2[3] = -9.22285161346010583774458802067371182158937943507e-0002 + * p2[4] = 7.19240511767225260740890292605070595560626179357e-0002 + * p2[5] = -4.88158265593355093703112238534484636193260459574e-0002 + * + * i = 3 + * p3[0] = 3.82409531118807759081121479786092134814808872880e-0001 + * p3[1] = 2.65309888180188647956400403013495759365167853426e-0002 + * p3[2] = 8.06815109775079171923561169415370309376296739835e-0002 + * p3[3] = -1.54821591666137613928840890835174351674007764799e-0002 + * p3[4] = 1.76308239242717268530498313416899188157165183405e-0002 + * + * Coefficents: Double precision + * i = 1: + * p1[0] = 0.70908683619977797008004927192814648151397705078125000 + * p1[1] = 1.71987061393048558089579513384356441668351720061e-0001 + * p1[2] = -3.19273345791990970293320316122813960527705450671e-0002 + * p1[3] = 8.36172645419110036267169600390549973563534476989e-0003 + * p1[4] = 1.13745336648572838333152213474277971244629758101e-0003 + * q1[0] = 1.0 + * q1[1] = 9.71980217826032937526460731778472389791321968082e-0001 + * q1[2] = -7.43576743326756176594084137256042653497087666030e-0002 + * q1[3] = -1.19345944932265559769719470515102012246995255372e-0001 + * q1[4] = 1.59913445751425002620935120470781382215050284762e-0002 + * q1[5] = 1.12601136853374984566572691306402321911547550783e-0003 + * i = 2: + * p2[0] = 0.42848681585558601181418225678498856723308563232421875 + * p2[1] = 6.53596762668970816023718845105667418483122103629e-0002 + * p2[2] = -6.97280829631212931321050770925128264272768936731e-0003 + * p2[3] = 6.46342359021981718947208605674813260166116632899e-0003 + * q2[0] = 1.0 + * q2[1] = 4.57572620560506047062553957454062012327519313936e-0001 + * q2[2] = -2.52182594886075452859655003407796103083422572036e-0001 + * q2[3] = -1.82970945407778594681348166040103197178711552827e-0002 + * q2[4] = 2.43574726993169566475227642128830141304953840502e-0002 + * q2[5] = -5.20390406466942525358645957564897411258667085501e-0003 + * q2[6] = 4.79520251383279837635552431988023256031951133885e-0004 + * i = 3: + * p3[0] = 0.382409479734567459008331979930517263710498809814453125 + * p3[1] = 1.42876048697668161599069814043449301572928034140e-0001 + * p3[2] = 3.42157571052250536817923866013561760785748899071e-0003 + * p3[3] = -5.01542621710067521405087887856991700987709272937e-0004 + * p3[4] = 8.89285814866740910123834688163838287618332122670e-0004 + * q3[0] = 1.0 + * q3[1] = 3.04253086629444201002215640948957897906299633168e-0001 + * q3[2] = -2.23162407379999477282555672834881213873185520006e-0001 + * q3[3] = -1.05060867741952065921809811933670131427552903636e-0002 + * q3[4] = 1.70511763916186982473301861980856352005926669320e-0002 + * q3[5] = -2.12950201683609187927899416700094630764182477464e-0003 + * + * Note that all pi0 are exact in double, which is obtained by a + * special Remez Algorithm. + * + * Coefficents: Quad precision + * i = 1: + * p1[0] = 0.709086836199777919037185741507610124611513720557 + * p1[1] = 4.45754781206489035827915969367354835667391606951e-0001 + * p1[2] = 3.21049298735832382311662273882632210062918153852e-0002 + * p1[3] = -5.71296796342106617651765245858289197369688864350e-0003 + * p1[4] = 6.04666892891998977081619174969855831606965352773e-0003 + * p1[5] = 8.99106186996888711939627812174765258822658645168e-0004 + * p1[6] = -6.96496846144407741431207008527018441810175568949e-0005 + * p1[7] = 1.52597046118984020814225409300131445070213882429e-0005 + * p1[8] = 5.68521076168495673844711465407432189190681541547e-0007 + * p1[9] = 3.30749673519634895220582062520286565610418952979e-0008 + * q1[0] = 1.0+0000 + * q1[1] = 1.35806511721671070408570853537257079579490650668e+0000 + * q1[2] = 2.97567810153429553405327140096063086994072952961e-0001 + * q1[3] = -1.52956835982588571502954372821681851681118097870e-0001 + * q1[4] = -2.88248519561420109768781615289082053597954521218e-0002 + * q1[5] = 1.03475311719937405219789948456313936302378395955e-0002 + * q1[6] = 4.12310203243891222368965360124391297374822742313e-0004 + * q1[7] = -3.12653708152290867248931925120380729518332507388e-0004 + * q1[8] = 2.36672170850409745237358105667757760527014332458e-0005 + * + * i = 2: + * p2[0] = 0.428486815855585429730209907810650616737756697477 + * p2[1] = 2.63622124067885222919192651151581541943362617352e-0001 + * p2[2] = 3.85520683670028865731877276741390421744971446855e-0002 + * p2[3] = 3.05065978278128549958897133190295325258023525862e-0003 + * p2[4] = 2.48232934951723128892080415054084339152450445081e-0003 + * p2[5] = 3.67092777065632360693313762221411547741550105407e-0004 + * p2[6] = 3.81228045616085789674530902563145250532194518946e-0006 + * p2[7] = 4.61677225867087554059531455133839175822537617677e-0006 + * p2[8] = 2.18209052385703200438239200991201916609364872993e-0007 + * p2[9] = 1.00490538985245846460006244065624754421022542454e-0008 + * q2[0] = 1.0 + * q2[1] = 9.20276350207639290567783725273128544224570775056e-0001 + * q2[2] = -4.79533683654165107448020515733883781138947771495e-0003 + * q2[3] = -1.24538337585899300494444600248687901947684291683e-0001 + * q2[4] = 4.49866050763472358547524708431719114204535491412e-0003 + * q2[5] = 7.20715455697920560621638325356292640604078591907e-0003 + * q2[6] = -8.68513169029126780280798337091982780598228096116e-0004 + * q2[7] = -1.25104431629401181525027098222745544809974229874e-0004 + * q2[8] = 3.10558344839000038489191304550998047521253437464e-0005 + * q2[9] = -1.76829227852852176018537139573609433652506765712e-0006 + * + * i = 3 + * p3[0] = 0.3824094797345675048502747661075355640070439388902 + * p3[1] = 3.42198093076618495415854906335908427159833377774e-0001 + * p3[2] = 9.63828189500585568303961406863153237440702754858e-0002 + * p3[3] = 8.76069421042696384852462044188520252156846768667e-0003 + * p3[4] = 1.86477890389161491224872014149309015261897537488e-0003 + * p3[5] = 8.16871354540309895879974742853701311541286944191e-0004 + * p3[6] = 6.83783483674600322518695090864659381650125625216e-0005 + * p3[7] = -1.10168269719261574708565935172719209272190828456e-0006 + * p3[8] = 9.66243228508380420159234853278906717065629721016e-0007 + * p3[9] = 2.31858885579177250541163820671121664974334728142e-0008 + * q3[0] = 1.0 + * q3[1] = 8.25479821168813634632437430090376252512793067339e-0001 + * q3[2] = -1.62251363073937769739639623669295110346015576320e-0002 + * q3[3] = -1.10621286905916732758745130629426559691187579852e-0001 + * q3[4] = 3.48309693970985612644446415789230015515365291459e-0003 + * q3[5] = 6.73553737487488333032431261131289672347043401328e-0003 + * q3[6] = -7.63222008393372630162743587811004613050245128051e-0004 + * q3[7] = -1.35792670669190631476784768961953711773073251336e-0004 + * q3[8] = 3.19610150954223587006220730065608156460205690618e-0005 + * q3[9] = -1.82096553862822346610109522015129585693354348322e-0006 + * + * (C) For x between 0 and 1. + * Let P stand for the number of significant bits in the working precision. + * -P 1 + * (1)For 0 <= x <= 2 , gamma(x) is computed by --- rounded to nearest. + * x + * The error is bound by 0.739 ulp(gamma(x)) in IEEE double precision. + * Proof. + * 1 2 + * Since -------- ~ x + 0.577...*x - ..., we have, for small x, + * gamma(x) + * 1 1 + * ----------- < gamma(x) < --- and + * x(1+0.578x) x + * 1 1 1 + * 0 < --- - gamma(x) <= --- - ----------- < 0.578 + * x x x(1+0.578x) + * 1 1 -P + * The error is thus bounded by --- ulp(---) + 0.578. Since x <= 2 , + * 2 x + * 1 P 1 P 1 + * --- >= 2 , ulp(---) >= ulp(2 ) >= 2. Thus 0.578=0.289*2<=0.289ulp(-) + * x x x + * Thus + * 1 1 + * | gamma(x) - [---] rounded | <= (0.5+0.289)*ulp(---). + * x x + * -P 1 + * Note that for x<= 2 , it is easy to see that ulp(---)=ulp(gamma(x)) + * x + * n 1 + * except only when x = 2 , (n<= -53). In such cases, --- is exact + * x + * and therefore the error is bounded by + * 1 + * 0.298*ulp(---) = 0.298*2*ulp(gamma(x)) = 0.578ulp(gamma(x)). + * x + * Thus we conclude that the error in gamma is less than 0.739 ulp. + * + * (2)Otherwise, for x in GTi-1 (see B), let y = x-(zi-1). From (B) we obtain + * gamma(1+x) + * gamma(1+x) = gy.h + gy.l, then compute gamma(x) by -----------. + * x + * gy.h + * Implementaion note. Write x = x.h+x.l, and Let th = ----- chopped to + * x + * 20 bits, then + * gy.h+gy.l + * gamma(x) = th + (---------- - th ) + * x + * 1 + * = th + ---*(gy.h-th*x.h+gy.l-th*x.l) + * x + * + * (D) For x between 2 and 8. Let n = 1+x chopped to an integer. Then + * + * gamma(x)=(x-1)*(x-2)*...*(x-n)*gamma(x-n) + * + * Since x-n is between 1 and 2, we can apply (B) to compute gamma(x). + * + * Implementation detail. The computation of (x-1)(x-2)...(x-n) in simulated + * higher precision arithmetic can be somewhat optimized. For example, in + * computing (x-1)*(x-2)*(x-3)*(x-4), if we compute (x-1)*(x-4) = z.h+z.l, + * then (x-2)(x-3) = z.h+2+z.l readily. In below, we list the expression + * of the formula to compute gamma(x). + * + * Assume x-n is in GTi (i=1,2, or 3, see B for detail). Let y = x - n - zi. + * By (B) we have gamma(x-n) = gy.h+gy.l. If x = x.h+x.l, then we have + * n=1 (x in [2,3]): + * gamma(x) = (x-1)*gamma(x-1) = (x-1)*(gy.h+gy.l) + * = [(x.h-1)+x.l]*(gy.h+gy.l) + * n=2 (x in [3,4]): + * gamma(x) = (x-1)(x-2)*gamma(x-2) = (x-1)*(x-2)*(gy.h+gy.l) + * = ((x.h-2)+x.l)*((x.h-1)+x.l)*(gy.h+gy.l) + * = [x.h*(x.h-3)+2+x.l*(x+(x.h-3))]*(gy.h+gy.l) + * n=3 (x in [4,5]) + * gamma(x) = (x-1)(x-2)(x-3)*(gy.h+gy.l) + * = (x.h*(x.h-3)+2+x.l*(x+(x.h-3)))*[((x.h-3)+x.l)(gy.h+gy.l)] + * n=4 (x in [5,6]) + * gamma(x) = [(x-1)(x-4)]*[(x-2)(x-3)]*(gy.h+gy.l) + * = [(x.h*(x.h-5)+4+x.l(x+(x.h-5)))]*[(x-2)*(x-3)]*(gy.h+gy.l) + * = (y.h+y.l)*(y.h+1+y.l)*(gy.h+gy.l) + * n=5 (x in [6,7]) + * gamma(x) = [(x-1)(x-4)]*[(x-2)(x-3)]*[(x-5)*(gy.h+gy.l)] + * n=6 (x in [7,8]) + * gamma(x) = [(x-1)(x-6)]*[(x-2)(x-5)]*[(x-3)(x-4)]*(gy.h+gy.l)] + * = [(y.h+y.l)(y.h+4+y.l)][(y.h+6+y.l)(gy.h+gy.l)] + * + * (E)Overflow Thresold. For x > Overflow thresold of gamma, + * return huge*huge (overflow). + * + * By checking whether lgamma(x) >= 2**{128,1024,16384}, one can + * determine the overflow threshold for x in single, double, and + * quad precision. See over.c for details. + * + * The overflow threshold of gamma(x) are + * + * single: x = 3.5040096283e+01 + * = 0x420C290F (IEEE single) + * double: x = 1.71624376956302711505e+02 + * = 0x406573FAE561F647 (IEEE double) + * quad: x = 1.7555483429044629170038892160702032034177e+03 + * = 0x4009B6E3180CD66A5C4206F128BA77F4 (quad) + * + * (F)For overflow_threshold >= x >= 8, we use asymptotic approximation. + * (1) Stirling's formula + * + * log(G(x)) ~= (x-.5)*(log(x)-1) + .5(log(2*pi)-1) + (1/x)*P(1/(x*x)) + * = L1 + L2 + L3, + * where + * L1(x) = (x-.5)*(log(x)-1), + * L2 = .5(log(2pi)-1) = 0.41893853...., + * L3(x) = (1/x)P(1/(x*x)), + * + * The range of L1,L2, and L3 are as follows: + * + * ------------------------------------------------------------------ + * Range(L1) = (single) [8.09..,88.30..] =[2** 3.01..,2** 6.46..] + * (double) [8.09..,709.3..] =[2** 3.01..,2** 9.47..] + * (quad) [8.09..,11356.10..]=[2** 3.01..,2** 13.47..] + * Range(L2) = 0.41893853..... + * Range(L3) = [0.0104...., 0.00048....] =[2**-6.58..,2**-11.02..] + * ------------------------------------------------------------------ + * + * Gamma(x) is then computed by exp(L1+L2+L3). + * + * (2) Error analysis of (F): + * -------------------------- + * The error in Gamma(x) depends on the error inherited in the computation + * of L= L1+L2+L3. Let L' be the computed value of L. The absolute error + * in L' is t = L-L'. Since exp(L') = exp(L-t) = exp(L)*exp(t) ~ + * (1+t)*exp(L), the relative error in exp(L') is approximately t. + * + * To guarantee the relatively accuracy in exp(L'), we would like + * |t| < 2**(-P-5) where P denotes for the number of significant bits + * of the working precision. Consequently, each of the L1,L2, and L3 + * must be computed with absolute error bounded by 2**(-P-5) in absolute + * value. + * + * Since L2 is a constant, it can be pre-computed to the desired accuracy. + * Also |L3| < 2**-6; therefore, it suffices to compute L3 with the + * working precision. That is, + * L3(x) approxmiate log(G(x))-(x-.5)(log(x)-1)-.5(log(2pi)-1) + * to a precision bounded by 2**(-P-5). + * + * 2**(-6) + * _________V___________________ + * L1(x): |_________|___________________| + * __ ________________________ + * L2: |__|________________________| + * __________________________ + * + L3(x): |__________________________| + * ------------------------------------------- + * [leading] + [Trailing] + * + * For L1(x)=(x-0.5)*(log(x)-1), we need ilogb(L1(x))+5 extra bits for + * both multiplicants to guarantee L1(x)'s absolute error is bounded by + * 2**(-P-5) in absolute value. Here ilogb(y) is defined to be the unbias + * binary exponent of y in IEEE format. We can get x-0.5 to the desire + * accuracy easily. It remains to compute log(x)-1 with ilogb(L1(x))+5 + * extra bits accracy. Note that the range of L1 is 88.30.., 709.3.., and + * 11356.10... for single, double, and quadruple precision, we have + * + * single double quadruple + * ------------------------------------ + * ilogb(L1(x))+5 <= 11 14 18 + * ------------------------------------ + * + * (3) Table Driven Method for log(x)-1: + * -------------------------------------- + * Let x = 2**n * y, where 1 <= y < 2. Let Z={z(i),i=1,...,m} + * be a set of predetermined evenly distributed floating point numbers + * in [1, 2]. Let z(j) be the closest one to y, then + * log(x)-1 = n*log(2)-1 + log(y) + * = n*log(2)-1 + log(z(j)*y/z(j)) + * = n*log(2)-1 + log(z(j)) + log(y/z(j)) + * = T1(n) + T2(j) + T3, + * + * where T1(n) = n*log(2)-1 and T2(j) = log(z(j)). Both T1 and T2 can be + * pre-calculated and be looked-up in a table. Note that 8 <= x < 1756 + * implies 3<=n<=10 implies 1.079.. < T1(n) < 6.931. + * + * + * y-z(i) y 1+s + * For T3, let s = --------; then ----- = ----- and + * y+z(i) z(i) 1-s + * 1+s 2 3 2 5 + * T3 = log(-----) = 2s + --- s + --- s + .... + * 1-s 3 5 + * + * Suppose the first term 2s is compute in extra precision. The + * dominating error in T3 would then be the rounding error of the + * second term 2/3*s**3. To force the rounding bounded by + * the required accuracy, we have + * single: |2/3*s**3| < 2**-11 == > |s|<0.09014... + * double: |2/3*s**3| < 2**-14 == > |s|<0.04507... + * quad : |2/3*s**3| < 2**-18 == > |s|<0.01788... = 2**(-5.80..) + * + * Base on this analysis, we choose Z = {z(i)|z(i)=1+i/64+1/128, 0<=i<=63}. + * For any y in [1,2), let j = [64*y] chopped to integer, then z(j) is + * the closest to y, and it is not difficult to see that |s| < 2**(-8). + * Please note that the polynomial approximation of T3 must be accurate + * -24-11 -35 -53-14 -67 -113-18 -131 + * to 2 =2 , 2 = 2 , and 2 =2 + * for single, double, and quadruple precision respectively. + * + * Inplementation notes. + * (1) Table look-up entries for T1(n) and T2(j), as well as the calculation + * of the leading term 2s in T3, are broken up into leading and trailing + * part such that (leading part)* 2**24 will always be an integer. That + * will guarantee the addition of the leading parts will be exact. + * + * 2**(-24) + * _________V___________________ + * T1(n): |_________|___________________| + * _______ ______________________ + * T2(j): |_______|______________________| + * ____ _______________________ + * 2s: |____|_______________________| + * __________________________ + * + T3(s)-2s: |__________________________| + * ------------------------------------------- + * [leading] + [Trailing] + * + * (2) How to compute 2s accurately. + * (A) Compute v = 2s to the working precision. If |v| < 2**(-18), + * stop. + * (B) chopped v to 2**(-24): v = ((int)(v*2**24))/2**24 + * (C) 2s = v + (2s - v), where + * 1 + * 2s - v = --- * (2(y-z) - v*(y+z) ) + * y+z + * 1 + * = --- * ( [2(y-z) - v*(y+z)_h ] - v*(y+z)_l ) + * y+z + * where (y+z)_h = (y+z) rounded to 24 bits by (double)(float), + * and (y+z)_l = ((z+z)-(y+z)_h)+(y-z). Note the the quantity + * in [] is exact. + * 2 4 + * (3) Remez approximation for (T3(s)-2s)/s = T3[0]*s + T3[1]*s + ...: + * Single precision: 1 term (compute in double precision arithmetic) + * T3(s) = 2s + S1*s^3, S1 = 0.6666717231848518054693623697539230 + * Remez error: |T3(s)/s - (2s+S1*s^3)| < 2**(-35.87) + * Double precision: 3 terms, Remez error is bounded by 2**(-72.40), + * see "tgamma_log" + * Quad precision: 7 terms, Remez error is bounded by 2**(-136.54), + * see "tgammal_log" + * + * The computation of 0.5*(ln(2pi)-1): + * 0.5*(ln(2pi)-1) = 0.4189385332046727417803297364056176398614... + * split 0.5*(ln(2pi)-1) to hln2pi_h + hln2pi_l, where hln2pi_h is the + * leading 21 bits of the constant. + * hln2pi_h= 0.4189383983612060546875 + * hln2pi_l= 1.348434666870928297364056176398612173648e-07 + * + * The computation of 1/x*P(1/x^2) = log(G(x))-(x-.5)(ln(x)-1)-(.5ln(2pi)-1): + * Let s = 1/x <= 1/8 < 0.125. We have + * quad precision + * |GP(s) - s*P(s^2)| <= 2**(-120.6), where + * 3 5 39 + * GP(s) = GP0*s+GP1*s +GP2*s +... +GP19*s , + * GP0 = 0.083333333333333333333333333333333172839171301 + * hex 0x3ffe5555 55555555 55555555 55555548 + * GP1 = -2.77777777777777777777777777492501211999399424104e-0003 + * GP2 = 7.93650793650793650793635650541638236350020883243e-0004 + * GP3 = -5.95238095238095238057299772679324503339241961704e-0004 + * GP4 = 8.41750841750841696138422987977683524926142600321e-0004 + * GP5 = -1.91752691752686682825032547823699662178842123308e-0003 + * GP6 = 6.41025641022403480921891559356473451161279359322e-0003 + * GP7 = -2.95506535798414019189819587455577003732808185071e-0002 + * GP8 = 1.79644367229970031486079180060923073476568732136e-0001 + * GP9 = -1.39243086487274662174562872567057200255649290646e+0000 + * GP10 = 1.34025874044417962188677816477842265259608269775e+0001 + * GP11 = -1.56803713480127469414495545399982508700748274318e+0002 + * GP12 = 2.18739841656201561694927630335099313968924493891e+0003 + * GP13 = -3.55249848644100338419187038090925410976237921269e+0004 + * GP14 = 6.43464880437835286216768959439484376449179576452e+0005 + * GP15 = -1.20459154385577014992600342782821389605893904624e+0007 + * GP16 = 2.09263249637351298563934942349749718491071093210e+0008 + * GP17 = -2.96247483183169219343745316433899599834685703457e+0009 + * GP18 = 2.88984933605896033154727626086506756972327292981e+0010 + * GP19 = -1.40960434146030007732838382416230610302678063984e+0011 + * + * double precision + * |GP(s) - s*P(s^2)| <= 2**(-63.5), where + * 3 5 7 9 11 13 15 + * GP(s) = GP0*s+GP1*s +GP2*s +GP3*s +GP4*s +GP5*s +GP6*s +GP7*s , + * + * GP0= 0.0833333333333333287074040640618477 (3FB55555 55555555) + * GP1= -2.77777777776649355200565611114627670089130772843e-0003 + * GP2= 7.93650787486083724805476194170211775784158551509e-0004 + * GP3= -5.95236628558314928757811419580281294593903582971e-0004 + * GP4= 8.41566473999853451983137162780427812781178932540e-0004 + * GP5= -1.90424776670441373564512942038926168175921303212e-0003 + * GP6= 5.84933161530949666312333949534482303007354299178e-0003 + * GP7= -1.59453228931082030262124832506144392496561694550e-0002 + * single precision + * |GP(s) - s*P(s^2)| <= 2**(-37.78), where + * 3 5 + * GP(s) = GP0*s+GP1*s +GP2*s + * GP0 = 8.33333330959694065245736888749042811909994573178e-0002 + * GP1 = -2.77765545601667179767706600890361535225507762168e-0003 + * GP2 = 7.77830853479775281781085278324621033523037489883e-0004 + * + * + * Implementation note: + * z = (1/x), z2 = z*z, z4 = z2*z2; + * p = z*(GP0+z2*(GP1+....+z2*GP7)) + * = z*(GP0+(z4*(GP2+z4*(GP4+z4*GP6))+z2*(GP1+z4*(GP3+z4*(GP5+z4*GP7))))) + * + * Adding everything up: + * t = rr.h*ww.h+hln2pi_h ... exact + * w = (hln2pi_l + ((x-0.5)*ww.l+rr.l*ww.h)) + p + * + * Computing exp(t+w): + * s = t+w; write s = (n+j/32)*ln2+r, |r|<=(1/64)*ln2, then + * exp(s) = 2**n * (2**(j/32) + 2**(j/32)*expm1(r)), where + * expm1(r) = r + Et1*r^2 + Et2*r^3 + ... + Et5*r^6, and + * 2**(j/32) is obtained by table look-up S[j]+S_trail[j]. + * Remez error bound: + * |exp(r) - (1+r+Et1*r^2+...+Et5*r^6)| <= 2^(-63). + */ + +#include "libm.h" + +#define __HI(x) ((int *) &x)[HIWORD] +#define __LO(x) ((unsigned *) &x)[LOWORD] + +struct Double { + double h; + double l; +}; + +/* Hex value of GP0 shoule be 3FB55555 55555555 */ +static const double c[] = { + +1.0, + +2.0, + +0.5, + +1.0e-300, + +6.66666666666666740682e-01, /* A1=T3[0] */ + +3.99999999955626478023093908674902212920e-01, /* A2=T3[1] */ + +2.85720221533145659809237398709372330980e-01, /* A3=T3[2] */ + +0.0833333333333333287074040640618477, /* GP[0] */ + -2.77777777776649355200565611114627670089130772843e-03, + +7.93650787486083724805476194170211775784158551509e-04, + -5.95236628558314928757811419580281294593903582971e-04, + +8.41566473999853451983137162780427812781178932540e-04, + -1.90424776670441373564512942038926168175921303212e-03, + +5.84933161530949666312333949534482303007354299178e-03, + -1.59453228931082030262124832506144392496561694550e-02, + +4.18937683105468750000e-01, /* hln2pi_h */ + +8.50099203991780279640e-07, /* hln2pi_l */ + +4.18938533204672741744150788368695779923320328369e-01, /* hln2pi */ + +2.16608493865351192653e-02, /* ln2_32hi */ + +5.96317165397058656257e-12, /* ln2_32lo */ + +4.61662413084468283841e+01, /* invln2_32 */ + +5.0000000000000000000e-1, /* Et1 */ + +1.66666666665223585560605991943703896196054020060e-01, /* Et2 */ + +4.16666666665895103520154073534275286743788421687e-02, /* Et3 */ + +8.33336844093536520775865096538773197505523826029e-03, /* Et4 */ + +1.38889201930843436040204096950052984793587640227e-03, /* Et5 */ +}; + +#define one c[0] +#define two c[1] +#define half c[2] +#define tiny c[3] +#define A1 c[4] +#define A2 c[5] +#define A3 c[6] +#define GP0 c[7] +#define GP1 c[8] +#define GP2 c[9] +#define GP3 c[10] +#define GP4 c[11] +#define GP5 c[12] +#define GP6 c[13] +#define GP7 c[14] +#define hln2pi_h c[15] +#define hln2pi_l c[16] +#define hln2pi c[17] +#define ln2_32hi c[18] +#define ln2_32lo c[19] +#define invln2_32 c[20] +#define Et1 c[21] +#define Et2 c[22] +#define Et3 c[23] +#define Et4 c[24] +#define Et5 c[25] + +/* + * double precision coefficients for computing log(x)-1 in tgamma. + * See "algorithm" for details + * + * log(x) - 1 = T1(n) + T2(j) + T3(s), where x = 2**n * y, 1<=y<2, + * j=[64*y], z[j]=1+j/64+1/128, s = (y-z[j])/(y+z[j]), and + * T1(n) = T1[2n,2n+1] = n*log(2)-1, + * T2(j) = T2[2j,2j+1] = log(z[j]), + * T3(s) = 2s + T3[0]s^3 + T3[1]s^5 + T3[2]s^7 + * = 2s + A1*s^3 + A2*s^5 + A3*s^7 (see const A1,A2,A3) + * Note + * (1) the leading entries are truncated to 24 binary point. + * See Remezpak/sun/tgamma_log_64.c + * (2) Remez error for T3(s) is bounded by 2**(-72.4) + * See mpremez/work/Log/tgamma_log_4_outr2 + */ + +static const double T1[] = { + -1.00000000000000000000e+00, /* 0xBFF00000 0x00000000 */ + +0.00000000000000000000e+00, /* 0x00000000 0x00000000 */ + -3.06852817535400390625e-01, /* 0xBFD3A37A 0x00000000 */ + -1.90465429995776763166e-09, /* 0xBE205C61 0x0CA86C38 */ + +3.86294305324554443359e-01, /* 0x3FD8B90B 0xC0000000 */ + +5.57953361754750897367e-08, /* 0x3E6DF473 0xDE6AF279 */ + +1.07944148778915405273e+00, /* 0x3FF14564 0x70000000 */ + +5.38906818755173187963e-08, /* 0x3E6CEEAD 0xCDA06BB5 */ + +1.77258867025375366211e+00, /* 0x3FFC5C85 0xF0000000 */ + +5.19860275755595544734e-08, /* 0x3E6BE8E7 0xBCD5E4F2 */ + +2.46573585271835327148e+00, /* 0x4003B9D3 0xB8000000 */ + +5.00813732756017835330e-08, /* 0x3E6AE321 0xAC0B5E2E */ + +3.15888303518295288086e+00, /* 0x40094564 0x78000000 */ + +4.81767189756440192100e-08, /* 0x3E69DD5B 0x9B40D76B */ + +3.85203021764755249023e+00, /* 0x400ED0F5 0x38000000 */ + +4.62720646756862482697e-08, /* 0x3E68D795 0x8A7650A7 */ + +4.54517740011215209961e+00, /* 0x40122E42 0xFC000000 */ + +4.43674103757284839467e-08, /* 0x3E67D1CF 0x79ABC9E4 */ + +5.23832458257675170898e+00, /* 0x4014F40B 0x5C000000 */ + +4.24627560757707130063e-08, /* 0x3E66CC09 0x68E14320 */ + +5.93147176504135131836e+00, /* 0x4017B9D3 0xBC000000 */ + +4.05581017758129486834e-08, /* 0x3E65C643 0x5816BC5D */ +}; + +static const double T2[] = { + +7.78210163116455078125e-03, /* 0x3F7FE020 0x00000000 */ + +3.88108903981662140884e-08, /* 0x3E64D620 0xCF11F86F */ + +2.31670141220092773438e-02, /* 0x3F97B918 0x00000000 */ + +4.51595251008850513740e-08, /* 0x3E683EAD 0x88D54940 */ + +3.83188128471374511719e-02, /* 0x3FA39E86 0x00000000 */ + +5.14549991480218823411e-08, /* 0x3E6B9FEB 0xD5FA9016 */ + +5.32444715499877929688e-02, /* 0x3FAB42DC 0x00000000 */ + +4.29688244898971182165e-08, /* 0x3E671197 0x1BEC28D1 */ + +6.79506063461303710938e-02, /* 0x3FB16536 0x00000000 */ + +5.55623773783008185114e-08, /* 0x3E6DD46F 0x5C1D0C4C */ + +8.24436545372009277344e-02, /* 0x3FB51B07 0x00000000 */ + +1.46738736635337847313e-08, /* 0x3E4F830C 0x1FB493C7 */ + +9.67295765876770019531e-02, /* 0x3FB8C345 0x00000000 */ + +4.98708741103424492282e-08, /* 0x3E6AC633 0x641EB597 */ + +1.10814332962036132812e-01, /* 0x3FBC5E54 0x00000000 */ + +3.33782539813823062226e-08, /* 0x3E61EB78 0xE862BAC3 */ + +1.24703466892242431641e-01, /* 0x3FBFEC91 0x00000000 */ + +1.16087148042227818450e-08, /* 0x3E48EDF5 0x5D551729 */ + +1.38402283191680908203e-01, /* 0x3FC1B72A 0x80000000 */ + +3.96674382274822001957e-08, /* 0x3E654BD9 0xE80A4181 */ + +1.51916027069091796875e-01, /* 0x3FC371FC 0x00000000 */ + +1.49567501781968021494e-08, /* 0x3E500F47 0xBA1DE6CB */ + +1.65249526500701904297e-01, /* 0x3FC526E5 0x80000000 */ + +4.63946052585787334062e-08, /* 0x3E68E86D 0x0DE8B900 */ + +1.78407609462738037109e-01, /* 0x3FC6D60F 0x80000000 */ + +4.80100802600100279538e-08, /* 0x3E69C674 0x8723551E */ + +1.91394805908203125000e-01, /* 0x3FC87FA0 0x00000000 */ + +4.70914263296092971436e-08, /* 0x3E694832 0x44240802 */ + +2.04215526580810546875e-01, /* 0x3FCA23BC 0x00000000 */ + +1.48478803446288209001e-08, /* 0x3E4FE2B5 0x63193712 */ + +2.16873884201049804688e-01, /* 0x3FCBC286 0x00000000 */ + +5.40995645549315919488e-08, /* 0x3E6D0B63 0x358A7E74 */ + +2.29374051094055175781e-01, /* 0x3FCD5C21 0x00000000 */ + +4.99707906542102284117e-08, /* 0x3E6AD3EE 0xE456E443 */ + +2.41719901561737060547e-01, /* 0x3FCEF0AD 0x80000000 */ + +3.53254081075974352804e-08, /* 0x3E62F716 0x4D948638 */ + +2.53915190696716308594e-01, /* 0x3FD04025 0x80000000 */ + +1.92842471355435739091e-08, /* 0x3E54B4D0 0x40DAE27C */ + +2.65963494777679443359e-01, /* 0x3FD1058B 0xC0000000 */ + +5.37194584979797487125e-08, /* 0x3E6CD725 0x6A8C4FD0 */ + +2.77868449687957763672e-01, /* 0x3FD1C898 0xC0000000 */ + +1.31549854251447496506e-09, /* 0x3E16999F 0xAFBC68E7 */ + +2.89633274078369140625e-01, /* 0x3FD2895A 0x00000000 */ + +1.85046735362538929911e-08, /* 0x3E53DE86 0xA35EB493 */ + +3.01261305809020996094e-01, /* 0x3FD347DD 0x80000000 */ + +2.47691407849191245052e-08, /* 0x3E5A987D 0x54D64567 */ + +3.12755703926086425781e-01, /* 0x3FD40430 0x80000000 */ + +6.07781046260499658610e-09, /* 0x3E3A1A9F 0x8EF4304A */ + +3.24119448661804199219e-01, /* 0x3FD4BE5F 0x80000000 */ + +1.99924077768719198045e-08, /* 0x3E557778 0xA0DB4C99 */ + +3.35355520248413085938e-01, /* 0x3FD57677 0x00000000 */ + +2.16727247443196802771e-08, /* 0x3E57455A 0x6C549AB7 */ + +3.46466720104217529297e-01, /* 0x3FD62C82 0xC0000000 */ + +4.72419910516215900493e-08, /* 0x3E695CE3 0xCA97B7B0 */ + +3.57455849647521972656e-01, /* 0x3FD6E08E 0x80000000 */ + +3.92742818015697624778e-08, /* 0x3E6515D0 0xF1C609CA */ + +3.68325531482696533203e-01, /* 0x3FD792A5 0x40000000 */ + +2.96760111198451042238e-08, /* 0x3E5FDD47 0xA27C15DA */ + +3.79078328609466552734e-01, /* 0x3FD842D1 0xC0000000 */ + +2.43255029056564770289e-08, /* 0x3E5A1E8B 0x17493B14 */ + +3.89716744422912597656e-01, /* 0x3FD8F11E 0x80000000 */ + +6.71711261571421332726e-09, /* 0x3E3CD98B 0x1DF85DA7 */ + +4.00243163108825683594e-01, /* 0x3FD99D95 0x80000000 */ + +1.01818702333557515008e-09, /* 0x3E117E08 0xACBA92EF */ + +4.10659909248352050781e-01, /* 0x3FDA4840 0x80000000 */ + +1.57369163351530571459e-08, /* 0x3E50E5BB 0x0A2BFCA7 */ + +4.20969247817993164062e-01, /* 0x3FDAF129 0x00000000 */ + +4.68261364720663662040e-08, /* 0x3E6923BC 0x358899C2 */ + +4.31173443794250488281e-01, /* 0x3FDB9858 0x80000000 */ + +2.10241208525779214510e-08, /* 0x3E569310 0xFB598FB1 */ + +4.41274523735046386719e-01, /* 0x3FDC3DD7 0x80000000 */ + +3.70698288427707487748e-08, /* 0x3E63E6D6 0xA6B9D9E1 */ + +4.51274633407592773438e-01, /* 0x3FDCE1AF 0x00000000 */ + +1.07318658117071930723e-08, /* 0x3E470BE7 0xD6F6FA58 */ + +4.61175680160522460938e-01, /* 0x3FDD83E7 0x00000000 */ + +3.49616477054305011286e-08, /* 0x3E62C517 0x9F2828AE */ + +4.70979690551757812500e-01, /* 0x3FDE2488 0x00000000 */ + +2.46670332000468969567e-08, /* 0x3E5A7C6C 0x261CBD8F */ + +4.80688512325286865234e-01, /* 0x3FDEC399 0xC0000000 */ + +1.70204650424422423704e-08, /* 0x3E52468C 0xC0175CEE */ + +4.90303933620452880859e-01, /* 0x3FDF6123 0xC0000000 */ + +5.44247409572909703749e-08, /* 0x3E6D3814 0x5630A2B6 */ + +4.99827861785888671875e-01, /* 0x3FDFFD2E 0x00000000 */ + +7.77056065794633071345e-09, /* 0x3E40AFE9 0x30AB2FA0 */ + +5.09261846542358398438e-01, /* 0x3FE04BDF 0x80000000 */ + +5.52474495483665749052e-08, /* 0x3E6DA926 0xD265FCC1 */ + +5.18607735633850097656e-01, /* 0x3FE0986F 0x40000000 */ + +2.85741955344967264536e-08, /* 0x3E5EAE6A 0x41723FB5 */ + +5.27867078781127929688e-01, /* 0x3FE0E449 0x80000000 */ + +1.08397144554263914271e-08, /* 0x3E474732 0x2FDBAB97 */ + +5.37041425704956054688e-01, /* 0x3FE12F71 0x80000000 */ + +4.01919275998792285777e-08, /* 0x3E6593EF 0xBC530123 */ + +5.46132385730743408203e-01, /* 0x3FE179EA 0xA0000000 */ + +5.18673922421792693237e-08, /* 0x3E6BD899 0xA0BFC60E */ + +5.55141448974609375000e-01, /* 0x3FE1C3B8 0x00000000 */ + +5.85658922177154808539e-08, /* 0x3E6F713C 0x24BC94F9 */ + +5.64070105552673339844e-01, /* 0x3FE20CDC 0xC0000000 */ + +3.27321296262276338905e-08, /* 0x3E6192AB 0x6D93503D */ + +5.72919726371765136719e-01, /* 0x3FE2555B 0xC0000000 */ + +2.71900203723740076878e-08, /* 0x3E5D31EF 0x96780876 */ + +5.81691682338714599609e-01, /* 0x3FE29D37 0xE0000000 */ + +5.72959078829112371070e-08, /* 0x3E6EC2B0 0x8AC85CD7 */ + +5.90387403964996337891e-01, /* 0x3FE2E474 0x20000000 */ + +4.26371800367512948470e-08, /* 0x3E66E402 0x68405422 */ + +5.99008142948150634766e-01, /* 0x3FE32B13 0x20000000 */ + +4.66979327646159769249e-08, /* 0x3E69121D 0x71320557 */ + +6.07555210590362548828e-01, /* 0x3FE37117 0xA0000000 */ + +3.96341792466729582847e-08, /* 0x3E654747 0xB5C5DD02 */ + +6.16029858589172363281e-01, /* 0x3FE3B684 0x40000000 */ + +1.86263416563663175432e-08, /* 0x3E53FFF8 0x455F1DBE */ + +6.24433279037475585938e-01, /* 0x3FE3FB5B 0x80000000 */ + +8.97441791510503832111e-09, /* 0x3E4345BD 0x096D3A75 */ + +6.32766664028167724609e-01, /* 0x3FE43F9F 0xE0000000 */ + +5.54287010493641158796e-09, /* 0x3E37CE73 0x3BD393DD */ + +6.41031146049499511719e-01, /* 0x3FE48353 0xC0000000 */ + +3.33714317793368531132e-08, /* 0x3E61EA88 0xDF73D5E9 */ + +6.49227917194366455078e-01, /* 0x3FE4C679 0xA0000000 */ + +2.94307433638127158696e-08, /* 0x3E5F99DC 0x7362D1DA */ + +6.57358050346374511719e-01, /* 0x3FE50913 0xC0000000 */ + +2.23619855184231409785e-08, /* 0x3E5802D0 0xD6979675 */ + +6.65422618389129638672e-01, /* 0x3FE54B24 0x60000000 */ + +1.41559608102782173188e-08, /* 0x3E4E6652 0x5EA4550A */ + +6.73422634601593017578e-01, /* 0x3FE58CAD 0xA0000000 */ + +4.06105737027198329700e-08, /* 0x3E65CD79 0x893092F2 */ + +6.81359171867370605469e-01, /* 0x3FE5CDB1 0xC0000000 */ + +5.29405324634793230630e-08, /* 0x3E6C6C17 0x648CF6E4 */ + +6.89233243465423583984e-01, /* 0x3FE60E32 0xE0000000 */ + +3.77733853963405370102e-08, /* 0x3E644788 0xD8CA7C89 */ +}; + +/* S[j],S_trail[j] = 2**(j/32.) for the final computation of exp(t+w) */ +static const double S[] = { + +1.00000000000000000000e+00, /* 3FF0000000000000 */ + +1.02189714865411662714e+00, /* 3FF059B0D3158574 */ + +1.04427378242741375480e+00, /* 3FF0B5586CF9890F */ + +1.06714040067682369717e+00, /* 3FF11301D0125B51 */ + +1.09050773266525768967e+00, /* 3FF172B83C7D517B */ + +1.11438674259589243221e+00, /* 3FF1D4873168B9AA */ + +1.13878863475669156458e+00, /* 3FF2387A6E756238 */ + +1.16372485877757747552e+00, /* 3FF29E9DF51FDEE1 */ + +1.18920711500272102690e+00, /* 3FF306FE0A31B715 */ + +1.21524735998046895524e+00, /* 3FF371A7373AA9CB */ + +1.24185781207348400201e+00, /* 3FF3DEA64C123422 */ + +1.26905095719173321989e+00, /* 3FF44E086061892D */ + +1.29683955465100964055e+00, /* 3FF4BFDAD5362A27 */ + +1.32523664315974132322e+00, /* 3FF5342B569D4F82 */ + +1.35425554693689265129e+00, /* 3FF5AB07DD485429 */ + +1.38390988196383202258e+00, /* 3FF6247EB03A5585 */ + +1.41421356237309514547e+00, /* 3FF6A09E667F3BCD */ + +1.44518080697704665027e+00, /* 3FF71F75E8EC5F74 */ + +1.47682614593949934623e+00, /* 3FF7A11473EB0187 */ + +1.50916442759342284141e+00, /* 3FF82589994CCE13 */ + +1.54221082540794074411e+00, /* 3FF8ACE5422AA0DB */ + +1.57598084510788649659e+00, /* 3FF93737B0CDC5E5 */ + +1.61049033194925428347e+00, /* 3FF9C49182A3F090 */ + +1.64575547815396494578e+00, /* 3FFA5503B23E255D */ + +1.68179283050742900407e+00, /* 3FFAE89F995AD3AD */ + +1.71861929812247793414e+00, /* 3FFB7F76F2FB5E47 */ + +1.75625216037329945351e+00, /* 3FFC199BDD85529C */ + +1.79470907500310716820e+00, /* 3FFCB720DCEF9069 */ + +1.83400808640934243066e+00, /* 3FFD5818DCFBA487 */ + +1.87416763411029996256e+00, /* 3FFDFC97337B9B5F */ + +1.91520656139714740007e+00, /* 3FFEA4AFA2A490DA */ + +1.95714412417540017941e+00, /* 3FFF50765B6E4540 */ +}; + +static const double S_trail[] = { + +0.00000000000000000000e+00, + +5.10922502897344389359e-17, /* 3C8D73E2A475B465 */ + +8.55188970553796365958e-17, /* 3C98A62E4ADC610A */ + -7.89985396684158212226e-17, /* BC96C51039449B3A */ + -3.04678207981247114697e-17, /* BC819041B9D78A76 */ + +1.04102784568455709549e-16, /* 3C9E016E00A2643C */ + +8.91281267602540777782e-17, /* 3C99B07EB6C70573 */ + +3.82920483692409349872e-17, /* 3C8612E8AFAD1255 */ + +3.98201523146564611098e-17, /* 3C86F46AD23182E4 */ + -7.71263069268148813091e-17, /* BC963AEABF42EAE2 */ + +4.65802759183693679123e-17, /* 3C8ADA0911F09EBC */ + +2.66793213134218609523e-18, /* 3C489B7A04EF80D0 */ + +2.53825027948883149593e-17, /* 3C7D4397AFEC42E2 */ + -2.85873121003886075697e-17, /* BC807ABE1DB13CAC */ + +7.70094837980298946162e-17, /* 3C96324C054647AD */ + -6.77051165879478628716e-17, /* BC9383C17E40B497 */ + -9.66729331345291345105e-17, /* BC9BDD3413B26456 */ + -3.02375813499398731940e-17, /* BC816E4786887A99 */ + -3.48399455689279579579e-17, /* BC841577EE04992F */ + -1.01645532775429503911e-16, /* BC9D4C1DD41532D8 */ + +7.94983480969762085616e-17, /* 3C96E9F156864B27 */ + -1.01369164712783039808e-17, /* BC675FC781B57EBC */ + +2.47071925697978878522e-17, /* 3C7C7C46B071F2BE */ + -1.01256799136747726038e-16, /* BC9D2F6EDB8D41E1 */ + +8.19901002058149652013e-17, /* 3C97A1CD345DCC81 */ + -1.85138041826311098821e-17, /* BC75584F7E54AC3B */ + +2.96014069544887330703e-17, /* 3C811065895048DD */ + +1.82274584279120867698e-17, /* 3C7503CBD1E949DB */ + +3.28310722424562658722e-17, /* 3C82ED02D75B3706 */ + -6.12276341300414256164e-17, /* BC91A5CD4F184B5C */ + -1.06199460561959626376e-16, /* BC9E9C23179C2893 */ + +8.96076779103666776760e-17, /* 3C99D3E12DD8A18B */ +}; + +/* Primary interval GTi() */ +static const double cr[] = { +/* p1, q1 */ + +0.70908683619977797008004927192814648151397705078125000, + +1.71987061393048558089579513384356441668351720061e-0001, + -3.19273345791990970293320316122813960527705450671e-0002, + +8.36172645419110036267169600390549973563534476989e-0003, + +1.13745336648572838333152213474277971244629758101e-0003, + +1.0, + +9.71980217826032937526460731778472389791321968082e-0001, + -7.43576743326756176594084137256042653497087666030e-0002, + -1.19345944932265559769719470515102012246995255372e-0001, + +1.59913445751425002620935120470781382215050284762e-0002, + +1.12601136853374984566572691306402321911547550783e-0003, +/* p2, q2 */ + +0.42848681585558601181418225678498856723308563232421875, + +6.53596762668970816023718845105667418483122103629e-0002, + -6.97280829631212931321050770925128264272768936731e-0003, + +6.46342359021981718947208605674813260166116632899e-0003, + +1.0, + +4.57572620560506047062553957454062012327519313936e-0001, + -2.52182594886075452859655003407796103083422572036e-0001, + -1.82970945407778594681348166040103197178711552827e-0002, + +2.43574726993169566475227642128830141304953840502e-0002, + -5.20390406466942525358645957564897411258667085501e-0003, + +4.79520251383279837635552431988023256031951133885e-0004, +/* p3, q3 */ + +0.382409479734567459008331979930517263710498809814453125, + +1.42876048697668161599069814043449301572928034140e-0001, + +3.42157571052250536817923866013561760785748899071e-0003, + -5.01542621710067521405087887856991700987709272937e-0004, + +8.89285814866740910123834688163838287618332122670e-0004, + +1.0, + +3.04253086629444201002215640948957897906299633168e-0001, + -2.23162407379999477282555672834881213873185520006e-0001, + -1.05060867741952065921809811933670131427552903636e-0002, + +1.70511763916186982473301861980856352005926669320e-0002, + -2.12950201683609187927899416700094630764182477464e-0003, +}; + +#define P10 cr[0] +#define P11 cr[1] +#define P12 cr[2] +#define P13 cr[3] +#define P14 cr[4] +#define Q10 cr[5] +#define Q11 cr[6] +#define Q12 cr[7] +#define Q13 cr[8] +#define Q14 cr[9] +#define Q15 cr[10] +#define P20 cr[11] +#define P21 cr[12] +#define P22 cr[13] +#define P23 cr[14] +#define Q20 cr[15] +#define Q21 cr[16] +#define Q22 cr[17] +#define Q23 cr[18] +#define Q24 cr[19] +#define Q25 cr[20] +#define Q26 cr[21] +#define P30 cr[22] +#define P31 cr[23] +#define P32 cr[24] +#define P33 cr[25] +#define P34 cr[26] +#define Q30 cr[27] +#define Q31 cr[28] +#define Q32 cr[29] +#define Q33 cr[30] +#define Q34 cr[31] +#define Q35 cr[32] + +static const double + GZ1_h = +0.938204627909682398190, + GZ1_l = +5.121952600248205157935e-17, + GZ2_h = +0.885603194410888749921, + GZ2_l = -4.964236872556339810692e-17, + GZ3_h = +0.936781411463652347038, + GZ3_l = -2.541923110834479415023e-17, + TZ1 = -0.3517214357852935791015625, + TZ3 = +0.280530631542205810546875; +/* INDENT ON */ + +/* compute gamma(y=yh+yl) for y in GT1 = [1.0000, 1.2845] */ +/* assume yh got 20 significant bits */ +static struct Double +GT1(double yh, double yl) { + double t3, t4, y, z; + struct Double r; + + y = yh + yl; + z = y * y; + t3 = (z * (P10 + y * ((P11 + y * P12) + z * (P13 + y * P14)))) / + (Q10 + y * ((Q11 + y * Q12) + z * ((Q13 + Q14 * y) + z * Q15))); + t3 += (TZ1 * yl + GZ1_l); + t4 = TZ1 * yh; + r.h = (double) ((float) (t4 + GZ1_h + t3)); + t3 += (t4 - (r.h - GZ1_h)); + r.l = t3; + return (r); +} + +/* compute gamma(y=yh+yl) for y in GT2 = [1.2844, 1.6374] */ +/* assume yh got 20 significant bits */ +static struct Double +GT2(double yh, double yl) { + double t3, y, z; + struct Double r; + + y = yh + yl; + z = y * y; + t3 = (z * (P20 + y * P21 + z * (P22 + y * P23))) / + (Q20 + (y * ((Q21 + Q22 * y) + z * Q23) + + (z * z) * ((Q24 + Q25 * y) + z * Q26))) + GZ2_l; + r.h = (double) ((float) (GZ2_h + t3)); + r.l = t3 - (r.h - GZ2_h); + return (r); +} + +/* compute gamma(y=yh+yl) for y in GT3 = [1.6373, 2.0000] */ +/* assume yh got 20 significant bits */ +static struct Double +GT3(double yh, double yl) { + double t3, t4, y, z; + struct Double r; + + y = yh + yl; + z = y * y; + t3 = (z * (P30 + y * ((P31 + y * P32) + z * (P33 + y * P34)))) / + (Q30 + y * ((Q31 + y * Q32) + z * ((Q33 + Q34 * y) + z * Q35))); + t3 += (TZ3 * yl + GZ3_l); + t4 = TZ3 * yh; + r.h = (double) ((float) (t4 + GZ3_h + t3)); + t3 += (t4 - (r.h - GZ3_h)); + r.l = t3; + return (r); +} + +/* INDENT OFF */ +/* + * return tgamma(x) scaled by 2**-m for 8<x<=171.62... using Stirling's formula + * log(G(x)) ~= (x-.5)*(log(x)-1) + .5(log(2*pi)-1) + (1/x)*P(1/(x*x)) + * = L1 + L2 + L3, + */ +/* INDENT ON */ +static struct Double +large_gam(double x, int *m) { + double z, t1, t2, t3, z2, t5, w, y, u, r, z4, v, t24 = 16777216.0, + p24 = 1.0 / 16777216.0; + int n2, j2, k, ix, j; + unsigned lx; + struct Double zz; + double u2, ss_h, ss_l, r_h, w_h, w_l, t4; + +/* INDENT OFF */ +/* + * compute ss = ss.h+ss.l = log(x)-1 (see tgamma_log.h for details) + * + * log(x) - 1 = T1(n) + T2(j) + T3(s), where x = 2**n * y, 1<=y<2, + * j=[64*y], z[j]=1+j/64+1/128, s = (y-z[j])/(y+z[j]), and + * T1(n) = T1[2n,2n+1] = n*log(2)-1, + * T2(j) = T2[2j,2j+1] = log(z[j]), + * T3(s) = 2s + A1[0]s^3 + A2[1]s^5 + A3[2]s^7 + * Note + * (1) the leading entries are truncated to 24 binary point. + * (2) Remez error for T3(s) is bounded by 2**(-72.4) + * 2**(-24) + * _________V___________________ + * T1(n): |_________|___________________| + * _______ ______________________ + * T2(j): |_______|______________________| + * ____ _______________________ + * 2s: |____|_______________________| + * __________________________ + * + T3(s)-2s: |__________________________| + * ------------------------------------------- + * [leading] + [Trailing] + */ +/* INDENT ON */ + ix = __HI(x); + lx = __LO(x); + n2 = (ix >> 20) - 0x3ff; /* exponent of x, range:3-7 */ + n2 += n2; /* 2n */ + ix = (ix & 0x000fffff) | 0x3ff00000; /* y = scale x to [1,2] */ + __HI(y) = ix; + __LO(y) = lx; + __HI(z) = (ix & 0xffffc000) | 0x2000; /* z[j]=1+j/64+1/128 */ + __LO(z) = 0; + j2 = (ix >> 13) & 0x7e; /* 2j */ + t1 = y + z; + t2 = y - z; + r = one / t1; + t1 = (double) ((float) t1); + u = r * t2; /* u = (y-z)/(y+z) */ + t4 = T2[j2 + 1] + T1[n2 + 1]; + z2 = u * u; + k = __HI(u) & 0x7fffffff; + t3 = T2[j2] + T1[n2]; + if ((k >> 20) < 0x3ec) { /* |u|<2**-19 */ + t2 = t4 + u * ((two + z2 * A1) + (z2 * z2) * (A2 + z2 * A3)); + } else { + t5 = t4 + u * (z2 * A1 + (z2 * z2) * (A2 + z2 * A3)); + u2 = u + u; + v = (double) ((int) (u2 * t24)) * p24; + t2 = t5 + r * ((two * t2 - v * t1) - v * (y - (t1 - z))); + t3 += v; + } + ss_h = (double) ((float) (t2 + t3)); + ss_l = t2 - (ss_h - t3); + + /* + * compute ww = (x-.5)*(log(x)-1) + .5*(log(2pi)-1) + 1/x*(P(1/x^2))) + * where ss = log(x) - 1 in already in extra precision + */ + z = one / x; + r = x - half; + r_h = (double) ((float) r); + w_h = r_h * ss_h + hln2pi_h; + z2 = z * z; + w = (r - r_h) * ss_h + r * ss_l; + z4 = z2 * z2; + t1 = z2 * (GP1 + z4 * (GP3 + z4 * (GP5 + z4 * GP7))); + t2 = z4 * (GP2 + z4 * (GP4 + z4 * GP6)); + t1 += t2; + w += hln2pi_l; + w_l = z * (GP0 + t1) + w; + k = (int) ((w_h + w_l) * invln2_32 + half); + + /* compute the exponential of w_h+w_l */ + j = k & 0x1f; + *m = (k >> 5); + t3 = (double) k; + + /* perform w - k*ln2_32 (represent as w_h - w_l) */ + t1 = w_h - t3 * ln2_32hi; + t2 = t3 * ln2_32lo; + w = w_l - t2; + w_h = t1 + w_l; + w_l = t2 - (w_l - (w_h - t1)); + + /* compute exp(w_h+w_l) */ + z = w_h - w_l; + z2 = z * z; + t1 = z2 * (Et1 + z2 * (Et3 + z2 * Et5)); + t2 = z2 * (Et2 + z2 * Et4); + t3 = w_h - (w_l - (t1 + z * t2)); + zz.l = S_trail[j] * (one + t3) + S[j] * t3; + zz.h = S[j]; + return (zz); +} + +/* INDENT OFF */ +/* + * kpsin(x)= sin(pi*x)/pi + * 3 5 7 9 11 13 15 + * = x+ks[0]*x +ks[1]*x +ks[2]*x +ks[3]*x +ks[4]*x +ks[5]*x +ks[6]*x + */ +static const double ks[] = { + -1.64493406684822640606569, + +8.11742425283341655883668741874008920850698590621e-0001, + -1.90751824120862873825597279118304943994042258291e-0001, + +2.61478477632554278317289628332654539353521911570e-0002, + -2.34607978510202710377617190278735525354347705866e-0003, + +1.48413292290051695897242899977121846763824221705e-0004, + -6.87730769637543488108688726777687262485357072242e-0006, +}; +/* INDENT ON */ + +/* assume x is not tiny and positive */ +static struct Double +kpsin(double x) { + double z, t1, t2, t3, t4; + struct Double xx; + + z = x * x; + xx.h = x; + t1 = z * x; + t2 = z * z; + t4 = t1 * ks[0]; + t3 = (t1 * z) * ((ks[1] + z * ks[2] + t2 * ks[3]) + (z * t2) * + (ks[4] + z * ks[5] + t2 * ks[6])); + xx.l = t4 + t3; + return (xx); +} + +/* INDENT OFF */ +/* + * kpcos(x)= cos(pi*x)/pi + * 2 4 6 8 10 12 + * = 1/pi +kc[0]*x +kc[1]*x +kc[2]*x +kc[3]*x +kc[4]*x +kc[5]*x + */ + +static const double one_pi_h = 0.318309886183790635705292970, + one_pi_l = 3.583247455607534006714276420e-17; +static const double npi_2_h = -1.5625, + npi_2_l = -0.00829632679489661923132169163975055099555883223; +static const double kc[] = { + -1.57079632679489661923132169163975055099555883223e+0000, + +1.29192819501230224953283586722575766189551966008e+0000, + -4.25027339940149518500158850753393173519732149213e-0001, + +7.49080625187015312373925142219429422375556727752e-0002, + -8.21442040906099210866977352284054849051348692715e-0003, + +6.10411356829515414575566564733632532333904115968e-0004, +}; +/* INDENT ON */ + +/* assume x is not tiny and positive */ +static struct Double +kpcos(double x) { + double z, t1, t2, t3, t4, x4, x8; + struct Double xx; + + z = x * x; + xx.h = one_pi_h; + t1 = (double) ((float) x); + x4 = z * z; + t2 = npi_2_l * z + npi_2_h * (x + t1) * (x - t1); + t3 = one_pi_l + x4 * ((kc[1] + z * kc[2]) + x4 * (kc[3] + z * + kc[4] + x4 * kc[5])); + t4 = t1 * t1; /* 48 bits mantissa */ + x8 = t2 + t3; + t4 *= npi_2_h; /* npi_2_h is 5 bits const. The product is exact */ + xx.l = x8 + t4; /* that will minimized the rounding error in xx.l */ + return (xx); +} + +/* INDENT OFF */ +static const double + /* 0.134861805732790769689793935774652917006 */ + t0z1 = 0.1348618057327907737708, + t0z1_l = -4.0810077708578299022531e-18, + /* 0.461632144968362341262659542325721328468 */ + t0z2 = 0.4616321449683623567850, + t0z2_l = -1.5522348162858676890521e-17, + /* 0.819773101100500601787868704921606996312 */ + t0z3 = 0.8197731011005006118708, + t0z3_l = -1.0082945122487103498325e-17; + /* 1.134861805732790769689793935774652917006 */ +/* INDENT ON */ + +/* gamma(x+i) for 0 <= x < 1 */ +static struct Double +gam_n(int i, double x) { + struct Double rr = {0.0L, 0.0L}, yy; + double r1, r2, t2, z, xh, xl, yh, yl, zh, z1, z2, zl, x5, wh, wl; + + /* compute yy = gamma(x+1) */ + if (x > 0.2845) { + if (x > 0.6374) { + r1 = x - t0z3; + r2 = (double) ((float) (r1 - t0z3_l)); + t2 = r1 - r2; + yy = GT3(r2, t2 - t0z3_l); + } else { + r1 = x - t0z2; + r2 = (double) ((float) (r1 - t0z2_l)); + t2 = r1 - r2; + yy = GT2(r2, t2 - t0z2_l); + } + } else { + r1 = x - t0z1; + r2 = (double) ((float) (r1 - t0z1_l)); + t2 = r1 - r2; + yy = GT1(r2, t2 - t0z1_l); + } + + /* compute gamma(x+i) = (x+i-1)*...*(x+1)*yy, 0<i<8 */ + switch (i) { + case 0: /* yy/x */ + r1 = one / x; + xh = (double) ((float) x); /* x is not tiny */ + rr.h = (double) ((float) ((yy.h + yy.l) * r1)); + rr.l = r1 * (yy.h - rr.h * xh) - + ((r1 * rr.h) * (x - xh) - r1 * yy.l); + break; + case 1: /* yy */ + rr.h = yy.h; + rr.l = yy.l; + break; + case 2: /* (x+1)*yy */ + z = x + one; /* may not be exact */ + zh = (double) ((float) z); + rr.h = zh * yy.h; + rr.l = z * yy.l + (x - (zh - one)) * yy.h; + break; + case 3: /* (x+2)*(x+1)*yy */ + z1 = x + one; + z2 = x + 2.0; + z = z1 * z2; + xh = (double) ((float) z); + zh = (double) ((float) z1); + xl = (x - (zh - one)) * (z2 + zh) - (xh - zh * (zh + one)); + rr.h = xh * yy.h; + rr.l = z * yy.l + xl * yy.h; + break; + + case 4: /* (x+1)*(x+3)*(x+2)*yy */ + z1 = x + 2.0; + z2 = (x + one) * (x + 3.0); + zh = z1; + __LO(zh) = 0; + __HI(zh) &= 0xfffffff8; /* zh 18 bits mantissa */ + zl = x - (zh - 2.0); + z = z1 * z2; + xh = (double) ((float) z); + xl = zl * (z2 + zh * (z1 + zh)) - (xh - zh * (zh * zh - one)); + rr.h = xh * yy.h; + rr.l = z * yy.l + xl * yy.h; + break; + case 5: /* ((x+1)*(x+4)*(x+2)*(x+3))*yy */ + z1 = x + 2.0; + z2 = x + 3.0; + z = z1 * z2; + zh = (double) ((float) z1); + yh = (double) ((float) z); + yl = (x - (zh - 2.0)) * (z2 + zh) - (yh - zh * (zh + one)); + z2 = z - 2.0; + z *= z2; + xh = (double) ((float) z); + xl = yl * (z2 + yh) - (xh - yh * (yh - 2.0)); + rr.h = xh * yy.h; + rr.l = z * yy.l + xl * yy.h; + break; + case 6: /* ((x+1)*(x+2)*(x+3)*(x+4)*(x+5))*yy */ + z1 = x + 2.0; + z2 = x + 3.0; + z = z1 * z2; + zh = (double) ((float) z1); + yh = (double) ((float) z); + z1 = x - (zh - 2.0); + yl = z1 * (z2 + zh) - (yh - zh * (zh + one)); + z2 = z - 2.0; + x5 = x + 5.0; + z *= z2; + xh = (double) ((float) z); + zh += 3.0; + xl = yl * (z2 + yh) - (xh - yh * (yh - 2.0)); + /* xh+xl=(x+1)*...*(x+4) */ + /* wh+wl=(x+5)*yy */ + wh = (double) ((float) (x5 * (yy.h + yy.l))); + wl = (z1 * yy.h + x5 * yy.l) - (wh - zh * yy.h); + rr.h = wh * xh; + rr.l = z * wl + xl * wh; + break; + case 7: /* ((x+1)*(x+2)*(x+3)*(x+4)*(x+5)*(x+6))*yy */ + z1 = x + 3.0; + z2 = x + 4.0; + z = z2 * z1; + zh = (double) ((float) z1); + yh = (double) ((float) z); /* yh+yl = (x+3)(x+4) */ + yl = (x - (zh - 3.0)) * (z2 + zh) - (yh - (zh * (zh + one))); + z1 = x + 6.0; + z2 = z - 2.0; /* z2 = (x+2)*(x+5) */ + z *= z2; + xh = (double) ((float) z); + xl = yl * (z2 + yh) - (xh - yh * (yh - 2.0)); + /* xh+xl=(x+2)*...*(x+5) */ + /* wh+wl=(x+1)(x+6)*yy */ + z2 -= 4.0; /* z2 = (x+1)(x+6) */ + wh = (double) ((float) (z2 * (yy.h + yy.l))); + wl = (z2 * yy.l + yl * yy.h) - (wh - (yh - 6.0) * yy.h); + rr.h = wh * xh; + rr.l = z * wl + xl * wh; + } + return (rr); +} + +double +tgamma(double x) { + struct Double ss, ww; + double t, t1, t2, t3, t4, t5, w, y, z, z1, z2, z3, z5; + int i, j, k, m, ix, hx, xk; + unsigned lx; + + hx = __HI(x); + lx = __LO(x); + ix = hx & 0x7fffffff; + y = x; + + if (ix < 0x3ca00000) + return (one / x); /* |x| < 2**-53 */ + if (ix >= 0x7ff00000) + /* +Inf -> +Inf, -Inf or NaN -> NaN */ + return (x * ((hx < 0)? 0.0 : x)); + if (hx > 0x406573fa || /* x > 171.62... overflow to +inf */ + (hx == 0x406573fa && lx > 0xE561F647)) { + z = x / tiny; + return (z * z); + } + if (hx >= 0x40200000) { /* x >= 8 */ + ww = large_gam(x, &m); + w = ww.h + ww.l; + __HI(w) += m << 20; + return (w); + } + if (hx > 0) { /* 0 < x < 8 */ + i = (int) x; + ww = gam_n(i, x - (double) i); + return (ww.h + ww.l); + } + + /* negative x */ + /* INDENT OFF */ + /* + * compute: xk = + * -2 ... x is an even int (-inf is even) + * -1 ... x is an odd int + * +0 ... x is not an int but chopped to an even int + * +1 ... x is not an int but chopped to an odd int + */ + /* INDENT ON */ + xk = 0; + if (ix >= 0x43300000) { + if (ix >= 0x43400000) + xk = -2; + else + xk = -2 + (lx & 1); + } else if (ix >= 0x3ff00000) { + k = (ix >> 20) - 0x3ff; + if (k > 20) { + j = lx >> (52 - k); + if ((j << (52 - k)) == lx) + xk = -2 + (j & 1); + else + xk = j & 1; + } else { + j = ix >> (20 - k); + if ((j << (20 - k)) == ix && lx == 0) + xk = -2 + (j & 1); + else + xk = j & 1; + } + } + if (xk < 0) + /* ideally gamma(-n)= (-1)**(n+1) * inf, but c99 expect NaN */ + return ((x - x) / (x - x)); /* 0/0 = NaN */ + + + /* negative underflow thresold */ + if (ix > 0x4066e000 || (ix == 0x4066e000 && lx > 11)) { + /* x < -183.0 - 11ulp */ + z = tiny / x; + if (xk == 1) + z = -z; + return (z * tiny); + } + + /* now compute gamma(x) by -1/((sin(pi*y)/pi)*gamma(1+y)), y = -x */ + + /* + * First compute ss = -sin(pi*y)/pi , so that + * gamma(x) = 1/(ss*gamma(1+y)) + */ + y = -x; + j = (int) y; + z = y - (double) j; + if (z > 0.3183098861837906715377675) + if (z > 0.6816901138162093284622325) + ss = kpsin(one - z); + else + ss = kpcos(0.5 - z); + else + ss = kpsin(z); + if (xk == 0) { + ss.h = -ss.h; + ss.l = -ss.l; + } + + /* Then compute ww = gamma(1+y), note that result scale to 2**m */ + m = 0; + if (j < 7) { + ww = gam_n(j + 1, z); + } else { + w = y + one; + if ((lx & 1) == 0) { /* y+1 exact (note that y<184) */ + ww = large_gam(w, &m); + } else { + t = w - one; + if (t == y) { /* y+one exact */ + ww = large_gam(w, &m); + } else { /* use y*gamma(y) */ + if (j == 7) + ww = gam_n(j, z); + else + ww = large_gam(y, &m); + t4 = ww.h + ww.l; + t1 = (double) ((float) y); + t2 = (double) ((float) t4); + /* t4 will not be too large */ + ww.l = y * (ww.l - (t2 - ww.h)) + (y - t1) * t2; + ww.h = t1 * t2; + } + } + } + + /* compute 1/(ss*ww) */ + t3 = ss.h + ss.l; + t4 = ww.h + ww.l; + t1 = (double) ((float) t3); + t2 = (double) ((float) t4); + z1 = ss.l - (t1 - ss.h); /* (t1,z1) = ss */ + z2 = ww.l - (t2 - ww.h); /* (t2,z2) = ww */ + t3 = t3 * t4; /* t3 = ss*ww */ + z3 = one / t3; /* z3 = 1/(ss*ww) */ + t5 = t1 * t2; + z5 = z1 * t4 + t1 * z2; /* (t5,z5) = ss*ww */ + t1 = (double) ((float) t3); /* (t1,z1) = ss*ww */ + z1 = z5 - (t1 - t5); + t2 = (double) ((float) z3); /* leading 1/(ss*ww) */ + z2 = z3 * (t2 * z1 - (one - t2 * t1)); + z = t2 - z2; + + /* check whether z*2**-m underflow */ + if (m != 0) { + hx = __HI(z); + i = hx & 0x80000000; + ix = hx ^ i; + j = ix >> 20; + if (j > m) { + ix -= m << 20; + __HI(z) = ix ^ i; + } else if ((m - j) > 52) { + /* underflow */ + if (xk == 0) + z = -tiny * tiny; + else + z = tiny * tiny; + } else { + /* subnormal */ + m -= 60; + t = one; + __HI(t) -= 60 << 20; + ix -= m << 20; + __HI(z) = ix ^ i; + z *= t; + } + } + return (z); +} diff --git a/usr/src/lib/libm/common/m9x/tgammaf.c b/usr/src/lib/libm/common/m9x/tgammaf.c new file mode 100644 index 0000000000..7f6fc06e45 --- /dev/null +++ b/usr/src/lib/libm/common/m9x/tgammaf.c @@ -0,0 +1,547 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak tgammaf = __tgammaf + +/* + * True gamma function + * + * float tgammaf(float x) + * + * Algorithm: see tgamma.c + * + * Maximum error observed: 0.87ulp (both positive and negative arguments) + */ + +#include "libm.h" +#include "libm_synonyms.h" +#include <math.h> +#if defined(__SUNPRO_C) +#include <sunmath.h> +#endif +#include <sys/isa_defs.h> + +#if defined(_BIG_ENDIAN) +#define HIWORD 0 +#define LOWORD 1 +#else +#define HIWORD 1 +#define LOWORD 0 +#endif +#define __HI(x) ((int *) &x)[HIWORD] +#define __LO(x) ((unsigned *) &x)[LOWORD] + +/* Coefficients for primary intervals GTi() */ +static const double cr[] = { + /* p1 */ + +7.09087253435088360271451613398019280077561279443e-0001, + -5.17229560788652108545141978238701790105241761089e-0001, + +5.23403394528150789405825222323770647162337764327e-0001, + -4.54586308717075010784041566069480411732634814899e-0001, + +4.20596490915239085459964590559256913498190955233e-0001, + -3.57307589712377520978332185838241458642142185789e-0001, + + /* p2 */ + +4.28486983980295198166056119223984284434264344578e-0001, + -1.30704539487709138528680121627899735386650103914e-0001, + +1.60856285038051955072861219352655851542955430871e-0001, + -9.22285161346010583774458802067371182158937943507e-0002, + +7.19240511767225260740890292605070595560626179357e-0002, + -4.88158265593355093703112238534484636193260459574e-0002, + + /* p3 */ + +3.82409531118807759081121479786092134814808872880e-0001, + +2.65309888180188647956400403013495759365167853426e-0002, + +8.06815109775079171923561169415370309376296739835e-0002, + -1.54821591666137613928840890835174351674007764799e-0002, + +1.76308239242717268530498313416899188157165183405e-0002, + + /* GZi and TZi */ + +0.9382046279096824494097535615803269576988, /* GZ1 */ + +0.8856031944108887002788159005825887332080, /* GZ2 */ + +0.9367814114636523216188468970808378497426, /* GZ3 */ + -0.3517214357852935791015625, /* TZ1 */ + +0.280530631542205810546875, /* TZ3 */ +}; + +#define P10 cr[0] +#define P11 cr[1] +#define P12 cr[2] +#define P13 cr[3] +#define P14 cr[4] +#define P15 cr[5] +#define P20 cr[6] +#define P21 cr[7] +#define P22 cr[8] +#define P23 cr[9] +#define P24 cr[10] +#define P25 cr[11] +#define P30 cr[12] +#define P31 cr[13] +#define P32 cr[14] +#define P33 cr[15] +#define P34 cr[16] +#define GZ1 cr[17] +#define GZ2 cr[18] +#define GZ3 cr[19] +#define TZ1 cr[20] +#define TZ3 cr[21] + +/* compute gamma(y) for y in GT1 = [1.0000, 1.2845] */ +static double +GT1(double y) { + double z, r; + + z = y * y; + r = TZ1 * y + z * ((P10 + y * P11 + z * P12) + (z * y) * (P13 + y * + P14 + z * P15)); + return (GZ1 + r); +} + +/* compute gamma(y) for y in GT2 = [1.2844, 1.6374] */ +static double +GT2(double y) { + double z; + + z = y * y; + return (GZ2 + z * ((P20 + y * P21 + z * P22) + (z * y) * (P23 + y * + P24 + z * P25))); +} + +/* compute gamma(y) for y in GT3 = [1.6373, 2.0000] */ +static double +GT3(double y) { +double z, r; + + z = y * y; + r = TZ3 * y + z * ((P30 + y * P31 + z * P32) + (z * y) * (P33 + y * + P34)); + return (GZ3 + r); +} + +/* INDENT OFF */ +static const double c[] = { ++1.0, ++2.0, ++0.5, ++1.0e-300, ++6.666717231848518054693623697539230e-0001, /* A1=T3[0] */ ++8.33333330959694065245736888749042811909994573178e-0002, /* GP[0] */ +-2.77765545601667179767706600890361535225507762168e-0003, /* GP[1] */ ++7.77830853479775281781085278324621033523037489883e-0004, /* GP[2] */ ++4.18938533204672741744150788368695779923320328369e-0001, /* hln2pi */ ++2.16608493924982901946e-02, /* ln2_32 */ ++4.61662413084468283841e+01, /* invln2_32 */ ++5.00004103388988968841156421415669985414073453720e-0001, /* Et1 */ ++1.66667656752800761782778277828110208108687545908e-0001, /* Et2 */ +}; + +#define one c[0] +#define two c[1] +#define half c[2] +#define tiny c[3] +#define A1 c[4] +#define GP0 c[5] +#define GP1 c[6] +#define GP2 c[7] +#define hln2pi c[8] +#define ln2_32 c[9] +#define invln2_32 c[10] +#define Et1 c[11] +#define Et2 c[12] + +/* S[j] = 2**(j/32.) for the final computation of exp(w) */ +static const double S[] = { ++1.00000000000000000000e+00, /* 3FF0000000000000 */ ++1.02189714865411662714e+00, /* 3FF059B0D3158574 */ ++1.04427378242741375480e+00, /* 3FF0B5586CF9890F */ ++1.06714040067682369717e+00, /* 3FF11301D0125B51 */ ++1.09050773266525768967e+00, /* 3FF172B83C7D517B */ ++1.11438674259589243221e+00, /* 3FF1D4873168B9AA */ ++1.13878863475669156458e+00, /* 3FF2387A6E756238 */ ++1.16372485877757747552e+00, /* 3FF29E9DF51FDEE1 */ ++1.18920711500272102690e+00, /* 3FF306FE0A31B715 */ ++1.21524735998046895524e+00, /* 3FF371A7373AA9CB */ ++1.24185781207348400201e+00, /* 3FF3DEA64C123422 */ ++1.26905095719173321989e+00, /* 3FF44E086061892D */ ++1.29683955465100964055e+00, /* 3FF4BFDAD5362A27 */ ++1.32523664315974132322e+00, /* 3FF5342B569D4F82 */ ++1.35425554693689265129e+00, /* 3FF5AB07DD485429 */ ++1.38390988196383202258e+00, /* 3FF6247EB03A5585 */ ++1.41421356237309514547e+00, /* 3FF6A09E667F3BCD */ ++1.44518080697704665027e+00, /* 3FF71F75E8EC5F74 */ ++1.47682614593949934623e+00, /* 3FF7A11473EB0187 */ ++1.50916442759342284141e+00, /* 3FF82589994CCE13 */ ++1.54221082540794074411e+00, /* 3FF8ACE5422AA0DB */ ++1.57598084510788649659e+00, /* 3FF93737B0CDC5E5 */ ++1.61049033194925428347e+00, /* 3FF9C49182A3F090 */ ++1.64575547815396494578e+00, /* 3FFA5503B23E255D */ ++1.68179283050742900407e+00, /* 3FFAE89F995AD3AD */ ++1.71861929812247793414e+00, /* 3FFB7F76F2FB5E47 */ ++1.75625216037329945351e+00, /* 3FFC199BDD85529C */ ++1.79470907500310716820e+00, /* 3FFCB720DCEF9069 */ ++1.83400808640934243066e+00, /* 3FFD5818DCFBA487 */ ++1.87416763411029996256e+00, /* 3FFDFC97337B9B5F */ ++1.91520656139714740007e+00, /* 3FFEA4AFA2A490DA */ ++1.95714412417540017941e+00, /* 3FFF50765B6E4540 */ +}; +/* INDENT ON */ + +/* INDENT OFF */ +/* + * return tgammaf(x) in double for 8<x<=35.040096283... using Stirling's formula + * log(G(x)) ~= (x-.5)*(log(x)-1) + .5(log(2*pi)-1) + (1/x)*P(1/(x*x)) + */ +/* + * compute ss = log(x)-1 + * + * log(x) - 1 = T1(n) + T2(j) + T3(s), where x = 2**n * y, 1<=y<2, + * j=[64*y], z[j]=1+j/64+1/128, s = (y-z[j])/(y+z[j]), and + * T1(n-3) = n*log(2)-1, n=3,4,5 + * T2(j) = log(z[j]), + * T3(s) = 2s + A1*s^3 + * Note + * (1) Remez error for T3(s) is bounded by 2**(-35.8) + * (see mpremez/work/Log/tgamma_log_2_outr1) + */ + +static const double T1[] = { /* T1[j]=(j+3)*log(2)-1 */ ++1.079441541679835928251696364375e+00, ++1.772588722239781237668928485833e+00, ++2.465735902799726547086160607291e+00, +}; + +static const double T2[] = { /* T2[j]=log(1+j/64+1/128) */ ++7.782140442054948947462900061137e-03, ++2.316705928153437822879916096229e-02, ++3.831886430213659919375532512380e-02, ++5.324451451881228286587019378653e-02, ++6.795066190850774939456527777263e-02, ++8.244366921107459126816006866831e-02, ++9.672962645855111229557105648746e-02, ++1.108143663402901141948061693232e-01, ++1.247034785009572358634065153809e-01, ++1.384023228591191356853258736016e-01, ++1.519160420258419750718034248969e-01, ++1.652495728953071628756114492772e-01, ++1.784076574728182971194002415109e-01, ++1.913948529996294546092988075613e-01, ++2.042155414286908915038203861962e-01, ++2.168739383006143596190895257443e-01, ++2.293741010648458299914807250461e-01, ++2.417199368871451681443075159135e-01, ++2.539152099809634441373232979066e-01, ++2.659635484971379413391259265375e-01, ++2.778684510034563061863500329234e-01, ++2.896332925830426768788930555257e-01, ++3.012613305781617810128755382338e-01, ++3.127557100038968883862465596883e-01, ++3.241194686542119760906707604350e-01, ++3.353555419211378302571795798142e-01, ++3.464667673462085809184621884258e-01, ++3.574558889218037742260094901409e-01, ++3.683255611587076530482301540504e-01, ++3.790783529349694583908533456310e-01, ++3.897167511400252133704636040035e-01, ++4.002431641270127069293251019951e-01, ++4.106599249852683859343062031758e-01, ++4.209692946441296361288671615068e-01, ++4.311734648183713408591724789556e-01, ++4.412745608048752294894964416613e-01, ++4.512746441394585851446923830790e-01, ++4.611757151221701663679999255979e-01, ++4.709797152187910125468978560564e-01, ++4.806885293457519076766184554480e-01, ++4.903039880451938381503461596457e-01, ++4.998278695564493298213314152470e-01, ++5.092619017898079468040749192283e-01, ++5.186077642080456321529769963648e-01, ++5.278670896208423851138922177783e-01, ++5.370414658968836545667292441538e-01, ++5.461324375981356503823972092312e-01, ++5.551415075405015927154803595159e-01, ++5.640701382848029660713842900902e-01, ++5.729197535617855090927567266263e-01, ++5.816917396346224825206107537254e-01, ++5.903874466021763746419167081236e-01, ++5.990081896460833993816000244617e-01, ++6.075552502245417955010851527911e-01, ++6.160298772155140196475659281967e-01, ++6.244332880118935010425387440547e-01, ++6.327666695710378295457864685036e-01, ++6.410311794209312910556013344054e-01, ++6.492279466251098188908399699053e-01, ++6.573580727083600301418900232459e-01, ++6.654226325450904489500926100067e-01, ++6.734226752121667202979603888010e-01, ++6.813592248079030689480715595681e-01, ++6.892332812388089803249143378146e-01, +}; +/* INDENT ON */ + +static double +large_gam(double x) { + double ss, zz, z, t1, t2, w, y, u; + unsigned lx; + int k, ix, j, m; + + ix = __HI(x); + lx = __LO(x); + m = (ix >> 20) - 0x3ff; /* exponent of x, range:3-5 */ + ix = (ix & 0x000fffff) | 0x3ff00000; /* y = scale x to [1,2] */ + __HI(y) = ix; + __LO(y) = lx; + __HI(z) = (ix & 0xffffc000) | 0x2000; /* z[j]=1+j/64+1/128 */ + __LO(z) = 0; + j = (ix >> 14) & 0x3f; + t1 = y + z; + t2 = y - z; + u = t2 / t1; + ss = T1[m - 3] + T2[j] + u * (two + A1 * (u * u)); + /* ss = log(x)-1 */ + /* + * compute ww = (x-.5)*(log(x)-1) + .5*(log(2pi)-1) + 1/x*(P(1/x^2))) + * where ss = log(x) - 1 + */ + z = one / x; + zz = z * z; + w = ((x - half) * ss + hln2pi) + z * (GP0 + zz * GP1 + (zz * zz) * GP2); + k = (int) (w * invln2_32 + half); + + /* compute the exponential of w */ + j = k & 0x1f; + m = k >> 5; + z = w - (double) k *ln2_32; + zz = S[j] * (one + z + (z * z) * (Et1 + z * Et2)); + __HI(zz) += m << 20; + return (zz); +} +/* INDENT OFF */ +/* + * kpsin(x)= sin(pi*x)/pi + * 3 5 7 9 + * = x+ks[0]*x +ks[1]*x +ks[2]*x +ks[3]*x + */ +static const double ks[] = { +-1.64493404985645811354476665052005342839447790544e+0000, ++8.11740794458351064092797249069438269367389272270e-0001, +-1.90703144603551216933075809162889536878854055202e-0001, ++2.55742333994264563281155312271481108635575331201e-0002, +}; +/* INDENT ON */ + +static double +kpsin(double x) { + double z; + + z = x * x; + return (x + (x * z) * ((ks[0] + z * ks[1]) + (z * z) * (ks[2] + z * + ks[3]))); +} + +/* INDENT OFF */ +/* + * kpcos(x)= cos(pi*x)/pi + * 2 4 6 + * = kc[0]+kc[1]*x +kc[2]*x +kc[3]*x + */ +static const double kc[] = { ++3.18309886183790671537767526745028724068919291480e-0001, +-1.57079581447762568199467875065854538626594937791e+0000, ++1.29183528092558692844073004029568674027807393862e+0000, +-4.20232949771307685981015914425195471602739075537e-0001, +}; +/* INDENT ON */ + +static double +kpcos(double x) { + double z; + + z = x * x; + return (kc[0] + z * (kc[1] + z * kc[2] + (z * z) * kc[3])); +} + +/* INDENT OFF */ +static const double +t0z1 = 0.134861805732790769689793935774652917006, +t0z2 = 0.461632144968362341262659542325721328468, +t0z3 = 0.819773101100500601787868704921606996312; + /* 1.134861805732790769689793935774652917006 */ +/* INDENT ON */ + +/* + * gamma(x+i) for 0 <= x < 1 + */ +static double +gam_n(int i, double x) { + double rr = 0.0L, yy; + double z1, z2; + + /* compute yy = gamma(x+1) */ + if (x > 0.2845) { + if (x > 0.6374) + yy = GT3(x - t0z3); + else + yy = GT2(x - t0z2); + } else + yy = GT1(x - t0z1); + + /* compute gamma(x+i) = (x+i-1)*...*(x+1)*yy, 0<i<8 */ + switch (i) { + case 0: /* yy/x */ + rr = yy / x; + break; + case 1: /* yy */ + rr = yy; + break; + case 2: /* (x+1)*yy */ + rr = (x + one) * yy; + break; + case 3: /* (x+2)*(x+1)*yy */ + rr = (x + one) * (x + two) * yy; + break; + + case 4: /* (x+1)*(x+3)*(x+2)*yy */ + rr = (x + one) * (x + two) * ((x + 3.0) * yy); + break; + case 5: /* ((x+1)*(x+4)*(x+2)*(x+3))*yy */ + z1 = (x + two) * (x + 3.0) * yy; + z2 = (x + one) * (x + 4.0); + rr = z1 * z2; + break; + case 6: /* ((x+1)*(x+2)*(x+3)*(x+4)*(x+5))*yy */ + z1 = (x + two) * (x + 3.0); + z2 = (x + 5.0) * yy; + rr = z1 * (z1 - two) * z2; + break; + case 7: /* ((x+1)*(x+2)*(x+3)*(x+4)*(x+5)*(x+6))*yy */ + z1 = (x + two) * (x + 3.0); + z2 = (x + 5.0) * (x + 6.0) * yy; + rr = z1 * (z1 - two) * z2; + break; + } + return (rr); +} + +float +tgammaf(float xf) { + float zf; + double ss, ww; + double x, y, z; + int i, j, k, ix, hx, xk; + + hx = *(int *) &xf; + ix = hx & 0x7fffffff; + + x = (double) xf; + if (ix < 0x33800000) + return (1.0F / xf); /* |x| < 2**-24 */ + + if (ix >= 0x7f800000) + return (xf * ((hx < 0)? 0.0F : xf)); /* +-Inf or NaN */ + + if (hx > 0x420C290F) /* x > 35.040096283... overflow */ + return (float)(x / tiny); + + if (hx >= 0x41000000) /* x >= 8 */ + return ((float) large_gam(x)); + + if (hx > 0) { /* 0 < x < 8 */ + i = (int) xf; + return ((float) gam_n(i, x - (double) i)); + } + + /* negative x */ + /* INDENT OFF */ + /* + * compute xk = + * -2 ... x is an even int (-inf is considered even) + * -1 ... x is an odd int + * +0 ... x is not an int but chopped to an even int + * +1 ... x is not an int but chopped to an odd int + */ + /* INDENT ON */ + xk = 0; + if (ix >= 0x4b000000) { + if (ix > 0x4b000000) + xk = -2; + else + xk = -2 + (ix & 1); + } else if (ix >= 0x3f800000) { + k = (ix >> 23) - 0x7f; + j = ix >> (23 - k); + if ((j << (23 - k)) == ix) + xk = -2 + (j & 1); + else + xk = j & 1; + } + if (xk < 0) { + /* 0/0 invalid NaN, ideally gamma(-n)= (-1)**(n+1) * inf */ + zf = xf - xf; + return (zf / zf); + } + + /* negative underflow thresold */ + if (ix > 0x4224000B) { /* x < -(41+11ulp) */ + if (xk == 0) + z = -tiny; + else + z = tiny; + return ((float)z); + } + + /* INDENT OFF */ + /* now compute gamma(x) by -1/((sin(pi*y)/pi)*gamma(1+y)), y = -x */ + /* + * First compute ss = -sin(pi*y)/pi , so that + * gamma(x) = 1/(ss*gamma(1+y)) + */ + /* INDENT ON */ + y = -x; + j = (int) y; + z = y - (double) j; + if (z > 0.3183098861837906715377675) + if (z > 0.6816901138162093284622325) + ss = kpsin(one - z); + else + ss = kpcos(0.5 - z); + else + ss = kpsin(z); + if (xk == 0) + ss = -ss; + + /* Then compute ww = gamma(1+y) */ + if (j < 7) + ww = gam_n(j + 1, z); + else + ww = large_gam(y + one); + + /* return 1/(ss*ww) */ + return ((float) (one / (ww * ss))); +} diff --git a/usr/src/lib/libm/common/m9x/tgammal.c b/usr/src/lib/libm/common/m9x/tgammal.c new file mode 100644 index 0000000000..9901532e8e --- /dev/null +++ b/usr/src/lib/libm/common/m9x/tgammal.c @@ -0,0 +1,1166 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak tgammal = __tgammal +#endif + +#include "libm.h" +#include <sys/isa_defs.h> + +#if defined(_BIG_ENDIAN) +#define H0_WORD(x) ((unsigned *) &x)[0] +#define H3_WORD(x) ((unsigned *) &x)[3] +#define CHOPPED(x) (long double) ((double) (x)) +#else +#define H0_WORD(x) ((((int *) &x)[2] << 16) | \ + (0x0000ffff & (((unsigned *) &x)[1] >> 15))) +#define H3_WORD(x) ((unsigned *) &x)[0] +#define CHOPPED(x) (long double) ((float) (x)) +#endif + +struct LDouble { + long double h, l; +}; + +/* INDENT OFF */ +/* Primary interval GTi() */ +static const long double P1[] = { + +0.709086836199777919037185741507610124611513720557L, + +4.45754781206489035827915969367354835667391606951e-0001L, + +3.21049298735832382311662273882632210062918153852e-0002L, + -5.71296796342106617651765245858289197369688864350e-0003L, + +6.04666892891998977081619174969855831606965352773e-0003L, + +8.99106186996888711939627812174765258822658645168e-0004L, + -6.96496846144407741431207008527018441810175568949e-0005L, + +1.52597046118984020814225409300131445070213882429e-0005L, + +5.68521076168495673844711465407432189190681541547e-0007L, + +3.30749673519634895220582062520286565610418952979e-0008L, +}; +static const long double Q1[] = { + +1.0+0000L, + +1.35806511721671070408570853537257079579490650668e+0000L, + +2.97567810153429553405327140096063086994072952961e-0001L, + -1.52956835982588571502954372821681851681118097870e-0001L, + -2.88248519561420109768781615289082053597954521218e-0002L, + +1.03475311719937405219789948456313936302378395955e-0002L, + +4.12310203243891222368965360124391297374822742313e-0004L, + -3.12653708152290867248931925120380729518332507388e-0004L, + +2.36672170850409745237358105667757760527014332458e-0005L, +}; +static const long double P2[] = { + +0.428486815855585429730209907810650135255270600668084114L, + +2.62768479103809762805691743305424077975230551176e-0001L, + +3.81187532685392297608310837995193946591425896150e-0002L, + +3.00063075891811043820666846129131255948527925381e-0003L, + +2.47315407812279164228398470797498649142513408654e-0003L, + +3.62838199917848372586173483147214880464782938664e-0004L, + +3.43991105975492623982725644046473030098172692423e-0006L, + +4.56902151569603272237014240794257659159045432895e-0006L, + +2.13734755837595695602045100675540011352948958453e-0007L, + +9.74123440547918230781670266967882492234877125358e-0009L, +}; +static const long double Q2[] = { + +1.0L, + +9.18284118632506842664645516830761489700556179701e-0001L, + -6.41430858837830766045202076965923776189154874947e-0003L, + -1.24400885809771073213345747437964149775410921376e-0001L, + +4.69803798146251757538856567522481979624746875964e-0003L, + +7.18309447069495315914284705109868696262662082731e-0003L, + -8.75812626987894695112722600697653425786166399105e-0004L, + -1.23539972377769277995959339188431498626674835169e-0004L, + +3.10019017590151598732360097849672925448587547746e-0005L, + -1.77260223349332617658921874288026777465782364070e-0006L, +}; +static const long double P3[] = { + +0.3824094797345675048502747661075355640070439388902L, + +3.42198093076618495415854906335908427159833377774e-0001L, + +9.63828189500585568303961406863153237440702754858e-0002L, + +8.76069421042696384852462044188520252156846768667e-0003L, + +1.86477890389161491224872014149309015261897537488e-0003L, + +8.16871354540309895879974742853701311541286944191e-0004L, + +6.83783483674600322518695090864659381650125625216e-0005L, + -1.10168269719261574708565935172719209272190828456e-0006L, + +9.66243228508380420159234853278906717065629721016e-0007L, + +2.31858885579177250541163820671121664974334728142e-0008L, +}; +static const long double Q3[] = { + +1.0L, + +8.25479821168813634632437430090376252512793067339e-0001L, + -1.62251363073937769739639623669295110346015576320e-0002L, + -1.10621286905916732758745130629426559691187579852e-0001L, + +3.48309693970985612644446415789230015515365291459e-0003L, + +6.73553737487488333032431261131289672347043401328e-0003L, + -7.63222008393372630162743587811004613050245128051e-0004L, + -1.35792670669190631476784768961953711773073251336e-0004L, + +3.19610150954223587006220730065608156460205690618e-0005L, + -1.82096553862822346610109522015129585693354348322e-0006L, +}; + +static const long double +#if defined(__x86) +GZ1_h = 0.938204627909682449364570100414084663498215377L, +GZ1_l = 4.518346116624229420055327632718530617227944106e-20L, +GZ2_h = 0.885603194410888700264725126309883762587560340L, +GZ2_l = 1.409077427270497062039119290776508217077297169e-20L, +GZ3_h = 0.936781411463652321613537060640553022494714241L, +GZ3_l = 5.309836440284827247897772963887219035221996813e-21L, +#else +GZ1_h = 0.938204627909682449409753561580326910854647031L, +GZ1_l = 4.684412162199460089642452580902345976446297037e-35L, +GZ2_h = 0.885603194410888700278815900582588658192658794L, +GZ2_l = 7.501529273890253789219935569758713534641074860e-35L, +GZ3_h = 0.936781411463652321618846897080837818855399840L, +GZ3_l = 3.088721217404784363585591914529361687403776917e-35L, +#endif +TZ1 = -0.3517214357852935791015625L, +TZ3 = 0.280530631542205810546875L; +/* INDENT ON */ + +/* INDENT OFF */ +/* + * compute gamma(y=yh+yl) for y in GT1 = [1.0000, 1.2845] + * ...assume yh got 53 or 24(i386) significant bits + */ +/* INDENT ON */ +static struct LDouble +GT1(long double yh, long double yl) { + long double t3, t4, y; + int i; + struct LDouble r; + + y = yh + yl; + for (t4 = Q1[8], t3 = P1[8] + y * P1[9], i = 7; i >= 0; i--) { + t4 = t4 * y + Q1[i]; + t3 = t3 * y + P1[i]; + } + t3 = (y * y) * t3 / t4; + t3 += (TZ1 * yl + GZ1_l); + t4 = TZ1 * yh; + r.h = CHOPPED((t4 + GZ1_h + t3)); + t3 += (t4 - (r.h - GZ1_h)); + r.l = t3; + return (r); +} + +/* INDENT OFF */ +/* + * compute gamma(y=yh+yl) for y in GT2 = [1.2844, 1.6374] + * ...assume yh got 53 significant bits + */ +/* INDENT ON */ +static struct LDouble +GT2(long double yh, long double yl) { + long double t3, t4, y; + int i; + struct LDouble r; + + y = yh + yl; + for (t4 = Q2[9], t3 = P2[9], i = 8; i >= 0; i--) { + t4 = t4 * y + Q2[i]; + t3 = t3 * y + P2[i]; + } + t3 = GZ2_l + (y * y) * t3 / t4; + r.h = CHOPPED((GZ2_h + t3)); + r.l = t3 - (r.h - GZ2_h); + return (r); +} + +/* INDENT OFF */ +/* + * compute gamma(y=yh+yl) for y in GT3 = [1.6373, 2.0000] + * ...assume yh got 53 significant bits + */ +/* INDENT ON */ +static struct LDouble +GT3(long double yh, long double yl) { + long double t3, t4, y; + int i; + struct LDouble r; + + y = yh + yl; + for (t4 = Q3[9], t3 = P3[9], i = 8; i >= 0; i--) { + t4 = t4 * y + Q3[i]; + t3 = t3 * y + P3[i]; + } + t3 = (y * y) * t3 / t4; + t3 += (TZ3 * yl + GZ3_l); + t4 = TZ3 * yh; + r.h = CHOPPED((t4 + GZ3_h + t3)); + t3 += (t4 - (r.h - GZ3_h)); + r.l = t3; + return (r); +} + +/* INDENT OFF */ +/* Hex value of GP[0] shoule be 3FB55555 55555555 */ +static const long double GP[] = { + +0.083333333333333333333333333333333172839171301L, + -2.77777777777777777777777777492501211999399424104e-0003L, + +7.93650793650793650793635650541638236350020883243e-0004L, + -5.95238095238095238057299772679324503339241961704e-0004L, + +8.41750841750841696138422987977683524926142600321e-0004L, + -1.91752691752686682825032547823699662178842123308e-0003L, + +6.41025641022403480921891559356473451161279359322e-0003L, + -2.95506535798414019189819587455577003732808185071e-0002L, + +1.79644367229970031486079180060923073476568732136e-0001L, + -1.39243086487274662174562872567057200255649290646e+0000L, + +1.34025874044417962188677816477842265259608269775e+0001L, + -1.56803713480127469414495545399982508700748274318e+0002L, + +2.18739841656201561694927630335099313968924493891e+0003L, + -3.55249848644100338419187038090925410976237921269e+0004L, + +6.43464880437835286216768959439484376449179576452e+0005L, + -1.20459154385577014992600342782821389605893904624e+0007L, + +2.09263249637351298563934942349749718491071093210e+0008L, + -2.96247483183169219343745316433899599834685703457e+0009L, + +2.88984933605896033154727626086506756972327292981e+0010L, + -1.40960434146030007732838382416230610302678063984e+0011L, /* 19 */ +}; + +static const long double T3[] = { + +0.666666666666666666666666666666666634567834260213L, /* T3[0] */ + +0.400000000000000000000000000040853636176634934140L, /* T3[1] */ + +0.285714285714285714285696975252753987869020263448L, /* T3[2] */ + +0.222222222222222225593221101192317258554772129875L, /* T3[3] */ + +0.181818181817850192105847183461778186703779262916L, /* T3[4] */ + +0.153846169861348633757101285952333369222567014596L, /* T3[5] */ + +0.133033462889260193922261296772841229985047571265L, /* T3[6] */ +}; + +static const long double c[] = { +0.0L, +1.0L, +2.0L, +0.5L, +1.0e-4930L, /* tiny */ +4.18937683105468750000e-01L, /* hln2pim1_h */ +8.50099203991780329736405617639861397473637783412817152e-07L, /* hln2pim1_l */ +0.418938533204672741780329736405617639861397473637783412817152L, /* hln2pim1 */ +2.16608493865351192653179168701171875e-02L, /* ln2_32hi */ +5.96317165397058692545083025235937919875797669127130e-12L, /* ln2_32lo */ +46.16624130844682903551758979206054839765267053289554989233L, /* invln2_32 */ +#if defined(__x86) +1.7555483429044629170023839037639845628291e+03L, /* overflow */ +#else +1.7555483429044629170038892160702032034177e+03L, /* overflow */ +#endif +}; + +#define zero c[0] +#define one c[1] +#define two c[2] +#define half c[3] +#define tiny c[4] +#define hln2pim1_h c[5] +#define hln2pim1_l c[6] +#define hln2pim1 c[7] +#define ln2_32hi c[8] +#define ln2_32lo c[9] +#define invln2_32 c[10] +#define overflow c[11] + +/* + * |exp(r) - (1+r+Et0*r^2+...+Et10*r^12)| <= 2^(-128.88) for |r|<=ln2/64 + */ +static const long double Et[] = { + +5.0000000000000000000e-1L, + +1.66666666666666666666666666666828835166292152466e-0001L, + +4.16666666666666666666666666666693398646592712189e-0002L, + +8.33333333333333333333331748774512601775591115951e-0003L, + +1.38888888888888888888888845356011511394764753997e-0003L, + +1.98412698412698413237140350092993252684198882102e-0004L, + +2.48015873015873016080222025357442659895814371694e-0005L, + +2.75573192239028921114572986441972140933432317798e-0006L, + +2.75573192239448470555548102895526369739856219317e-0007L, + +2.50521677867683935940853997995937600214167232477e-0008L, + +2.08767928899010367374984448513685566514152147362e-0009L, +}; + +/* + * long double precision coefficients for computing log(x)-1 in tgamma. + * See "algorithm" for details + * + * log(x) - 1 = T1(n) + T2(j) + T3(s), where x = 2**n * y, 1<=y<2, + * j=[64*y], z[j]=1+j/64+1/128, s = (y-z[j])/(y+z[j]), and + * T1(n) = T1[2n,2n+1] = n*log(2)-1, + * T2(j) = T2[2j,2j+1] = log(z[j]), + * T3(s) = 2s + T3[0]s^3 + T3[1]s^5 + T3[2]s^7 + ... + T3[6]s^15 + * Note + * (1) the leading entries are truncated to 24 binary point. + * (2) Remez error for T3(s) is bounded by 2**(-136.54) + */ +static const long double T1[] = { +-1.000000000000000000000000000000000000000000e+00L, + +0.000000000000000000000000000000000000000000e+00L, +-3.068528175354003906250000000000000000000000e-01L, +-1.904654299957767878541823431924500011926579e-09L, + +3.862943053245544433593750000000000000000000e-01L, + +5.579533617547508924291635313615100141107647e-08L, + +1.079441487789154052734375000000000000000000e+00L, + +5.389068187551732136437452970422650211661470e-08L, + +1.772588670253753662109375000000000000000000e+00L, + +5.198602757555955348583270627230200282215294e-08L, + +2.465735852718353271484375000000000000000000e+00L, + +5.008137327560178560729088284037750352769117e-08L, + +3.158883035182952880859375000000000000000000e+00L, + +4.817671897564401772874905940845299849351090e-08L, + +3.852030217647552490234375000000000000000000e+00L, + +4.627206467568624985020723597652849919904913e-08L, + +4.545177400112152099609375000000000000000000e+00L, + +4.436741037572848197166541254460399990458737e-08L, + +5.238324582576751708984375000000000000000000e+00L, + +4.246275607577071409312358911267950061012560e-08L, + +5.931471765041351318359375000000000000000000e+00L, + +4.055810177581294621458176568075500131566384e-08L, +}; + +/* + * T2[2i,2i+1] = log(1+i/64+1/128) + */ +static const long double T2[] = { + +7.7821016311645507812500000000000000000000e-03L, + +3.8810890398166212900061136763678127453570e-08L, + +2.3167014122009277343750000000000000000000e-02L, + +4.5159525100885049160962289916579411752759e-08L, + +3.8318812847137451171875000000000000000000e-02L, + +5.1454999148021880325123797290345960518164e-08L, + +5.3244471549987792968750000000000000000000e-02L, + +4.2968824489897120193786528776939573415076e-08L, + +6.7950606346130371093750000000000000000000e-02L, + +5.5562377378300815277772629414034632394030e-08L, + +8.2443654537200927734375000000000000000000e-02L, + +1.4673873663533785068668307805914095366600e-08L, + +9.6729576587677001953125000000000000000000e-02L, + +4.9870874110342446056487463437015041543346e-08L, + +1.1081433296203613281250000000000000000000e-01L, + +3.3378253981382306169323211928098474801099e-08L, + +1.2470346689224243164062500000000000000000e-01L, + +1.1608714804222781515380863268491613205318e-08L, + +1.3840228319168090820312500000000000000000e-01L, + +3.9667438227482200873601649187393160823607e-08L, + +1.5191602706909179687500000000000000000000e-01L, + +1.4956750178196803424896884511327584958252e-08L, + +1.6524952650070190429687500000000000000000e-01L, + +4.6394605258578736449277240313729237989366e-08L, + +1.7840760946273803710937500000000000000000e-01L, + +4.8010080260010025241510941968354682199540e-08L, + +1.9139480590820312500000000000000000000000e-01L, + +4.7091426329609298807561308873447039132856e-08L, + +2.0421552658081054687500000000000000000000e-01L, + +1.4847880344628820386196239272213742113867e-08L, + +2.1687388420104980468750000000000000000000e-01L, + +5.4099564554931589525744347498478964801484e-08L, + +2.2937405109405517578125000000000000000000e-01L, + +4.9970790654210230725046139871550961365282e-08L, + +2.4171990156173706054687500000000000000000e-01L, + +3.5325408107597432515913513900103385655073e-08L, + +2.5391519069671630859375000000000000000000e-01L, + +1.9284247135543573297906606667466299224747e-08L, + +2.6596349477767944335937500000000000000000e-01L, + +5.3719458497979750926537543389268821141517e-08L, + +2.7786844968795776367187500000000000000000e-01L, + +1.3154985425144750329234012330820349974537e-09L, + +2.8963327407836914062500000000000000000000e-01L, + +1.8504673536253893055525668970003860369760e-08L, + +3.0126130580902099609375000000000000000000e-01L, + +2.4769140784919125538233755492657352680723e-08L, + +3.1275570392608642578125000000000000000000e-01L, + +6.0778104626049965596883190321597861455475e-09L, + +3.2411944866180419921875000000000000000000e-01L, + +1.9992407776871920760434987352182336158873e-08L, + +3.3535552024841308593750000000000000000000e-01L, + +2.1672724744319679579814166199074433006807e-08L, + +3.4646672010421752929687500000000000000000e-01L, + +4.7241991051621587188425772950711830538414e-08L, + +3.5745584964752197265625000000000000000000e-01L, + +3.9274281801569759490140904474434669956562e-08L, + +3.6832553148269653320312500000000000000000e-01L, + +2.9676011119845105154050398826897178765758e-08L, + +3.7907832860946655273437500000000000000000e-01L, + +2.4325502905656478345631019858881408009210e-08L, + +3.8971674442291259765625000000000000000000e-01L, + +6.7171126157142136040035208670510556529487e-09L, + +4.0024316310882568359375000000000000000000e-01L, + +1.0181870233355751019951311700799406124957e-09L, + +4.1065990924835205078125000000000000000000e-01L, + +1.5736916335153056203175822787661567534220e-08L, + +4.2096924781799316406250000000000000000000e-01L, + +4.6826136472066367161506795972449857268707e-08L, + +4.3117344379425048828125000000000000000000e-01L, + +2.1024120852577922478955594998480144051225e-08L, + +4.4127452373504638671875000000000000000000e-01L, + +3.7069828842770746441661301225362605528786e-08L, + +4.5127463340759277343750000000000000000000e-01L, + +1.0731865811707192383079012478685922879010e-08L, + +4.6117568016052246093750000000000000000000e-01L, + +3.4961647705430499925597855358603099030515e-08L, + +4.7097969055175781250000000000000000000000e-01L, + +2.4667033200046897856056359251373510964634e-08L, + +4.8068851232528686523437500000000000000000e-01L, + +1.7020465042442243455448011551208861216878e-08L, + +4.9030393362045288085937500000000000000000e-01L, + +5.4424740957290971159645746860530583309571e-08L, + +4.9982786178588867187500000000000000000000e-01L, + +7.7705606579463314152470441415126573566105e-09L, + +5.0926184654235839843750000000000000000000e-01L, + +5.5247449548366574919228323824878565745713e-08L, + +5.1860773563385009765625000000000000000000e-01L, + +2.8574195534496726996364798698556235730848e-08L, + +5.2786707878112792968750000000000000000000e-01L, + +1.0839714455426392217778300963558522088193e-08L, + +5.3704142570495605468750000000000000000000e-01L, + +4.0191927599879229244153832299023744345999e-08L, + +5.4613238573074340820312500000000000000000e-01L, + +5.1867392242179272209231209163864971792889e-08L, + +5.5514144897460937500000000000000000000000e-01L, + +5.8565892217715480359515904050170125743178e-08L, + +5.6407010555267333984375000000000000000000e-01L, + +3.2732129626227634290090190711817681692354e-08L, + +5.7291972637176513671875000000000000000000e-01L, + +2.7190020372374006726626261068626400393936e-08L, + +5.8169168233871459960937500000000000000000e-01L, + +5.7295907882911235753725372340709967597394e-08L, + +5.9038740396499633789062500000000000000000e-01L, + +4.2637180036751291708123598757577783615014e-08L, + +5.9900814294815063476562500000000000000000e-01L, + +4.6697932764615975024461651502060474048774e-08L, + +6.0755521059036254882812500000000000000000e-01L, + +3.9634179246672960152791125371893149820625e-08L, + +6.1602985858917236328125000000000000000000e-01L, + +1.8626341656366315928196700650292529688219e-08L, + +6.2443327903747558593750000000000000000000e-01L, + +8.9744179151050387440546731199093039879228e-09L, + +6.3276666402816772460937500000000000000000e-01L, + +5.5428701049364114685035797584887586099726e-09L, + +6.4103114604949951171875000000000000000000e-01L, + +3.3371431779336851334405392546708949047361e-08L, + +6.4922791719436645507812500000000000000000e-01L, + +2.9430743363812714969905311122271269100885e-08L, + +6.5735805034637451171875000000000000000000e-01L, + +2.2361985518423140023245936165514147093250e-08L, + +6.6542261838912963867187500000000000000000e-01L, + +1.4155960810278217610006660181148303091649e-08L, + +6.7342263460159301757812500000000000000000e-01L, + +4.0610573702719835388801017264750843477878e-08L, + +6.8135917186737060546875000000000000000000e-01L, + +5.2940532463479321559568089441735584156689e-08L, + +6.8923324346542358398437500000000000000000e-01L, + +3.7773385396340539337814603903232796216537e-08L, +}; + +/* + * S[j],S_trail[j] = 2**(j/32.) for the final computation of exp(t+w) + */ +static const long double S[] = { +#if defined(__x86) + +1.0000000000000000000000000e+00L, + +1.0218971486541166782081522e+00L, + +1.0442737824274138402382006e+00L, + +1.0671404006768236181297224e+00L, + +1.0905077326652576591003302e+00L, + +1.1143867425958925362894369e+00L, + +1.1387886347566916536971221e+00L, + +1.1637248587775775137938619e+00L, + +1.1892071150027210666875674e+00L, + +1.2152473599804688780476325e+00L, + +1.2418578120734840485256747e+00L, + +1.2690509571917332224885722e+00L, + +1.2968395546510096659215822e+00L, + +1.3252366431597412945939118e+00L, + +1.3542555469368927282668852e+00L, + +1.3839098819638319548151403e+00L, + +1.4142135623730950487637881e+00L, + +1.4451808069770466200253470e+00L, + +1.4768261459394993113155431e+00L, + +1.5091644275934227397133885e+00L, + +1.5422108254079408235859630e+00L, + +1.5759808451078864864006862e+00L, + +1.6104903319492543080837174e+00L, + +1.6457554781539648445110730e+00L, + +1.6817928305074290860378350e+00L, + +1.7186192981224779156032914e+00L, + +1.7562521603732994831094730e+00L, + +1.7947090750031071864148413e+00L, + +1.8340080864093424633989166e+00L, + +1.8741676341102999013002103e+00L, + +1.9152065613971472938202589e+00L, + +1.9571441241754002689657438e+00L, +#else + +1.00000000000000000000000000000000000e+00L, + +1.02189714865411667823448013478329942e+00L, + +1.04427378242741384032196647873992910e+00L, + +1.06714040067682361816952112099280918e+00L, + +1.09050773266525765920701065576070789e+00L, + +1.11438674259589253630881295691960313e+00L, + +1.13878863475669165370383028384151134e+00L, + +1.16372485877757751381357359909218536e+00L, + +1.18920711500272106671749997056047593e+00L, + +1.21524735998046887811652025133879836e+00L, + +1.24185781207348404859367746872659561e+00L, + +1.26905095719173322255441908103233805e+00L, + +1.29683955465100966593375411779245118e+00L, + +1.32523664315974129462953709549872168e+00L, + +1.35425554693689272829801474014070273e+00L, + +1.38390988196383195487265952726519287e+00L, + +1.41421356237309504880168872420969798e+00L, + +1.44518080697704662003700624147167095e+00L, + +1.47682614593949931138690748037404985e+00L, + +1.50916442759342273976601955103319352e+00L, + +1.54221082540794082361229186209073479e+00L, + +1.57598084510788648645527016018190504e+00L, + +1.61049033194925430817952066735740067e+00L, + +1.64575547815396484451875672472582254e+00L, + +1.68179283050742908606225095246642969e+00L, + +1.71861929812247791562934437645631244e+00L, + +1.75625216037329948311216061937531314e+00L, + +1.79470907500310718642770324212778174e+00L, + +1.83400808640934246348708318958828892e+00L, + +1.87416763411029990132999894995444645e+00L, + +1.91520656139714729387261127029583086e+00L, + +1.95714412417540026901832225162687149e+00L, +#endif +}; +static const long double S_trail[] = { +#if defined(__x86) + +0.0000000000000000000000000e+00L, + +2.6327965667180882569382524e-20L, + +8.3765863521895191129661899e-20L, + +3.9798705777454504249209575e-20L, + +1.0668046596651558640993042e-19L, + +1.9376009847285360448117114e-20L, + +6.7081819456112953751277576e-21L, + +1.9711680502629186462729727e-20L, + +2.9932584438449523689104569e-20L, + +6.8887754153039109411061914e-20L, + +6.8002718741225378942847820e-20L, + +6.5846917376975403439742349e-20L, + +1.2171958727511372194876001e-20L, + +3.5625253228704087115438260e-20L, + +3.1129551559077560956309179e-20L, + +5.7519192396164779846216492e-20L, + +3.7900651177865141593101239e-20L, + +1.1659262405698741798080115e-20L, + +7.1364385105284695967172478e-20L, + +5.2631003710812203588788949e-20L, + +2.6328853788732632868460580e-20L, + +5.4583950085438242788190141e-20L, + +9.5803254376938269960718656e-20L, + +7.6837733983874245823512279e-21L, + +2.4415965910835093824202087e-20L, + +2.6052966871016580981769728e-20L, + +2.6876456344632553875309579e-21L, + +1.2861930155613700201703279e-20L, + +8.8166633394037485606572294e-20L, + +2.9788615389580190940837037e-20L, + +5.2352341619805098677422139e-20L, + +5.2578463064010463732242363e-20L, +#else + +0.00000000000000000000000000000000000e+00L, + +1.80506787420330954745573333054573786e-35L, +-9.37452029228042742195756741973083214e-35L, +-1.59696844729275877071290963023149997e-35L, + +9.11249341012502297851168610167248666e-35L, +-6.50422820697854828723037477525938871e-35L, +-8.14846884452585113732569176748815532e-35L, +-5.06621457672180031337233074514290335e-35L, +-1.35983097468881697374987563824591912e-35L, + +9.49742763556319647030771056643324660e-35L, +-3.28317052317699860161506596533391526e-36L, +-5.01723570938719041029018653045842895e-35L, +-2.39147479768910917162283430160264014e-35L, +-8.35057135763390881529889073794408385e-36L, + +7.03675688907326504242173719067187644e-35L, +-5.18248485306464645753689301856695619e-35L, + +9.42224254862183206569211673639406488e-35L, +-3.96750082539886230916730613021641828e-35L, + +7.14352899156330061452327361509276724e-35L, + +1.15987125286798512424651783410044433e-35L, + +4.69693347835811549530973921320187447e-35L, +-3.38651317599500471079924198499981917e-35L, +-8.58731877429824706886865593510387445e-35L, +-9.60595154874935050318549936224606909e-35L, + +9.60973393212801278450755869714178581e-35L, + +6.37839792144002843924476144978084855e-35L, + +7.79243078569586424945646112516927770e-35L, + +7.36133776758845652413193083663393220e-35L, +-6.47299514791334723003521457561217053e-35L, + +8.58747441795369869427879806229522962e-35L, + +2.37181542282517483569165122830269098e-35L, +-3.02689168209611877300459737342190031e-37L, +#endif +}; +/* INDENT ON */ + +/* INDENT OFF */ +/* + * return tgamma(x) scaled by 2**-m for 8<x<=171.62... using Stirling's formula + * log(G(x)) ~= (x-.5)*(log(x)-1) + .5(log(2*pi)-1) + (1/x)*P(1/(x*x)) + * = L1 + L2 + L3, + */ +/* INDENT ON */ +static struct LDouble +large_gam(long double x, int *m) { + long double z, t1, t2, t3, z2, t5, w, y, u, r, v; + long double t24 = 16777216.0L, p24 = 1.0L / 16777216.0L; + int n2, j2, k, ix, j, i; + struct LDouble zz; + long double u2, ss_h, ss_l, r_h, w_h, w_l, t4; + +/* INDENT OFF */ +/* + * compute ss = ss.h+ss.l = log(x)-1 (see tgamma_log.h for details) + * + * log(x) - 1 = T1(n) + T2(j) + T3(s), where x = 2**n * y, 1<=y<2, + * j=[64*y], z[j]=1+j/64+1/128, s = (y-z[j])/(y+z[j]), and + * T1(n) = T1[2n,2n+1] = n*log(2)-1, + * T2(j) = T2[2j,2j+1] = log(z[j]), + * T3(s) = 2s + T3[0]s^3 + T3[1]s^5 + ... + T3[6]s^15 + * Note + * (1) the leading entries are truncated to 24 binary point. + * (2) Remez error for T3(s) is bounded by 2**(-72.4) + * 2**(-24) + * _________V___________________ + * T1(n): |_________|___________________| + * _______ ______________________ + * T2(j): |_______|______________________| + * ____ _______________________ + * 2s: |____|_______________________| + * __________________________ + * + T3(s)-2s: |__________________________| + * ------------------------------------------- + * [leading] + [Trailing] + */ + /* INDENT ON */ + ix = H0_WORD(x); + n2 = (ix >> 16) - 0x3fff; /* exponent of x, range:3-10 */ + y = scalbnl(x, -n2); /* y = scale x to [1,2] */ + n2 += n2; /* 2n */ + j = (ix >> 10) & 0x3f; /* j */ + z = 1.0078125L + (long double) j * 0.015625L; /* z[j]=1+j/64+1/128 */ + j2 = j + j; + t1 = y + z; + t2 = y - z; + r = one / t1; + u = r * t2; /* u = (y-z)/(y+z) */ + t1 = CHOPPED(t1); + t4 = T2[j2 + 1] + T1[n2 + 1]; + z2 = u * u; + k = H0_WORD(u) & 0x7fffffff; + t3 = T2[j2] + T1[n2]; + for (t5 = T3[6], i = 5; i >= 0; i--) + t5 = z2 * t5 + T3[i]; + if ((k >> 16) < 0x3fec) { /* |u|<2**-19 */ + t2 = t4 + u * (two + z2 * t5); + } else { + t5 = t4 + (u * z2) * t5; + u2 = u + u; + v = (long double) ((int) (u2 * t24)) * p24; + t2 = t5 + r * ((two * t2 - v * t1) - v * (y - (t1 - z))); + t3 += v; + } + ss_h = CHOPPED((t2 + t3)); + ss_l = t2 - (ss_h - t3); +/* INDENT OFF */ +/* + * compute ww = (x-.5)*(log(x)-1) + .5*(log(2pi)-1) + 1/x*(P(1/x^2))) + * where ss = log(x) - 1 in already in extra precision + */ + /* INDENT ON */ + z = one / x; + r = x - half; + r_h = CHOPPED((r)); + w_h = r_h * ss_h + hln2pim1_h; + z2 = z * z; + w = (r - r_h) * ss_h + r * ss_l; + t1 = GP[19]; + for (i = 18; i > 0; i--) + t1 = z2 * t1 + GP[i]; + w += hln2pim1_l; + w_l = z * (GP[0] + z2 * t1) + w; + k = (int) ((w_h + w_l) * invln2_32 + half); + + /* compute the exponential of w_h+w_l */ + + j = k & 0x1f; + *m = k >> 5; + t3 = (long double) k; + + /* perform w - k*ln2_32 (represent as w_h - w_l) */ + t1 = w_h - t3 * ln2_32hi; + t2 = t3 * ln2_32lo; + w = t2 - w_l; + w_h = t1 - w; + w_l = w - (t1 - w_h); + + /* compute exp(w_h-w_l) */ + z = w_h - w_l; + for (t1 = Et[10], i = 9; i >= 0; i--) + t1 = z * t1 + Et[i]; + t3 = w_h - (w_l - (z * z) * t1); /* t3 = expm1(z) */ + zz.l = S_trail[j] * (one + t3) + S[j] * t3; + zz.h = S[j]; + return (zz); +} + +/* INDENT OFF */ +/* + * kpsin(x)= sin(pi*x)/pi + * 3 5 7 9 11 27 + * = x+ks[0]*x +ks[1]*x +ks[2]*x +ks[3]*x +ks[4]*x + ... + ks[12]*x + */ +static const long double ks[] = { + -1.64493406684822643647241516664602518705158902870e+0000L, + +8.11742425283353643637002772405874238094995726160e-0001L, + -1.90751824122084213696472111835337366232282723933e-0001L, + +2.61478478176548005046532613563241288115395517084e-0002L, + -2.34608103545582363750893072647117829448016479971e-0003L, + +1.48428793031071003684606647212534027556262040158e-0004L, + -6.97587366165638046518462722252768122615952898698e-0006L, + +2.53121740413702536928659271747187500934840057929e-0007L, + -7.30471182221385990397683641695766121301933621956e-0009L, + +1.71653847451163495739958249695549313987973589884e-0010L, + -3.34813314714560776122245796929054813458341420565e-0012L, + +5.50724992262622033449487808306969135431411753047e-0014L, + -7.67678132753577998601234393215802221104236979928e-0016L, +}; +/* INDENT ON */ + +/* + * assume x is not tiny and positive + */ +static struct LDouble +kpsin(long double x) { + long double z, t1, t2; + struct LDouble xx; + int i; + + z = x * x; + xx.h = x; + for (t2 = ks[12], i = 11; i > 0; i--) + t2 = z * t2 + ks[i]; + t1 = z * x; + t2 *= z * t1; + xx.l = t1 * ks[0] + t2; + return (xx); +} + +/* INDENT OFF */ +/* + * kpcos(x)= cos(pi*x)/pi + * 2 4 6 8 10 12 + * = 1/pi +kc[0]*x +kc[1]*x +kc[2]*x +kc[3]*x +kc[4]*x +kc[5]*x + * + * 2 4 6 8 10 22 + * = 1/pi - pi/2*x +kc[0]*x +kc[1]*x +kc[2]*x +kc[3]*x +...+kc[9]*x + * + * -pi/2*x*x = (npi_2_h + npi_2_l) * (x_f+x_l)*(x_f+x_l) + * = npi_2_h*(x_f+x_l)*(x_f+x_l) + npi_2_l*x*x + * = npi_2_h*x_f*x_f + npi_2_h*(x*x-x_f*x_f) + npi_2_l*x*x + * = npi_2_h*x_f*x_f + npi_2_h*(x+x_f)*(x-x_f) + npi_2_l*x*x + * Here x_f = (long double) (float)x + * Note that pi/2(in hex) = + * 1.921FB54442D18469898CC51701B839A252049C1114CF98E804177D4C76273644A29 + * npi_2_h = -pi/2 chopped to 25 bits = -1.921FB50000000000000000000000000 = + * -1.570796310901641845703125000000000 and + * npi_2_l = + * -0.0000004442D18469898CC51701B839A252049C1114CF98E804177D4C76273644A29 = + * -.0000000158932547735281966916397514420985846996875529104874722961539 = + * -1.5893254773528196691639751442098584699687552910487472296153e-8 + * 1/pi(in hex) = + * .517CC1B727220A94FE13ABE8FA9A6EE06DB14ACC9E21C820FF28B1D5EF5DE2B + * will be splitted into: + * one_pi_h = 1/pi chopped to 48 bits = .517CC1B727220000000000... and + * one_pi_l = .0000000000000A94FE13ABE8FA9A6EE06DB14ACC9E21C820FF28B1D5EF5DE2B + */ + +static const long double +#if defined(__x86) +one_pi_h = 0.3183098861481994390487670898437500L, /* 31 bits */ +one_pi_l = 3.559123248900043690127872406891929148e-11L, +#else +one_pi_h = 0.31830988618379052468299050815403461456298828125L, +one_pi_l = 1.46854777018590994109505931010230912897495334688117e-16L, +#endif +npi_2_h = -1.570796310901641845703125000000000L, +npi_2_l = -1.5893254773528196691639751442098584699687552910e-8L; + +static const long double kc[] = { + +1.29192819501249250731151312779548918765320728489e+0000L, + -4.25027339979557573976029596929319207009444090366e-0001L, + +7.49080661650990096109672954618317623888421628613e-0002L, + -8.21458866111282287985539464173976555436050215120e-0003L, + +6.14202578809529228503205255165761204750211603402e-0004L, + -3.33073432691149607007217330302595267179545908740e-0005L, + +1.36970959047832085796809745461530865597993680204e-0006L, + -4.41780774262583514450246512727201806217271097336e-0008L, + +1.14741409212381858820016567664488123478660705759e-0009L, + -2.44261236114707374558437500654381006300502749632e-0011L, +}; +/* INDENT ON */ + +/* + * assume x is not tiny and positive + */ +static struct LDouble +kpcos(long double x) { + long double z, t1, t2, t3, t4, x4, x8; + int i; + struct LDouble xx; + + z = x * x; + xx.h = one_pi_h; + t1 = (long double) ((float) x); + x4 = z * z; + t2 = npi_2_l * z + npi_2_h * (x + t1) * (x - t1); + for (i = 8, t3 = kc[9]; i >= 0; i--) + t3 = z * t3 + kc[i]; + t3 = one_pi_l + x4 * t3; + t4 = t1 * t1 * npi_2_h; + x8 = t2 + t3; + xx.l = x8 + t4; + return (xx); +} + +/* INDENT OFF */ +static const long double + /* 0.13486180573279076968979393577465291700642511139552429398233 */ +#if defined(__x86) +t0z1 = 0.1348618057327907696779385054997035808810L, +t0z1_l = 1.1855430274949336125392717150257379614654e-20L, +#else +t0z1 = 0.1348618057327907696897939357746529168654L, +t0z1_l = 1.4102088588676879418739164486159514674310e-37L, +#endif + /* 0.46163214496836234126265954232572132846819620400644635129599 */ +#if defined(__x86) +t0z2 = 0.4616321449683623412538115843295472018326L, +t0z2_l = 8.84795799617412663558532305039261747030640e-21L, +#else +t0z2 = 0.46163214496836234126265954232572132343318L, +t0z2_l = 5.03501162329616380465302666480916271611101e-36L, +#endif + /* 0.81977310110050060178786870492160699631174407846245179119586 */ +#if defined(__x86) +t0z3 = 0.81977310110050060178773362329351925836817L, +t0z3_l = 1.350816280877379435658077052534574556256230e-22L +#else +t0z3 = 0.8197731011005006017878687049216069516957449L, +t0z3_l = 4.461599916947014419045492615933551648857380e-35L +#endif +; +/* INDENT ON */ + +/* + * gamma(x+i) for 0 <= x < 1 + */ +static struct LDouble +gam_n(int i, long double x) { + struct LDouble rr = {0.0L, 0.0L}, yy; + long double r1, r2, t2, z, xh, xl, yh, yl, zh, z1, z2, zl, x5, wh, wl; + + /* compute yy = gamma(x+1) */ + if (x > 0.2845L) { + if (x > 0.6374L) { + r1 = x - t0z3; + r2 = CHOPPED((r1 - t0z3_l)); + t2 = r1 - r2; + yy = GT3(r2, t2 - t0z3_l); + } else { + r1 = x - t0z2; + r2 = CHOPPED((r1 - t0z2_l)); + t2 = r1 - r2; + yy = GT2(r2, t2 - t0z2_l); + } + } else { + r1 = x - t0z1; + r2 = CHOPPED((r1 - t0z1_l)); + t2 = r1 - r2; + yy = GT1(r2, t2 - t0z1_l); + } + /* compute gamma(x+i) = (x+i-1)*...*(x+1)*yy, 0<i<8 */ + switch (i) { + case 0: /* yy/x */ + r1 = one / x; + xh = CHOPPED((x)); /* x is not tiny */ + rr.h = CHOPPED(((yy.h + yy.l) * r1)); + rr.l = r1 * (yy.h - rr.h * xh) - ((r1 * rr.h) * (x - xh) - + r1 * yy.l); + break; + case 1: /* yy */ + rr.h = yy.h; + rr.l = yy.l; + break; + case 2: /* (x+1)*yy */ + z = x + one; /* may not be exact */ + zh = CHOPPED((z)); + rr.h = zh * yy.h; + rr.l = z * yy.l + (x - (zh - one)) * yy.h; + break; + case 3: /* (x+2)*(x+1)*yy */ + z1 = x + one; + z2 = x + 2.0L; + z = z1 * z2; + xh = CHOPPED((z)); + zh = CHOPPED((z1)); + xl = (x - (zh - one)) * (z2 + zh) - (xh - zh * (zh + one)); + + rr.h = xh * yy.h; + rr.l = z * yy.l + xl * yy.h; + break; + + case 4: /* (x+1)*(x+3)*(x+2)*yy */ + z1 = x + 2.0L; + z2 = (x + one) * (x + 3.0L); + zh = CHOPPED(z1); + zl = x - (zh - 2.0L); + xh = CHOPPED(z2); + xl = zl * (zh + z1) - (xh - (zh * zh - one)); + + /* wh+wl=(x+2)*yy */ + wh = CHOPPED((z1 * (yy.h + yy.l))); + wl = (zl * yy.h + z1 * yy.l) - (wh - zh * yy.h); + + rr.h = xh * wh; + rr.l = z2 * wl + xl * wh; + + break; + case 5: /* ((x+1)*(x+4)*(x+2)*(x+3))*yy */ + z1 = x + 2.0L; + z2 = x + 3.0L; + z = z1 * z2; + zh = CHOPPED((z1)); + yh = CHOPPED((z)); + yl = (x - (zh - 2.0L)) * (z2 + zh) - (yh - zh * (zh + one)); + z2 = z - 2.0L; + z *= z2; + xh = CHOPPED((z)); + xl = yl * (z2 + yh) - (xh - yh * (yh - 2.0L)); + rr.h = xh * yy.h; + rr.l = z * yy.l + xl * yy.h; + break; + case 6: /* ((x+1)*(x+2)*(x+3)*(x+4)*(x+5))*yy */ + z1 = x + 2.0L; + z2 = x + 3.0L; + z = z1 * z2; + zh = CHOPPED((z1)); + yh = CHOPPED((z)); + z1 = x - (zh - 2.0L); + yl = z1 * (z2 + zh) - (yh - zh * (zh + one)); + z2 = z - 2.0L; + x5 = x + 5.0L; + z *= z2; + xh = CHOPPED(z); + zh += 3.0; + xl = yl * (z2 + yh) - (xh - yh * (yh - 2.0L)); + /* xh+xl=(x+1)*...*(x+4) */ + /* wh+wl=(x+5)*yy */ + wh = CHOPPED((x5 * (yy.h + yy.l))); + wl = (z1 * yy.h + x5 * yy.l) - (wh - zh * yy.h); + rr.h = wh * xh; + rr.l = z * wl + xl * wh; + break; + case 7: /* ((x+1)*(x+2)*(x+3)*(x+4)*(x+5)*(x+6))*yy */ + z1 = x + 3.0L; + z2 = x + 4.0L; + z = z2 * z1; + zh = CHOPPED((z1)); + yh = CHOPPED((z)); /* yh+yl = (x+3)(x+4) */ + yl = (x - (zh - 3.0L)) * (z2 + zh) - (yh - (zh * (zh + one))); + z1 = x + 6.0L; + z2 = z - 2.0L; /* z2 = (x+2)*(x+5) */ + z *= z2; + xh = CHOPPED((z)); + xl = yl * (z2 + yh) - (xh - yh * (yh - 2.0L)); + /* xh+xl=(x+2)*...*(x+5) */ + /* wh+wl=(x+1)(x+6)*yy */ + z2 -= 4.0L; /* z2 = (x+1)(x+6) */ + wh = CHOPPED((z2 * (yy.h + yy.l))); + wl = (z2 * yy.l + yl * yy.h) - (wh - (yh - 6.0L) * yy.h); + rr.h = wh * xh; + rr.l = z * wl + xl * wh; + } + return (rr); +} + +long double +tgammal(long double x) { + struct LDouble ss, ww; + long double t, t1, t2, t3, t4, t5, w, y, z, z1, z2, z3, z5; + int i, j, m, ix, hx, xk; + unsigned lx; + + hx = H0_WORD(x); + lx = H3_WORD(x); + ix = hx & 0x7fffffff; + y = x; + if (ix < 0x3f8e0000) { /* x < 2**-113 */ + return (one / x); + } + if (ix >= 0x7fff0000) + return (x * ((hx < 0)? zero : x)); /* Inf or NaN */ + if (x > overflow) /* overflow threshold */ + return (x * 1.0e4932L); + if (hx >= 0x40020000) { /* x >= 8 */ + ww = large_gam(x, &m); + w = ww.h + ww.l; + return (scalbnl(w, m)); + } + + if (hx > 0) { /* 0 < x < 8 */ + i = (int) x; + ww = gam_n(i, x - (long double) i); + return (ww.h + ww.l); + } + /* INDENT OFF */ + /* negative x */ + /* + * compute xk = + * -2 ... x is an even int (-inf is considered an even #) + * -1 ... x is an odd int + * +0 ... x is not an int but chopped to an even int + * +1 ... x is not an int but chopped to an odd int + */ + /* INDENT ON */ + xk = 0; +#if defined(__x86) + if (ix >= 0x403e0000) { /* x >= 2**63 } */ + if (ix >= 0x403f0000) + xk = -2; + else + xk = -2 + (lx & 1); +#else + if (ix >= 0x406f0000) { /* x >= 2**112 */ + if (ix >= 0x40700000) + xk = -2; + else + xk = -2 + (lx & 1); +#endif + } else if (ix >= 0x3fff0000) { + w = -x; + t1 = floorl(w); + t2 = t1 * half; + t3 = floorl(t2); + if (t1 == w) { + if (t2 == t3) + xk = -2; + else + xk = -1; + } else { + if (t2 == t3) + xk = 0; + else + xk = 1; + } + } + + if (xk < 0) { + /* return NaN. Ideally gamma(-n)= (-1)**(n+1) * inf */ + return (x - x) / (x - x); + } + + /* + * negative underflow thresold -(1774+9ulp) + */ + if (x < -1774.0000000000000000000000000000017749370L) { + z = tiny / x; + if (xk == 1) + z = -z; + return (z * tiny); + } + + /* INDENT OFF */ + /* + * now compute gamma(x) by -1/((sin(pi*y)/pi)*gamma(1+y)), y = -x + */ + /* + * First compute ss = -sin(pi*y)/pi so that + * gamma(x) = 1/(ss*gamma(1+y)) + */ + /* INDENT ON */ + y = -x; + j = (int) y; + z = y - (long double) j; + if (z > 0.3183098861837906715377675L) + if (z > 0.6816901138162093284622325L) + ss = kpsin(one - z); + else + ss = kpcos(0.5L - z); + else + ss = kpsin(z); + if (xk == 0) { + ss.h = -ss.h; + ss.l = -ss.l; + } + + /* Then compute ww = gamma(1+y), note that result scale to 2**m */ + m = 0; + if (j < 7) { + ww = gam_n(j + 1, z); + } else { + w = y + one; + if ((lx & 1) == 0) { /* y+1 exact (note that y<184) */ + ww = large_gam(w, &m); + } else { + t = w - one; + if (t == y) { /* y+one exact */ + ww = large_gam(w, &m); + } else { /* use y*gamma(y) */ + if (j == 7) + ww = gam_n(j, z); + else + ww = large_gam(y, &m); + t4 = ww.h + ww.l; + t1 = CHOPPED((y)); + t2 = CHOPPED((t4)); + /* t4 will not be too large */ + ww.l = y * (ww.l - (t2 - ww.h)) + (y - t1) * t2; + ww.h = t1 * t2; + } + } + } + + /* compute 1/(ss*ww) */ + t3 = ss.h + ss.l; + t4 = ww.h + ww.l; + t1 = CHOPPED((t3)); + t2 = CHOPPED((t4)); + z1 = ss.l - (t1 - ss.h); /* (t1,z1) = ss */ + z2 = ww.l - (t2 - ww.h); /* (t2,z2) = ww */ + t3 = t3 * t4; /* t3 = ss*ww */ + z3 = one / t3; /* z3 = 1/(ss*ww) */ + t5 = t1 * t2; + z5 = z1 * t4 + t1 * z2; /* (t5,z5) = ss*ww */ + t1 = CHOPPED((t3)); /* (t1,z1) = ss*ww */ + z1 = z5 - (t1 - t5); + t2 = CHOPPED((z3)); /* leading 1/(ss*ww) */ + z2 = z3 * (t2 * z1 - (one - t2 * t1)); + z = t2 - z2; + + return (scalbnl(z, -m)); +} diff --git a/usr/src/lib/libm/common/m9x/trunc.c b/usr/src/lib/libm/common/m9x/trunc.c new file mode 100644 index 0000000000..381000bbd8 --- /dev/null +++ b/usr/src/lib/libm/common/m9x/trunc.c @@ -0,0 +1,70 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak trunc = __trunc +#endif + +#include "libm.h" + +double +trunc(double x) { + union { + unsigned i[2]; + double d; + } xx; + unsigned hx, sx, i; + + xx.d = x; + hx = xx.i[HIWORD] & ~0x80000000; + sx = xx.i[HIWORD] & 0x80000000; + if (hx < 0x43300000) { /* |x| < 2^52 */ + if (hx < 0x3ff00000) /* |x| < 1 */ + return (sx ? -0.0 : 0.0); + + /* chop x at the integer bit */ + if (hx < 0x41300000) { + i = 1 << (0x412 - (hx >> 20)); + xx.i[HIWORD] &= ~(i | (i - 1)); + xx.i[LOWORD] = 0; + } else { + i = 1 << (0x432 - (hx >> 20)); + xx.i[LOWORD] &= ~(i | (i - 1)); + } + return (xx.d); + } else if (hx < 0x7ff00000) + return (x); + else +#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN) + return (hx >= 0x7ff80000 ? x : x + x); + /* assumes sparc-like QNaN */ +#else + return (x + x); +#endif +} diff --git a/usr/src/lib/libm/common/m9x/truncf.c b/usr/src/lib/libm/common/m9x/truncf.c new file mode 100644 index 0000000000..a81b16c09e --- /dev/null +++ b/usr/src/lib/libm/common/m9x/truncf.c @@ -0,0 +1,63 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak truncf = __truncf +#endif + +#include "libm.h" + +float +truncf(float x) { + union { + unsigned i; + float f; + } xx; + unsigned hx, sx, i; + + xx.f = x; + hx = xx.i & ~0x80000000; + sx = xx.i & 0x80000000; + if (hx < 0x4b000000) { /* |x| < 2^23 */ + if (hx < 0x3f800000) /* |x| < 1 */ + return (sx ? -0.0F : 0.0F); + + /* chop x at the integer bit */ + i = 1 << (0x95 - (hx >> 23)); + xx.i &= ~((i << 1) - 1); + return (xx.f); + } else if (hx < 0x7f800000) /* |x| is integral */ + return (x); + else +#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN) + return (hx > 0x7f800000 ? x * x : x + x); +#else + return (x + x); +#endif +} diff --git a/usr/src/lib/libm/common/m9x/truncl.c b/usr/src/lib/libm/common/m9x/truncl.c new file mode 100644 index 0000000000..29f1d6a3a0 --- /dev/null +++ b/usr/src/lib/libm/common/m9x/truncl.c @@ -0,0 +1,110 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(ELFOBJ) +#pragma weak truncl = __truncl +#endif + +#include "libm.h" + +#if defined(__sparc) +long double +truncl(long double x) { + union { + unsigned i[4]; + long double q; + } xx; + unsigned hx, sx; + int j; + + xx.q = x; + sx = xx.i[0] & 0x80000000; + hx = xx.i[0] & ~0x80000000; + + /* handle trivial cases */ + if (hx >= 0x406f0000) /* |x| >= 2^112 + ... or x is nan */ + return (hx >= 0x7fff0000 ? x + x : x); + + /* handle |x| < 1 */ + if (hx < 0x3fff0000) + return (sx ? -0.0L : 0.0L); + + j = 0x406f - (hx >> 16); /* 1 <= j <= 112 */ + xx.i[0] = hx; + if (j >= 96) { /* 96 <= j <= 112 */ + xx.i[0] &= ~((1 << (j - 96)) - 1); + xx.i[1] = xx.i[2] = xx.i[3] = 0; + } else if (j >= 64) { /* 64 <= j <= 95 */ + xx.i[1] &= ~((1 << (j - 64)) - 1); + xx.i[2] = xx.i[3] = 0; + } else if (j >= 32) { /* 32 <= j <= 63 */ + xx.i[2] &= ~((1 << (j - 32)) - 1); + xx.i[3] = 0; + } else /* 1 <= j <= 31 */ + xx.i[3] &= ~((1 << j) - 1); + + /* negate result if need be */ + if (sx) + xx.i[0] |= 0x80000000; + return (xx.q); +} +#elif defined(__x86) +long double +truncl(long double x) { + union { + unsigned i[3]; + long double e; + } xx; + int ex, sx, i; + + xx.e = x; + ex = xx.i[2] & 0x7fff; + sx = xx.i[2] & 0x8000; + if (ex < 0x403e) { /* |x| < 2^63 */ + if (ex < 0x3fff) /* |x| < 1 */ + return (sx ? -0.0L : 0.0L); + + /* chop x at the integer bit */ + if (ex < 0x401e) { + i = 1 << (0x401d - ex); + xx.i[1] &= ~(i | (i - 1)); + xx.i[0] = 0; + } else { + i = 1 << (0x403d - ex); + xx.i[0] &= ~(i | (i - 1)); + } + return (xx.e); + } else if (ex < 0x7fff) /* x is integral */ + return (x); + else /* inf or nan */ + return (x + x); +} +#else +#error Unknown architecture +#endif /* defined(__sparc) || defined(__x86) */ diff --git a/usr/src/lib/libm/common/mapfile-vers b/usr/src/lib/libm/common/mapfile-vers new file mode 100644 index 0000000000..d2f0f69deb --- /dev/null +++ b/usr/src/lib/libm/common/mapfile-vers @@ -0,0 +1,769 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright 2011 Nexenta Systems, Inc. All rights reserved. +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# Interface definition for libm.so.2 +# +# For information regarding the establishment of versioned definitions see: +# The Linker and Libraries Manual (version 2.5 or greater) +# This is part of the Developers Guide in the Answerbook. Specifically refer +# to Chapter 2 under section "Defining Additional Symbols" through section +# "Reducing Symbol Scope", and Chapter 5 "Versioning". +# +# For specific rules for the modification (evolution) of these version +# definitions see: +# psarc_1995_14: Integration of Scoped Libraries +# (/shared/sac/PSARC/1995/014) +# Policy for Shared Library Version Names and Interface Definitions +# (/shared/ON/general_docs/scoping-rules.ps) + +# +# MAPFILE HEADER START +# +# WARNING: STOP NOW. DO NOT MODIFY THIS FILE. +# Object versioning must comply with the rules detailed in +# +# usr/src/lib/README.mapfiles +# +# You should not be making modifications here until you've read the most current +# copy of that file. If you need help, contact a gatekeeper for guidance. +# +# MAPFILE HEADER END +# + +$mapfile_version 2 + +$if _ELF32 +$add lf64 +$endif +$if _sparc && _ELF32 +$add sparc32 +$endif +$if _sparc && _ELF64 +$add sparcv9 +$endif +$if _x86 && _ELF32 +$add i386 +$endif +$if _x86 && _ELF64 +$add amd64 +$endif + +SYMBOL_VERSION SUNW_1.3 { + global: + __isnanf; #LSARC/2003/658 + __isnanl; #LSARC/2003/658 + _isnan { FLAGS = NODYNSORT }; #LSARC/2003/658 + _isnand { FLAGS = NODYNSORT }; #LSARC/2003/658 + _isnanf { FLAGS = NODYNSORT }; #LSARC/2003/658 + _logb { FLAGS = NODYNSORT }; #LSARC/2003/658 + _modf { FLAGS = NODYNSORT }; #LSARC/2003/658 + _modff { FLAGS = NODYNSORT }; #LSARC/2003/658 + _nextafter { FLAGS = NODYNSORT }; #LSARC/2003/658 + _scalb { FLAGS = NODYNSORT }; #LSARC/2003/658 + isnand { FLAGS = NODYNSORT }; #LSARC/2003/658 + isnanf; #LSARC/2003/658 + isnanl; #LSARC/2003/658 +} SUNW_1.2; + +SYMBOL_VERSION SUNW_1.2 { + global: + __acoshf; + __acoshl; + __asinhf; + __asinhl; + __atanhf; + __atanhl; + __cabs; + __cabsf; + __cabsl; + __cacos; + __cacosf; + __cacosh; + __cacoshf; + __cacoshl; + __cacosl; + __carg; + __cargf; + __cargl; + __casin; + __casinf; + __casinh; + __casinhf; + __casinhl; + __casinl; + __catan; + __catanf; + __catanh; + __catanhf; + __catanhl; + __catanl; + __cbrtf; + __cbrtl; + __ccos; + __ccosf; + __ccosh; + __ccoshf; + __ccoshl; + __ccosl; + __cexp; + __cexpf; + __cexpl; + __cimag; + __cimagf; + __cimagl; + __clog; + __clogf; + __clogl; + __conj; + __conjf; + __conjl; + __copysignf; + __copysignl; + __cpow; + __cpowf; + __cpowl; + __cproj; + __cprojf; + __cprojl; + __creal; + __crealf; + __creall; + __csin; + __csinf; + __csinh; + __csinhf; + __csinhl; + __csinl; + __csqrt; + __csqrtf; + __csqrtl; + __ctan; + __ctanf; + __ctanh; + __ctanhf; + __ctanhl; + __ctanl; + __erfcf; + __erfcl; + __erff; + __erfl; + __exp2; + __exp2f; + __exp2l; + __expm1f; + __expm1l; + __fdim; + __fdimf; + __fdiml; + __feclearexcept; + __fegetenv; + __fegetexceptflag; +$if _x86 + __fegetprec; #LSARC/1996/175 +$endif + __fegetround; + __feholdexcept; + __fenv_dfl_env; #LSARC/1996/175 + __feraiseexcept; + __fesetenv; + __fesetexceptflag; +$if _x86 + __fesetprec; #LSARC/1996/175 +$endif + __fesetround; + __fetestexcept; + __feupdateenv; + __fex_get_handling; #LSARC/1996/175 + __fex_get_log; #LSARC/1996/175 + __fex_get_log_depth; #LSARC/1996/175 + __fex_getexcepthandler; #LSARC/1996/175 + __fex_log_entry; #LSARC/1996/175 + __fex_merge_flags; #LSARC/1996/175 + __fex_set_handling; #LSARC/1996/175 + __fex_set_log; #LSARC/1996/175 + __fex_set_log_depth; #LSARC/1996/175 + __fex_setexcepthandler; #LSARC/1996/175 + __fma; + __fmaf; + __fmal; + __fmax; + __fmaxf; + __fmaxl; + __fmin; + __fminf; + __fminl; + __frexp; + __gammaf; #LSARC/2003/279 + __gammaf_r; #LSARC/2003/279 + __gammal; #LSARC/2003/279 + __gammal_r; #LSARC/2003/279 + __hypotf; + __hypotl; + __ilogbf; + __ilogbl; + __j0f; #LSARC/2003/279 + __j0l; #LSARC/2003/279 + __j1f; #LSARC/2003/279 + __j1l; #LSARC/2003/279 + __jnf; #LSARC/2003/279 + __jnl; #LSARC/2003/279 + __ldexp; + __lgammaf; + __lgammaf_r; #LSARC/2003/279 + __lgammal; + __lgammal_r; #LSARC/2003/279 + __llrint; + __llrintf; + __llrintl; + __llround; + __llroundf; + __llroundl; + __log1pf; + __log1pl; + __log2; + __log2f; + __log2l; + __logbf; + __logbl; +$if amd64 || sparcv9 + __lrint { FLAGS = NODYNSORT }; + __lrintf { FLAGS = NODYNSORT }; + __lrintl { FLAGS = NODYNSORT }; + __lround { FLAGS = NODYNSORT }; + __lroundf { FLAGS = NODYNSORT }; + __lroundl { FLAGS = NODYNSORT }; +$else + __lrint; + __lrintf; + __lrintl; + __lround; + __lroundf; + __lroundl; +$endif + __modf; + __nan; + __nanf; + __nanl; + __nearbyint; + __nearbyintf; + __nearbyintl; + __nextafterf; + __nextafterl; + __nexttoward; + __nexttowardf; + __nexttowardl; + __remainderf; + __remainderl; + __remquo; + __remquof; + __remquol; + __rintf; + __rintl; + __round; + __roundf; + __roundl; + __scalbf; #LSARC/2003/279 + __scalbl; #LSARC/2003/279 + __scalbln; + __scalblnf; + __scalblnl; + __scalbnf; + __scalbnl; + __signgamf; #LSARC/2003/279 + __signgaml; #LSARC/2003/279 + __significandf; #LSARC/2003/279 + __significandl; #LSARC/2003/279 + __sincos; #LSARC/2003/279 + __sincosf; #LSARC/2003/279 + __sincosl; #LSARC/2003/279 + __tgamma; + __tgammaf; + __tgammal; + __trunc; + __truncf; + __truncl; + __xpg6 { FLAGS = NODIRECT }; #private contract with libc group + __y0f; #LSARC/2003/279 + __y0l; #LSARC/2003/279 + __y1f; #LSARC/2003/279 + __y1l; #LSARC/2003/279 + __ynf; #LSARC/2003/279 + __ynl; #LSARC/2003/279 + acosf; + acoshf; + acoshl; + acosl; + asinf; + asinhf; + asinhl; + asinl; + atan2f; + atan2l; + atanf; + atanhf; + atanhl; + atanl; + cabs; + cabsf; + cabsl; + cacos; + cacosf; + cacosh; + cacoshf; + cacoshl; + cacosl; + carg; + cargf; + cargl; + casin; + casinf; + casinh; + casinhf; + casinhl; + casinl; + catan; + catanf; + catanh; + catanhf; + catanhl; + catanl; + cbrtf; + cbrtl; + ccos; + ccosf; + ccosh; + ccoshf; + ccoshl; + ccosl; + ceilf; + ceill; + cexp; + cexpf; + cexpl; + cimag; + cimagf; + cimagl; + clog; + clogf; + clogl; + conj; + conjf; + conjl; + copysignf; + copysignl; + cosf; + coshf; + coshl; + cosl; + cpow; + cpowf; + cpowl; + cproj; + cprojf; + cprojl; + creal; + crealf; + creall; + csin; + csinf; + csinh; + csinhf; + csinhl; + csinl; + csqrt; + csqrtf; + csqrtl; + ctan; + ctanf; + ctanh; + ctanhf; + ctanhl; + ctanl; + erfcf; + erfcl; + erff; + erfl; + exp2; + exp2f; + exp2l; + expf; + expl; + expm1f; + expm1l; + fabsf; + fabsl; + fdim; + fdimf; + fdiml; + feclearexcept; + fegetenv; + fegetexceptflag; +$if _x86 + fegetprec; #LSARC/1996/175 +$endif + fegetround; + feholdexcept; + feraiseexcept; + fesetenv; + fesetexceptflag; +$if _x86 + fesetprec; #LSARC/1996/175 +$endif + fesetround; + fetestexcept; + feupdateenv; + fex_get_handling; #LSARC/1996/175 + fex_get_log; #LSARC/1996/175 + fex_get_log_depth; #LSARC/1996/175 + fex_getexcepthandler; #LSARC/1996/175 + fex_log_entry; #LSARC/1996/175 + fex_merge_flags; #LSARC/1996/175 + fex_set_handling; #LSARC/1996/175 + fex_set_log; #LSARC/1996/175 + fex_set_log_depth; #LSARC/1996/175 + fex_setexcepthandler; #LSARC/1996/175 + floorf; + floorl; + fma; + fmaf; + fmal; + fmax; + fmaxf; + fmaxl; + fmin; + fminf; + fminl; + fmodf; + fmodl; + frexp; + frexpf; + frexpl; + gammaf; #LSARC/2003/279 + gammaf_r; #LSARC/2003/279 + gammal; #LSARC/2003/279 + gammal_r; #LSARC/2003/279 + hypotf; + hypotl; + ilogbf; + ilogbl; + j0f; #LSARC/2003/279 + j0l; #LSARC/2003/279 + j1f; #LSARC/2003/279 + j1l; #LSARC/2003/279 + jnf; #LSARC/2003/279 + jnl; #LSARC/2003/279 + ldexp; + ldexpf; + ldexpl; + lgammaf; + lgammaf_r; #LSARC/2003/279 + lgammal; + lgammal_r; #LSARC/2003/279 +$if amd64 || sparcv9 + llrint { FLAGS = NODYNSORT }; + llrintf { FLAGS = NODYNSORT }; + llrintl { FLAGS = NODYNSORT }; + llround { FLAGS = NODYNSORT }; + llroundf { FLAGS = NODYNSORT }; + llroundl { FLAGS = NODYNSORT }; +$else + llrint; + llrintf; + llrintl; + llround; + llroundf; + llroundl; +$endif + log10f; + log10l; + log1pf; + log1pl; + log2; + log2f; + log2l; + logbf; + logbl; + logf; + logl; + lrint; + lrintf; + lrintl; + lround; + lroundf; + lroundl; + modf; + modff; + modfl; + nan; + nanf; + nanl; + nearbyint; + nearbyintf; + nearbyintl; + nextafterf; + nextafterl; + nexttoward; + nexttowardf; + nexttowardl; + powf; + powl; + remainderf; + remainderl; + remquo; + remquof; + remquol; + rintf; + rintl; + round; + roundf; + roundl; + scalbf; #LSARC/2003/279 + scalbl; #LSARC/2003/279 + scalbln; + scalblnf; + scalblnl; + scalbnf; + scalbnl; + signgamf; #LSARC/2003/279 + signgaml; #LSARC/2003/279 + significandf; #LSARC/2003/279 + significandl; #LSARC/2003/279 + sincos; #LSARC/2003/279 + sincosf; #LSARC/2003/279 + sincosl; #LSARC/2003/279 + sinf; + sinhf; + sinhl; + sinl; + sqrtf; + sqrtl; + tanf; + tanhf; + tanhl; + tanl; + tgamma; + tgammaf; + tgammal; + trunc; + truncf; + truncl; + y0f; #LSARC/2003/279 + y0l; #LSARC/2003/279 + y1f; #LSARC/2003/279 + y1l; #LSARC/2003/279 + ynf; #LSARC/2003/279 + ynl; #LSARC/2003/279 +} SUNW_1.1.1; + +SYMBOL_VERSION SUNW_1.1.1 { + global: + __acosf; + __acosl; + __asinf; + __asinl; + __atan2f; + __atan2l; + __atanf; + __atanl; + __ceilf; + __ceill; + __cosf; + __coshf; + __coshl; + __cosl; + __expf; + __expl; + __fabsf; + __fabsl; + __floorf; + __floorl; + __fmodf; + __fmodl; + __frexpf; + __frexpl; + __ldexpf; + __ldexpl; + __log10f; + __log10l; + __logf; + __logl; + __modff; + __modfl; + __powf; + __powl; + __sinf; + __sinhf; + __sinhl; + __sinl; + __sqrtf; + __sqrtl; + __tanf; + __tanhf; + __tanhl; + __tanl; +} SUNW_1.1; + +SYMBOL_VERSION SUNW_1.1 { + global: + __acos; + __acosh; + __asin; + __asinh; + __atan; + __atan2; + __atanh; + __cbrt; + __ceil; + __copysign; + __cos; + __cosh; + __erf; + __erfc; + __exp; + __expm1; + __fabs; + __floor; + __fmod; + __gamma; + __gamma_r; + __hypot; + __ilogb; + __isnan; + __j0; + __j1; + __jn; + __lgamma; + __lgamma_r; + __log; + __log10; + __log1p; + __logb; + __nextafter; + __pow; + __remainder; + __rint; + __scalb; + __scalbn; + __signgam; + __significand; + __sin; + __sinh; + __sqrt; + __tan; + __tanh; + __y0; + __y1; + __yn; + acos; + acosh; + asin; + asinh; + atan; + atan2; + atanh; + cbrt; + ceil; + copysign; + cos; + cosh; + erf; + erfc; + exp; + expm1; + fabs; + floor; + fmod; + gamma; + gamma_r; + hypot; + ilogb; + isnan; + j0; + j1; + jn; + lgamma; + lgamma_r; + log; + log10; + log1p; + logb; + matherr; + nextafter; + pow; + remainder; + rint; + scalb; + scalbn; + signgam; + significand; + sin; + sinh; + sqrt; + tan; + tanh; + y0; + y1; + yn; +}; + +$if amd64 || sparcv9 +SYMBOL_VERSION SUNWprivate_1.2 { +$else +SYMBOL_VERSION SUNWprivate_1.3 { +$endif + global: + __libm_mt_fex_sync; # -lmtsk + __mt_fex_sync; # -lmtsk +$if amd64 || sparcv9 +} SUNWprivate_1.1; +$else +} SUNWprivate_1.2; + +SYMBOL_VERSION SUNWprivate_1.2 { + global: + __libm_errno; # SC3.0.1 -lmopt +} SUNWprivate_1.1; +$endif + +SYMBOL_VERSION SUNWprivate_1.1 { + global: + _lib_version; + __libm__rem_pio2; + __libm__rem_pio2m; + # anything else is local + local: + # For symbols with multiple names, move the less preferred + # names out of .SUNW_dynsymsort + feclearexcept96 { FLAGS = NODYNSORT }; + feraiseexcept96 { FLAGS = NODYNSORT }; + fetestexcept96 { FLAGS = NODYNSORT }; + fegetexceptflag96 { FLAGS = NODYNSORT }; + fesetexceptflag96 { FLAGS = NODYNSORT }; + feupdateenv96 { FLAGS = NODYNSORT }; + fegetenv96 { FLAGS = NODYNSORT }; + fesetenv96 { FLAGS = NODYNSORT }; + fegetround96 { FLAGS = NODYNSORT }; + *; # symbols not mentioned in this file are scoped out +}; diff --git a/usr/src/lib/libm/i386/Makefile b/usr/src/lib/libm/i386/Makefile new file mode 100644 index 0000000000..0a710fb63a --- /dev/null +++ b/usr/src/lib/libm/i386/Makefile @@ -0,0 +1,28 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2011 Nexenta Systems, Inc. All rights reserved. +# + +TARGET_ARCH= i386 +include ../Makefile.com + +# +# Without this option GCC will place floats into x87 (or wider) floating point +# registers, ending up with better-than-ieee precision. +# +CFLAGS += -_gcc=-ffloat-store +$(OBJS_M9XSSE) := CFLAGS += -xarch=sse2 + +install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT) + +include ../Makefile.targ diff --git a/usr/src/lib/libm/i386/src/__reduction.s b/usr/src/lib/libm/i386/src/__reduction.s new file mode 100644 index 0000000000..8c1e9f2334 --- /dev/null +++ b/usr/src/lib/libm/i386/src/__reduction.s @@ -0,0 +1,90 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "__reduction.s" + +/ +/ After argument reduction which returns n: +/ n mod 4 sin(x) cos(x) tan(x) +/ ---------------------------------------------------------- +/ 0 S C S/C +/ 1 C -S -C/S +/ 2 -S -C S/C +/ 3 -C S -C/S +/ ---------------------------------------------------------- + +#include "libm.h" +#include "libm_synonyms.h" +#include "libm_protos.h" +#undef fabs + + ENTRY(__reduction) +#ifndef PIC + movl 12(%esp),%eax / load the high part of arg +#else + movl 16(%esp),%eax / load the high part of arg +#endif + andl $0x7fffffff,%eax / clear sign + cmpl $0x3fe921fb,%eax / Is |x| < pi/4 (= 0x3fe921fb54...) ? + jbe .L0 + cmpl $0x7ff00000,%eax / Is arg a NaN or an Inf ? + jb .L1 +.L0: +#ifndef PIC + fldl 8(%esp) / push arg +#else + fldl 12(%esp) / push arg +#endif + fwait + movl $0,%eax / set n = 0 + ret +.L1: + pushl %ebp + movl %esp,%ebp + subl $16,%esp + PIC_SETUP(1) + leal -16(%ebp),%eax / address of y[0] + pushl %eax +#ifndef PIC + pushl 16(%ebp) + pushl 12(%ebp) +#else + pushl 20(%ebp) + pushl 16(%ebp) +#endif + call PIC_F(__rem_pio2) / call __rem_pio2(x,&y) + fldl -8(%ebp) / y[1] + fldl -16(%ebp) / y[0], y[1] + faddp %st,%st(1) / y[0]+y[1] round-to-extended + addl $28,%esp / 16+4*3 + andl $3,%eax + PIC_WRAPUP + leave + ret + .align 4 + SET_SIZE(__reduction) diff --git a/usr/src/lib/libm/i386/src/acos.s b/usr/src/lib/libm/i386/src/acos.s new file mode 100644 index 0000000000..7305bd7e63 --- /dev/null +++ b/usr/src/lib/libm/i386/src/acos.s @@ -0,0 +1,88 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "acos.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(acos,function) +#include "libm_synonyms.h" +#include "libm_protos.h" + +#undef fabs + + ENTRY(acos) + fldl 4(%esp) / push x + fld1 / push 1 + fld %st(1) / x , 1 , x + fabs / |x| , 1 , x + fucomp + fstsw %ax + sahf + ja .ERR + fadd %st(1),%st / 1+x,x + fldz + fucomp + fstsw %ax + sahf + jp .L1 + jne .L1 + / x is -1 + fstp %st(0) / -1 + fstp %st(0) / empty NPX stack + fldpi + ret +.L1: + fxch %st(1) / x,1+x + fld1 / 1,x,1+x + fsubp %st,%st(1) / 1-x,1+x + fdivp %st,%st(1) / (1-x)/(1+x) + fsqrt + fld1 / 1,sqrt((1-x)/(1+x)) + fpatan + fadd %st(0),%st + ret + +.ERR: + / |x| > 1 + pushl %ebp + movl %esp,%ebp + PIC_SETUP(1) + fstp %st(0) / x + fstp %st(0) / empty NPX stack + pushl $1 + pushl 12(%ebp) / high x + pushl 8(%ebp) / low x + pushl 12(%ebp) / high x + pushl 8(%ebp) / low x + call PIC_F(_SVID_libm_err) / report SVID result/error + addl $20,%esp + PIC_WRAPUP + leave + ret + .align 4 + SET_SIZE(acos) diff --git a/usr/src/lib/libm/i386/src/acosf.s b/usr/src/lib/libm/i386/src/acosf.s new file mode 100644 index 0000000000..58b4328262 --- /dev/null +++ b/usr/src/lib/libm/i386/src/acosf.s @@ -0,0 +1,78 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "acosf.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(acosf,function) +#include "libm_synonyms.h" +#include "libm_protos.h" + +#undef fabs + + ENTRY(acosf) + flds 4(%esp) / push x + fld1 / push 1 + fld %st(1) / x , 1 , x + fabs / |x| , 1 , x + fucomp + fstsw %ax + sahf + ja .ERR + fadd %st(1),%st / 1+x,x + fldz + fucomp + fstsw %ax + sahf + jp .L1 + jne .L1 + / x is -1 + fstp %st(0) / x + fstp %st(0) / empty NPX stack + fldpi + ret +.L1: + fxch %st(1) / x,1+x + fld1 / 1,x,1+x + fsubp %st,%st(1) / 1-x,1+x + fdivp %st,%st(1) / (1-x)/(1+x) + fsqrt + fld1 / 1,sqrt((1-x)/(1+x)) + fpatan + fadd %st(0),%st + ret + +.ERR: + / |x| > 1 + fstp %st(0) / x + fstp %st(0) / empty NPX stack + fldz + fdiv %st(0),%st / 0/0 + ret + .align 4 + SET_SIZE(acosf) diff --git a/usr/src/lib/libm/i386/src/acosl.s b/usr/src/lib/libm/i386/src/acosl.s new file mode 100644 index 0000000000..e552f645fb --- /dev/null +++ b/usr/src/lib/libm/i386/src/acosl.s @@ -0,0 +1,75 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "acosl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(acosl,function) +#include "libm_synonyms.h" + +#undef fabs + + ENTRY(acosl) + fldt 4(%esp) / push x + fld1 / push 1 + fld %st(1) / x , 1 , x + fabs / |x| , 1 , x + fucomp + fstsw %ax + sahf + ja 9f + fadd %st(1),%st / 1+x,x + fldz + fucomp + fstsw %ax + sahf + jp .L1 + jne .L1 + / x is -1 + fstp %st(0) / -1 + fstp %st(0) / empty NPX stack + fldpi + ret +.L1: + fxch %st(1) / x,1+x + fld1 / 1,x,1+x + fsubp %st,%st(1) / 1-x,1+x + fdivp %st,%st(1) / (1-x)/(1+x) + fsqrt + fld1 / 1,sqrt((1-x)/(1+x)) + fpatan + fadd %st(0),%st + ret +9: + / |x| > 1 + fstp %st(0) / x + fsub %st,%st(0) / +/-0 or NaN+invalid + fdiv %st,%st(0) / NaN+invalid or NaN + ret + .align 4 + SET_SIZE(acosl) diff --git a/usr/src/lib/libm/i386/src/asin.s b/usr/src/lib/libm/i386/src/asin.s new file mode 100644 index 0000000000..0db6b45e41 --- /dev/null +++ b/usr/src/lib/libm/i386/src/asin.s @@ -0,0 +1,73 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "asin.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(asin,function) +#include "libm_synonyms.h" +#include "libm_protos.h" + +#undef fabs + + ENTRY(asin) + fldl 4(%esp) / push x + fld1 / push 1 + fld %st(1) / x , 1 , x + fabs / |x| , 1 , x + fucomp + fstsw %ax + sahf + ja .ERR + fadd %st(1),%st / 1+x,x + fld1 / 1,1+x,x + fsub %st(2),%st / 1-x,1+x,x + fmulp %st,%st(1) / (1-x)*(1+x),x + fsqrt / sqrt((1-x)/(1+x)),x + fpatan / atan(x/sqrt((1-x)/(1+x))) + ret + +.ERR: + / |x| > 1 + pushl %ebp + movl %esp,%ebp + PIC_SETUP(1) + fstp %st(0) / x + fstp %st(0) / empty NPX stack + pushl $2 + pushl 12(%ebp) / high x + pushl 8(%ebp) / low x + pushl 12(%ebp) / high x + pushl 8(%ebp) / low x + call PIC_F(_SVID_libm_err) / report SVID result/error + addl $20,%esp + PIC_WRAPUP + leave + ret + .align 4 + SET_SIZE(asin) diff --git a/usr/src/lib/libm/i386/src/asinf.s b/usr/src/lib/libm/i386/src/asinf.s new file mode 100644 index 0000000000..a2a7c2c0ee --- /dev/null +++ b/usr/src/lib/libm/i386/src/asinf.s @@ -0,0 +1,63 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "asinf.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(asinf,function) +#include "libm_synonyms.h" +#include "libm_protos.h" + +#undef fabs + + ENTRY(asinf) + flds 4(%esp) / push x + fld1 / push 1 + fld %st(1) / x , 1 , x + fabs / |x| , 1 , x + fucomp + fstsw %ax + sahf + ja .ERR + fadd %st(1),%st / 1+x,x + fld1 / 1,1+x,x + fsub %st(2),%st / 1-x,1+x,x + fmulp %st,%st(1) / (1-x)*(1+x),x + fsqrt / sqrt((1-x)*(1+x)),x + fpatan / atan(x/sqrt((1-x)*(1+x))) + ret + +.ERR: + / |x| > 1 + fstp %st(0) / x + fstp %st(0) / empty NPX stack + fldz + fdiv %st(0),%st / 0/0 + ret + .align 4 + SET_SIZE(asinf) diff --git a/usr/src/lib/libm/i386/src/asinl.s b/usr/src/lib/libm/i386/src/asinl.s new file mode 100644 index 0000000000..96d2047fcb --- /dev/null +++ b/usr/src/lib/libm/i386/src/asinl.s @@ -0,0 +1,60 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "asinl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(asinl,function) +#include "libm_synonyms.h" + +#undef fabs + + ENTRY(asinl) + fldt 4(%esp) / push x + fld1 / push 1 + fld %st(1) / x , 1 , x + fabs / |x| , 1 , x + fucomp + fstsw %ax + sahf + ja 9f + fadd %st(1),%st / 1+x,x + fld1 / 1,1+x,x + fsub %st(2),%st / 1-x,1+x,x + fmulp %st,%st(1) / (1-x)*(1+x),x + fsqrt / sqrt((1-x)*(1+x)),x + fpatan / atan(x/sqrt((1-x)*(1+x))) + ret +9: + / |x| > 1 + fstp %st(0) / x + fsub %st,%st(0) / +/-0 or NaN+invalid + fdiv %st,%st(0) / NaN+invalid or NaN + ret + .align 4 + SET_SIZE(asinl) diff --git a/usr/src/lib/libm/i386/src/atan.s b/usr/src/lib/libm/i386/src/atan.s new file mode 100644 index 0000000000..a48c5c75a0 --- /dev/null +++ b/usr/src/lib/libm/i386/src/atan.s @@ -0,0 +1,41 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "atan.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(atan,function) +#include "libm_synonyms.h" + + ENTRY(atan) + fldl 4(%esp) / push arg + fld1 / push 1.0 + fpatan / atan(arg/1.0) + ret + .align 4 + SET_SIZE(atan) diff --git a/usr/src/lib/libm/i386/src/atan2.s b/usr/src/lib/libm/i386/src/atan2.s new file mode 100644 index 0000000000..6287f13537 --- /dev/null +++ b/usr/src/lib/libm/i386/src/atan2.s @@ -0,0 +1,70 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "atan2.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(atan2,function) +#include "libm_synonyms.h" +#include "libm_protos.h" + + ENTRY(atan2) + movl 4(%esp),%eax / low part of y + movl 12(%esp),%ecx / low part of x + orl %eax,%ecx + jz .maybe_0s + + / not both x and y are 0's +1: + fldl 4(%esp) / push y + fldl 12(%esp) / push x + fpatan / return atan2(y,x) + ret + +.maybe_0s: + movl 8(%esp),%eax / high part of y + movl 16(%esp),%ecx / high part of x + orl %eax,%ecx + andl $0x7fffffff,%ecx / clear sign + jnz 1b + / both x and y are 0's + pushl %ebp + movl %esp,%ebp + PIC_SETUP(1) + pushl $3 + pushl 12(%ebp) / high y + pushl 8(%ebp) / low y + pushl 20(%ebp) / high x + pushl 16(%ebp) / low x + call PIC_F(_SVID_libm_err) / report SVID result/error + addl $20,%esp + PIC_WRAPUP + leave + ret + .align 4 + SET_SIZE(atan2) diff --git a/usr/src/lib/libm/i386/src/atan2f.s b/usr/src/lib/libm/i386/src/atan2f.s new file mode 100644 index 0000000000..e45afcc264 --- /dev/null +++ b/usr/src/lib/libm/i386/src/atan2f.s @@ -0,0 +1,42 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "atan2f.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(atan2f,function) +#include "libm_synonyms.h" +#include "libm_protos.h" + + ENTRY(atan2f) + flds 4(%esp) / push y + flds 8(%esp) / push x + fpatan / return atan2(y,x) + ret + .align 4 + SET_SIZE(atan2f) diff --git a/usr/src/lib/libm/i386/src/atan2l.s b/usr/src/lib/libm/i386/src/atan2l.s new file mode 100644 index 0000000000..7720ddedef --- /dev/null +++ b/usr/src/lib/libm/i386/src/atan2l.s @@ -0,0 +1,41 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "atan2l.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(atan2l,function) +#include "libm_synonyms.h" + + ENTRY(atan2l) + fldt 4(%esp) / push y + fldt 16(%esp) / push x + fpatan / return atan2(y,x) + ret + .align 4 + SET_SIZE(atan2l) diff --git a/usr/src/lib/libm/i386/src/atanl.s b/usr/src/lib/libm/i386/src/atanl.s new file mode 100644 index 0000000000..f4712344f3 --- /dev/null +++ b/usr/src/lib/libm/i386/src/atanl.s @@ -0,0 +1,41 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "atanl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(atanl,function) +#include "libm_synonyms.h" + + ENTRY(atanl) + fldt 4(%esp) / push arg + fld1 / push 1.0 + fpatan / atan(arg/1.0) + ret + .align 4 + SET_SIZE(atanl) diff --git a/usr/src/lib/libm/i386/src/ceil.s b/usr/src/lib/libm/i386/src/ceil.s new file mode 100644 index 0000000000..12a0c6b125 --- /dev/null +++ b/usr/src/lib/libm/i386/src/ceil.s @@ -0,0 +1,56 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "ceil.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(ceil,function) +#include "libm_synonyms.h" + + ENTRY(ceil) + subl $8,%esp + fstcw (%esp) + fldl 12(%esp) + movw (%esp),%cx + orw $0x0c00,%cx + xorw $0x0400,%cx + movw %cx,4(%esp) + fldcw 4(%esp) / set RD = up + frndint + fstcw 4(%esp) / restore RD + movw 4(%esp),%dx + andw $0xf3ff,%dx + movw (%esp),%cx + andw $0x0c00,%cx + orw %dx,%cx + movw %cx,(%esp) + fldcw (%esp) / restore RD + addl $8,%esp + ret + .align 4 + SET_SIZE(ceil) diff --git a/usr/src/lib/libm/i386/src/copysign.s b/usr/src/lib/libm/i386/src/copysign.s new file mode 100644 index 0000000000..933eaaafe1 --- /dev/null +++ b/usr/src/lib/libm/i386/src/copysign.s @@ -0,0 +1,51 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "copysign.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(copysign,function) +#include "libm_synonyms.h" + + ENTRY(copysign) + movl 8(%esp),%eax / eax <-- hi_32(x) + movl 16(%esp),%ecx / ecx <-- hi_32(y) + andl $0x7fffffff,%eax / eax <-- hi_32(abs(x)) + andl $0x80000000,%ecx / ecx[31] <-- sign_bit(y) + orl %ecx,%eax / eax <-- hi_32(copysign(x,y)) + movl 4(%esp),%ecx / ecx <-- lo_32(x) + / = lo_32(copysign(x,y)) + subl $8,%esp / set up loading dock for result + movl %ecx,(%esp) / copy lo_32(result) to loading dock + movl %eax,4(%esp) / copy hi_32(result) to loading dock + fldl (%esp) / load copysign(x,y) + fwait / in case fldl causes exception + addl $8,%esp / restore stack-pointer for return + ret + .align 4 + SET_SIZE(copysign) diff --git a/usr/src/lib/libm/i386/src/copysignf.s b/usr/src/lib/libm/i386/src/copysignf.s new file mode 100644 index 0000000000..e3a217beeb --- /dev/null +++ b/usr/src/lib/libm/i386/src/copysignf.s @@ -0,0 +1,48 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "copysignf.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(copysignf,function) +#include "libm_synonyms.h" + + ENTRY(copysignf) + movl 4(%esp),%eax / eax <-- x + movl 8(%esp),%ecx / ecx <-- y + andl $0x7fffffff,%eax / eax <-- abs(x) + andl $0x80000000,%ecx / ecx[31] <-- sign_bit(y) + orl %ecx,%eax / eax <-- copysign(x,y) + subl $4,%esp / set up loading dock for result + movl %eax,(%esp) / copy result to loading dock + flds (%esp) / load copysign(x,y) + fwait / in case fldl causes exception + addl $4,%esp / restore stack-pointer for return + ret + .align 4 + SET_SIZE(copysignf) diff --git a/usr/src/lib/libm/i386/src/copysignl.s b/usr/src/lib/libm/i386/src/copysignl.s new file mode 100644 index 0000000000..944117ee41 --- /dev/null +++ b/usr/src/lib/libm/i386/src/copysignl.s @@ -0,0 +1,54 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "copysignl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(copysignl,function) +#include "libm_synonyms.h" + + ENTRY(copysignl) + movl 12(%esp),%eax / sign and bexp of x + movl 24(%esp),%ecx / sign and bexp of y + andl $0x00007fff,%eax / eax <-- bexp(x) + andl $0x00008000,%ecx / ecx <-- sign(y) + orl %ecx,%eax / eax <-- bexp(x) with sign(y) + movl 8(%esp),%ecx / ecx <-- hi_32(sgnfcnd(x)) + movl 4(%esp),%edx / edx <-- lo_32(sgnfcnd(x)) + subl $12,%esp / set up loading dock for result + movl %edx,(%esp) / copy lo_32(result's sgnfcnd) + / to loading dock + movl %ecx,4(%esp) / copy hi_32(result's sgnfcnd) + / to loading dock + movl %eax,8(%esp) / copy sign&bexp(result) + / to loading dock + fldt (%esp) / load copysign(x,y) + addl $12,%esp / restore stack-pointer for return + ret + .align 4 + SET_SIZE(copysignl) diff --git a/usr/src/lib/libm/i386/src/cos.s b/usr/src/lib/libm/i386/src/cos.s new file mode 100644 index 0000000000..44862f2bc9 --- /dev/null +++ b/usr/src/lib/libm/i386/src/cos.s @@ -0,0 +1,59 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "cos.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(cos,function) +#include "libm_synonyms.h" +#include "libm_protos.h" + + ENTRY(cos) + PIC_SETUP(1) + call PIC_F(__reduction) + PIC_WRAPUP + cmpl $1,%eax + jl .cos0 + je .cos1 + cmpl $2,%eax + je .cos2 + fsin + ret +.cos2: + fcos + fchs + ret +.cos1: + fsin + fchs + ret +.cos0: + fcos + ret + .align 4 + SET_SIZE(cos) diff --git a/usr/src/lib/libm/i386/src/exp.s b/usr/src/lib/libm/i386/src/exp.s new file mode 100644 index 0000000000..f901f511e6 --- /dev/null +++ b/usr/src/lib/libm/i386/src/exp.s @@ -0,0 +1,156 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "exp.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(exp,function) +#include "libm_synonyms.h" +#include "libm_protos.h" + + ENTRY(exp) + movl 8(%esp),%ecx / ecx <-- hi_32(x) + andl $0x7fffffff,%ecx / ecx <-- hi_32(|x|) + cmpl $0x3fe62e42,%ecx / Is |x| < ln(2)? + jb .shortcut / If so, take a shortcut. + je .check_tail / |x| may be only slightly < ln(2) + cmpl $0x7ff00000,%ecx / hi_32(|x|) >= hi_32(INF)? + jae .not_finite / if so, x is not finite +.finite_non_special: / Here, ln(2) < |x| < INF + fldl 4(%esp) / push x + subl $8,%esp + /// overhead of RP save/restore; 63/15 + fstcw (%esp) /// ; 15/3 + movw (%esp),%ax /// ; 4/1 + movw %ax,4(%esp) /// save old RP; 2/1 + orw $0x0300,%ax /// force 64-bit RP; 2/1 + movw %ax,(%esp) /// ; 2/1 + fldcw (%esp) /// ; 19/4 + fldl2e / push log2e }not for xtndd_dbl + fmulp %st,%st(1) / z = x*log2e }not for xtndd_dbl + fld %st(0) / duplicate stack top + frndint / [z],z + fucom / This and the next 3 instructions + fstsw %ax / add 10 clocks to runtime of the + sahf / main branch, but save about 265 + je .z_integral / upon detection of integral z. + / [z] != z, compute exp(x) + fxch / z,[z] + fsub %st(1),%st / z-[z],[z] + f2xm1 / 2**(z-[z])-1,[z] + fld1 / 1,2**(z-[z])-1,[z] + faddp %st,%st(1) / 2**(z-[z]) ,[z] +.merge: + fscale / exp(x) ,[z] + fstp %st(1) + fstcw (%esp) / restore RD + movw (%esp),%dx + andw $0xfcff,%dx + movw 4(%esp),%cx + andw $0x0300,%cx + orw %dx,%cx + movw %cx,(%esp) + fldcw (%esp) /// restore old RP; 19/4 + fstpl (%esp) / round to double + fldl (%esp) / exp(x) rounded to double + fxam / determine class of exp(x) + add $8,%esp + fstsw %ax / store status in ax + andw $0x4500,%ax + cmpw $0x0500,%ax + je .overflow + cmpw $0x4000,%ax + je .underflow + ret + +.overflow: + fstp %st(0) / stack empty + push %ebp + mov %esp,%ebp + PIC_SETUP(1) + pushl $6 + jmp .error + +.underflow: + fstp %st(0) / stack empty + push %ebp + mov %esp,%ebp + PIC_SETUP(2) + pushl $7 + +.error: + pushl 12(%ebp) / high x + pushl 8(%ebp) / low x + pushl 12(%ebp) / high x + pushl 8(%ebp) / low x + call PIC_F(_SVID_libm_err) + addl $20,%esp + PIC_WRAPUP + leave + ret + +.z_integral: / here, z is integral + fstp %st(0) / ,z + fld1 / 1,z + jmp .merge + +.check_tail: + movl 4(%esp),%edx / edx <-- lo_32(x) + cmpl $0xfefa39ef,%edx / Is |x| slightly < ln(2)? + ja .finite_non_special / branch if |x| slightly > ln(2) +.shortcut: + / Here, |x| < ln(2), so |z| = |x*log2(e)| < 1, + / whence z is in f2xm1's domain. + fldl 4(%esp) / push x + fldl2e / push log2e }not for xtndd_dbl + fmulp %st,%st(1) / z = x*log2e }not for xtndd_dbl + f2xm1 / 2**(x*log2(e))-1 = e**x - 1 + fld1 / 1,2**(z)-1 + faddp %st,%st(1) / 2**(z) = e**x + ret + +.not_finite: + / Here, flags still have settings from execution of + / cmpl $0x7ff00000,%ecx / hi_32(|x|) > hi_32(INF)? + ja .NaN_or_pinf / if not, x may be +/- INF + movl 4(%esp),%edx / edx <-- lo_32(x) + cmpl $0,%edx / lo_32(x) = 0? + jne .NaN_or_pinf / if not, x is NaN + movl 8(%esp),%eax / eax <-- hi_32(x) + andl $0x80000000,%eax / here, x is infinite, but +/-? + jz .NaN_or_pinf / branch if x = +INF + fldz / Here, x = -inf, so return 0 + ret + +.NaN_or_pinf: + / Here, x = NaN or +inf, so load x and return immediately. + fldl 4(%esp) + fwait + ret + .align 4 + SET_SIZE(exp) diff --git a/usr/src/lib/libm/i386/src/exp10.s b/usr/src/lib/libm/i386/src/exp10.s new file mode 100644 index 0000000000..28fe4b48f7 --- /dev/null +++ b/usr/src/lib/libm/i386/src/exp10.s @@ -0,0 +1,133 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "exp10.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(exp10,function) +#include "libm_synonyms.h" + + ENTRY(exp10) + movl 8(%esp),%ecx / ecx <-- hi_32(x) + andl $0x7fffffff,%ecx / ecx <-- hi_32(|x|) + cmpl $0x3fd34413,%ecx / Is |x| < log10(2)? + jb .shortcut / If so, take a shortcut. + je .check_tail / maybe |x| only slightly < log10(2) + cmpl $0x7ff00000,%ecx / hi_32(|x|) >= hi_32(INF)? + jae .not_finite / if so, x is not finite +.finite_non_special: / Here, log10(2) < |x| < INF + fldl 4(%esp) / push x (=arg) + + subl $8,%esp / save RP and set round-to-64-bits + fstcw (%esp) + movw (%esp),%ax + movw %ax,4(%esp) + orw $0x0300,%ax + movw %ax,(%esp) + fldcw (%esp) + + fldl2t / push log2(10) }NOT for xtndd_dbl + fmulp %st,%st(1) / z = x*log2(10) }NOT for xtndd_dbl + fld %st(0) / duplicate stack top + frndint / [z],z + fucom / z integral? + fstsw %ax + sahf + je .z_integral / branch if z integral + fxch / z, [z] + fsub %st(1),%st / z-[z], [z] + f2xm1 / 2**(z-[z])-1, [z] + fld1 / 1,2**(z-[z])-1, [z] + faddp %st,%st(1) / 2**(z-[z]), [z] + fscale / 2**z = 10**(arg), [z] + fstp %st(1) + + fstcw (%esp) / restore old RP + movw (%esp),%dx + andw $0xfcff,%dx + movw 4(%esp),%cx + andw $0x0300,%cx + orw %dx,%cx + movw %cx,(%esp) + fldcw (%esp) + add $8,%esp + + ret + +.z_integral: / here, z is integral + fstp %st(0) / ,z + fld1 / 1 = 2**0, z + fscale / 2**(0 + z) = 2**z = 10**(arg), z + fstp %st(1) / 10**(arg) + + fstcw (%esp) / restore old RP + movw (%esp),%dx + andw $0xfcff,%dx + movw 4(%esp),%cx + andw $0x0300,%cx + orw %dx,%cx + movw %cx,(%esp) + fldcw (%esp) + add $8,%esp + + ret + +.check_tail: + movl 4(%esp),%edx / edx <-- lo_32(x) + cmpl $0x509f79fe,%edx / Is |x| slightly > log10(2)? + ja .finite_non_special / branch if |x| slightly > log10(2) +.shortcut: + / Here, |x| < log10(2), so |z| = |x*log2(10)| < 1 + / whence z is in f2xm1's domain. + fldl 4(%esp) / push x (=arg) + fldl2t / push log2(10) }NOT for xtndd_dbl + fmulp %st,%st(1) / z = x*log2(10) }NOT for xtndd_dbl + f2xm1 / 2**z - 1 + fld1 / 1,2**z - 1 + faddp %st,%st(1) / 2**z = 10**x + ret + +.not_finite: + cmpl $0x7ff00000,%ecx / hi_32(|x|) > hi_32(INF)? + ja .NaN_or_pinf / if so, x is NaN + movl 4(%esp),%edx / edx <-- lo_32(x) + cmpl $0,%edx / lo_32(x) = 0? + jne .NaN_or_pinf / if not, x is NaN + movl 8(%esp),%eax / eax <-- hi_32(x) + andl $0x80000000,%eax / here, x is infinite, but +/-? + jz .NaN_or_pinf / branch if x = +INF + fldz / Here, x = -inf, so return 0 + ret + +.NaN_or_pinf: + / Here, x = NaN or +inf, so load x and return immediately. + fldl 4(%esp) + fwait + ret + .align 4 + SET_SIZE(exp10) diff --git a/usr/src/lib/libm/i386/src/exp10f.s b/usr/src/lib/libm/i386/src/exp10f.s new file mode 100644 index 0000000000..7c2fba095b --- /dev/null +++ b/usr/src/lib/libm/i386/src/exp10f.s @@ -0,0 +1,123 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "exp10f.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(exp10f,function) +#include "libm_synonyms.h" + + ENTRY(exp10f) + movl 4(%esp),%ecx / ecx <-- x + andl $0x7fffffff,%ecx / ecx <-- |x| + cmpl $0x3e9a209a,%ecx / Is |x| < log10(2)? + jbe .shortcut / If so, take a shortcut. + cmpl $0x7f800000,%ecx / |x| >= INF? + jae .not_finite / if so, x is not finite + flds 4(%esp) / push x (=arg) + + subl $8,%esp / save RP and set round-to-64-bits + fstcw (%esp) + movw (%esp),%ax + movw %ax,4(%esp) + orw $0x0300,%ax + movw %ax,(%esp) + fldcw (%esp) + + fldl2t / push log2(10) }NOT for xtndd_dbl + fmulp %st,%st(1) / z = x*log2(10) }NOT for xtndd_dbl + fld %st(0) / duplicate stack top + frndint / [z],z + fucom / z integral? + fstsw %ax + sahf + je .z_integral / branch if z integral + fxch / z, [z] + fsub %st(1),%st / z-[z], [z] + f2xm1 / 2**(z-[z])-1, [z] + fld1 / 1,2**(z-[z])-1, [z] + faddp %st,%st(1) / 2**(z-[z]), [z] + fscale / 2**z = 10**(arg), [z] + fstp %st(1) + + fstcw (%esp) / restore old RP + movw (%esp),%dx + andw $0xfcff,%dx + movw 4(%esp),%cx + andw $0x0300,%cx + orw %dx,%cx + movw %cx,(%esp) + fldcw (%esp) + add $8,%esp + + ret + +.z_integral: / here, z is integral + fstp %st(0) / ,z + fld1 / 1 = 2**0, z + fscale / 2**(0 + z) = 2**z = 10**(arg), z + fstp %st(1) / 10**(arg) + + fstcw (%esp) / restore old RP + movw (%esp),%dx + andw $0xfcff,%dx + movw 4(%esp),%cx + andw $0x0300,%cx + orw %dx,%cx + movw %cx,(%esp) + fldcw (%esp) + add $8,%esp + + ret + +.shortcut: + / Here, |x| < log10(2), so |z| = |x*log2(10)| < 1 + / whence z is in f2xm1's domain. + flds 4(%esp) / push x (=arg) + fldl2t / push log2(10) }NOT for xtndd_dbl + fmulp %st,%st(1) / z = x*log2(10) }NOT for xtndd_dbl + f2xm1 / 2**z - 1 + fld1 / 1,2**z - 1 + faddp %st,%st(1) / 2**z = 10**x + ret + +.not_finite: + ja .NaN_or_pinf / branch if x is NaN + movl 4(%esp),%eax / eax <-- x + andl $0x80000000,%eax / here, x is infinite, but +/-? + jz .NaN_or_pinf / branch if x = +INF + fldz / Here, x = -inf, so return 0 + ret + +.NaN_or_pinf: + / Here, x = NaN or +inf, so load x and return immediately. + flds 4(%esp) + fwait + ret + .align 4 + SET_SIZE(exp10f) diff --git a/usr/src/lib/libm/i386/src/exp10l.s b/usr/src/lib/libm/i386/src/exp10l.s new file mode 100644 index 0000000000..228652b89c --- /dev/null +++ b/usr/src/lib/libm/i386/src/exp10l.s @@ -0,0 +1,115 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "exp10l.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(exp10l,function) +#include "libm_synonyms.h" + + .data + .align 4 +lt2_hi: .long 0xfbd00000, 0x9a209a84, 0x00003ffd +lt2_lo: .long 0x653f4837, 0x8677076a, 0x0000bfc9 + + ENTRY(exp10l) + movl 12(%esp),%ecx / cx <--sign&bexp(x) + andl $0x00007fff,%ecx / ecx <-- zero_xtnd(bexp(x)) + cmpl $0x00003ffd,%ecx / Is |x| < log10(2)? + jb .shortcut / If so, take a shortcut. + je .check_tail / maybe |x| only slightly < log10(2) + cmpl $0x00007fff,%ecx / bexp(|x|) = bexp(INF)? + je .not_finite / if so, x is not finite + cmpl $0x0000400e,%ecx / |x| < 32768 = 2^15? + jb .finite_non_special / if so, proceed with argument reduction + fldt 4(%esp) / x + fld1 / 1, x + jmp 1f +.finite_non_special: / Here, log10(2) < |x| < 2^15 + fldt 4(%esp) / x + fld %st(0) / x, x + fldl2t / log2(10), x, x + fmulp / z := x*log2(10), x + frndint / [z], x + fst %st(2) / [z], x, [z] + PIC_SETUP(1) + fldt PIC_L(lt2_hi) / lt2_hi, [z], x, [z] + fmulp / [z]*lt2_hi, x, [z] + fsubrp %st,%st(1) / x-[z]*lt2_hi, [z] + fldt PIC_L(lt2_lo) / lt2_lo, x-[z]*lt2_hi, [z] + PIC_WRAPUP + fmul %st(2),%st / [z]*lt2_lo, x-[z]*lt2_hi, [z] + fsubrp %st,%st(1) / r := x-[z]*log10(2), [z] + fldl2t / log2(10), r, [z] + fmulp / f := r*log2(10), [z] + f2xm1 / 2^f-1,[z] + fld1 / 1, 2^f-1, [z] + faddp %st,%st(1) / 2^f, [z] +1: + fscale / 10^x, [z] + fstp %st(1) + ret + +.check_tail: + movl 8(%esp),%ecx / ecx <-- hi_32(sgnfcnd(x)) + cmpl $0x9a209a84,%ecx / Is |x| < log10(2)? + ja .finite_non_special + jb .shortcut + movl 4(%esp),%edx / edx <-- lo_32(sgnfcnd(x)) + cmpl $0xfbcff798,%edx / Is |x| slightly > log10(2)? + ja .finite_non_special / branch if |x| slightly > log10(2) +.shortcut: + / Here, |x| < log10(2), so |z| = |x/log10(2)| < 1 + / whence z is in f2xm1's domain. + fldt 4(%esp) / x + fldl2t / log2(10), x + fmulp / z := x*log2(10) + f2xm1 / 2^z-1 + fld1 / 1, 2^z-1 + faddp %st,%st(1) / 10^x + ret + +.not_finite: + movl 8(%esp),%ecx / ecx <-- hi_32(sgnfcnd(x)) + cmpl $0x80000000,%ecx / hi_32(sgnfcnd(x)) = hi_32(sgnfcnd(INF))? + jne .NaN_or_pinf / if not, x is NaN or unsupp. + movl 4(%esp),%edx / edx <-- lo_32(sgnfcnd(x)) + cmpl $0,%edx / lo_32(sgnfcnd(x)) = 0? + jne .NaN_or_pinf / if not, x is NaN + movl 12(%esp),%eax / ax <-- sign&bexp((x)) + andl $0x00008000,%eax / here, x is infinite, but +/-? + jz .NaN_or_pinf / branch if x = +INF + fldz / Here, x = -inf, so return 0 + ret + +.NaN_or_pinf: + / Here, x = NaN or +inf, so load x and return immediately. + fldt 4(%esp) + ret + .align 4 + SET_SIZE(exp10l) diff --git a/usr/src/lib/libm/i386/src/exp2.s b/usr/src/lib/libm/i386/src/exp2.s new file mode 100644 index 0000000000..7d0bc8f85d --- /dev/null +++ b/usr/src/lib/libm/i386/src/exp2.s @@ -0,0 +1,98 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "exp2.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(exp2,function) +#include "libm_synonyms.h" + + ENTRY(exp2) + movl 8(%esp),%ecx / ecx <-- hi_32(x) + andl $0x7fffffff,%ecx / ecx <-- hi_32(|x|) + cmpl $0x3ff00000,%ecx / Is |x| < 1? + jb .shortcut / If so, take a shortcut. + je .check_tail / |x| may be only slightly < ln(2) + cmpl $0x7ff00000,%ecx / hi_32(|x|) >= hi_32(INF)? + jae .not_finite / if so, x is not finite +.finite_non_special: / Here, 1 < |x| < INF + fldl 4(%esp) / push arg + fld %st(0) / duplicate stack top + frndint / [x],x + fucom / x integral? + fstsw %ax + sahf + je .x_integral / branch if x integral + fxch / x, [x] + fsub %st(1),%st / x-[x], [x] + f2xm1 / 2**(x-[x])-1, [x] + fld1 / 1,2**(x-[x])-1, [x] + faddp %st,%st(1) / 2**(x-[x]), [x] + fscale / 2**x = 2**(arg), [x] + fstp %st(1) + ret + +.x_integral: + fstp %st(0) / ,x + fld1 / 1 = 2**0, x + fscale / 2**(0 + x) = 2**x, x + fstp %st(1) / 2**x + ret + +.check_tail: + movl 4(%esp),%edx / edx <-- lo_32(x) + cmpl $0x00000000,%edx / Is |x| slightly > 1? + ja .finite_non_special / branch if |x| slightly > 1 +.shortcut: + / Here, |x| <= 1, + / whence x is in f2xm1's domain. + fldl 4(%esp) / push x + f2xm1 / 2**x - 1 + fld1 / 1,2**x - 1 + faddp %st,%st(1) / 2**x + ret + +.not_finite: + cmpl $0x7ff00000,%ecx / hi_32(|x|) > hi_32(INF)? + ja .NaN_or_pinf / if so, x is NaN + movl 4(%esp),%edx / edx <-- lo_32(x) + cmpl $0,%edx / lo_32(x) = 0? + jne .NaN_or_pinf / if not, x is NaN + movl 8(%esp),%eax / eax <-- hi_32(x) + andl $0x80000000,%eax / here, x is infinite, but +/-? + jz .NaN_or_pinf / branch if x = +INF + fldz / Here, x = -inf, so return 0 + ret + +.NaN_or_pinf: + / Here, x = NaN or +inf, so load x and return immediately. + fldl 4(%esp) + fwait + ret + .align 4 + SET_SIZE(exp2) diff --git a/usr/src/lib/libm/i386/src/exp2f.s b/usr/src/lib/libm/i386/src/exp2f.s new file mode 100644 index 0000000000..34b3d7fb8a --- /dev/null +++ b/usr/src/lib/libm/i386/src/exp2f.s @@ -0,0 +1,88 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "exp2f.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(exp2f,function) +#include "libm_synonyms.h" + + ENTRY(exp2f) + movl 4(%esp),%ecx / ecx <-- x + andl $0x7fffffff,%ecx / ecx <-- |x| + cmpl $0x3f800000,%ecx / Is |x| <= 1? + jbe .shortcut / If so, take a shortcut. + cmpl $0x7f800000,%ecx / |x| >= INF? + jae .not_finite / if so, x is not finite + flds 4(%esp) / push arg + fld %st(0) / duplicate stack top + frndint / [x],x + fucom / x integral? + fstsw %ax + sahf + je .x_integral / branch if x integral + fxch / x, [x] + fsub %st(1),%st / x-[x], [x] + f2xm1 / 2**(x-[x])-1, [x] + fld1 / 1,2**(x-[x])-1, [x] + faddp %st,%st(1) / 2**(x-[x]), [x] + fscale / 2**x = 2**(arg), [x] + fstp %st(1) + ret + +.x_integral: / here, x is integral + fstp %st(0) / ,x + fld1 / 1 = 2**0, x + fscale / 2**(0 + x) = 2**x, x + fstp %st(1) / 2**x + ret + +.shortcut: + / Here, |x| <= 1, + / whence x is in f2xm1's domain. + flds 4(%esp) / push x + f2xm1 / 2**x - 1 + fld1 / 1,2**x - 1 + faddp %st,%st(1) / 2**x + ret + +.not_finite: + ja .NaN_or_pinf / branch if x is NaN + movl 4(%esp),%eax / eax <-- x + andl $0x80000000,%eax / here, x is infinite, but +/-? + jz .NaN_or_pinf / branch if x = +INF + fldz / Here, x = -inf, so return 0 + ret + +.NaN_or_pinf: + / Here, x = NaN or +inf, so load x and return immediately. + flds 4(%esp) + fwait + ret + .align 4 + SET_SIZE(exp2f) diff --git a/usr/src/lib/libm/i386/src/exp2l.s b/usr/src/lib/libm/i386/src/exp2l.s new file mode 100644 index 0000000000..b4ba784cbc --- /dev/null +++ b/usr/src/lib/libm/i386/src/exp2l.s @@ -0,0 +1,101 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "exp2l.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(exp2l,function) +#include "libm_synonyms.h" + + ENTRY(exp2l) + movl 12(%esp),%ecx / cx <--sign&bexp(x) + andl $0x00007fff,%ecx / ecx <-- zero_xtnd(bexp(x)) + cmpl $0x00003fff,%ecx / Is |x| <= 1? + jb .shortcut / If so, take a shortcut. + je .check_tail / |x| may be slightly > 1 + cmpl $0x00007fff,%ecx / bexp(|x|) = bexp(INF)? + je .not_finite / if so, x is not finite +.finite_non_special: / Here, 1 < |x| < INF + fldt 4(%esp) / push arg + fld %st(0) / duplicate stack top + frndint / [x],x + fucom / x integral? + fnstsw %ax + sahf + je .x_integral / branch if x integral + fxch / x, [x] + fsub %st(1),%st / x-[x], [x] + f2xm1 / 2**(x-[x])-1, [x] + fld1 / 1,2**(x-[x])-1, [x] + faddp %st,%st(1) / 2**(x-[x]), [x] + fscale / 2**x = 2**(arg), [x] + fstp %st(1) + ret + +.x_integral: + fstp %st(0) / ,x + fld1 / 1 = 2**0, x + fscale / 2**(0 + x) = 2**x, x + fstp %st(1) / 2**x + ret + +.check_tail: + movl 8(%esp),%ecx / ecx <-- hi_32(sgnfcnd(x)) + cmpl $0x80000000,%ecx / Is |x| <= 1? + ja .finite_non_special + movl 4(%esp),%edx / edx <-- lo_32(sgnfcnd(x)) + cmpl $0x00000000,%edx / Is |x| slightly > 1? + ja .finite_non_special / branch if |x| slightly > 1 +.shortcut: + / Here, |x| < 1, + / whence x is in f2xm1's domain. + fldt 4(%esp) / push x + f2xm1 / 2**x - 1 + fld1 / 1,2**x - 1 + faddp %st,%st(1) / 2**x + ret + +.not_finite: + movl 8(%esp),%ecx / ecx <-- hi_32(sgnfcnd(x)) + cmpl $0x80000000,%ecx / hi_32(|x|) = hi_32(INF)? + jne .NaN_or_pinf / if not, x is NaN + movl 4(%esp),%edx / edx <-- lo_32(x) + cmpl $0,%edx / lo_32(x) = 0? + jne .NaN_or_pinf / if not, x is NaN + movl 12(%esp),%eax / ax <-- sign&bexp((x)) + andl $0x00008000,%eax / here, x is infinite, but +/-? + jz .NaN_or_pinf / branch if x = +INF + fldz / Here, x = -inf, so return 0 + ret + +.NaN_or_pinf: + / Here, x = NaN or +inf, so load x and return immediately. + fldt 4(%esp) + ret + .align 4 + SET_SIZE(exp2l) diff --git a/usr/src/lib/libm/i386/src/expl.s b/usr/src/lib/libm/i386/src/expl.s new file mode 100644 index 0000000000..22736b962c --- /dev/null +++ b/usr/src/lib/libm/i386/src/expl.s @@ -0,0 +1,124 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "expl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(expl,function) +#include "libm_synonyms.h" + + .data + .align 4 +ln2_hi: .long 0xd1d00000, 0xb17217f7, 0x00003ffe +ln2_lo: .long 0x4c67fc0d, 0x8654361c, 0x0000bfce + + ENTRY(expl) + movl 12(%esp),%ecx / cx <--sign&bexp(x) + andl $0x7fff,%ecx / ecx <-- zero_xtnd(bexp(x)) + cmpl $0x3ffe,%ecx / Is |x| < 0.5? + jb 2f / If so, see which shortcut to take + je .check_tail / More checking if 0.5 <= |x| < 1 + cmpl $0x00007fff,%ecx / bexp(|x|) = bexp(INF)? + je .not_finite / if so, x is not finite + cmpl $0x0000400e,%ecx / |x| < 32768 = 2^15? + jb .finite_non_special / if so, proceed with argument reduction + fldt 4(%esp) / x + fld1 / 1, x + jmp 1f +.finite_non_special: / Here, ln(2) < |x| < 2^15 + fldt 4(%esp) / x + fld %st(0) / x, x + fldl2e / log2(e), x, x + fmulp / z := x*log2(e), x + frndint / [z], x + fst %st(2) / [z], x, [z] + PIC_SETUP(1) + fldt PIC_L(ln2_hi) / ln2_hi, [z], x, [z] + fmulp / [z]*ln2_hi, x, [z] + fsubrp %st,%st(1) / x-[z]*ln2_hi, [z] + fldt PIC_L(ln2_lo) / ln2_lo, x-[z]*ln2_hi, [z] + PIC_WRAPUP + fmul %st(2),%st / [z]*ln2_lo, x-[z]*ln2_hi, [z] + fsubrp %st,%st(1) / r := x-[z]*ln(2), [z] + fldl2e / log2(e), r, [z] + fmulp / f := r*log2(e), [z] + f2xm1 / 2^f-1,[z] + fld1 / 1, 2^f-1, [z] + faddp %st,%st(1) / 2^f, [z] +1: + fscale / e^x, [z] + fstp %st(1) + ret + +2: / Here, |x| < 0.5 + cmpl $0x3fbe,%ecx / Is |x| >= 2^-65? + jae .shortcut / If so, take a shortcut + fldt 4(%esp) / x + fld1 / 1, x + faddp %st,%st(1) / 1+x (for inexact & directed rounding) + ret + +.check_tail: + movl 8(%esp),%ecx / ecx <-- hi_32(sgnfcnd(x)) + cmpl $0xb17217f7,%ecx / Is |x| < ln(2)? + ja .finite_non_special + jb .shortcut + movl 4(%esp),%edx / edx <-- lo_32(x) + cmpl $0xd1cf79ab,%edx / Is |x| slightly < ln(2)? + ja .finite_non_special / branch if |x| slightly > ln(2) +.shortcut: + / Here, |x| < ln(2), so |z| = |x/ln(2)| < 1, + / whence z is in f2xm1's domain. + fldt 4(%esp) / x + fldl2e / log2(e), x + fmulp / x*log2(e) + f2xm1 / 2^(x*log2(e))-1 = e^x-1 + fld1 / 1, e^x-1 + faddp %st,%st(1) / e^x + ret + +.not_finite: + movl 8(%esp),%ecx / ecx <-- hi_32(sgnfcnd(x)) + cmpl $0x80000000,%ecx / hi_32(|x|) = hi_32(INF)? + jne .NaN_or_pinf / if not, x is NaN + movl 4(%esp),%edx / edx <-- lo_32(x) + cmpl $0,%edx / lo_32(x) = 0? + jne .NaN_or_pinf / if not, x is NaN + movl 12(%esp),%eax / ax <-- sign&bexp((x)) + andl $0x00008000,%eax / here, x is infinite, but +/-? + jz .NaN_or_pinf / branch if x = +INF + fldz / Here, x = -inf, so return 0 + ret + +.NaN_or_pinf: + / Here, x = NaN or +inf, so load x and return immediately. + fldt 4(%esp) + fadd %st(0),%st / quiet SNaN + ret + .align 4 + SET_SIZE(expl) diff --git a/usr/src/lib/libm/i386/src/expm1.s b/usr/src/lib/libm/i386/src/expm1.s new file mode 100644 index 0000000000..bca8dfe46e --- /dev/null +++ b/usr/src/lib/libm/i386/src/expm1.s @@ -0,0 +1,130 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "expm1.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(expm1,function) +#include "libm_synonyms.h" + + .data + .align 4 +.mhundred: .float -100.0 + + ENTRY(expm1) + movl 8(%esp),%ecx / ecx <-- hi_32(x) + andl $0x7fffffff,%ecx / ecx <-- hi_32(|x|) + cmpl $0x3fe62e42,%ecx / Is |x| < ln(2)? + jb .shortcut / If so, take a shortcut. + je .check_tail / |x| may be only slightly < ln(2) + cmpl $0x7ff00000,%ecx / hi_32(|x|) >= hi_32(INF)? + jae .not_finite / if so, x is not finite +.finite_non_special: / Here, ln(2) < |x| < INF + fldl 4(%esp) / push x + + subl $8,%esp / save RP and set round-to-64-bits + fstcw (%esp) + movw (%esp),%ax + movw %ax,4(%esp) + orw $0x0300,%ax + movw %ax,(%esp) + fldcw (%esp) + + fldl2e / push log2e }not for xtndd_dbl + fmulp %st,%st(1) / z = x*log2e }not for xtndd_dbl + fld %st(0) / duplicate stack top + frndint / [z],z + / [z] != 0, compute exp(x) and then subtract one to get expm1(x) + fxch / z,[z] + fsub %st(1),%st / z-[z],[z] + f2xm1 / 2**(z-[z])-1,[z] + / avoid spurious underflow when scaling to compute exp(x) + PIC_SETUP(1) + flds PIC_L(.mhundred) + PIC_WRAPUP + fucom %st(2) / if -100 !< [z], then use -100 + fstsw %ax + sahf + jb .got_int_part + fxch %st(2) +.got_int_part: + fstp %st(0) / 2**(z-[z])-1,max([z],-100) + fld1 / 1,2**(z-[z])-1,max([z],-100) + faddp %st,%st(1) / 2**(z-[z]) ,max([z],-100) + fscale / exp(x) ,max([z],-100) + fld1 / 1,exp(x) ,max([z],-100) + fxch / exp(x),1 ,max([z],-100) + fsubp %st,%st(1) / exp(x)-1 ,max([z],-100) + fstp %st(1) + + fstcw (%esp) / restore old RP + movw (%esp),%dx + andw $0xfcff,%dx + movw 4(%esp),%cx + andw $0x0300,%cx + orw %dx,%cx + movw %cx,(%esp) + fldcw (%esp) + add $8,%esp + + ret + +.check_tail: + movl 4(%esp),%edx / edx <-- lo_32(x) + cmpl $0xfefa39ef,%edx / Is |x| slightly < ln(2)? + ja .finite_non_special / branch if |x| slightly > ln(2) +.shortcut: + / Here, |x| < ln(2), so |z| = |x*log2(e)| < 1, + / whence z is in f2xm1's domain. + fldl 4(%esp) / push x + fldl2e / push log2e }not for xtndd_dbl + fmulp %st,%st(1) / z = x*log2e }not for xtndd_dbl + f2xm1 / 2**(x*log2(e))-1 = e**x - 1 + ret + +.not_finite: + / Here, flags still have settings from execution of + / cmpl $0x7ff00000,%ecx / hi_32(|x|) > hi_32(INF)? + ja .NaN_or_pinf / if not, x may be +/- INF + movl 4(%esp),%edx / edx <-- lo_32(x) + cmpl $0,%edx / lo_32(x) = 0? + jne .NaN_or_pinf / if not, x is NaN + movl 8(%esp),%eax / eax <-- hi_32(x) + andl $0x80000000,%eax / here, x is infinite, but +/-? + jz .NaN_or_pinf / branch if x = +INF + fld1 / Here, x = -inf, so return -1 + fchs + ret + +.NaN_or_pinf: + / Here, x = NaN or +inf, so load x and return immediately. + fldl 4(%esp) + fwait + ret + .align 4 + SET_SIZE(expm1) diff --git a/usr/src/lib/libm/i386/src/expm1f.s b/usr/src/lib/libm/i386/src/expm1f.s new file mode 100644 index 0000000000..08abd37b2c --- /dev/null +++ b/usr/src/lib/libm/i386/src/expm1f.s @@ -0,0 +1,153 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "expm1f.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(expm1f,function) +#include "libm_synonyms.h" + + .data + .align 4 +.mhundred: .float -100.0 + + ENTRY(expm1f) + movl 4(%esp),%ecx / ecx <-- x + andl $0x7fffffff,%ecx / ecx <-- |x| + cmpl $0x3f317217,%ecx / Is |x| < ln(2)? + jbe .shortcut / If so, take a shortcut. + cmpl $0x7f800000,%ecx / |x| >= INF? + jae .not_finite / if so, x is not finite + flds 4(%esp) / push x + + subl $8,%esp / save RP and set round-to-64-bits + fstcw (%esp) + movw (%esp),%ax + movw %ax,4(%esp) + orw $0x0300,%ax + movw %ax,(%esp) + fldcw (%esp) + + fldl2e / push log2e }not for xtndd_dbl + fmulp %st,%st(1) / z = x*log2e }not for xtndd_dbl + fld %st(0) / duplicate stack top + frndint / [z],z + fucom / This and the next 3 instructions + fstsw %ax / add 10 clocks to runtime of the + sahf / main branch, but save about 265 + je .z_integral / upon detection of integral z. + / [z] != 0, compute exp(x) and then subtract one to get expm1(x) + fxch / z,[z] + fsub %st(1),%st / z-[z],[z] + f2xm1 / 2**(z-[z])-1,[z] + / avoid spurious underflow when scaling to compute exp(x) + PIC_SETUP(1) + flds PIC_L(.mhundred) + PIC_WRAPUP + fucom %st(2) / if -100 !< [z], then use -100 + fstsw %ax + sahf + jb .got_int_part + fxch %st(2) +.got_int_part: + fstp %st(0) / 2**(z-[z])-1,max([z],-100) + fld1 / 1,2**(z-[z])-1,max([z],-100) + faddp %st,%st(1) / 2**(z-[z]) ,max([z],-100) + fscale / exp(x) ,max([z],-100) + fld1 / 1,exp(x) ,max([z],-100) + fsubrp %st,%st(1) / exp(x)-1 ,max([z],-100) + fstp %st(1) + + fstcw (%esp) / restore old RP + movw (%esp),%dx + andw $0xfcff,%dx + movw 4(%esp),%cx + andw $0x0300,%cx + orw %dx,%cx + movw %cx,(%esp) + fldcw (%esp) + add $8,%esp + + ret + +.z_integral: / here, z is integral + fstp %st(0) / ,z + / avoid spurious underflow when scaling to compute exp(x) + PIC_SETUP(2) + flds PIC_L(.mhundred) + PIC_WRAPUP + fucom %st(1) / if -100 !< [z], then use -100 + fstsw %ax + sahf + jb .scale_wont_ovfl + fxch %st(1) +.scale_wont_ovfl: + fstp %st(0) / max([z],-100) + fld1 / 1,max([z],-100) + fscale / exp(x) ,max([z],-100) + fld1 / 1,exp(x) ,max([z],-100) + fsubrp %st,%st(1) / exp(x)-1 ,max([z],-100) + fstp %st(1) + + fstcw (%esp) / restore old RP + movw (%esp),%dx + andw $0xfcff,%dx + movw 4(%esp),%cx + andw $0x0300,%cx + orw %dx,%cx + movw %cx,(%esp) + fldcw (%esp) + add $8,%esp + + ret + +.shortcut: + / Here, |x| < ln(2), so |z| = |x*log2(e)| < 1, + / whence z is in f2xm1's domain. + flds 4(%esp) / push x + fldl2e / push log2e }not for xtndd_dbl + fmulp %st,%st(1) / z = x*log2e }not for xtndd_dbl + f2xm1 / 2**(x*log2(e))-1 = e**x - 1 + ret + +.not_finite: + ja .NaN_or_pinf / branch if x is NaN + movl 4(%esp),%eax / eax <-- x + andl $0x80000000,%eax / here, x is infinite, but +/-? + jz .NaN_or_pinf / branch if x = +INF + fld1 / Here, x = -inf, so return -1 + fchs + ret + +.NaN_or_pinf: + / Here, x = NaN or +inf, so load x and return immediately. + flds 4(%esp) + fwait + ret + .align 4 + SET_SIZE(expm1f) diff --git a/usr/src/lib/libm/i386/src/expm1l.s b/usr/src/lib/libm/i386/src/expm1l.s new file mode 100644 index 0000000000..2ece2d59de --- /dev/null +++ b/usr/src/lib/libm/i386/src/expm1l.s @@ -0,0 +1,123 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "expm1l.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(expm1l,function) +#include "libm_synonyms.h" + + .data + .align 4 +ln2_hi: .long 0xd1d00000, 0xb17217f7, 0x00003ffe +ln2_lo: .long 0x4c67fc0d, 0x8654361c, 0x0000bfce + + ENTRY(expm1l) + movl 12(%esp),%ecx / cx <--sign&bexp(x) + movl %ecx,%eax / ax <--sign&bexp(x) + andl $0x00007fff,%ecx / ecx <-- zero_xtnd(bexp(x)) + cmpl $0x00003ffe,%ecx / Is |x| < ln(2)? + jb .shortcut / If so, take a shortcut. + je .check_tail / |x| may be only slightly < ln(2) + cmpl $0x00007fff,%ecx / bexp(|x|) = bexp(INF)? + je .not_finite / if so, x is not finite + andl $0x0000ffff,%eax / eax <-- sign&bexp(x) + cmpl $0x0000c006,%eax / x <= -128? + jae 1f / if so, simply return -1 + cmpl $0x0000400d,%ecx / |x| < 16384 = 2^14? + jb .finite_non_special / if so, proceed with argument reduction + fldt 4(%esp) / x >= 16384; x + fld1 / 1, x + fscale / +Inf, x + fstp %st(1) / +Inf + ret + +.finite_non_special: / -128 < x < -ln(2) || ln(2) < x < 2^14 + fldt 4(%esp) / x + fld %st(0) / x, x + fldl2e / log2(e), x, x + fmulp / z := x*log2(e), x + frndint / [z], x + fst %st(2) / [z], x, [z] + PIC_SETUP(1) + fldt PIC_L(ln2_hi) / ln2_hi, [z], x, [z] + fmulp / [z]*ln2_hi, x, [z] + fsubrp %st,%st(1) / x-[z]*ln2_hi, [z] + fldt PIC_L(ln2_lo) / ln2_lo, x-[z]*ln2_hi, [z] + PIC_WRAPUP + fmul %st(2),%st / [z]*ln2_lo, x-[z]*ln2_hi, [z] + fsubrp %st,%st(1) / r := x-[z]*ln(2), [z] + fldl2e / log2(e), r, [z] + fmulp / f := r*log2(e), [z] + f2xm1 / 2^f-1,[z] + fld1 / 1, 2^f-1, [z] + faddp %st,%st(1) / 2^f, [z] + fscale / e^x, [z] + fstp %st(1) / e^x + fld1 / 1, e^x + fsubrp %st,%st(1) / e^x-1 + ret + +.check_tail: + movl 8(%esp),%ecx / ecx <-- hi_32(sgnfcnd(x)) + cmpl $0xb17217f7,%ecx / Is |x| < ln(2)? + ja .finite_non_special + jb .shortcut + movl 4(%esp),%edx / edx <-- lo_32(x) + cmpl $0xd1cf79ab,%edx / Is |x| slightly < ln(2)? + ja .finite_non_special / branch if |x| slightly > ln(2) +.shortcut: + / Here, |x| < ln(2), so |z| = |x/ln(2)| < 1, + / whence z is in f2xm1's domain. + fldt 4(%esp) / x + fldl2e / log2(e), x + fmulp / z := x*log2(e) + f2xm1 / 2^(x*log2(e))-1 = e^x-1 + ret + +.not_finite: + movl 8(%esp),%ecx / ecx <-- hi_32(sgnfcnd(x)) + cmpl $0x80000000,%ecx / hi_32(|x|) = hi_32(INF)? + jne .NaN_or_pinf / if not, x is NaN + movl 4(%esp),%edx / edx <-- lo_32(x) + cmpl $0,%edx / lo_32(x) = 0? + jne .NaN_or_pinf / if not, x is NaN + movl 12(%esp),%eax / ax <-- sign&bexp((x)) + andl $0x00008000,%eax / here, x is infinite, but +/-? + jz .NaN_or_pinf / branch if x = +INF +1: + fld1 / Here, x = -inf, so return -1 + fchs + ret + +.NaN_or_pinf: + / Here, x = NaN or +inf, so load x and return immediately. + fldt 4(%esp) + ret + .align 4 + SET_SIZE(expm1l) diff --git a/usr/src/lib/libm/i386/src/fabs.s b/usr/src/lib/libm/i386/src/fabs.s new file mode 100644 index 0000000000..b2fc32f9e2 --- /dev/null +++ b/usr/src/lib/libm/i386/src/fabs.s @@ -0,0 +1,41 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "fabs.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(fabs,function) +#include "libm_synonyms.h" + + ENTRY(fabs) + fldl 4(%esp) +#undef fabs + fabs + ret + .align 4 + SET_SIZE(fabs) diff --git a/usr/src/lib/libm/i386/src/fabsf.s b/usr/src/lib/libm/i386/src/fabsf.s new file mode 100644 index 0000000000..e89180e4e6 --- /dev/null +++ b/usr/src/lib/libm/i386/src/fabsf.s @@ -0,0 +1,42 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "fabsf.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(fabsf,function) +#include "libm_synonyms.h" + + ENTRY(fabsf) + flds 4(%esp) +#undef fabs + fabs + ret + .align 4 + SET_SIZE(fabsf) + diff --git a/usr/src/lib/libm/i386/src/fabsl.s b/usr/src/lib/libm/i386/src/fabsl.s new file mode 100644 index 0000000000..d21f3d3a37 --- /dev/null +++ b/usr/src/lib/libm/i386/src/fabsl.s @@ -0,0 +1,42 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "fabsl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(fabsl,function) +#include "libm_synonyms.h" + + ENTRY(fabsl) + fldt 4(%esp) +#undef fabs + fabs + ret + .align 4 + SET_SIZE(fabsl) + diff --git a/usr/src/lib/libm/i386/src/finitef.s b/usr/src/lib/libm/i386/src/finitef.s new file mode 100644 index 0000000000..70e119cc37 --- /dev/null +++ b/usr/src/lib/libm/i386/src/finitef.s @@ -0,0 +1,44 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "finitef.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(finitef,function) +#include "libm_synonyms.h" + + ENTRY(finitef) + movl 4(%esp),%eax / eax <-- x + notl %eax / not(bexp) = 0 iff bexp = all 1's + andl $0x7f800000,%eax / ZF <-- 1 iff not(bexp) = 0 + jz .done / no jump if arg. is finite + movl $1,%eax / %ax was 0; ansi needs %eax = 1 +.done: + ret + .align 4 + SET_SIZE(finitef) diff --git a/usr/src/lib/libm/i386/src/finitel.s b/usr/src/lib/libm/i386/src/finitel.s new file mode 100644 index 0000000000..3b3e792d59 --- /dev/null +++ b/usr/src/lib/libm/i386/src/finitel.s @@ -0,0 +1,56 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "finitel.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(finitel,function) +#include "libm_synonyms.h" + + ENTRY(finitel) + movl 12(%esp),%eax / %ax <-- sign&bexp(x) + testl $0x80000000,8(%esp) / ZF = 1 iff hi_32(sgnfcnd(x))'s msb = 0 + jz .chk_denormal_or_0 + notl %eax / not(bexp) = 0 iff bexp = all 1's + andl $0x00007fff,%eax / ZF <-- 1 iff not(bexp) = 0 + jz .done / no jump if arg. is finite + movl $1,%eax / ansi needs %eax = 1 +.done: + ret + +.chk_denormal_or_0: + andl $0x00007fff,%eax / ZF <-- 1 iff bexp = 0 iff denormal or 0 + jnz .unsupported / jump if arg has unsupported format + movl $1,%eax / ansi needs %eax = 1 + ret + +.unsupported: + movl $0,%eax / unsupported format does not represent + ret / a finite number + .align 4 + SET_SIZE(finitel) diff --git a/usr/src/lib/libm/i386/src/floor.s b/usr/src/lib/libm/i386/src/floor.s new file mode 100644 index 0000000000..c03b68bc7f --- /dev/null +++ b/usr/src/lib/libm/i386/src/floor.s @@ -0,0 +1,56 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "floor.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(floor,function) +#include "libm_synonyms.h" + + ENTRY(floor) + subl $8,%esp + fstcw (%esp) + fldl 12(%esp) + movw (%esp),%cx + orw $0x0c00,%cx + xorw $0x0800,%cx + movw %cx,4(%esp) + fldcw 4(%esp) / set RD = down + frndint + fstcw 4(%esp) / restore RD + movw 4(%esp),%dx + andw $0xf3ff,%dx + movw (%esp),%cx + andw $0x0c00,%cx + orw %dx,%cx + movw %cx,(%esp) + fldcw (%esp) / restore RD + addl $8,%esp + ret + .align 4 + SET_SIZE(floor) diff --git a/usr/src/lib/libm/i386/src/floorl.s b/usr/src/lib/libm/i386/src/floorl.s new file mode 100644 index 0000000000..d9dabfb87c --- /dev/null +++ b/usr/src/lib/libm/i386/src/floorl.s @@ -0,0 +1,81 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "floorl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(ceill,function) +LIBM_ANSI_PRAGMA_WEAK(floorl,function) +#include "libm_synonyms.h" + + ENTRY(ceill) + subl $8,%esp + fstcw (%esp) + fldt 12(%esp) + movw (%esp),%cx + orw $0x0c00,%cx + xorw $0x0400,%cx + movw %cx,4(%esp) + fldcw 4(%esp) / set RD = up + frndint + fstcw 4(%esp) / restore RD + movw 4(%esp),%dx + andw $0xf3ff,%dx + movw (%esp),%cx + andw $0x0c00,%cx + orw %dx,%cx + movw %cx,(%esp) + fldcw (%esp) / restore RD + addl $8,%esp + ret + .align 4 + SET_SIZE(ceill) + + + ENTRY(floorl) + subl $8,%esp + fstcw (%esp) + fldt 12(%esp) + movw (%esp),%cx + orw $0x0c00,%cx + xorw $0x0800,%cx + movw %cx,4(%esp) + fldcw 4(%esp) / set RD = down + frndint + fstcw 4(%esp) / restore RD + movw 4(%esp),%dx + andw $0xf3ff,%dx + movw (%esp),%cx + andw $0x0c00,%cx + orw %dx,%cx + movw %cx,(%esp) + fldcw (%esp) / restore RD + addl $8,%esp + ret + .align 4 + SET_SIZE(floorl) diff --git a/usr/src/lib/libm/i386/src/fmod.s b/usr/src/lib/libm/i386/src/fmod.s new file mode 100644 index 0000000000..0c8a048994 --- /dev/null +++ b/usr/src/lib/libm/i386/src/fmod.s @@ -0,0 +1,66 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "fmod.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(fmod,function) +#include "libm_synonyms.h" +#include "libm_protos.h" + + ENTRY(fmod) + movl 16(%esp),%eax / eax <-- hi_32(y) + andl $0x7fffffff,%eax / eax <-- hi_32(|y|) + orl 12(%esp),%eax / eax <-- lo_32(y)|hi_32(|y|) + je .zero + + fldl 12(%esp) / load arg y + fldl 4(%esp) / load arg x +.mod_loop: + fprem / partial fmod + fstsw %ax / store status word + andw $0x400,%ax / check for incomplete reduction + jne .mod_loop / while incomplete, do fprem again + fstp %st(1) + ret +.zero: + pushl %ebp + movl %esp,%ebp + PIC_SETUP(1) + pushl $27 / case 27 in _SVID_libm_err + pushl 20(%ebp) / pass x + pushl 16(%ebp) + pushl 12(%ebp) / pass y + pushl 8(%ebp) + call PIC_F(_SVID_libm_err) + addl $20,%esp + PIC_WRAPUP + leave + ret + .align 4 + SET_SIZE(fmod) diff --git a/usr/src/lib/libm/i386/src/fmodf.s b/usr/src/lib/libm/i386/src/fmodf.s new file mode 100644 index 0000000000..b4dbe3053c --- /dev/null +++ b/usr/src/lib/libm/i386/src/fmodf.s @@ -0,0 +1,46 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "fmodf.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(fmodf,function) +#include "libm_synonyms.h" + + ENTRY(fmodf) + flds 8(%esp) / load arg y + flds 4(%esp) / load arg x +.mod_loop: + fprem / partial fmod + fstsw %ax / store status word + andw $0x400,%ax / check for incomplete reduction + jne .mod_loop / while incomplete, do fprem again + fstp %st(1) + ret + .align 4 + SET_SIZE(fmodf) diff --git a/usr/src/lib/libm/i386/src/fmodl.s b/usr/src/lib/libm/i386/src/fmodl.s new file mode 100644 index 0000000000..c4c5849e4b --- /dev/null +++ b/usr/src/lib/libm/i386/src/fmodl.s @@ -0,0 +1,46 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "fmodl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(fmodl,function) +#include "libm_synonyms.h" + + ENTRY(fmodl) + fldt 16(%esp) / load arg y + fldt 4(%esp) / load arg x +.mod_loop: + fprem / partial fmod + fstsw %ax / store status word + andw $0x400,%ax / check for incomplete reduction + jne .mod_loop / while incomplete, do fprem again + fstp %st(1) + ret + .align 4 + SET_SIZE(fmodl) diff --git a/usr/src/lib/libm/i386/src/hypot.s b/usr/src/lib/libm/i386/src/hypot.s new file mode 100644 index 0000000000..0687a9f4fe --- /dev/null +++ b/usr/src/lib/libm/i386/src/hypot.s @@ -0,0 +1,138 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "hypot.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(hypot,function) +#include "libm_synonyms.h" +#include "libm_protos.h" + +#undef fabs + + .data + .align 4 +inf: + .long 0x7f800000 + + ENTRY(hypot) + movl 8(%esp),%eax / eax <-- hi_32(x) + andl $0x7fffffff,%eax / eax <-- hi_32(|x|) + jz .x_maybe_0 / if x = +/-0, return |y| + subl $0x7ff00000,%eax / eax <-- hi_32(|x|) - hi_32(INF) + jz .x_maybe_inf +.check_y: + movl 16(%esp),%eax / eax <-- hi_32(y) + andl $0x7fffffff,%eax / eax <-- hi_32(|y|) + jz .y_maybe_0 / if y = +/-0, return |x| + subl $0x7ff00000,%eax / eax <-- hi_32(|y|) - hi_32(INF) + jz .y_maybe_inf +.do_hypot: + fldl 12(%esp) / ,y + fmul %st(0),%st / ,y*y + fldl 4(%esp) / x,y*y + fmul %st(0),%st / x*x,y*y + faddp %st,%st(1) / x*x+y*y + fsqrt / sqrt(x*x+y*y) + subl $8,%esp + fstpl (%esp) / round to double + fldl (%esp) / sqrt(x*x+y*y) rounded to double + PIC_SETUP(1) + flds PIC_L(inf) / inf , sqrt(x*x+y*y) + PIC_WRAPUP + addl $8,%esp + fucomp + fstsw %ax / store status in %ax + sahf / 80387 flags in %ah to 80386 flags + jz .maybe_ovflw + ret + +.maybe_ovflw: + jnp .ovflw + ret + +.ovflw: + / overflow occurred + fstp %st(0) / stack empty + pushl %ebp + movl %esp,%ebp + PIC_SETUP(2) + pushl $4 + pushl 20(%ebp) / high y + pushl 16(%ebp) / low y + pushl 12(%ebp) / high x + pushl 8(%ebp) / low x + call PIC_F(_SVID_libm_err) + addl $20,%esp + PIC_WRAPUP + leave + ret + +.x_maybe_0: + movl 4(%esp),%ecx / ecx <-- lo_32(x) + orl %ecx,%eax / is x = +/-0? + jnz .check_y / branch if x is denormal + / x = +/-0, so return |y| + fldl 12(%esp) + fabs + ret + +.x_maybe_inf: + movl 4(%esp),%ecx / ecx <-- lo_32(x) + orl %ecx,%eax / is x = +/-INF? + jnz .check_y / branch if x is NaN + / push&pop y in case y is a SNaN + fldl 12(%esp) + fstp %st(0) + / x = +/-INF, so return |x| + fldl 4(%esp) + fabs + ret + +.y_maybe_0: + movl 12(%esp),%ecx / ecx <-- lo_32(y) + orl %ecx,%eax / is y = +/-0? + jnz .do_hypot / branch if y is denormal + / y = +/-0, so return |x| + fldl 4(%esp) + fabs + ret + +.y_maybe_inf: + movl 12(%esp),%ecx / ecx <-- lo_32(y) + orl %ecx,%eax / is y = +/-INF? + jnz .do_hypot / branch if y is NaN + / push&pop x in case x is a SNaN + fldl 4(%esp) + fstp %st(0) + / y = +/-INF, so return |y| + fldl 12(%esp) + fabs + ret + .align 4 + SET_SIZE(hypot) diff --git a/usr/src/lib/libm/i386/src/hypotf.s b/usr/src/lib/libm/i386/src/hypotf.s new file mode 100644 index 0000000000..068b75ea87 --- /dev/null +++ b/usr/src/lib/libm/i386/src/hypotf.s @@ -0,0 +1,70 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "hypotf.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(hypotf,function) +#include "libm_synonyms.h" +#include "libm_protos.h" + +#undef fabs + + ENTRY(hypotf) + movl 4(%esp),%eax / eax <-- x + andl $0x7fffffff,%eax / eax <-- |x| + jz .return_abs_y / if x = +/-0, return |y| + subl $0x7f800000,%eax / eax <-- |x| - INF + jz .return_abs_x / if x = +/-INF, return |x| + movl 8(%esp),%eax / eax <-- y + andl $0x7fffffff,%eax / eax <-- |y| + jz .return_abs_x / if y = +/-0, return |x| + subl $0x7f800000,%eax / eax <-- |y| - INF +.return_abs_y: + flds 8(%esp) / y + jz .take_abs / if y = +/-INF, return |y| + fmul %st(0),%st / y*y + flds 4(%esp) / x,y*y + fmul %st(0),%st / x*x,y*y + faddp %st,%st(1) / x*x+y*y + fsqrt / sqrt(x*x+y*y) + subl $4,%esp + fstps (%esp) / round to single + flds (%esp) + fwait + addl $4,%esp + ret + +.return_abs_x: + / returns |x| + flds 4(%esp) +.take_abs: + fabs + ret + .align 4 + SET_SIZE(hypotf) diff --git a/usr/src/lib/libm/i386/src/ieee_funcl.s b/usr/src/lib/libm/i386/src/ieee_funcl.s new file mode 100644 index 0000000000..60a22178e1 --- /dev/null +++ b/usr/src/lib/libm/i386/src/ieee_funcl.s @@ -0,0 +1,122 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "ieee_funcl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(isinfl,function) +LIBM_ANSI_PRAGMA_WEAK(isnormall,function) +LIBM_ANSI_PRAGMA_WEAK(issubnormall,function) +LIBM_ANSI_PRAGMA_WEAK(iszerol,function) +LIBM_ANSI_PRAGMA_WEAK(signbitl,function) +#include "libm_synonyms.h" + + ENTRY(isinfl) + movl 12(%esp),%eax / ax <-- sign and bexp of x + notl %eax + andl $0x00007fff,%eax + jz .L6 + movl $0,%eax +.not_inf: + ret + +.L6: / here, (eax) = 0.0 + movl 8(%esp),%ecx + xorl $0x80000000,%ecx / handle unsupported implicitly + orl 4(%esp), %ecx + jnz .not_inf + movl $1,%eax + ret + .align 4 + SET_SIZE(isinfl) + + ENTRY(isnormall) + / TRUE iff (x is finite, but + / neither subnormal nor zero) + / iff (msb(sgnfcnd(x) /= 0 + / & 0 < bexp(x) < 0x7fff) + movl 8(%esp),%eax / eax <-- hi_32(sgnfcnd(x)) + andl $0x80000000,%eax / eax[31] <-- msb(sgnfcnd(x)), + / rest_of(eax) <-- 0 + jz .L8 / jump iff msb(sgnfcnd(x)) = 0 + movl 12(%esp),%eax / ax <-- sign and bexp of x + notl %eax / ax[0..14] <-- not(bexp(x)) + andl $0x00007fff,%eax / eax <-- zero_xtnd(not(bexp(x))) + jz .L8 / jump iff bexp(x) = 0x7fff or 0 + xorl $0x00007fff,%eax / treat pseudo-denormal as subnormal + jz .L8 + movl $1,%eax +.L8: + ret + .align 4 + SET_SIZE(isnormall) + + ENTRY(issubnormall) + / TRUE iff (bexp(x) = 0 & + / msb(sgnfcnd(x)) = 0 & frac(x) /= 0) + movl 8(%esp),%eax / eax <-- hi_32(sgnfcnd(x)) + testl $0x80000000,%eax / eax[31] = msb(sgnfcnd(x)); + / set ZF if it's 0. + jz .may_be_subnorm / jump iff msb(sgnfcnd(x)) = 0 +.not_subnorm: + movl $0,%eax +.quicker_out: + ret +.may_be_subnorm: + testl $0x00007fff,12(%esp) / set ZF iff bexp(x) = 0 + jnz .not_subnorm / jump iff bexp(x) /= 0 + orl 4(%esp),%eax / (eax) = 0 iff sgnfcnd(x) = 0 + jz .quicker_out + movl $1,%eax + ret + .align 4 + SET_SIZE(issubnormall) + + ENTRY(iszerol) + movl 12(%esp),%eax / ax <-- sign and bexp of x + andl $0x00007fff,%eax / eax <-- zero_xtnd(bexp(x)) + jz .may_be_zero / jump iff bexp(x) = 0 +.not_zero: + movl $0,%eax + ret +.may_be_zero: / here, (eax) = 0 + orl 8(%esp),%eax / is hi_32(sgnfcnd(x)) = 0? + jnz .not_zero / jump iff hi_32(sgnfcnd(x)) /= 0 + orl 4(%esp),%eax / is lo_32(sgnfcnd(x)) = 0? + jnz .not_zero / jump iff lo_32(sgnfcnd(x)) /= 0 + movl $1,%eax + ret + .align 4 + SET_SIZE(iszerol) + + ENTRY(signbitl) + movl 10(%esp),%eax / eax[31] <-- sign_bit(x) + shrl $31,%eax / eax <-- zero_xtnd(sign_bit(x)) + ret + .align 4 + SET_SIZE(signbitl) diff --git a/usr/src/lib/libm/i386/src/ilogb.s b/usr/src/lib/libm/i386/src/ilogb.s new file mode 100644 index 0000000000..6d4215fb03 --- /dev/null +++ b/usr/src/lib/libm/i386/src/ilogb.s @@ -0,0 +1,86 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "ilogb.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(ilogb,function) +#include "libm_synonyms.h" +#include "xpg6.h" + + .data + .align 8 +two52: .long 0x0,0x43300000 / 2**52 + + ENTRY(ilogb) + movl 8(%esp),%eax / eax <-- hi_32(x) + andl $0x7fffffff,%eax / eax <-- hi_32(abs(x)) + testl $0x7ff00000,%eax / is bexp(x) 0? + jz .bexp_0 / jump if x is 0 or subnormal + / biased exponent is non-zero + cmpl $0x7ff00000,%eax / is bexp(x) 0x7ff? + jae .bexp_all_1 / jump if x is NaN or Inf + shrl $20,%eax / eax <-- bexp(x) + subl $1023,%eax / unbias exponent by 1023 + ret + +.bexp_all_1: + movl $0x7fffffff,%eax / x is NaN or inf, so return 0x7fffffff + jmp 0f + +.bexp_0: + orl 4(%esp),%eax / test whether x is 0 + jnz .ilogb_subnorm + movl $0x80000001,%eax / x is +/-0, so return 0x80000001 +0: + PIC_SETUP(0) + PIC_G_LOAD(movzwl,__xpg6,ecx) + PIC_WRAPUP + andl $_C99SUSv3_ilogb_0InfNaN_raises_invalid,%ecx + cmpl $0,%ecx + je 1f + fldz + fdivp %st,%st(0) / raise invalid as per SUSv3 +1: + ret + +.ilogb_subnorm: / subnormal input + fldl 4(%esp) / push x + PIC_SETUP(1) + fmull PIC_L(two52) / x*2**52 + PIC_WRAPUP + subl $8,%esp / set up storage area + fstpl (%esp) / store x*2**52 in storage are + movl $0x7ff00000,%eax + andl 4(%esp),%eax + shrl $20,%eax / extract exponent of x*2**52 + subl $1075,%eax / unbias it by 1075 (= 1023 + 52) + addl $8,%esp + ret + .align 4 + SET_SIZE(ilogb) diff --git a/usr/src/lib/libm/i386/src/ilogbf.s b/usr/src/lib/libm/i386/src/ilogbf.s new file mode 100644 index 0000000000..927c4e9943 --- /dev/null +++ b/usr/src/lib/libm/i386/src/ilogbf.s @@ -0,0 +1,91 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "ilogbf.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(ilogbf,function) +#include "libm_synonyms.h" +#include "xpg6.h" + + .data + .align 8 +two23: .long 0x4b000000 / 2**23 + + ENTRY(ilogbf) + movl 4(%esp),%eax / eax <-- x + testl $0x7f800000,%eax / is bexp(x) 0? + jz .bexp_0 / jump if x is 0 or subnormal + / here, biased exponent is non-zero + andl $0x7fffffff,%eax / eax <-- abs(x) + cmpl $0x7f800000,%eax / is bexp(x) 0xff? + jae .bexp_all_1 / jump if x is NaN or Inf + shrl $23,%eax / eax <-- zero_xtnd(bexp(x)) + subl $127,%eax / unbias exponent by 127 + ret + +.bexp_all_1: + movl $0x7fffffff,%eax / x is NaN or inf, so return 0x7fffffff + jmp 0f + +.bexp_0: + andl $0x7fffffff,%eax / eax <-- abs(x), and + / ZF = 1 iff x = 0.0 + jnz .ilogb_subnorm + movl $0x80000001,%eax / x is +/-0, so return 0x80000001 +0: + PIC_SETUP(0) + PIC_G_LOAD(movzwl,__xpg6,ecx) + PIC_WRAPUP + andl $_C99SUSv3_ilogb_0InfNaN_raises_invalid,%ecx + cmpl $0,%ecx + je 1f + fldz + fdivp %st,%st(0) / raise invalid as per SUSv3 +1: + ret + +.ilogb_subnorm: / subnormal input + flds 4(%esp) / push x + PIC_SETUP(1) + fmuls PIC_L(two23) / x*2**23; rebias x by 127+23, + / instead of 127 + PIC_WRAPUP + subl $4,%esp / set up storage area + fstps (%esp) / store x*2**23 in storage area + fwait / (shouldn't raise exception, but + / just in case) + movl $0x7f800000,%eax / eax <-- single_bexp_mask + andl (%esp),%eax / eax[23..30] <-- bexp(x*2**23), + / rest_of(eax) <-- 0 + shrl $23,%eax / eax <-- zero_xtnd(bexp(x*2**23)) + subl $150,%eax / unbias rebiased x by 150 (= 127 + 23) + addl $4,%esp / restore stack for caller + ret + .align 4 + SET_SIZE(ilogbf) diff --git a/usr/src/lib/libm/i386/src/ilogbl.s b/usr/src/lib/libm/i386/src/ilogbl.s new file mode 100644 index 0000000000..d2a6c0160b --- /dev/null +++ b/usr/src/lib/libm/i386/src/ilogbl.s @@ -0,0 +1,87 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "ilogbl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(ilogbl,function) +#include "libm_synonyms.h" +#include "xpg6.h" + + .data + .align 8 +two63: .long 0x0,0x43d00000 / 2**63 + + ENTRY(ilogbl) + movl 12(%esp),%eax / eax <-- sign and bexp of x + andl $0x00007fff,%eax / eax <-- bexp(x) + jz .bexp_0 / jump iff x is 0 or subnormal + / here, biased exponent is non-zero + testl $0x80000000,8(%esp) / test msb of hi_32(sgnfcnd(x)) + jz .ilogbl_not_finite / jump if unsupported format + cmpl $0x00007fff,%eax + je .ilogbl_not_finite + subl $16383,%eax / unbias exponent by 16383 = 0x3fff + ret + +.ilogbl_not_finite: + movl $0x7fffffff,%eax / x is NaN/inf/unsup + jmp 0f + +.bexp_0: + movl 8(%esp),%eax / eax <-- hi_32(sgnfcnd(x)) + orl 4(%esp),%eax / test whether x is 0 + jnz .ilogbl_subnorm / jump iff x is subnormal + movl $0x80000001,%eax / x is +/-0, so return 0x80000001 +0: + PIC_SETUP(0) + PIC_G_LOAD(movzwl,__xpg6,ecx) + PIC_WRAPUP + andl $_C99SUSv3_ilogb_0InfNaN_raises_invalid,%ecx + cmpl $0,%ecx + je 1f + fldz + fdivp %st,%st(0) / raise invalid as per SUSv3 +1: + ret + + +.ilogbl_subnorm: / subnormal or pseudo-denormal input + fldt 4(%esp) / push x, setting D-flag + PIC_SETUP(1) + fmull PIC_L(two63) / x*2**63 + PIC_WRAPUP + subl $12,%esp + fstpt (%esp) + movl $0x00007fff,%eax + andl 8(%esp),%eax / eax <-- sign and bexp of x*2**63 + subl $16445,%eax / unbias it by (16,383 + 63) + addl $12,%esp + ret + .align 4 + SET_SIZE(ilogbl) diff --git a/usr/src/lib/libm/i386/src/isnan.s b/usr/src/lib/libm/i386/src/isnan.s new file mode 100644 index 0000000000..1f343862b3 --- /dev/null +++ b/usr/src/lib/libm/i386/src/isnan.s @@ -0,0 +1,64 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "isnan.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(isnan,function) + .weak _isnan + .type _isnan,@function +_isnan = __isnan + .weak _isnand + .type _isnand,@function +_isnand = __isnan + .weak isnand + .type isnand,@function +isnand = __isnan +#include "libm_synonyms.h" + + ENTRY(isnan) + movl 8(%esp),%eax / eax <-- hi_32(x) + andl $0x7fffffff,%eax / eax <-- hi_32(abs(x)) + subl $0x7ff00000,%eax / weed out finite values + jae .nan_or_inf / no jump if arg. is finite + movl $0,%eax / ansi needs (eax) = 0 + ret +.nan_or_inf: + ja .got_nan / no jump if arg. may be infinite; + / let nan waste time + / (eax) = 0 here + testl $0xffffffff,4(%esp) / ZF <-- 1 iff lo_frac. = 0 + / iff arg. is infinite + jnz .got_nan / no jump if arg. is infinite; + ret +.got_nan: + movl $1,%eax / %eax was 0, must be made 1 to + / indicate TRUE + ret + .align 4 + SET_SIZE(isnan) diff --git a/usr/src/lib/libm/i386/src/isnanf.s b/usr/src/lib/libm/i386/src/isnanf.s new file mode 100644 index 0000000000..d2316ed50d --- /dev/null +++ b/usr/src/lib/libm/i386/src/isnanf.s @@ -0,0 +1,55 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "isnanf.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(isnanf,function) + .weak _isnanf + .type _isnanf,@function +_isnanf = __isnanf +#include "libm_synonyms.h" + + ENTRY(isnanf) + movl 4(%esp),%eax / eax <-- x + andl $0x7fffffff,%eax / eax <-- abs(x) + subl $0x7f800000,%eax / ZF <-- 1 iff x is infinite + jae .nan_or_inf / no jump iff arg. is finite + movl $0,%eax + ret +.nan_or_inf: + jnz .got_nan / no jump if arg. infinite; + / let nan waste time + ret / %eax = 0 here +.got_nan: + movl $1,%eax / %eax was 0, must be made 1 to + / indicate TRUE + ret + .align 4 + SET_SIZE(isnanf) + diff --git a/usr/src/lib/libm/i386/src/isnanl.s b/usr/src/lib/libm/i386/src/isnanl.s new file mode 100644 index 0000000000..00374c8198 --- /dev/null +++ b/usr/src/lib/libm/i386/src/isnanl.s @@ -0,0 +1,56 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "isnanl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(isnanl,function) +#include "libm_synonyms.h" + + ENTRY(isnanl) + movl 12(%esp),%eax / ax <-- sign bit and exp + andl $0x00007fff,%eax + jz .not_nan / jump if exp is all 0 + xorl $0x00007fff,%eax + jz .nan_or_inf / jump if exp is all 1 + testl $0x80000000,8(%esp) + jz .got_nan / jump if leading bit is 0 + movl $0,%eax +.not_nan: + ret +.nan_or_inf: / note that %eax = 0 from before + cmpl $0x80000000,8(%esp) / what is first half of significand? + jnz .got_nan / jump if not equal to 0x80000000 + testl $0xffffffff,4(%esp) / is second half of significand 0? + jnz .got_nan / jump if not equal to 0 + ret +.got_nan: + movl $1,%eax + ret + .align 4 + SET_SIZE(isnanl) diff --git a/usr/src/lib/libm/i386/src/libm_inlines.h b/usr/src/lib/libm/i386/src/libm_inlines.h new file mode 100644 index 0000000000..e57ecbaded --- /dev/null +++ b/usr/src/lib/libm/i386/src/libm_inlines.h @@ -0,0 +1,351 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * Copyright 2011, Richard Lowe + */ + +/* Functions in this file are duplicated in locallibm.il. Keep them in sync */ + +#ifndef _LIBM_INLINES_H +#define _LIBM_INLINES_H + +#ifdef __GNUC__ + +#ifdef __cplusplus +extern "C" { +#endif + +#include <sys/types.h> +#include <sys/ieeefp.h> + +#define _LO_WORD(x) ((uint32_t *)&x)[0] +#define _HI_WORD(x) ((uint32_t *)&x)[1] +#define _HIER_WORD(x) ((uint32_t *)&x)[2] + +extern __inline__ double +__inline_sqrt(double a) +{ + double ret; + + __asm__ __volatile__("fsqrt\n\t" : "=t" (ret) : "0" (a) : "cc"); + return (ret); +} + +extern __inline__ double +__ieee754_sqrt(double a) +{ + return (__inline_sqrt(a)); +} + +extern __inline__ float +__inline_sqrtf(float a) +{ + float ret; + + __asm__ __volatile__("fsqrt\n\t" : "=t" (ret) : "0" (a) : "cc"); + return (ret); +} + +extern __inline__ double +__inline_rint(double a) +{ + __asm__ __volatile__( + "andl $0x7fffffff,%1\n\t" + "cmpl $0x43300000,%1\n\t" + "jae 1f\n\t" + "frndint\n\t" + "1: fwait\n\t" + : "+t" (a), "+&r" (_HI_WORD(a)) + : + : "cc"); + + return (a); +} + +/* + * 00 - 24 bits + * 01 - reserved + * 10 - 53 bits + * 11 - 64 bits + */ +extern __inline__ int +__swapRP(int i) +{ + int ret; + uint16_t cw; + + __asm__ __volatile__("fstcw %0\n\t" : "=m" (cw)); + + ret = (cw >> 8) & 0x3; + cw = (cw & 0xfcff) | ((i & 0x3) << 8); + + __asm__ __volatile__("fldcw %0\n\t" : : "m" (cw)); + + return (ret); +} + +/* + * 00 - Round to nearest, with even preferred + * 01 - Round down + * 10 - Round up + * 11 - Chop + */ +extern __inline__ enum fp_direction_type +__swap87RD(enum fp_direction_type i) +{ + int ret; + uint16_t cw; + + __asm__ __volatile__("fstcw %0\n\t" : "=m" (cw)); + + ret = (cw >> 10) & 0x3; + cw = (cw & 0xf3ff) | ((i & 0x3) << 10); + + __asm__ __volatile__("fldcw %0\n\t" : : "m" (cw)); + + return (ret); +} + +extern __inline__ double +ceil(double d) +{ + /* + * Let's set a Rounding Control (RC) bits from x87 FPU Control Word + * to fp_positive and save old bits in rd. + */ + short rd = __swap87RD(fp_positive); + + /* + * The FRNDINT instruction returns a floating-point value that is the + * integral value closest to the source value in the direction of the + * rounding mode specified in the RC field of the x87 FPU control word. + * + * Rounds the source value in the ST(0) register to the nearest + * integral value, depending on the current rounding mode + * (setting of the RC field of the FPU control word), + * and stores the result in ST(0). + */ + __asm__ __volatile__("frndint" : "+t" (d) : : "cc"); + + /* restore old RC bits */ + __swap87RD(rd); + + return (d); +} + +extern __inline__ double +copysign(double d1, double d2) +{ + __asm__ __volatile__( + "andl $0x7fffffff,%0\n\t" /* %0 <-- hi_32(abs(d)) */ + "andl $0x80000000,%1\n\t" /* %1[31] <-- sign_bit(d2) */ + "orl %1,%0\n\t" /* %0 <-- hi_32(copysign(x,y)) */ + : "+&r" (_HI_WORD(d1)), "+r" (_HI_WORD(d2)) + : + : "cc"); + + return (d1); +} + +extern __inline__ double +fabs(double d) +{ + __asm__ __volatile__("fabs\n\t" : "+t" (d) : : "cc"); + return (d); +} + +extern __inline__ float +fabsf(float d) +{ + __asm__ __volatile__("fabs\n\t" : "+t" (d) : : "cc"); + return (d); +} + +extern __inline__ long double +fabsl(long double d) +{ + __asm__ __volatile__("fabs\n\t" : "+t" (d) : : "cc"); + return (d); +} + +extern __inline__ int +finite(double d) +{ + int ret = _HI_WORD(d); + + __asm__ __volatile__( + "notl %0\n\t" + "andl $0x7ff00000,%0\n\t" + "negl %0\n\t" + "shrl $31,%0\n\t" + : "+r" (ret) + : + : "cc"); + return (ret); +} + +extern __inline__ double +floor(double d) +{ + short rd = __swap87RD(fp_negative); + + __asm__ __volatile__("frndint" : "+t" (d), "+r" (rd) : : "cc"); + __swap87RD(rd); + + return (d); +} + +/* + * branchless __isnan + * ((0x7ff00000-[((lx|-lx)>>31)&1]|ahx)>>31)&1 = 1 iff x is NaN + */ +extern __inline__ int +isnan(double d) +{ + int ret; + + __asm__ __volatile__( + "movl %1,%%ecx\n\t" + "negl %%ecx\n\t" /* ecx <-- -lo_32(x) */ + "orl %%ecx,%1\n\t" + "shrl $31,%1\n\t" /* 1 iff lx != 0 */ + "andl $0x7fffffff,%2\n\t" /* ecx <-- hi_32(abs(x)) */ + "orl %2,%1\n\t" + "subl $0x7ff00000,%1\n\t" + "negl %1\n\t" + "shrl $31,%1\n\t" + : "=r" (ret) + : "0" (_HI_WORD(d)), "r" (_LO_WORD(d)) + : "ecx"); + + return (ret); +} + +extern __inline__ int +isnanf(float f) +{ + __asm__ __volatile__( + "andl $0x7fffffff,%0\n\t" + "negl %0\n\t" + "addl $0x7f800000,%0\n\t" + "shrl $31,%0\n\t" + : "+r" (f) + : + : "cc"); + + return (f); +} + +extern __inline__ double +rint(double a) { + return (__inline_rint(a)); +} + +extern __inline__ double +scalbn(double d, int n) +{ + double dummy; + + __asm__ __volatile__( + "fildl %2\n\t" /* Convert N to extended */ + "fxch\n\t" + "fscale\n\t" + : "+t" (d), "=u" (dummy) + : "m" (n) + : "cc"); + + return (d); +} + +extern __inline__ int +signbit(double d) +{ + return (_HI_WORD(d) >> 31); +} + +extern __inline__ int +signbitf(float f) +{ + return ((*(uint32_t *)&f) >> 31); +} + +extern __inline__ double +sqrt(double d) +{ + return (__inline_sqrt(d)); +} + +extern __inline__ float +sqrtf(float f) +{ + return (__inline_sqrtf(f)); +} + +extern __inline__ long double +sqrtl(long double ld) +{ + __asm__ __volatile__("fsqrt" : "+t" (ld) : : "cc"); + return (ld); +} + +extern __inline__ int +isnanl(long double ld) +{ + int ret = _HIER_WORD(ld); + + __asm__ __volatile__( + "andl $0x00007fff,%0\n\t" + "jz 1f\n\t" /* jump if exp is all 0 */ + "xorl $0x00007fff,%0\n\t" + "jz 2f\n\t" /* jump if exp is all 1 */ + "testl $0x80000000,%1\n\t" + "jz 3f\n\t" /* jump if leading bit is 0 */ + "xorl %0,%0\n\t" + "jmp 1f\n\t" + "2:\n\t" /* note that %0 = 0 from before */ + "cmpl $0x80000000,%1\n\t" /* what is first half of significand? */ + "jnz 3f\n\t" /* jump if not equal to 0x80000000 */ + "testl $0xffffffff,%2\n\t" /* is second half of significand 0? */ + "jnz 3f\n\t" /* jump if not equal to 0 */ + "jmp 1f\n\t" + "3:\n\t" + "movl $1,%0\n\t" + "1:\n\t" + : "+&r" (ret) + : "r" (_HI_WORD(ld)), "r" (_LO_WORD(ld)) + : "cc"); + + return (ret); +} + +#ifdef __cplusplus +} +#endif + +#endif /* __GNUC__ */ + +#endif /* _LIBM_INLINES_H */ diff --git a/usr/src/lib/libm/i386/src/llrint.s b/usr/src/lib/libm/i386/src/llrint.s new file mode 100644 index 0000000000..c3c27f9c83 --- /dev/null +++ b/usr/src/lib/libm/i386/src/llrint.s @@ -0,0 +1,46 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "llrint.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(llrint,function) +#include "libm_synonyms.h" + + ENTRY(llrint) + movl %esp,%ecx + subl $8,%esp + fldl 4(%ecx) / load x + fistpll -8(%ecx) / [x] + fwait + movl -8(%ecx),%eax + movl -4(%ecx),%edx + addl $8,%esp + ret + .align 4 + SET_SIZE(llrint) diff --git a/usr/src/lib/libm/i386/src/llrintf.s b/usr/src/lib/libm/i386/src/llrintf.s new file mode 100644 index 0000000000..e4656cbbd0 --- /dev/null +++ b/usr/src/lib/libm/i386/src/llrintf.s @@ -0,0 +1,46 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "llrintf.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(llrintf,function) +#include "libm_synonyms.h" + + ENTRY(llrintf) + movl %esp,%ecx + subl $8,%esp + flds 4(%ecx) / load x + fistpll -8(%ecx) / [x] + fwait + movl -8(%ecx),%eax + movl -4(%ecx),%edx + addl $8,%esp + ret + .align 4 + SET_SIZE(llrintf) diff --git a/usr/src/lib/libm/i386/src/llrintl.s b/usr/src/lib/libm/i386/src/llrintl.s new file mode 100644 index 0000000000..500fc472ca --- /dev/null +++ b/usr/src/lib/libm/i386/src/llrintl.s @@ -0,0 +1,46 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "llrintl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(llrintl,function) +#include "libm_synonyms.h" + + ENTRY(llrintl) + movl %esp,%ecx + subl $8,%esp + fldt 4(%ecx) / load x + fistpll -8(%ecx) / [x] + fwait + movl -8(%ecx),%eax + movl -4(%ecx),%edx + addl $8,%esp + ret + .align 4 + SET_SIZE(llrintl) diff --git a/usr/src/lib/libm/i386/src/locallibm.il b/usr/src/lib/libm/i386/src/locallibm.il new file mode 100644 index 0000000000..ca79724f86 --- /dev/null +++ b/usr/src/lib/libm/i386/src/locallibm.il @@ -0,0 +1,370 @@ +/ +/ CDDL HEADER START +/ +/ The contents of this file are subject to the terms of the +/ Common Development and Distribution License (the "License"). +/ You may not use this file except in compliance with the License. +/ +/ You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +/ or http://www.opensolaris.org/os/licensing. +/ See the License for the specific language governing permissions +/ and limitations under the License. +/ +/ When distributing Covered Code, this CDDL HEADER in each +/ file and the License file at usr/src/OPENSOLARIS.LICENSE. +/ If applicable, add the following below this CDDL HEADER, with the +/ fields enclosed by brackets "[]" replaced with your own identifying +/ information: Portions Copyright [yyyy] [name of copyright owner] +/ +/ CDDL HEADER END +/ +/ Copyright 2011 Nexenta Systems, Inc. All rights reserved. +/ +/ Copyright 2006 Sun Microsystems, Inc. All rights reserved. +/ Use is subject to license terms. +/ + +/ Portions of this file are duplicated as GCC inline assembly in +/ libm_inlines.h. Keep them in sync. + + .inline __ieee754_sqrt,0 + fldl (%esp) + fsqrt + .end + + .inline __inline_rint,0 + fldl (%esp) + movl 4(%esp),%eax + andl $0x7fffffff,%eax + cmpl $0x43300000,%eax + jae 1f + frndint +1: + fwait / in case we jumped around the frndint + .end + + .inline __inline_sqrtf,0 + flds (%esp) + fsqrt + .end + + .inline __inline_sqrt,0 + fldl (%esp) + fsqrt + .end + + .inline __inline_fstsw,0 + fstsw %ax + .end + +/ +/ 00 - 24 bits +/ 01 - reserved +/ 10 - 53 bits +/ 11 - 64 bits +/ + .inline __swapRP,0 + subl $4,%esp + fstcw (%esp) + movw (%esp),%ax + movw %ax,%cx + andw $0xfcff,%cx + movl 4(%esp),%edx /// + andl $0x3,%edx + shlw $8,%dx + orw %dx,%cx + movl %ecx,(%esp) + fldcw (%esp) + shrw $8,%ax + andl $0x3,%eax + addl $4,%esp + .end + +/ +/ 00 - Round to nearest, with even preferred +/ 01 - Round down +/ 10 - Round up +/ 11 - Chop +/ + .inline __swap87RD,0 + subl $4,%esp + fstcw (%esp) + movw (%esp),%ax + movw %ax,%cx + andw $0xf3ff,%cx + movl 4(%esp),%edx + andl $0x3,%edx + shlw $10,%dx + orw %dx,%cx + movl %ecx,(%esp) + fldcw (%esp) + shrw $10,%ax + andl $0x3,%eax + addl $4,%esp + .end + +/ +/ Convert Top-of-Stack to long +/ + .inline __xtol,0 + subl $8,%esp / 8 bytes of stack space + fstcw 2(%esp) / byte[2:3] = old_cw + movw 2(%esp),%ax + andw $0xf3ff,%ax + orw $0x0c00,%ax / RD set to Chop + movw %ax,(%esp) / byte[0:1] = new_cw + fldcw (%esp) / set new_cw + fistpl 4(%esp) / byte[4:7] = converted long + fstcw (%esp) / restore old RD + movw (%esp),%ax + andw $0xf3ff,%ax + movw 2(%esp),%dx + andw $0x0c00,%dx + orw %ax,%dx + movw %dx,2(%esp) + fldcw 2(%esp) + movl 4(%esp),%eax + addl $8,%esp + .end + + .inline __ceil,0 + subl $8,%esp + fstcw (%esp) + fldl 8(%esp) /// + movw (%esp),%cx + orw $0x0c00,%cx + xorw $0x0400,%cx + movw %cx,4(%esp) + fldcw 4(%esp) / set RD = up + frndint + fstcw 4(%esp) / restore RD + movw 4(%esp),%dx + andw $0xf3ff,%dx + movw (%esp),%cx + andw $0x0c00,%cx + orw %dx,%cx + movw %cx,(%esp) + fldcw (%esp) + addl $8,%esp + .end + + .inline __copysign,0 + movl 4(%esp),%eax /// eax <-- hi_32(x) + movl 12(%esp),%ecx /// ecx <-- hi_32(y) + andl $0x7fffffff,%eax / eax <-- hi_32(abs(x)) + andl $0x80000000,%ecx / ecx[31] <-- sign_bit(y) + orl %ecx,%eax / eax <-- hi_32(__copysign(x,y)) + movl (%esp),%ecx /// ecx <-- lo_32(x) + / = lo_32(__copysign(x,y)) + subl $8,%esp / set up loading dock for result + movl %ecx,(%esp) / copy lo_32(result) to loading dock + movl %eax,4(%esp) / copy hi_32(result) to loading dock + fldl (%esp) / load __copysign(x,y) + fwait / in case fldl causes exception + addl $8,%esp / restore stack-pointer + .end + + .inline __d_sqrt_,0 + movl (%esp),%eax + fldl (%eax) + fsqrt + .end + + .inline __fabs,0 + fldl (%esp) /// + fabs + .end + + .inline __fabsf,0 + flds (%esp) + fabs + .end + + .inline __fabsl,0 + fldt (%esp) + fabs + .end + +/ +/ branchless _finite +/ + .inline _finite,0 + movl 4(%esp),%eax /// eax <-- hi_32(x) + notl %eax / not(bexp) = 0 iff bexp = all 1's + andl $0x7ff00000,%eax + negl %eax + shrl $31,%eax + .end + + .inline __floor,0 + subl $8,%esp + fstcw (%esp) + fldl 8(%esp) /// + movw (%esp),%cx + orw $0x0c00,%cx + xorw $0x0800,%cx + movw %cx,4(%esp) + fldcw 4(%esp) / set RD = down + frndint + fstcw 4(%esp) / restore RD + movw 4(%esp),%dx + andw $0xf3ff,%dx + movw (%esp),%cx + andw $0x0c00,%cx + orw %dx,%cx + movw %cx,(%esp) + fldcw (%esp) / restore RD + addl $8,%esp + .end + + .inline __isnanf,0 + movl (%esp),%eax + andl $0x7fffffff,%eax + negl %eax + addl $0x7f800000,%eax + shrl $31,%eax + .end + + + .inline __isnormal,0 + / TRUE iff (x is _finite, but + / neither subnormal nor +/-0) + / iff (0 < bexp(x) < 0x7ff) + movl 4(%esp),%eax / eax <-- hi_32(x) + andl $0x7ff00000,%eax / eax[20..30] <-- bexp(x), + / rest_of(eax) <-- 0 + pushfl + popl %ecx / bit 6 of ecx <-- not bexp(x) + subl $0x7ff00000,%eax + pushfl + popl %eax / bit 6 of eax <-- not bexp(x) + orl %ecx,%eax + andl $0x40,%eax + xorl $0x40,%eax + shrl $6,%eax + .end + + .inline __issubnormal,0 + / TRUE iff (bexp(x) = 0 and + / frac(x) /= 0) + movl $0,%eax + movl 4(%esp),%ecx / ecx <-- hi_32(x) + andl $0x7fffffff,%ecx / ecx <-- hi_32(abs(x)) + cmpl $0x00100000,%ecx / is bexp(x) = 0? + adcl $0,%eax / jump if bexp(x) = 0 + orl (%esp),%ecx / = 0 iff sgnfcnd(x) = 0 + / iff x = +/- 0.0 here + pushfl + popl %ecx + andl $0x40,%ecx + xorl $0x40,%ecx + shrl $6,%ecx + andl %ecx,%eax + .end + + .inline __iszero,0 + movl 4(%esp),%eax / eax <-- hi_32(x) + andl $0x7fffffff,%eax / eax <-- hi_32(abs(x)) + orl (%esp),%eax / = 0 iff x = +/- 0.0 + pushfl + popl %eax + andl $0x40,%eax + shrl $6,%eax + .end + + .inline __r_sqrt_,0 + movl (%esp),%eax + flds (%eax) + fsqrt + .end + + .inline __rint,0 + fldl (%esp) + movl 4(%esp),%eax + andl $0x7fffffff,%eax + cmpl $0x43300000,%eax + jae 1f + frndint +1: + fwait / in case we jumped around frndint + .end + + .inline __scalbn,0 + fildl 8(%esp) /// convert N to extended + fldl (%esp) /// push x + fscale + fstp %st(1) + .end + + .inline __signbit,0 + movl 4(%esp),%eax /// high part of x + shrl $31,%eax + .end + + .inline __signbitf,0 + movl (%esp),%eax + shrl $31,%eax + .end + + .inline __sqrt,0 + fldl (%esp) + fsqrt + .end + + .inline __sqrtf,0 + flds (%esp) + fsqrt + .end + + .inline __sqrtl,0 + fldt (%esp) + fsqrt + .end + + .inline __isnanl,0 + movl 8(%esp),%eax / ax <-- sign bit and __exp + andl $0x00007fff,%eax + jz 1f / jump if __exp is all 0 + xorl $0x00007fff,%eax + jz 2f / jump if __exp is all 1 + testl $0x80000000,4(%esp) + jz 3f / jump if leading bit is 0 + movl $0,%eax + jmp 1f +2: / note that %eax = 0 from before + cmpl $0x80000000,4(%esp) / what is first half of __significand? + jnz 3f / jump if not equal to 0x80000000 + testl $0xffffffff,(%esp) / is second half of __significand 0? + jnz 3f / jump if not equal to 0 + jmp 1f +3: + movl $1,%eax +1: + .end + + .inline __f95_signf,0 + sub $4,%esp + mov 4(%esp),%edx + mov (%edx),%eax + and $0x7fffffff,%eax + mov 8(%esp),%edx + mov (%edx),%ecx + and $0x80000000,%ecx + or %ecx,%eax + mov %eax,(%esp) + flds (%esp) + add $4,%esp + .end + + .inline __f95_sign,0 + mov (%esp),%edx + fldl (%edx) + fabs + mov 4(%esp),%edx + mov 4(%edx),%eax + test %eax,%eax + jns 1f + fchs +1: + .end + diff --git a/usr/src/lib/libm/i386/src/log.s b/usr/src/lib/libm/i386/src/log.s new file mode 100644 index 0000000000..0d72806019 --- /dev/null +++ b/usr/src/lib/libm/i386/src/log.s @@ -0,0 +1,96 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "log.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(log,function) +#include "libm_synonyms.h" +#include "libm_protos.h" + + ENTRY(log) + fldln2 / loge(2) + movl 8(%esp),%eax / eax <-- hi_32(x) + testl $0x80000000,%eax + jnz .maybe_0_or_less + testl $0x7fffffff,%eax + jz .maybe_0 + fldl 4(%esp) / arg, loge(2) + fyl2x / loge(2)*log2(arg); ln(arg) + ret + +.maybe_0: + movl 4(%esp),%ecx / ecx <-- lo_32(x) + cmpl $0,%ecx + je .zero / no branch if x is +denormal +.neg_nan_reentry: + fldl 4(%esp) / arg, loge(2) + fyl2x / loge(2)*log2(arg); ln(arg) + ret + +.zero_or_less: + / x =< 0 + testl $0x7fffffff,%eax + jnz .less_than_0 + movl 4(%esp),%ecx / ecx <-- lo_32(x) + cmpl $0,%ecx + jne .less_than_0 / branch if x is -denormal +.zero: + / x = +/-0 + pushl %ebp + movl %esp,%ebp + PIC_SETUP(1) + pushl $16 + jmp .merge + +.maybe_0_or_less: + cmpl $0xfff00000,%eax / -INF below hi_32(x)? + ja .neg_nan_reentry + jb .zero_or_less + movl 4(%esp),%ecx / ecx <-- lo_32(x) + cmpl $0,%ecx / is x NaN or -INF? + jne .neg_nan_reentry / branch if x is NaN with signbit = 1 + / x = -INF +.less_than_0: + pushl %ebp + movl %esp,%ebp + PIC_SETUP(2) + pushl $17 +.merge: + fstp %st(0) / stack empty + pushl 12(%ebp) + pushl 8(%ebp) + pushl 12(%ebp) + pushl 8(%ebp) + call PIC_F(_SVID_libm_err) + addl $20,%esp + PIC_WRAPUP + leave + ret + .align 4 + SET_SIZE(log) diff --git a/usr/src/lib/libm/i386/src/log10.s b/usr/src/lib/libm/i386/src/log10.s new file mode 100644 index 0000000000..65f9620f55 --- /dev/null +++ b/usr/src/lib/libm/i386/src/log10.s @@ -0,0 +1,96 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "log10.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(log10,function) +#include "libm_synonyms.h" +#include "libm_protos.h" + + ENTRY(log10) + fldlg2 / log10(2) + movl 8(%esp),%eax / eax <-- hi_32(x) + testl $0x80000000,%eax + jnz .maybe_0_or_less + testl $0x7fffffff,%eax + jz .maybe_0 + fldl 4(%esp) / arg, log10(2) + fyl2x / log10(2)*log2(arg); log10(arg) + ret + +.maybe_0: + movl 4(%esp),%ecx / ecx <-- lo_32(x) + cmpl $0,%ecx + je .zero / no branch if x is +denormal +.neg_nan_reentry: + fldl 4(%esp) / arg, log10(2) + fyl2x / log10(2)*log2(arg); log10(arg) + ret + +.zero_or_less: + / x =< 0 + testl $0x7fffffff,%eax + jnz .less_than_0 + movl 4(%esp),%ecx / ecx <-- lo_32(x) + cmpl $0,%ecx + jne .less_than_0 / branch if x is -denormal +.zero: + / x = +/-0 + pushl %ebp + movl %esp,%ebp + PIC_SETUP(1) + pushl $18 + jmp .merge + +.maybe_0_or_less: + cmpl $0xfff00000,%eax / -INF below hi_32(x)? + ja .neg_nan_reentry + jb .zero_or_less + movl 4(%esp),%ecx / ecx <-- lo_32(x) + cmpl $0,%ecx / is x NaN or -INF? + jne .neg_nan_reentry / branch if x is NaN with signbit = 1 + / x = -INF +.less_than_0: + pushl %ebp + movl %esp,%ebp + PIC_SETUP(2) + pushl $19 +.merge: + fstp %st(0) / stack empty + pushl 12(%ebp) + pushl 8(%ebp) + pushl 12(%ebp) + pushl 8(%ebp) + call PIC_F(_SVID_libm_err) + addl $20,%esp + PIC_WRAPUP + leave + ret + .align 4 + SET_SIZE(log10) diff --git a/usr/src/lib/libm/i386/src/log10f.s b/usr/src/lib/libm/i386/src/log10f.s new file mode 100644 index 0000000000..5b69b547ed --- /dev/null +++ b/usr/src/lib/libm/i386/src/log10f.s @@ -0,0 +1,42 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "log10f.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(log10f,function) +#include "libm_synonyms.h" +#include "libm_protos.h" + + ENTRY(log10f) + fldlg2 + flds 4(%esp) / st = arg, st(1) = log10(2) + fyl2x / st = log10(arg) = log10(2)*log2(arg) + ret + .align 4 + SET_SIZE(log10f) diff --git a/usr/src/lib/libm/i386/src/log10l.s b/usr/src/lib/libm/i386/src/log10l.s new file mode 100644 index 0000000000..6a86fd9968 --- /dev/null +++ b/usr/src/lib/libm/i386/src/log10l.s @@ -0,0 +1,41 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "log10l.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(log10l,function) +#include "libm_synonyms.h" + + ENTRY(log10l) + fldlg2 + fldt 4(%esp) / st = arg, st(1) = log10(2) + fyl2x / st = log10(arg) = log10(2)*log2(arg) + ret + .align 4 + SET_SIZE(log10l) diff --git a/usr/src/lib/libm/i386/src/log2.s b/usr/src/lib/libm/i386/src/log2.s new file mode 100644 index 0000000000..32424aa7b5 --- /dev/null +++ b/usr/src/lib/libm/i386/src/log2.s @@ -0,0 +1,41 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "log2.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(log2,function) +#include "libm_synonyms.h" + + ENTRY(log2) + fld1 / push 1.0 + fldl 4(%esp) / push x + fyl2x / st = 1.0*log2(arg) + ret + .align 4 + SET_SIZE(log2) diff --git a/usr/src/lib/libm/i386/src/log2f.s b/usr/src/lib/libm/i386/src/log2f.s new file mode 100644 index 0000000000..67f78ef095 --- /dev/null +++ b/usr/src/lib/libm/i386/src/log2f.s @@ -0,0 +1,41 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "log2f.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(log2f,function) +#include "libm_synonyms.h" + + ENTRY(log2f) + fld1 / push 1.0 + flds 4(%esp) / push x + fyl2x / st = 1.0*log2(arg) + ret + .align 4 + SET_SIZE(log2f) diff --git a/usr/src/lib/libm/i386/src/log2l.s b/usr/src/lib/libm/i386/src/log2l.s new file mode 100644 index 0000000000..1181f50bbd --- /dev/null +++ b/usr/src/lib/libm/i386/src/log2l.s @@ -0,0 +1,41 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "log2l.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(log2l,function) +#include "libm_synonyms.h" + + ENTRY(log2l) + fld1 / push 1.0 + fldt 4(%esp) / push x + fyl2x / st = 1.0*log2(arg) + ret + .align 4 + SET_SIZE(log2l) diff --git a/usr/src/lib/libm/i386/src/logl.s b/usr/src/lib/libm/i386/src/logl.s new file mode 100644 index 0000000000..1eef76f5ca --- /dev/null +++ b/usr/src/lib/libm/i386/src/logl.s @@ -0,0 +1,41 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "logl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(logl,function) +#include "libm_synonyms.h" + + ENTRY(logl) + fldln2 + fldt 4(%esp) / st = arg, st(1) = loge(2) + fyl2x / st = ln(arg) = loge(2)*log2(arg) + ret + .align 4 + SET_SIZE(logl) diff --git a/usr/src/lib/libm/i386/src/lrint.s b/usr/src/lib/libm/i386/src/lrint.s new file mode 100644 index 0000000000..f34c5b6f4a --- /dev/null +++ b/usr/src/lib/libm/i386/src/lrint.s @@ -0,0 +1,45 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "lrint.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(lrint,function) +#include "libm_synonyms.h" + + ENTRY(lrint) + movl %esp,%ecx + subl $8,%esp + fldl 4(%ecx) / load x + fistpl -8(%ecx) / [x] + fwait + movl -8(%ecx),%eax + addl $8,%esp + ret + .align 4 + SET_SIZE(lrint) diff --git a/usr/src/lib/libm/i386/src/lrintf.s b/usr/src/lib/libm/i386/src/lrintf.s new file mode 100644 index 0000000000..e2fb9d2827 --- /dev/null +++ b/usr/src/lib/libm/i386/src/lrintf.s @@ -0,0 +1,45 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "lrintf.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(lrintf,function) +#include "libm_synonyms.h" + + ENTRY(lrintf) + movl %esp,%ecx + subl $8,%esp + flds 4(%ecx) / load x + fistpl -8(%ecx) / [x] + fwait + movl -8(%ecx),%eax + addl $8,%esp + ret + .align 4 + SET_SIZE(lrintf) diff --git a/usr/src/lib/libm/i386/src/lrintl.s b/usr/src/lib/libm/i386/src/lrintl.s new file mode 100644 index 0000000000..c78afbdd70 --- /dev/null +++ b/usr/src/lib/libm/i386/src/lrintl.s @@ -0,0 +1,45 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "lrintl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(lrintl,function) +#include "libm_synonyms.h" + + ENTRY(lrintl) + movl %esp,%ecx + subl $8,%esp + fldt 4(%ecx) / load x + fistpl -8(%ecx) / [x] + fwait + movl -8(%ecx),%eax + addl $8,%esp + ret + .align 4 + SET_SIZE(lrintl) diff --git a/usr/src/lib/libm/i386/src/lround.s b/usr/src/lib/libm/i386/src/lround.s new file mode 100644 index 0000000000..dbafe5ea93 --- /dev/null +++ b/usr/src/lib/libm/i386/src/lround.s @@ -0,0 +1,97 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "lround.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(lround,function) +#include "libm_synonyms.h" +#undef fabs + + .section .rodata + .align 4 +.Lhalf: .float 0.5 + + ENTRY(lround) + movl %esp,%ecx + subl $8,%esp + fstcw -8(%ecx) + fldl 4(%ecx) + movw -8(%ecx),%dx + andw $0xf3ff,%dx + movw %dx,-4(%ecx) + fldcw -4(%ecx) / set RD = to_nearest + fld %st(0) + frndint / [x],x + fstcw -4(%ecx) + movw -4(%ecx),%dx + andw $0xf3ff,%dx + movw -8(%ecx),%ax + andw $0x0c00,%ax + orw %dx,%ax + movw %ax,-8(%ecx) + fldcw -8(%ecx) / restore RD + fucom / check if x is already an integer + fstsw %ax + sahf + jp 0f + je 0f + fxch / x,[x] + fsub %st(1),%st / x-[x],[x] + fabs / |x-[x]|,[x] + PIC_SETUP(1) + fcoms PIC_L(.Lhalf) + PIC_WRAPUP + fnstsw %ax + sahf + jae 2f / if |x-[x]| = 0.5 goto halfway, + / most cases will not take branch. +0: + fstp %st(0) +1: + fistpl -8(%ecx) + fwait + movl -8(%ecx),%eax + addl $8,%esp + ret +2: + / x = n+0.5, recompute lround(x) as x+sign(x)*0.5 + fldl 4(%ecx) / x, 0.5, [x] + movl 8(%ecx),%eax / high part of x + andl $0x80000000,%eax + jnz 3f + faddp + fstp %st(1) + jmp 1b +3: + / here, x is negative, so return x-0.5 + fsubp %st,%st(1) / x-0.5,[x] + fstp %st(1) + jmp 1b + .align 4 + SET_SIZE(lround) diff --git a/usr/src/lib/libm/i386/src/lroundl.s b/usr/src/lib/libm/i386/src/lroundl.s new file mode 100644 index 0000000000..8c5f2205ee --- /dev/null +++ b/usr/src/lib/libm/i386/src/lroundl.s @@ -0,0 +1,97 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "lroundl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(lroundl,function) +#include "libm_synonyms.h" +#undef fabs + + .section .rodata + .align 4 +.Lhalf: .float 0.5 + + ENTRY(lroundl) + movl %esp,%ecx + subl $8,%esp + fstcw -8(%ecx) + fldt 4(%ecx) + movw -8(%ecx),%dx + andw $0xf3ff,%dx + movw %dx,-4(%ecx) + fldcw -4(%ecx) / set RD = to_nearest + fld %st(0) + frndint / [x],x + fstcw -4(%ecx) + movw -4(%ecx),%dx + andw $0xf3ff,%dx + movw -8(%ecx),%ax + andw $0x0c00,%ax + orw %dx,%ax + movw %ax,-8(%ecx) + fldcw -8(%ecx) / restore RD + fucom / check if x is already an integer + fstsw %ax + sahf + jp 0f + je 0f + fxch / x,[x] + fsub %st(1),%st / x-[x],[x] + fabs / |x-[x]|,[x] + PIC_SETUP(1) + fcoms PIC_L(.Lhalf) + PIC_WRAPUP + fnstsw %ax + sahf + jae 2f / if |x-[x]| = 0.5 goto halfway, + / most cases will not take branch. +0: + fstp %st(0) +1: + fistpl -8(%ecx) + fwait + movl -8(%ecx),%eax + addl $8,%esp + ret +2: + / x = n+0.5, recompute lroundl(x) as x+sign(x)*0.5 + fldt 4(%ecx) / x, 0.5, [x] + movw 12(%ecx),%ax / sign+exp part of x + andw $0x8000,%ax / look at sign bit + jnz 3f + faddp + fstp %st(1) + jmp 1b +3: + / here, x is negative, so return x-0.5 + fsubp %st,%st(1) / x-0.5,[x] + fstp %st(1) + jmp 1b + .align 4 + SET_SIZE(lroundl) diff --git a/usr/src/lib/libm/i386/src/nextafter.s b/usr/src/lib/libm/i386/src/nextafter.s new file mode 100644 index 0000000000..b04bc0dd8a --- /dev/null +++ b/usr/src/lib/libm/i386/src/nextafter.s @@ -0,0 +1,134 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "nextafter.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(nextafter,function) + .weak _nextafter + .type _nextafter,@function +_nextafter = __nextafter +#include "libm_synonyms.h" +#include "libm_protos.h" + + .data + .align 8 +Fmin: .long 0x1,0x0 +ftmp: .long 0,0 /// WILL WRITE INTO + + + ENTRY(nextafter) + pushl %ebp + movl %esp,%ebp + fldl 16(%ebp) / y + subl $8,%esp + fldl 8(%ebp) / load x + fucom / x : y + fstsw %ax + sahf + jp .NaN + je .equal + fstp %st(1) / x + ja .bigger + / x < y + ftst + movl $1,%ecx /// Fmin + movl %ecx,-8(%ebp) + movl $0,%ecx /// Fmin+4 + movl %ecx,-4(%ebp) + fnstsw %ax + sahf + je .final + ja .addulp + jb .subulp +.bigger: + / x > y + ftst + movl $1,%ecx /// Fmin + movl %ecx,-8(%ebp) + movl $0,%ecx /// Fmin+4 + xorl $0x80000000,%ecx + movl %ecx,-4(%ebp) + fnstsw %ax + sahf + je .final + jb .addulp +.subulp: + movl 8(%ebp),%eax / low x + movl 12(%ebp),%ecx / high x + subl $1,%eax / low x - ulp + movl %eax,-8(%ebp) + sbbl $0x0,%ecx + movl %ecx,-4(%ebp) + jmp .final +.addulp: + movl 8(%ebp),%eax / low x + movl 12(%ebp),%ecx / high x + addl $1,%eax / low x + ulp + movl %eax,-8(%ebp) + adcl $0x0,%ecx + movl %ecx,-4(%ebp) + +.final: + fstp %st(0) + fldl -8(%ebp) + andl $0x7ff00000,%ecx + jz .underflow + cmpl $0x7ff00000,%ecx + je .overflow + jmp .return +.overflow: + PIC_SETUP(1) + pushl $46 + fstp %st(0) / stack empty + pushl -4(%ebp) + pushl -8(%ebp) + pushl -4(%ebp) + pushl -8(%ebp) + call PIC_F(_SVID_libm_err) + addl $20,%esp + PIC_WRAPUP + jmp .return +.underflow: + PIC_SETUP(2) + fldl PIC_L(Fmin) + fmul %st(0),%st + fstpl PIC_L(ftmp) / create underflow signal + PIC_WRAPUP + jmp .return +.equal: + fstp %st(0) / C99 says to return y when x == y + jmp .return +.NaN: + faddp %st,%st(1) / x+y,x +.return: + fwait + leave + ret + .align 4 + SET_SIZE(nextafter) diff --git a/usr/src/lib/libm/i386/src/nextafterf.s b/usr/src/lib/libm/i386/src/nextafterf.s new file mode 100644 index 0000000000..9cbb36166e --- /dev/null +++ b/usr/src/lib/libm/i386/src/nextafterf.s @@ -0,0 +1,114 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "nextafterf.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(nextafterf,function) +#include "libm_synonyms.h" + + .data + .align 4 +Fmaxf: .long 0x7f7fffff +Fminf: .long 0x1 +ftmpf: .long 0 + + + ENTRY(nextafterf) + pushl %ebp + movl %esp,%ebp + movl $0,%eax /// upper half of %eax must be initialized + flds 12(%ebp) / y + subl $4,%esp + flds 8(%ebp) / x, y + fucom / x : y + fstsw %ax + sahf + jp .NaN + je .equal + fstp %st(1) / x + ja .bigger + / x < y + ftst / x : 0 + movl $0x1,-4(%ebp) / -4(%ebp) contains Fminf + fnstsw %ax + sahf + je .final + ja .addulp + jb .subulp +.bigger: + / x > y + ftst / x : 0 + movl $0x80000001,-4(%ebp) / -4(%ebp) contains -Fminf + fnstsw %ax + sahf + je .final + jb .addulp +.subulp: + movl 8(%ebp),%eax / x + subl $1,%eax / x - ulp + movl %eax,-4(%ebp) + jmp .final +.addulp: + movl 8(%ebp),%eax / x + addl $1,%eax / x + ulp + movl %eax,-4(%ebp) + +.final: + fstp %st(0) / empty + flds -4(%ebp) / z + andl $0x7f800000,%eax + jz .underflow + cmpl $0x7f800000,%eax + je .overflow + jmp .return +.overflow: + PIC_SETUP(1) + flds PIC_L(Fmaxf) / Fmaxf, z + fmul %st(0),%st / overflow-to-Inf, z + fstps PIC_L(ftmpf) / z & create overflow signal + PIC_WRAPUP + jmp .return +.underflow: + PIC_SETUP(2) + flds PIC_L(Fminf) / Fminf, z + fmul %st(0),%st / underflow-to-0, z + fstps PIC_L(ftmpf) / z & create underflow signal + PIC_WRAPUP + jmp .return +.equal: + fstp %st(0) / C99 says to return y when x == y + jmp .return +.NaN: + faddp %st,%st(1) / x+y +.return: + fwait + leave + ret + .align 4 + SET_SIZE(nextafterf) diff --git a/usr/src/lib/libm/i386/src/nextafterl.s b/usr/src/lib/libm/i386/src/nextafterl.s new file mode 100644 index 0000000000..cdf8647039 --- /dev/null +++ b/usr/src/lib/libm/i386/src/nextafterl.s @@ -0,0 +1,186 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "nextafterl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(nextafterl,function) +#include "libm_synonyms.h" + + .section .rodata + .align 4 +.LFmaxl: .long 0xffffffff,0xffffffff,0x00007ffe +.LFminl: .long 0x1,0x0,0x0 + + + ENTRY(nextafterl) + pushl %ebp + movl %esp,%ebp + fldt 20(%ebp) / y + subl $12,%esp + fldt 8(%ebp) / load x + fucom / x : y + fstsw %ax + sahf + jp .LNaN + je .Lequal + fstp %st(1) / x + ja .Lbigger + / x < y + ftst + movl $1,-12(%ebp) /// -12(%ebp) contains Fminl + movl $0,-8(%ebp) + movl $0,%ecx /// final needs this + movl %ecx,-4(%ebp) + fnstsw %ax + sahf + je .Lfinal + ja .Laddulp + jb .Lsubulp +.Lbigger: + / x > y + ftst + movl $1,-12(%ebp) /// -12(%ebp) contains -Fminl + movl $0,-8(%ebp) + movl $0x00008000,%ecx /// final needs this + movl %ecx,-4(%ebp) + fnstsw %ax + sahf + je .Lfinal + jb .Laddulp +.Lsubulp: + movl 12(%ebp),%edx / high word of significand of x + movl 16(%ebp),%ecx / x's exponent + andl $0x0000ffff,%ecx + movl %edx,%eax + not %eax + andl $0x80000000,%eax / look at explicit leading bit + orl %ecx,%eax + andl $0x80007fff,%eax + jnz .Lnot_pseudonormal / zero value implies pseudonormal + addl $1,%ecx / if pseudonormal, turn into equivalent normal +.Lnot_pseudonormal: + movl 8(%ebp),%eax / low x + subl $1,%eax / low x - ulp + movl %eax,-12(%ebp) + cmpl $0xffffffff,%eax / this means low x was 0 + jz .Lborrow + movl %edx,-8(%ebp) + movl %ecx,-4(%ebp) + jmp .Lfinal +.Lborrow: + cmpl $0x80000000,%edx / look at high x + je .Lsecond_borrow + subl $1,%edx + movl %edx,-8(%ebp) + movl %ecx,-4(%ebp) + jmp .Lfinal +.Lsecond_borrow: + movl %ecx,%eax + andl $0x7fff,%eax / look at exp x without sign bit + cmpl $1,%eax + jbe .Lsubnormal_result / exp > 1 ==> result will be normal + movl $0xffffffff,-8(%ebp) + subl $1,%ecx + movl %ecx,-4(%ebp) + jmp .Lfinal +.Lsubnormal_result: + movl $0x7fffffff,-8(%ebp) + movl %ecx,%eax + andl $0x8000,%eax / look at sign bit + jz .Lpositive + movl $0x8000,%ecx + movl %ecx,-4(%ebp) + jmp .Lfinal +.Lpositive: + movl $0,%ecx + movl %ecx,-4(%ebp) + jmp .Lfinal +.Laddulp: + movl 12(%ebp),%edx / high x + movl 16(%ebp),%ecx / x's exponent + andl $0x0000ffff,%ecx + movl %edx,%eax + not %eax + andl $0x80000000,%eax / look at explicit leading bit + orl %ecx,%eax + andl $0x80007fff,%eax + jnz .Lnot_pseudonormal_2 / zero value implies pseudonormal + addl $1,%ecx +.Lnot_pseudonormal_2: + movl 8(%ebp),%eax / low x + addl $1,%eax / low x + ulp + movl %eax,-12(%ebp) + jz .Lcarry / jump if the content of %eax is 0 + movl %edx,-8(%ebp) + movl %ecx,-4(%ebp) + jmp .Lfinal +.Lcarry: + movl %edx,%eax + andl $0x7fffffff,%eax + cmpl $0x7fffffff,%eax / look at high x + je .Lsecond_carry + addl $1,%edx + movl %edx,-8(%ebp) + movl %ecx,-4(%ebp) + jmp .Lfinal +.Lsecond_carry: + movl $0x80000000,-8(%ebp) + addl $1,%ecx + movl %ecx,-4(%ebp) +.Lfinal: + fstp %st(0) + fldt -12(%ebp) + andl $0x00007fff,%ecx + jz .Lunderflow + cmpw $0x7fff,%cx + je .Loverflow + jmp .Lreturn +.Loverflow: + PIC_SETUP(1) + fldt PIC_L(.LFmaxl) + PIC_WRAPUP + fmulp %st,%st(0) / create overflow signal + jmp .Lreturn +.Lunderflow: + PIC_SETUP(2) + fldt PIC_L(.LFminl) + PIC_WRAPUP + fmulp %st,%st(0) / create underflow signal + jmp .Lreturn +.Lequal: + fstp %st(0) / C99 says to return y when x == y + jmp .Lreturn +.LNaN: + faddp %st,%st(1) / x+y,x +.Lreturn: + fwait + leave + ret + .align 4 + SET_SIZE(nextafterl) diff --git a/usr/src/lib/libm/i386/src/nexttowardl.s b/usr/src/lib/libm/i386/src/nexttowardl.s new file mode 100644 index 0000000000..c39b33d04b --- /dev/null +++ b/usr/src/lib/libm/i386/src/nexttowardl.s @@ -0,0 +1,186 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "nexttowardl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(nexttowardl,function) +#include "libm_synonyms.h" + + .section .rodata + .align 4 +.LFmaxl: .long 0xffffffff,0xffffffff,0x00007ffe +.LFminl: .long 0x1,0x0,0x0 + + + ENTRY(nexttowardl) + pushl %ebp + movl %esp,%ebp + fldt 20(%ebp) / y + subl $12,%esp + fldt 8(%ebp) / load x + fucom / x : y + fstsw %ax + sahf + jp .LNaN + je .Lequal + fstp %st(1) / x + ja .Lbigger + / x < y + ftst + movl $1,-12(%ebp) /// -12(%ebp) contains Fminl + movl $0,-8(%ebp) + movl $0,%ecx /// final needs this + movl %ecx,-4(%ebp) + fnstsw %ax + sahf + je .Lfinal + ja .Laddulp + jb .Lsubulp +.Lbigger: + / x > y + ftst + movl $1,-12(%ebp) /// -12(%ebp) contains -Fminl + movl $0,-8(%ebp) + movl $0x00008000,%ecx /// final needs this + movl %ecx,-4(%ebp) + fnstsw %ax + sahf + je .Lfinal + jb .Laddulp +.Lsubulp: + movl 12(%ebp),%edx / high word of significand of x + movl 16(%ebp),%ecx / x's exponent + andl $0x0000ffff,%ecx + movl %edx,%eax + not %eax + andl $0x80000000,%eax / look at explicit leading bit + orl %ecx,%eax + andl $0x80007fff,%eax + jnz .Lnot_pseudonormal / zero value implies pseudonormal + addl $1,%ecx / if pseudonormal, turn into equivalent normal +.Lnot_pseudonormal: + movl 8(%ebp),%eax / low x + subl $1,%eax / low x - ulp + movl %eax,-12(%ebp) + cmpl $0xffffffff,%eax / this means low x was 0 + jz .Lborrow + movl %edx,-8(%ebp) + movl %ecx,-4(%ebp) + jmp .Lfinal +.Lborrow: + cmpl $0x80000000,%edx / look at high x + je .Lsecond_borrow + subl $1,%edx + movl %edx,-8(%ebp) + movl %ecx,-4(%ebp) + jmp .Lfinal +.Lsecond_borrow: + movl %ecx,%eax + andl $0x7fff,%eax / look at exp x without sign bit + cmpl $1,%eax + jbe .Lsubnormal_result / exp > 1 ==> result will be normal + movl $0xffffffff,-8(%ebp) + subl $1,%ecx + movl %ecx,-4(%ebp) + jmp .Lfinal +.Lsubnormal_result: + movl $0x7fffffff,-8(%ebp) + movl %ecx,%eax + andl $0x8000,%eax / look at sign bit + jz .Lpositive + movl $0x8000,%ecx + movl %ecx,-4(%ebp) + jmp .Lfinal +.Lpositive: + movl $0,%ecx + movl %ecx,-4(%ebp) + jmp .Lfinal +.Laddulp: + movl 12(%ebp),%edx / high x + movl 16(%ebp),%ecx / x's exponent + andl $0x0000ffff,%ecx + movl %edx,%eax + not %eax + andl $0x80000000,%eax / look at explicit leading bit + orl %ecx,%eax + andl $0x80007fff,%eax + jnz .Lnot_pseudonormal_2 / zero value implies pseudonormal + addl $1,%ecx +.Lnot_pseudonormal_2: + movl 8(%ebp),%eax / low x + addl $1,%eax / low x + ulp + movl %eax,-12(%ebp) + jz .Lcarry / jump if the content of %eax is 0 + movl %edx,-8(%ebp) + movl %ecx,-4(%ebp) + jmp .Lfinal +.Lcarry: + movl %edx,%eax + andl $0x7fffffff,%eax + cmpl $0x7fffffff,%eax / look at high x + je .Lsecond_carry + addl $1,%edx + movl %edx,-8(%ebp) + movl %ecx,-4(%ebp) + jmp .Lfinal +.Lsecond_carry: + movl $0x80000000,-8(%ebp) + addl $1,%ecx + movl %ecx,-4(%ebp) +.Lfinal: + fstp %st(0) + fldt -12(%ebp) + andl $0x00007fff,%ecx + jz .Lunderflow + cmpw $0x7fff,%cx + je .Loverflow + jmp .Lreturn +.Loverflow: + PIC_SETUP(1) + fldt PIC_L(.LFmaxl) + PIC_WRAPUP + fmulp %st,%st(0) / create overflow signal + jmp .Lreturn +.Lunderflow: + PIC_SETUP(2) + fldt PIC_L(.LFminl) + PIC_WRAPUP + fmulp %st,%st(0) / create underflow signal + jmp .Lreturn +.Lequal: + fstp %st(0) / C99 says to return y when x == y + jmp .Lreturn +.LNaN: + faddp %st,%st(1) / x+y,x +.Lreturn: + fwait + leave + ret + .align 4 + SET_SIZE(nexttowardl) diff --git a/usr/src/lib/libm/i386/src/pow.s b/usr/src/lib/libm/i386/src/pow.s new file mode 100644 index 0000000000..c539a7791d --- /dev/null +++ b/usr/src/lib/libm/i386/src/pow.s @@ -0,0 +1,473 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "pow.s" + +/ Note: 0^NaN should not signal "invalid" but this implementation +/ does because y is placed on the NPX stack. + +/ Special cases: +/ +/ x ** 0 is 1 _SVID_libm_err if x is 0 or NaN +/ 1 ** y is 1 (C99) +/ x ** NaN is NaN +/ NaN ** y (except 0) is NaN +/ x ** 1 is x +/ +-(|x| > 1) ** +inf is +inf +/ +-(|x| > 1) ** -inf is +0 +/ +-(|x| < 1) ** +inf is +0 +/ +-(|x| < 1) ** -inf is +inf +/ (-1) ** +-inf is +1 (C99) +/ +0 ** +y (except 0, NaN) is +0 +/ -0 ** +y (except 0, NaN, odd int) is +0 +/ -0 ** +y (odd int) is -0 +/ +-0 ** -y (except 0, NaN) _SVID_libm_err +/ +inf ** +y (except 0, NaN) is +inf +/ +inf ** -y (except 0, NaN) is +0 +/ -inf ** +-y (except 0, NaN) is -0 ** -+y (NO z flag) +/ x ** -1 is 1/x +/ x ** 2 is x*x +/ -x ** y (an integer) is (-1)**(y) * (+x)**(y) +/ x ** y (x negative & y not integer) _SVID_libm_err +/ if x and y are finite and x**y = 0 _SVID_libm_err (underflow) +/ if x and y are finite and x**y = inf _SVID_libm_err (overflow) + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(pow,function) +#include "libm_synonyms.h" +#include "libm_protos.h" +#include "xpg6.h" + +#undef fabs + + .data + .align 4 +negzero: + .float -0.0 +one: + .float 1.0 +negone: + .float -1.0 +two: + .float 2.0 +Snan: + .long 0x7f800001 +pinfinity: + .long 0x7f800000 +ninfinity: + .long 0xff800000 + + + ENTRY(pow) + pushl %ebp + movl %esp,%ebp + PIC_SETUP(1) + + fldl 8(%ebp) / x + fxam / determine class of x + fnstsw %ax / store status in %ax + movb %ah,%dh / %dh <- condition code of x + + fldl 16(%ebp) / y , x + fxam / determine class of y + fnstsw %ax / store status in %ax + movb %ah,%dl / %dl <- condition code of y + + call .pow_main /// LOCAL + PIC_WRAPUP + leave + ret + +.pow_main: + / x ** 0 is 1 unless x is 0 or a NaN + movb %dl,%cl + andb $0x45,%cl + cmpb $0x40,%cl / C3=1 C2=0 C1=? C0=0 when +-0 + jne 1f + movb %dh,%cl + andb $0x45,%cl + cmpb $0x40,%cl / C3=1 C2=0 C1=? C0=0 when +-0 + jne 2f + / 0^0 + pushl $20 + jmp .SVIDerr / SVID error handler +2: + cmpb $0x01,%cl /// C3=0 C2=0 C1=? C0=1 when +-NaN + jne 2f + / NaN^0 + pushl $42 + jmp .SVIDerr +2: + / (not 0 or NaN)^0 + fstp %st(0) / x + fstp %st(0) / stack empty + fld1 / 1 + ret + +1: / y is not zero + PIC_G_LOAD(movzwl,__xpg6,eax) + andl $_C99SUSv3_pow_treats_Inf_as_an_even_int,%eax + cmpl $0,%eax + je 1f + + / C99: 1 ** anything is 1 + fld1 / 1, y, x + fucomp %st(2) / y, x + fnstsw %ax / store status in %ax + sahf / 80387 flags in %ax to 80386 flags + jp 1f / so that pow(NaN1,NaN2) returns NaN2 + jne 1f + fstp %st(0) / x + ret + +1: + / x ** NaN is NaN + movb %dl,%cl + andb $0x45,%cl + cmpb $0x01,%cl / C3=0 C2=0 C1=? C0=1 when +-NaN + jne 1f + fstp %st(1) / y + ret + +1: / y is not NaN + / NaN ** y (except 0) is NaN + movb %dh,%cl + andb $0x45,%cl + cmpb $0x01,%cl / C3=0 C2=0 C1=? C0=1 when +-NaN + jne 1f + fstp %st(0) / x + ret + +1: / x is not NaN + / x ** 1 is x + fcoms PIC_L(one) / y , x + fnstsw %ax / store status in %ax + sahf / 80387 flags in %ax to 80386 flags + jne 1f + fstp %st(0) / x + ret + +1: / y is not 1 + / +-(|x| > 1) ** +inf is +inf + / +-(|x| > 1) ** -inf is +0 + / +-(|x| < 1) ** +inf is +0 + / +-(|x| < 1) ** -inf is +inf + / +-(|x| = 1) ** +-inf is NaN + movb %dl,%cl + andb $0x47,%cl + cmpb $0x05,%cl / C3=0 C2=1 C1=0 C0=1 when +inf + je .yispinf + cmpb $0x07,%cl / C3=0 C2=1 C1=1 C0=1 when -inf + je .yisninf + + / +0 ** +y (except 0, NaN) is +0 + / -0 ** +y (except 0, NaN, odd int) is +0 + / +0 ** -y (except 0, NaN) is +inf (z flag) + / -0 ** -y (except 0, NaN, odd int) is +inf (z flag) + / -0 ** y (odd int) is - (+0 ** x) + movb %dh,%cl + andb $0x47,%cl + cmpb $0x40,%cl / C3=1 C2=0 C1=0 C0=0 when +0 + je .xispzero + cmpb $0x42,%cl / C3=1 C2=0 C1=1 C0=0 when -0 + je .xisnzero + + / +inf ** +y (except 0, NaN) is +inf + / +inf ** -y (except 0, NaN) is +0 + / -inf ** +-y (except 0, NaN) is -0 ** -+y (NO z flag) + movb %dh,%cl + andb $0x47,%cl + cmpb $0x05,%cl / C3=0 C2=1 C1=0 C0=1 when +inf + je .xispinf + cmpb $0x07,%cl / C3=0 C2=1 C1=1 C0=1 when -inf + je .xisninf + + / x ** -1 is 1/x + fcoms PIC_L(negone) / y , x + fnstsw %ax / store status in %ax + sahf / 80387 flags in %ax to 80386 flags + jne 1f + fld %st(1) / x , y , x + fdivrs PIC_L(one) / 1/x , y , x + jmp .signok / check for over/underflow + +1: / y is not -1 + / x ** 2 is x*x + fcoms PIC_L(two) / y , x + fnstsw %ax / store status in %ax + sahf / 80387 flags in %ax to 80386 flags + jne 1f + fld %st(1) / x , y , x + fld %st(0) / x , x , y , x + fmulp / x^2 , y , x + jmp .signok / check for over/underflow + +1: / y is not 2 + / make copies of x & y + fld %st(1) / x , y , x + fld %st(1) / y , x , y , x + + / -x ** y (an integer) is (-1)**(y) * (+x)**(y) + / x ** y (x negative & y not integer) is NaN + movl $0,%ecx / track whether to flip sign of result + fld %st(1) / x , y , x , y , x + ftst / compare %st(0) with 0 + fnstsw %ax / store status in %ax + sahf / 80387 flags in %ax to 80386 flags + fstp %st(0) / y , x , y , x + ja .merge / x > 0 + / x < 0 + call .y_is_int + cmpl $0,%ecx + jne 1f + / x < 0, y is non-integral + fstp %st(0) / x , y , x + fstp %st(0) / y , x + pushl $24 + jmp .SVIDerr / SVID error handler + +1: / x < 0 & y = int + fxch / x , y , y , x + fchs / px = -x , y , y , x + fxch / y , px , y , x +.merge: + / px > 0 + fxch / px , y , y , x + + / x**y = exp(y*ln(x)) + fyl2x / t=y*log2(px) , y , x + fld %st(0) / t , t , y , x + frndint / [t] , t , y , x + fxch / t , [t] , y , x + fucom + fnstsw %ax / store status in %ax + sahf / 80387 flags in %ax to 80386 flags + je 1f / t is integral + fsub %st(1),%st / t-[t] , [t] , y , x + f2xm1 / 2**(t-[t])-1 , [t] , y , x + fadds PIC_L(one) / 2**(t-[t]) , [t] , y , x + fscale / 2**t = px**y , [t] , y , x + jmp 2f +1: + fstp %st(0) / t=[t] , y , x + fld1 / 1 , t , y , x + fscale / 1*2**t = x**y , t , y , x +2: + fstp %st(1) / x**y , y , x + cmpl $1,%ecx + jne .signok + fchs / change sign since x<0 & y=-int +.signok: + subl $8,%esp + fstpl (%esp) / round to double precision + fldl (%esp) / place result on NPX stack + addl $8,%esp + + fxam / determine class of x**y + fnstsw %ax / store status in %ax + andw $0x4500,%ax + / check for overflow + cmpw $0x0500,%ax / C0=0 C1=1 C2=? C3=1 then +-inf + jne 1f + / x^y overflows + fstp %st(0) / y , x + pushl $21 + jmp .SVIDerr +1: + / check for underflow + cmpw $0x4000,%ax / C0=1 C1=0 C2=? C3=0 then +-0 + jne 1f + / x^y underflows + fstp %st(0) / y , x + pushl $22 + jmp .SVIDerr +1: + fstp %st(2) / y , x**y + fstp %st(0) / x**y + ret + +/ ------------------------------------------------------------------------ + +.xispinf: + ftst / compare %st(0) with 0 + fnstsw %ax / store status in %ax + sahf / 80387 flags in %ax to 80386 flags + ja .retpinf / y > 0 + jmp .retpzero / y < 0 + +.xisninf: + / -inf ** +-y is -0 ** -+y + fchs / -y , x + flds PIC_L(negzero) / -0 , -y , x + fstp %st(2) / -y , -0 + jmp .xisnzero + +.yispinf: + fld %st(1) / x , y , x + fabs / |x| , y , x + fcomps PIC_L(one) / y , x + fnstsw %ax / store status in %ax + sahf / 80387 flags in %ax to 80386 flags + je .retponeorinvalid / x == -1 C99 + ja .retpinf / |x| > 1 + jmp .retpzero / |x| < 1 + +.yisninf: + fld %st(1) / x , y , x + fabs / |x| , y , x + fcomps PIC_L(one) / y , x + fnstsw %ax / store status in %ax + sahf / 80387 flags in %ax to 80386 flags + je .retponeorinvalid / x == -1 C99 + ja .retpzero / |x| > 1 + jmp .retpinf / |x| < 1 + +.xispzero: + / y cannot be 0 or NaN ; stack has y , x + ftst / compare %st(0) with 0 + fnstsw %ax / store status in %ax + sahf / 80387 flags in %ax to 80386 flags + ja .retpzero / y > 0 + / x = +0 & y < 0 + jmp .SVIDzerotoneg + +.xisnzero: + / y cannot be 0 or NaN ; stack has y , x + call .y_is_int + cmpl $1,%ecx + jne 1f / y is not an odd integer + / y is an odd integer + ftst / compare %st(0) with 0 + fnstsw %ax / store status in %ax + sahf / 80387 flags in %ax to 80386 flags + ja .retnzero / y > 0 + / x = -0 & y < 0 (odd int) return -inf (z flag) + / x = -inf & y != 0 or NaN return -inf (NO z flag) + movb %dh,%cl + andb $0x45,%cl + cmpb $0x05,%cl / C3=0 C2=1 C1=? C0=1 when +-inf + jne .SVIDzerotoneg + fstp %st(0) / x + fstp %st(0) / stack empty + flds PIC_L(ninfinity) / -inf + ret + +1: / y is not an odd integer + ftst / compare %st(0) with 0 + fnstsw %ax / store status in %ax + sahf / 80387 flags in %ax to 80386 flags + ja .retpzero / y > 0 + / x = -0 & y < 0 (not odd int) return +inf (z flag) + / x = -inf & y not 0 or NaN return +inf (NO z flag) + movb %dh,%cl + andb $0x45,%cl + cmpb $0x05,%cl / C3=0 C2=1 C1=? C0=1 when +-inf + jne .SVIDzerotoneg + jmp .retpinf / return +inf (NO z flag) + +.retpzero: + fstp %st(0) / x + fstp %st(0) / stack empty + fldz / +0 + ret + +.retnzero: + fstp %st(0) / x + fstp %st(0) / stack empty + flds PIC_L(negzero) / -0 + ret + +.retponeorinvalid: + PIC_G_LOAD(movzwl,__xpg6,eax) + andl $_C99SUSv3_pow_treats_Inf_as_an_even_int,%eax + cmpl $0,%eax + je 1f + fstp %st(0) / x + fstp %st(0) / stack empty + fld1 / 1 + ret + +1: + fstp %st(0) / x + fstp %st(0) / stack empty + flds PIC_L(Snan) / Q NaN (i flag) + fwait + ret + +.retpinf: + fstp %st(0) / x + fstp %st(0) / stack empty + flds PIC_L(pinfinity) / +inf + ret + +.SVIDzerotoneg: + pushl $23 +.SVIDerr: + / At this point the fp stack contains y , x and the number + / of the error case has been pushed on the memory stack. + subl $16,%esp + fstpl 8(%esp) / push y + fstpl (%esp) / push x; NPX stack empty + call PIC_F(_SVID_libm_err) / report result/error according to SVID + addl $20,%esp + ret + +/ Set %ecx to 2 if y is an even integer, 1 if y is an odd integer, +/ 0 otherwise. Assume y is not zero. Do not raise inexact or modify +/ %edx. +.y_is_int: + movl 20(%ebp),%eax + andl $0x7fffffff,%eax / |y| + cmpl $0x43400000,%eax + jae 1f / |y| >= 2^53, an even int + cmpl $0x3ff00000,%eax + jb 2f / |y| < 1, can't be an int + movl %eax,%ecx + sarl $20,%ecx + subl $0x433,%ecx + negl %ecx / 52 - unbiased exponent of y + movl 16(%ebp),%eax + bsfl %eax,%eax / index of least sig. 1 bit + jne 3f / jump if 1 bit found + movl 20(%ebp),%eax + bsfl %eax,%eax + addl $32,%eax / 32 + index of least sig. 1 bit +3: + cmpl %ecx,%eax + jb 2f + ja 1f + movl $1,%ecx + ret +1: + movl $2,%ecx + ret +2: + xorl %ecx,%ecx + ret + .align 4 + SET_SIZE(pow) diff --git a/usr/src/lib/libm/i386/src/powf.s b/usr/src/lib/libm/i386/src/powf.s new file mode 100644 index 0000000000..e45b9d4694 --- /dev/null +++ b/usr/src/lib/libm/i386/src/powf.s @@ -0,0 +1,443 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "powf.s" + +/ Note: 0^SNaN should not signal "invalid" but this implementation +/ does because y is placed on the NPX stack. + +/ Special cases: +/ +/ x ** 0 is 1 +/ 1 ** y is 1 (C99) +/ x ** NaN is NaN +/ NaN ** y (except 0) is NaN +/ x ** 1 is x +/ +-(|x| > 1) ** +inf is +inf +/ +-(|x| > 1) ** -inf is +0 +/ +-(|x| < 1) ** +inf is +0 +/ +-(|x| < 1) ** -inf is +inf +/ (-1) ** +-inf is +1 (C99) +/ +0 ** +y (except 0, NaN) is +0 +/ -0 ** +y (except 0, NaN, odd int) is +0 +/ +0 ** -y (except 0, NaN) is +inf (z flag) +/ -0 ** -y (except 0, NaN, odd int) is +inf (z flag) +/ -0 ** y (odd int) is - (+0 ** x) +/ +inf ** +y (except 0, NaN) is +inf +/ +inf ** -y (except 0, NaN) is +0 +/ -inf ** +-y (except 0, NaN) is -0 ** -+y (NO z flag) +/ x ** -1 is 1/x +/ x ** 2 is x*x +/ -x ** y (an integer) is (-1)**(y) * (+x)**(y) +/ x ** y (x negative & y not integer) is NaN (i flag) + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(powf,function) +#include "libm_synonyms.h" +#include "libm_protos.h" +#include "xpg6.h" + +#undef fabs + + .data + .align 4 +negzero: + .float -0.0 +half: + .float 0.5 +one: + .float 1.0 +negone: + .float -1.0 +two: + .float 2.0 +Snan: + .long 0x7f800001 +pinfinity: + .long 0x7f800000 +ninfinity: + .long 0xff800000 + + + ENTRY(powf) + pushl %ebp + movl %esp,%ebp + PIC_SETUP(1) + + flds 8(%ebp) / x + fxam / determine class of x + fnstsw %ax / store status in %ax + movb %ah,%dh / %dh <- condition code of x + + flds 12(%ebp) / y , x + fxam / determine class of y + fnstsw %ax / store status in %ax + movb %ah,%dl / %dl <- condition code of y + + call .pow_main /// LOCAL + PIC_WRAPUP + leave + ret + +.pow_main: + / x ** 0 is 1 + movb %dl,%cl + andb $0x45,%cl + cmpb $0x40,%cl / C3=1 C2=0 C1=? C0=0 when +-0 + jne 1f + fstp %st(0) / x + fstp %st(0) / stack empty + fld1 / 1 + ret + +1: / y is not zero + PIC_G_LOAD(movzwl,__xpg6,eax) + andl $_C99SUSv3_pow_treats_Inf_as_an_even_int,%eax + cmpl $0,%eax + je 1f + + / C99: 1 ** anything is 1 + fld1 / 1, y, x + fucomp %st(2) / y, x + fnstsw %ax / store status in %ax + sahf / 80387 flags in %ax to 80386 flags + jp 1f / so that pow(NaN1,NaN2) returns NaN2 + jne 1f + fstp %st(0) / x + ret + +1: + / x ** NaN is NaN + movb %dl,%cl + andb $0x45,%cl + cmpb $0x01,%cl / C3=0 C2=0 C1=? C0=1 when +-NaN + jne 1f + fstp %st(1) / y + ret + +1: / y is not NaN + / NaN ** y (except 0) is NaN + movb %dh,%cl + andb $0x45,%cl + cmpb $0x01,%cl / C3=0 C2=0 C1=? C0=1 when +-NaN + jne 1f + fstp %st(0) / x + ret + +1: / x is not NaN + / x ** 1 is x + fcoms PIC_L(one) / y , x + fnstsw %ax / store status in %ax + sahf / 80387 flags in %ax to 80386 flags + jne 1f + fstp %st(0) / x + ret + +1: / y is not 1 + / +-(|x| > 1) ** +inf is +inf + / +-(|x| > 1) ** -inf is +0 + / +-(|x| < 1) ** +inf is +0 + / +-(|x| < 1) ** -inf is +inf + / +-(|x| = 1) ** +-inf is NaN + movb %dl,%cl + andb $0x47,%cl + cmpb $0x05,%cl / C3=0 C2=1 C1=0 C0=1 when +inf + je .yispinf + cmpb $0x07,%cl / C3=0 C2=1 C1=1 C0=1 when -inf + je .yisninf + + / +0 ** +y (except 0, NaN) is +0 + / -0 ** +y (except 0, NaN, odd int) is +0 + / +0 ** -y (except 0, NaN) is +inf (z flag) + / -0 ** -y (except 0, NaN, odd int) is +inf (z flag) + / -0 ** y (odd int) is - (+0 ** x) + movb %dh,%cl + andb $0x47,%cl + cmpb $0x40,%cl / C3=1 C2=0 C1=0 C0=0 when +0 + je .xispzero + cmpb $0x42,%cl / C3=1 C2=0 C1=1 C0=0 when -0 + je .xisnzero + + / +inf ** +y (except 0, NaN) is +inf + / +inf ** -y (except 0, NaN) is +0 + / -inf ** +-y (except 0, NaN) is -0 ** -+y (NO z flag) + movb %dh,%cl + andb $0x47,%cl + cmpb $0x05,%cl / C3=0 C2=1 C1=0 C0=1 when +inf + je .xispinf + cmpb $0x07,%cl / C3=0 C2=1 C1=1 C0=1 when -inf + je .xisninf + + / x ** -1 is 1/x + fcoms PIC_L(negone) / y , x + fnstsw %ax / store status in %ax + sahf / 80387 flags in %ax to 80386 flags + jne 1f + fld %st(1) / x , y , x + fdivrs PIC_L(one) / 1/x , y , x + jmp .signok / check for over/underflow + +1: / y is not -1 + / x ** 2 is square(x) + fcoms PIC_L(two) / y , x + fnstsw %ax / store status in %ax + sahf / 80387 flags in %ax to 80386 flags + jne 1f + fld %st(1) / x , y , x + fld %st(0) / x , x , y , x + fmulp / x^2 , y , x + jmp .signok / check for over/underflow + +1: / y is not 2 + / x ** 1/2 is sqrt(x) + fcoms PIC_L(half) / y , x + fnstsw %ax / store status in %ax + sahf / 80387 flags in %ax to 80386 flags + jne 1f + fld %st(1) / x , y , x + fsqrt / sqrt(x) , y , x + jmp .signok / check for over/underflow + +1: / y is not 2 + / make copies of x & y + fld %st(1) / x , y , x + fld %st(1) / y , x , y , x + + / -x ** y (an integer) is (-1)**(y) * (+x)**(y) + / x ** y (x negative & y not integer) is NaN + movl $0,%ecx / track whether to flip sign of result + fld %st(1) / x , y , x , y , x + ftst / compare %st(0) with 0 + fnstsw %ax / store status in %ax + sahf / 80387 flags in %ax to 80386 flags + fstp %st(0) / y , x , y , x + ja .merge / x > 0 + / x < 0 + call .y_is_int + cmpl $0,%ecx + jne 1f + / x < 0 & y != int so x**y = NaN (i flag) + fstp %st(0) / x , y , x + fstp %st(0) / y , x + fstp %st(0) / y , x + fstp %st(0) / y , x + fldz + fdiv %st,%st(0) / 0/0 + ret + +1: / x < 0 & y = int + fxch / x , y , y , x + fchs / px = -x , y , y , x + fxch / y , px , y , x +.merge: + / px > 0 + fxch / px , y , y , x + + / x**y = exp(y*ln(x)) + fyl2x / t=y*log2(px) , y , x + fld %st(0) / t , t , y , x + frndint / [t] , t , y , x + fxch / t , [t] , y , x + fucom + fnstsw %ax / store status in %ax + sahf / 80387 flags in %ax to 80386 flags + je 1f / px = int + fsub %st(1),%st / t-[t] , [t] , y , x + f2xm1 / 2**(t-[t])-1 , [t] , y , x + fadds PIC_L(one) / 2**(t-[t]) , [t] , y , x + fscale / 2**t = px**y , [t] , y , x + jmp 2f +1: + fstp %st(0) / t=[t] , y , x + fld1 / 1 , t , y , x + fscale / 1*2**t = x**y , t , y , x +2: + fstp %st(1) / x**y , y , x + cmpl $1,%ecx + jne .signok + fchs / change sign since x<0 & y=-int +.signok: + subl $4,%esp + fstps (%esp) / round to single precision + flds (%esp) / place result on NPX stack + addl $4,%esp + fstp %st(2) / y , x**y + fstp %st(0) / x**y + ret + +/ ------------------------------------------------------------------------ + +.xispinf: + ftst / compare %st(0) with 0 + fnstsw %ax / store status in %ax + sahf / 80387 flags in %ax to 80386 flags + ja .retpinf / y > 0 + jmp .retpzero / y < 0 + +.xisninf: + / -inf ** +-y is -0 ** -+y + fchs / -y , x + flds PIC_L(negzero) / -0 , -y , x + fstp %st(2) / -y , -0 + jmp .xisnzero + +.yispinf: + fld %st(1) / x , y , x + fabs / |x| , y , x + fcomps PIC_L(one) / y , x + fnstsw %ax / store status in %ax + sahf / 80387 flags in %ax to 80386 flags + je .retponeorinvalid / x == -1 C99 + ja .retpinf / |x| > 1 + jmp .retpzero / |x| < 1 + +.yisninf: + fld %st(1) / x , y , x + fabs / |x| , y , x + fcomps PIC_L(one) / y , x + fnstsw %ax / store status in %ax + sahf / 80387 flags in %ax to 80386 flags + je .retponeorinvalid / x == -1 C99 + ja .retpzero / |x| > 1 + jmp .retpinf / |x| < 1 + +.xispzero: + / y cannot be 0 or NaN ; stack has y , x + ftst / compare %st(0) with 0 + fnstsw %ax / store status in %ax + sahf / 80387 flags in %ax to 80386 flags + ja .retpzero / y > 0 + / x = +0 & y < 0 so x**y = +inf + jmp .retpinfzflag / ret +inf & z flag + +.xisnzero: + / y cannot be 0 or NaN ; stack has y , x + call .y_is_int + cmpl $1,%ecx + jne 1f / y is not an odd integer + / y is an odd integer + ftst / compare %st(0) with 0 + fnstsw %ax / store status in %ax + sahf / 80387 flags in %ax to 80386 flags + ja .retnzero / y > 0 + / x = -0 & y < 0 (odd int) return -inf (z flag) + / x = -inf & y != 0 or NaN return -inf (NO z flag) + movb %dh,%cl + andb $0x45,%cl + cmpb $0x05,%cl / C3=0 C2=1 C1=? C0=1 when +-inf + je 2f + fdiv %st,%st(1) / y / x, x (raise z flag) +2: + fstp %st(0) / x + fstp %st(0) / stack empty + flds PIC_L(ninfinity) / -inf + ret + +1: / y is not an odd integer + ftst / compare %st(0) with 0 + fnstsw %ax / store status in %ax + sahf / 80387 flags in %ax to 80386 flags + ja .retpzero / y > 0 + / x = -0 & y < 0 (not odd int) return +inf (z flag) + / x = -inf & y not 0 or NaN return +inf (NO z flag) + movb %dh,%cl + andb $0x45,%cl + cmpb $0x05,%cl / C3=0 C2=1 C1=? C0=1 when +-inf + jne .retpinfzflag / ret +inf & divide-by-0 flag + jmp .retpinf / return +inf (NO z flag) + +.retpzero: + fstp %st(0) / x + fstp %st(0) / stack empty + fldz / +0 + ret + +.retnzero: + fstp %st(0) / x + fstp %st(0) / stack empty + flds PIC_L(negzero) / -0 + ret + +.retponeorinvalid: + PIC_G_LOAD(movzwl,__xpg6,eax) + andl $_C99SUSv3_pow_treats_Inf_as_an_even_int,%eax + cmpl $0,%eax + je 1f + fstp %st(0) / x + fstp %st(0) / stack empty + fld1 / 1 + ret + +1: + fstp %st(0) / x + fstp %st(0) / stack empty + flds PIC_L(Snan) / Q NaN (i flag) + fwait + ret + +.retpinf: + fstp %st(0) / x + fstp %st(0) / stack empty + flds PIC_L(pinfinity) / +inf + ret + +.retpinfzflag: + fstp %st(0) / x + fstp %st(0) / stack empty + fldz + fdivrs PIC_L(one) / 1/0 + ret + +/ Set %ecx to 2 if y is an even integer, 1 if y is an odd integer, +/ 0 otherwise. Assume y is not zero. Do not raise inexact or modify +/ %edx. +.y_is_int: + movl 12(%ebp),%eax + andl $0x7fffffff,%eax / |y| + cmpl $0x4b800000,%eax + jae 1f / |y| >= 2^24, an even int + cmpl $0x3f800000,%eax + jb 2f / |y| < 1, can't be an int + movl %eax,%ecx + sarl $23,%ecx + subl $150,%ecx + negl %ecx / 23 - unbiased exponent of y + bsfl %eax,%eax / index of least sig. 1 bit + cmpl %ecx,%eax + jb 2f + ja 1f + movl $1,%ecx + ret +1: + movl $2,%ecx + ret +2: + xorl %ecx,%ecx + ret + .align 4 + SET_SIZE(powf) diff --git a/usr/src/lib/libm/i386/src/powl.s b/usr/src/lib/libm/i386/src/powl.s new file mode 100644 index 0000000000..f20f0f4411 --- /dev/null +++ b/usr/src/lib/libm/i386/src/powl.s @@ -0,0 +1,440 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "powl.s" + +/ Special cases: +/ +/ x ** 0 is 1 +/ 1 ** y is 1 (C99) +/ x ** NaN is NaN +/ NaN ** y (except 0) is NaN +/ x ** 1 is x +/ +-(|x| > 1) ** +inf is +inf +/ +-(|x| > 1) ** -inf is +0 +/ +-(|x| < 1) ** +inf is +0 +/ +-(|x| < 1) ** -inf is +inf +/ (-1) ** +-inf is +1 (C99) +/ +0 ** +y (except 0, NaN) is +0 +/ -0 ** +y (except 0, NaN, odd int) is +0 +/ +0 ** -y (except 0, NaN) is +inf (z flag) +/ -0 ** -y (except 0, NaN, odd int) is +inf (z flag) +/ -0 ** y (odd int) is - (+0 ** x) +/ +inf ** +y (except 0, NaN) is +inf +/ +inf ** -y (except 0, NaN) is +0 +/ -inf ** +-y (except 0, NaN) is -0 ** -+y (NO z flag) +/ x ** -1 is 1/x +/ x ** 2 is x*x +/ -x ** y (an integer) is (-1)**(y) * (+x)**(y) +/ x ** y (x negative & y not integer) is NaN (i flag) + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(powl,function) +#include "libm_synonyms.h" +#include "xpg6.h" + +#undef fabs + + .data + .align 4 +negzero: + .float -0.0 +half: + .float 0.5 +one: + .float 1.0 +negone: + .float -1.0 +two: + .float 2.0 +Snan: + .long 0x7f800001 +pinfinity: + .long 0x7f800000 +ninfinity: + .long 0xff800000 + + + ENTRY(powl) + pushl %ebp + movl %esp,%ebp + PIC_SETUP(1) + + fldt 8(%ebp) / x + fxam / determine class of x + fnstsw %ax / store status in %ax + movb %ah,%dh / %dh <- condition code of x + + fldt 20(%ebp) / y , x + fxam / determine class of y + fnstsw %ax / store status in %ax + movb %ah,%dl / %dl <- condition code of y + + call .pow_main /// LOCAL + PIC_WRAPUP + leave + ret + +.pow_main: + / x ** 0 is 1 + movb %dl,%cl + andb $0x45,%cl + cmpb $0x40,%cl / C3=1 C2=0 C1=? C0=0 when +-0 + jne 1f + fstp %st(0) / x + fstp %st(0) / stack empty + fld1 / 1 + ret + +1: / y is not zero + PIC_G_LOAD(movzwl,__xpg6,eax) + andl $_C99SUSv3_pow_treats_Inf_as_an_even_int,%eax + cmpl $0,%eax + je 1f + + / C99: 1 ** anything is 1 + fld1 / 1, y, x + fucomp %st(2) / y, x + fnstsw %ax / store status in %ax + sahf / 80387 flags in %ax to 80386 flags + jp 1f / so that pow(NaN1,NaN2) returns NaN2 + jne 1f + fstp %st(0) / x + ret + +1: + / x ** NaN is NaN + movb %dl,%cl + andb $0x45,%cl + cmpb $0x01,%cl / C3=0 C2=0 C1=? C0=1 when +-NaN + jne 1f + fstp %st(1) / y + ret + +1: / y is not NaN + / NaN ** y (except 0) is NaN + movb %dh,%cl + andb $0x45,%cl + cmpb $0x01,%cl / C3=0 C2=0 C1=? C0=1 when +-NaN + jne 1f + fstp %st(0) / x + ret + +1: / x is not NaN + / x ** 1 is x + fcoms PIC_L(one) / y , x + fnstsw %ax / store status in %ax + sahf / 80387 flags in %ax to 80386 flags + jne 1f + fstp %st(0) / x + ret + +1: / y is not 1 + / +-(|x| > 1) ** +inf is +inf + / +-(|x| > 1) ** -inf is +0 + / +-(|x| < 1) ** +inf is +0 + / +-(|x| < 1) ** -inf is +inf + / +-(|x| = 1) ** +-inf is NaN + movb %dl,%cl + andb $0x47,%cl + cmpb $0x05,%cl / C3=0 C2=1 C1=0 C0=1 when +inf + je .yispinf + cmpb $0x07,%cl / C3=0 C2=1 C1=1 C0=1 when -inf + je .yisninf + + / +0 ** +y (except 0, NaN) is +0 + / -0 ** +y (except 0, NaN, odd int) is +0 + / +0 ** -y (except 0, NaN) is +inf (z flag) + / -0 ** -y (except 0, NaN, odd int) is +inf (z flag) + / -0 ** y (odd int) is - (+0 ** x) + movb %dh,%cl + andb $0x47,%cl + cmpb $0x40,%cl / C3=1 C2=0 C1=0 C0=0 when +0 + je .xispzero + cmpb $0x42,%cl / C3=1 C2=0 C1=1 C0=0 when -0 + je .xisnzero + + / +inf ** +y (except 0, NaN) is +inf + / +inf ** -y (except 0, NaN) is +0 + / -inf ** +-y (except 0, NaN) is -0 ** -+y (NO z flag) + movb %dh,%cl + andb $0x47,%cl + cmpb $0x05,%cl / C3=0 C2=1 C1=0 C0=1 when +inf + je .xispinf + cmpb $0x07,%cl / C3=0 C2=1 C1=1 C0=1 when -inf + je .xisninf + + / x ** -1 is 1/x + fcoms PIC_L(negone) / y , x + fnstsw %ax / store status in %ax + sahf / 80387 flags in %ax to 80386 flags + jne 1f + fld %st(1) / x , y , x + fdivrs PIC_L(one) / 1/x , y , x + jmp .signok / check for over/underflow + +1: / y is not -1 + / x ** 2 is x*x + fcoms PIC_L(two) / y , x + fnstsw %ax / store status in %ax + sahf / 80387 flags in %ax to 80386 flags + jne 1f + fld %st(1) / x , y , x + fld %st(0) / x , x , y , x + fmulp / x^2 , y , x + jmp .signok / check for over/underflow + +1: / y is not 2 + / x ** 1/2 is sqrt(x) + fcoms PIC_L(half) / y , x + fnstsw %ax / store status in %ax + sahf / 80387 flags in %ax to 80386 flags + jne 1f + fld %st(1) / x , y , x + fsqrt / sqrt(x) , y , x + jmp .signok / check for over/underflow + +1: / y is not 1/2 + / make copies of x & y + fld %st(1) / x , y , x + fld %st(1) / y , x , y , x + + / -x ** y (an integer) is (-1)**(y) * (+x)**(y) + / x ** y (x negative & y not integer) is NaN + movl $0,%ecx / track whether to flip sign of result + fld %st(1) / x , y , x , y , x + ftst / compare %st(0) with 0 + fnstsw %ax / store status in %ax + sahf / 80387 flags in %ax to 80386 flags + fstp %st(0) / y , x , y , x + ja .merge / x > 0 + / x < 0 + call .y_is_int + cmpl $0,%ecx + jne 1f + / x < 0 & y != int so x**y = NaN (i flag) + fstp %st(0) / x , y , x + fstp %st(0) / y , x + fstp %st(0) / x + fstp %st(0) / stack empty + fldz + fdiv %st,%st(0) / 0/0 + ret + +1: / x < 0 & y = int + fxch / x , y , y , x + fchs / px = -x , y , y , x + fxch / y , px , y , x +.merge: + / px > 0 + fxch / px , y , y , x + + / x**y = exp(y*ln(x)) + fyl2x / t=y*log2(px) , y , x + fld %st(0) / t , t , y , x + frndint / [t] , t , y , x + fxch / t , [t] , y , x + fucom + fnstsw %ax / store status in %ax + sahf / 80387 flags in %ax to 80386 flags + je 1f / t is integral + fsub %st(1),%st / t-[t] , [t] , y , x + f2xm1 / 2**(t-[t])-1 , [t] , y , x + fadds PIC_L(one) / 2**(t-[t]) , [t] , y , x + fscale / 2**t = px**y , [t] , y , x + jmp 2f +1: + fstp %st(0) / t=[t] , y , x + fld1 / 1 , t , y , x + fscale / 1*2**t = x**y , t , y , x +2: + fstp %st(1) / x**y , y , x + cmpl $1,%ecx + jne .signok + fchs / change sign since x<0 & y=-int +.signok: + fstp %st(2) / y , x**y + fstp %st(0) / x**y + ret + +/ ------------------------------------------------------------------------ + +.xispinf: + ftst / compare %st(0) with 0 + fnstsw %ax / store status in %ax + sahf / 80387 flags in %ax to 80386 flags + ja .retpinf / y > 0 + jmp .retpzero / y < 0 + +.xisninf: + / -inf ** +-y is -0 ** -+y + fchs / -y , x + flds PIC_L(negzero) / -0 , -y , x + fstp %st(2) / -y , -0 + jmp .xisnzero + +.yispinf: + fld %st(1) / x , y , x + fabs / |x| , y , x + fcomps PIC_L(one) / y , x + fnstsw %ax / store status in %ax + sahf / 80387 flags in %ax to 80386 flags + je .retponeorinvalid / x == -1 C99 + ja .retpinf / |x| > 1 + jmp .retpzero / |x| < 1 + +.yisninf: + fld %st(1) / x , y , x + fabs / |x| , y , x + fcomps PIC_L(one) / y , x + fnstsw %ax / store status in %ax + sahf / 80387 flags in %ax to 80386 flags + je .retponeorinvalid / x == -1 C99 + ja .retpzero / |x| > 1 + jmp .retpinf / |x| < 1 + +.xispzero: + / y cannot be 0 or NaN ; stack has y , x + ftst / compare %st(0) with 0 + fnstsw %ax / store status in %ax + sahf / 80387 flags in %ax to 80386 flags + ja .retpzero / y > 0 + / x = +0 & y < 0 so x**y = +inf + jmp .retpinfzflag / ret +inf & z flag + +.xisnzero: + / y cannot be 0 or NaN ; stack has y , x + call .y_is_int + cmpl $1,%ecx + jne 1f / y is not an odd integer + / y is an odd integer + ftst / compare %st(0) with 0 + fnstsw %ax / store status in %ax + sahf / 80387 flags in %ax to 80386 flags + ja .retnzero / y > 0 + / x = -0 & y < 0 (odd int) return -inf (z flag) + / x = -inf & y != 0 or NaN return -inf (NO z flag) + movb %dh,%cl + andb $0x45,%cl + cmpb $0x05,%cl / C3=0 C2=1 C1=? C0=1 when +-inf + je 2f + fdiv %st,%st(1) / y / x, x (raise z flag) +2: + fstp %st(0) / x + fstp %st(0) / stack empty + flds PIC_L(ninfinity) / -inf + ret + +1: / y is not an odd integer + ftst / compare %st(0) with 0 + fnstsw %ax / store status in %ax + sahf / 80387 flags in %ax to 80386 flags + ja .retpzero / y > 0 + / x = -0 & y < 0 (not odd int) return +inf (z flag) + / x = -inf & y not 0 or NaN return +inf (NO z flag) + movb %dh,%cl + andb $0x45,%cl + cmpb $0x05,%cl / C3=0 C2=1 C1=? C0=1 when +-inf + jne .retpinfzflag / ret +inf & divide-by-0 flag + jmp .retpinf / return +inf (NO z flag) + +.retpzero: + fstp %st(0) / x + fstp %st(0) / stack empty + fldz / +0 + ret + +.retnzero: + fstp %st(0) / x + fstp %st(0) / stack empty + flds PIC_L(negzero) / -0 + ret + +.retponeorinvalid: + PIC_G_LOAD(movzwl,__xpg6,eax) + andl $_C99SUSv3_pow_treats_Inf_as_an_even_int,%eax + cmpl $0,%eax + je 1f + fstp %st(0) / x + fstp %st(0) / stack empty + fld1 / 1 + ret + +1: + fstp %st(0) / x + fstp %st(0) / stack empty + flds PIC_L(Snan) / Q NaN (i flag) + fwait + ret + +.retpinf: + fstp %st(0) / x + fstp %st(0) / stack empty + flds PIC_L(pinfinity) / +inf + ret + +.retpinfzflag: + fstp %st(0) / x + fstp %st(0) / stack empty + fldz + fdivrs PIC_L(one) / 1/0 + ret + +/ Set %ecx to 2 if y is an even integer, 1 if y is an odd integer, +/ 0 otherwise. Assume y is not zero. Do not raise inexact or modify +/ %edx. +.y_is_int: + movl 28(%ebp),%eax + andl $0x7fff,%eax / exponent of y + cmpl $0x403f,%eax + jae 1f / |y| >= 2^64, an even int + cmpl $0x3fff,%eax + jb 2f / |y| < 1, can't be an int + movl %eax,%ecx + subl $0x403e,%ecx + negl %ecx / 63 - unbiased exponent of y + movl 20(%ebp),%eax + bsfl %eax,%eax / index of least sig. 1 bit + jne 3f / jump if 1 bit found + movl 24(%ebp),%eax + bsfl %eax,%eax + addl $32,%eax / 32 + index of least sig. 1 bit +3: + cmpl %ecx,%eax + jb 2f + ja 1f + movl $1,%ecx + ret +1: + movl $2,%ecx + ret +2: + xorl %ecx,%ecx + ret + .align 4 + SET_SIZE(powl) diff --git a/usr/src/lib/libm/i386/src/remainder.s b/usr/src/lib/libm/i386/src/remainder.s new file mode 100644 index 0000000000..5b651c3b21 --- /dev/null +++ b/usr/src/lib/libm/i386/src/remainder.s @@ -0,0 +1,82 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "remainder.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(remainder,function) +#include "libm_synonyms.h" +#include "libm_protos.h" + + ENTRY(remainder) + pushl %ebp + movl %esp,%ebp + fldl 16(%esp) / load arg y + fldl 8(%esp) / load arg x + fucom + fnstsw %ax + sahf + jp .rem_loop / if x or y is NaN, use fprem1 + + movl 20(%esp),%eax / eax <-- hi_32(y) + andl $0x7fffffff,%eax / eax <-- hi_32(|y|) + orl 16(%esp),%eax / eax <-- lo_32(y)|hi_32(|y|) + je .yzero_or_xinf + + movl 12(%esp),%eax / eax <-- hi_32(x) + andl $0x7fffffff,%eax / eax <-- hi_32(|x|) + cmpl $0x7ff00000,%eax + jne .rem_loop + cmpl $0,8(%esp) + je .yzero_or_xinf + +.rem_loop: + fprem1 / partial remainder + fstsw %ax / store status word + andw $0x400,%ax / check for incomplete reduction + jne .rem_loop / while incomplete, do fprem1 again + fstp %st(1) + leave + ret + +.yzero_or_xinf: + PIC_SETUP(1) + fstp %st(0) / x + fstp %st(0) / empty NPX stack + pushl $28 / case 28 in _SVID_libm_err + pushl 20(%ebp) / pass y + pushl 16(%ebp) + pushl 12(%ebp) / pass x + pushl 8(%ebp) + call PIC_F(_SVID_libm_err) + addl $20,%esp + PIC_WRAPUP + leave + ret + .align 4 + SET_SIZE(remainder) diff --git a/usr/src/lib/libm/i386/src/remainderf.s b/usr/src/lib/libm/i386/src/remainderf.s new file mode 100644 index 0000000000..2529903e58 --- /dev/null +++ b/usr/src/lib/libm/i386/src/remainderf.s @@ -0,0 +1,46 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "remainderf.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(remainderf,function) +#include "libm_synonyms.h" + + ENTRY(remainderf) + flds 8(%esp) / load arg y + flds 4(%esp) / load arg x +.rem_loop: + fprem1 / partial remainder + fstsw %ax / store status word + andw $0x400,%ax / check whether reduction complete + jne .rem_loop / while reduction incomplete, do fprem1 + fstp %st(1) + ret + .align 4 + SET_SIZE(remainderf) diff --git a/usr/src/lib/libm/i386/src/remainderl.s b/usr/src/lib/libm/i386/src/remainderl.s new file mode 100644 index 0000000000..84191d4ff5 --- /dev/null +++ b/usr/src/lib/libm/i386/src/remainderl.s @@ -0,0 +1,46 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "remainderl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(remainderl,function) +#include "libm_synonyms.h" + + ENTRY(remainderl) + fldt 16(%esp) / load arg y + fldt 4(%esp) / load arg x +.rem_loop: + fprem1 / partial remainder + fstsw %ax / store status word + andw $0x400,%ax / check whether reduction complete + jne .rem_loop / while reduction incomplete, do fprem1 + fstp %st(1) + ret + .align 4 + SET_SIZE(remainderl) diff --git a/usr/src/lib/libm/i386/src/remquo.s b/usr/src/lib/libm/i386/src/remquo.s new file mode 100644 index 0000000000..81c03b8451 --- /dev/null +++ b/usr/src/lib/libm/i386/src/remquo.s @@ -0,0 +1,70 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "remquo.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(remquo,function) +#include "libm_synonyms.h" +#include "libm_protos.h" + + ENTRY(remquo) + fldl 12(%esp) / load arg y + fldl 4(%esp) / load arg x +.Lrem_loop: + fprem1 / partial remainder + fstsw %ax / store status word + andw $0x400,%ax / check whether reduction complete + jne .Lrem_loop / while reduction incomplete, do fprem1 + fstsw %ax + fwait + fstp %st(1) + movw %ax,%dx + andw $0x4000,%dx / get C3 + sarw $13,%dx + movw %ax,%cx + andw $0x100,%cx / get C0 + sarw $6,%cx + addw %cx,%dx + andw $0x200,%ax / get C1 + sarw $9,%ax + addw %dx,%ax + cwtl + movl 8(%esp),%edx / sign and bexp of x + movl 16(%esp),%ecx / sign and bexp of y + andl $0x80000000,%edx / edx <- sign(x) + andl $0x80000000,%ecx / ecx <- sign(y) + cmpl %edx,%ecx + je .pos + negl %eax / negative n +.pos: + movl 20(%esp),%ecx + movl %eax,0(%ecx) / last 3 significant bits of quotient + ret + .align 4 + SET_SIZE(remquo) diff --git a/usr/src/lib/libm/i386/src/remquof.s b/usr/src/lib/libm/i386/src/remquof.s new file mode 100644 index 0000000000..787a7c1d11 --- /dev/null +++ b/usr/src/lib/libm/i386/src/remquof.s @@ -0,0 +1,70 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "remquof.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(remquof,function) +#include "libm_synonyms.h" +#include "libm_protos.h" + + ENTRY(remquof) + flds 8(%esp) / load arg y + flds 4(%esp) / load arg x +.Lremf_loop: + fprem1 / partial remainder + fstsw %ax / store status word + andw $0x400,%ax / check whether reduction complete + jne .Lremf_loop / while reduction incomplete, do fprem1 + fstsw %ax + fwait + fstp %st(1) + movw %ax,%dx + andw $0x4000,%dx / get C3 + sarw $13,%dx + movw %ax,%cx + andw $0x100,%cx / get C0 + sarw $6,%cx + addw %cx,%dx + andw $0x200,%ax / get C1 + sarw $9,%ax + addw %dx,%ax + cwtl + movl 4(%esp),%edx / sign and bexp of x + movl 8(%esp),%ecx / sign and bexp of y + andl $0x80000000,%edx / edx <- sign(x) + andl $0x80000000,%ecx / ecx <- sign(y) + cmpl %edx,%ecx + je .pos + negl %eax / negative n +.pos: + movl 12(%esp),%ecx + movl %eax,0(%ecx) / last 3 significant bits of quotient + ret + .align 4 + SET_SIZE(remquof) diff --git a/usr/src/lib/libm/i386/src/remquol.s b/usr/src/lib/libm/i386/src/remquol.s new file mode 100644 index 0000000000..86d0f40677 --- /dev/null +++ b/usr/src/lib/libm/i386/src/remquol.s @@ -0,0 +1,70 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "remquol.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(remquol,function) +#include "libm_synonyms.h" +#include "libm_protos.h" + + ENTRY(remquol) + fldt 16(%esp) / load arg y + fldt 4(%esp) / load arg x +.Lreml_loop: + fprem1 / partial remainder + fstsw %ax / store status word + andw $0x400,%ax / check whether reduction complete + jne .Lreml_loop / while reduction incomplete, do fprem1 + fstsw %ax + fwait + fstp %st(1) + movw %ax,%dx + andw $0x4000,%dx / get C3 + sarw $13,%dx + movw %ax,%cx + andw $0x100,%cx / get C0 + sarw $6,%cx + addw %cx,%dx + andw $0x200,%ax / get C1 + sarw $9,%ax + addw %dx,%ax + cwtl + movl 12(%esp),%edx / sign and bexp of x + movl 24(%esp),%ecx / sign and bexp of y + andl $0x00008000,%edx / edx <- sign(x) + andl $0x00008000,%ecx / ecx <- sign(y) + cmpl %edx,%ecx + je .pos + negl %eax / negative n +.pos: + movl 28(%esp),%ecx + movl %eax,0(%ecx) / last 3 significant bits of quotient + ret + .align 4 + SET_SIZE(remquol) diff --git a/usr/src/lib/libm/i386/src/rint.s b/usr/src/lib/libm/i386/src/rint.s new file mode 100644 index 0000000000..0395ee7ecb --- /dev/null +++ b/usr/src/lib/libm/i386/src/rint.s @@ -0,0 +1,46 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "rint.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(rint,function) +#include "libm_synonyms.h" + + ENTRY(rint) + fldl 4(%esp) / load x + movl 8(%esp),%eax / eax <-- hi_32(x) + andl $0x7fffffff,%eax / eax <-- hi_32(|x|) + cmpl $0x43300000,%eax / is |x| >= 2**52? + jae .done / if so, branch (already integral) + frndint / [x], per rounding mode +.done: + fwait + ret + .align 4 + SET_SIZE(rint) diff --git a/usr/src/lib/libm/i386/src/rintf.s b/usr/src/lib/libm/i386/src/rintf.s new file mode 100644 index 0000000000..58eafe6c2b --- /dev/null +++ b/usr/src/lib/libm/i386/src/rintf.s @@ -0,0 +1,46 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "rintf.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(rintf,function) +#include "libm_synonyms.h" + + ENTRY(rintf) + flds 4(%esp) / load x + movl 4(%esp),%eax / eax <-- x + andl $0x7fffffff,%eax / eax <-- |x| + cmpl $0x4b000000,%eax / is |x| >= 2**23? + jae .done / if so, branch (already integral) + frndint / [x], per rounding mode +.done: + fwait + ret + .align 4 + SET_SIZE(rintf) diff --git a/usr/src/lib/libm/i386/src/rintl.s b/usr/src/lib/libm/i386/src/rintl.s new file mode 100644 index 0000000000..c967d1a12d --- /dev/null +++ b/usr/src/lib/libm/i386/src/rintl.s @@ -0,0 +1,41 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "rintl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(rintl,function) +#include "libm_synonyms.h" + + ENTRY(rintl) + fldt 4(%esp) / load x + frndint / [x], per rounding mode + fwait + ret + .align 4 + SET_SIZE(rintl) diff --git a/usr/src/lib/libm/i386/src/rndintl.s b/usr/src/lib/libm/i386/src/rndintl.s new file mode 100644 index 0000000000..80a3676397 --- /dev/null +++ b/usr/src/lib/libm/i386/src/rndintl.s @@ -0,0 +1,150 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "rndintl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(aintl,function) +LIBM_ANSI_PRAGMA_WEAK(irintl,function) +LIBM_ANSI_PRAGMA_WEAK(anintl,function) +LIBM_ANSI_PRAGMA_WEAK(nintl,function) +#include "libm_synonyms.h" +#undef fabs + + ENTRY(aintl) + movl %esp,%eax + subl $8,%esp + fstcw -8(%eax) + fldt 4(%eax) + movw -8(%eax),%cx + orw $0x0c00,%cx + movw %cx,-4(%eax) + fldcw -4(%eax) / set RD = to_zero + frndint + fstcw -4(%eax) + movw -4(%eax),%dx + andw $0xf3ff,%dx + movw -8(%eax),%cx + andw $0x0c00,%cx + orw %dx,%cx + movw %cx,-8(%eax) + fldcw -8(%eax) / restore RD + addl $8,%esp + ret + .align 4 + SET_SIZE(aintl) + + ENTRY(irintl) + movl %esp,%ecx + subl $8,%esp + fldt 4(%ecx) / load x + fistpl -8(%ecx) / [x] + fwait + movl -8(%ecx),%eax + addl $8,%esp + ret + .align 4 + SET_SIZE(irintl) + + .data + .align 4 +half: .float 0.5 + + ENTRY(anintl) +.Lanintl: + movl %esp,%ecx + subl $8,%esp + fstcw -8(%ecx) + fldt 4(%ecx) + movw -8(%ecx),%dx + andw $0xf3ff,%dx + movw %dx,-4(%ecx) + fldcw -4(%ecx) / set RD = to_nearest + fld %st(0) + frndint / [x],x + fstcw -4(%ecx) + movw -4(%ecx),%dx + andw $0xf3ff,%dx + movw -8(%ecx),%ax + andw $0x0c00,%ax + orw %dx,%ax + movw %ax,-8(%ecx) + fldcw -8(%ecx) / restore RD + fucom / check if x is already an integer + fstsw %ax + sahf + jp .L0 + je .L0 + fxch / x,[x] + fsub %st(1),%st / x-[x],[x] + fabs / |x-[x]|,[x] + PIC_SETUP(1) + fcoms PIC_L(half) + PIC_WRAPUP + fnstsw %ax + sahf + jae .halfway / if |x-[x]| = 0.5 goto halfway, + / most cases will not take branch. +.L0: + addl $8,%esp + fstp %st(0) + ret +.halfway: + / x = n+0.5, recompute anint(x) as x+sign(x)*0.5 + fldt 4(%ecx) / x, 0.5, [x] + movw 12(%ecx),%ax / sign+exp part of x + andw $0x8000,%ax / look at sign bit + jnz .x_neg + faddp + addl $8,%esp + fstp %st(1) + ret +.x_neg: + / here, x is negative, so return x-0.5 + fsubp %st,%st(1) / x-0.5,[x] + addl $8,%esp + fstp %st(1) + ret + .align 4 + SET_SIZE(anintl) + + ENTRY(nintl) + pushl %ebp + movl %esp,%ebp + subl $8,%esp + pushl 16(%ebp) + pushl 12(%ebp) + pushl 8(%ebp) + call .Lanintl /// LOCAL + fistpl -8(%ebp) + fwait + movl -8(%ebp),%eax + leave + ret + .align 4 + SET_SIZE(nintl) diff --git a/usr/src/lib/libm/i386/src/round.s b/usr/src/lib/libm/i386/src/round.s new file mode 100644 index 0000000000..32923221b3 --- /dev/null +++ b/usr/src/lib/libm/i386/src/round.s @@ -0,0 +1,95 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "round.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(round,function) +#include "libm_synonyms.h" +#undef fabs + + .section .rodata + .align 4 +.Lhalf: .float 0.5 + + ENTRY(round) + movl %esp,%ecx + subl $8,%esp + fstcw -8(%ecx) + fldl 4(%ecx) + movw -8(%ecx),%dx + andw $0xf3ff,%dx + movw %dx,-4(%ecx) + fldcw -4(%ecx) / set RD = to_nearest + fld %st(0) + frndint / [x],x + fstcw -4(%ecx) + movw -4(%ecx),%dx + andw $0xf3ff,%dx + movw -8(%ecx),%ax + andw $0x0c00,%ax + orw %dx,%ax + movw %ax,-8(%ecx) + fldcw -8(%ecx) / restore RD + fucom / check if x is already an integer + fstsw %ax + sahf + jp 0f + je 0f + fxch / x,[x] + fsub %st(1),%st / x-[x],[x] + fabs / |x-[x]|,[x] + PIC_SETUP(1) + fcoms PIC_L(.Lhalf) + PIC_WRAPUP + fnstsw %ax + sahf + jae 2f / if |x-[x]| = 0.5 goto halfway, + / most cases will not take branch. +0: + addl $8,%esp + fstp %st(0) + ret +2: + / x = n+0.5, recompute round(x) as x+sign(x)*0.5 + fldl 4(%ecx) / x, 0.5, [x] + movl 8(%ecx),%eax / high part of x + andl $0x80000000,%eax + jnz 3f + faddp + addl $8,%esp + fstp %st(1) + ret +3: + / here, x is negative, so return x-0.5 + fsubp %st,%st(1) / x-0.5,[x] + addl $8,%esp + fstp %st(1) + ret + .align 4 + SET_SIZE(round) diff --git a/usr/src/lib/libm/i386/src/roundl.s b/usr/src/lib/libm/i386/src/roundl.s new file mode 100644 index 0000000000..1db4133245 --- /dev/null +++ b/usr/src/lib/libm/i386/src/roundl.s @@ -0,0 +1,95 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "roundl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(roundl,function) +#include "libm_synonyms.h" +#undef fabs + + .section .rodata + .align 4 +.Lhalf: .float 0.5 + + ENTRY(roundl) + movl %esp,%ecx + subl $8,%esp + fstcw -8(%ecx) + fldt 4(%ecx) + movw -8(%ecx),%dx + andw $0xf3ff,%dx + movw %dx,-4(%ecx) + fldcw -4(%ecx) / set RD = to_nearest + fld %st(0) + frndint / [x],x + fstcw -4(%ecx) + movw -4(%ecx),%dx + andw $0xf3ff,%dx + movw -8(%ecx),%ax + andw $0x0c00,%ax + orw %dx,%ax + movw %ax,-8(%ecx) + fldcw -8(%ecx) / restore RD + fucom / check if x is already an integer + fstsw %ax + sahf + jp 0f + je 0f + fxch / x,[x] + fsub %st(1),%st / x-[x],[x] + fabs / |x-[x]|,[x] + PIC_SETUP(1) + fcoms PIC_L(.Lhalf) + PIC_WRAPUP + fnstsw %ax + sahf + jae 2f / if |x-[x]| = 0.5 goto halfway, + / most cases will not take branch. +0: + addl $8,%esp + fstp %st(0) + ret +2: + / x = n+0.5, recompute roundl(x) as x+sign(x)*0.5 + fldt 4(%ecx) / x, 0.5, [x] + movw 12(%ecx),%ax / sign+exp of x + andw $0x8000,%ax / look at sign bit + jnz 3f + faddp + addl $8,%esp + fstp %st(1) + ret +3: + / here, x is negative, so return x-0.5 + fsubp %st,%st(1) / x-0.5,[x] + addl $8,%esp + fstp %st(1) + ret + .align 4 + SET_SIZE(roundl) diff --git a/usr/src/lib/libm/i386/src/scalbln.s b/usr/src/lib/libm/i386/src/scalbln.s new file mode 100644 index 0000000000..62e016ab3c --- /dev/null +++ b/usr/src/lib/libm/i386/src/scalbln.s @@ -0,0 +1,42 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "scalbln.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(scalbln,function) +#include "libm_synonyms.h" + + ENTRY(scalbln) + fildl 12(%esp) / convert N to extended + fldl 4(%esp) / push x + fscale + fstp %st(1) + ret + .align 4 + SET_SIZE(scalbln) diff --git a/usr/src/lib/libm/i386/src/scalblnf.s b/usr/src/lib/libm/i386/src/scalblnf.s new file mode 100644 index 0000000000..a3d34bb404 --- /dev/null +++ b/usr/src/lib/libm/i386/src/scalblnf.s @@ -0,0 +1,42 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "scalblnf.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(scalblnf,function) +#include "libm_synonyms.h" + + ENTRY(scalblnf) + fildl 8(%esp) / convert N to extended + flds 4(%esp) / push x + fscale + fstp %st(1) + ret + .align 4 + SET_SIZE(scalblnf) diff --git a/usr/src/lib/libm/i386/src/scalblnl.s b/usr/src/lib/libm/i386/src/scalblnl.s new file mode 100644 index 0000000000..b166e31efa --- /dev/null +++ b/usr/src/lib/libm/i386/src/scalblnl.s @@ -0,0 +1,43 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "scalblnl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(scalblnl,function) +#include "libm_synonyms.h" + + ENTRY(scalblnl) + fildl 16(%esp) / convert 32-bit integer N + / to extended-double + fldt 4(%esp) / push x + fscale + fstp %st(1) + ret + .align 4 + SET_SIZE(scalblnl) diff --git a/usr/src/lib/libm/i386/src/scalbn.s b/usr/src/lib/libm/i386/src/scalbn.s new file mode 100644 index 0000000000..270e61d30e --- /dev/null +++ b/usr/src/lib/libm/i386/src/scalbn.s @@ -0,0 +1,42 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "scalbn.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(scalbn,function) +#include "libm_synonyms.h" + + ENTRY(scalbn) + fildl 12(%esp) / convert N to extended + fldl 4(%esp) / push x + fscale + fstp %st(1) + ret + .align 4 + SET_SIZE(scalbn) diff --git a/usr/src/lib/libm/i386/src/scalbnf.s b/usr/src/lib/libm/i386/src/scalbnf.s new file mode 100644 index 0000000000..525b832eee --- /dev/null +++ b/usr/src/lib/libm/i386/src/scalbnf.s @@ -0,0 +1,42 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "scalbnf.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(scalbnf,function) +#include "libm_synonyms.h" + + ENTRY(scalbnf) + fildl 8(%esp) / convert N to extended + flds 4(%esp) / push x + fscale + fstp %st(1) + ret + .align 4 + SET_SIZE(scalbnf) diff --git a/usr/src/lib/libm/i386/src/scalbnl.s b/usr/src/lib/libm/i386/src/scalbnl.s new file mode 100644 index 0000000000..09dc73d997 --- /dev/null +++ b/usr/src/lib/libm/i386/src/scalbnl.s @@ -0,0 +1,43 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "scalbnl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(scalbnl,function) +#include "libm_synonyms.h" + + ENTRY(scalbnl) + fildl 16(%esp) / convert 32-bit integer N + / to extended-double + fldt 4(%esp) / push x + fscale + fstp %st(1) + ret + .align 4 + SET_SIZE(scalbnl) diff --git a/usr/src/lib/libm/i386/src/sin.s b/usr/src/lib/libm/i386/src/sin.s new file mode 100644 index 0000000000..57919836d1 --- /dev/null +++ b/usr/src/lib/libm/i386/src/sin.s @@ -0,0 +1,59 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "sin.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(sin,function) +#include "libm_synonyms.h" +#include "libm_protos.h" + + ENTRY(sin) + PIC_SETUP(1) + call PIC_F(__reduction) + PIC_WRAPUP + cmpl $1,%eax + jl .sin0 + je .sin1 + cmpl $2,%eax + je .sin2 + fcos + fchs + ret +.sin2: + fsin + fchs + ret +.sin1: + fcos + ret +.sin0: + fsin + ret + .align 4 + SET_SIZE(sin) diff --git a/usr/src/lib/libm/i386/src/sincos.s b/usr/src/lib/libm/i386/src/sincos.s new file mode 100644 index 0000000000..14acff1581 --- /dev/null +++ b/usr/src/lib/libm/i386/src/sincos.s @@ -0,0 +1,82 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "sincos.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(sincos,function) +#include "libm_synonyms.h" +#include "libm_protos.h" + + ENTRY(sincos) + PIC_SETUP(1) + call PIC_F(__reduction) + PIC_WRAPUP + fsincos + cmpl $1,%eax + jl .sincos0 + je .sincos1 + cmpl $2,%eax + je .sincos2 + / n=3 + fchs + movl 12(%esp),%eax + fstpl 0(%eax) + movl 16(%esp),%eax + fstpl 0(%eax) + fwait + ret +.sincos2: + / n=2 + fchs + movl 16(%esp),%eax + fstpl 0(%eax) + fchs + movl 12(%esp),%eax + fstpl 0(%eax) + fwait + ret +.sincos1: + / n=1 + movl 12(%esp),%eax + fstpl 0(%eax) + fchs + movl 16(%esp),%eax + fstpl 0(%eax) + fwait + ret +.sincos0: + / n=0 + movl 16(%esp),%eax + fstpl 0(%eax) + movl 12(%esp),%eax + fstpl 0(%eax) + fwait + ret + .align 4 + SET_SIZE(sincos) diff --git a/usr/src/lib/libm/i386/src/sqrtl.s b/usr/src/lib/libm/i386/src/sqrtl.s new file mode 100644 index 0000000000..05636173d0 --- /dev/null +++ b/usr/src/lib/libm/i386/src/sqrtl.s @@ -0,0 +1,40 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "sqrtl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(sqrtl,function) +#include "libm_synonyms.h" + + ENTRY(sqrtl) + fldt 4(%esp) + fsqrt + ret + .align 4 + SET_SIZE(sqrtl) diff --git a/usr/src/lib/libm/i386/src/tan.s b/usr/src/lib/libm/i386/src/tan.s new file mode 100644 index 0000000000..d3cdf08fc0 --- /dev/null +++ b/usr/src/lib/libm/i386/src/tan.s @@ -0,0 +1,52 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "tan.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(tan,function) +#include "libm_synonyms.h" +#include "libm_protos.h" + + ENTRY(tan) + PIC_SETUP(1) + call PIC_F(__reduction) + PIC_WRAPUP + andl $1,%eax + cmpl $0,%eax + je .tan1 + fptan + fdivp %st,%st(1) + fchs + ret +.tan1: + fptan + fstp %st(0) + ret + .align 4 + SET_SIZE(tan) diff --git a/usr/src/lib/libm/i386/src/trunc.s b/usr/src/lib/libm/i386/src/trunc.s new file mode 100644 index 0000000000..06a6973cf0 --- /dev/null +++ b/usr/src/lib/libm/i386/src/trunc.s @@ -0,0 +1,56 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "trunc.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(trunc,function) +#include "libm_synonyms.h" + + ENTRY(trunc) + movl %esp,%eax + subl $8,%esp + fstcw -8(%eax) + fldl 4(%eax) + movw -8(%eax),%cx + orw $0x0c00,%cx + movw %cx,-4(%eax) + fldcw -4(%eax) / set RD = to_zero + frndint + fstcw -4(%eax) + movw -4(%eax),%dx + andw $0xf3ff,%dx + movw -8(%eax),%cx + andw $0x0c00,%cx + orw %dx,%cx + movw %cx,-8(%eax) + fldcw -8(%eax) / restore RD + addl $8,%esp + ret + .align 4 + SET_SIZE(trunc) diff --git a/usr/src/lib/libm/i386/src/truncl.s b/usr/src/lib/libm/i386/src/truncl.s new file mode 100644 index 0000000000..11881ccabc --- /dev/null +++ b/usr/src/lib/libm/i386/src/truncl.s @@ -0,0 +1,56 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "truncl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(truncl,function) +#include "libm_synonyms.h" + + ENTRY(truncl) + movl %esp,%eax + subl $8,%esp + fstcw -8(%eax) + fldt 4(%eax) + movw -8(%eax),%cx + orw $0x0c00,%cx + movw %cx,-4(%eax) + fldcw -4(%eax) / set RD = to_zero + frndint + fstcw -4(%eax) + movw -4(%eax),%dx + andw $0xf3ff,%dx + movw -8(%eax),%cx + andw $0x0c00,%cx + orw %dx,%cx + movw %cx,-8(%eax) + fldcw -8(%eax) / restore RD + addl $8,%esp + ret + .align 4 + SET_SIZE(truncl) diff --git a/usr/src/lib/libm/sparc/Makefile b/usr/src/lib/libm/sparc/Makefile new file mode 100644 index 0000000000..ddb6f2cc98 --- /dev/null +++ b/usr/src/lib/libm/sparc/Makefile @@ -0,0 +1,23 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2011 Nexenta Systems, Inc. All rights reserved. +# + +TARGET_ARCH= sparc +include ../Makefile.com + +CHIP = ultra + +install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT) + +include ../Makefile.targ diff --git a/usr/src/lib/libm/sparc/src/copysign.S b/usr/src/lib/libm/sparc/src/copysign.S new file mode 100644 index 0000000000..4cfae45627 --- /dev/null +++ b/usr/src/lib/libm/sparc/src/copysign.S @@ -0,0 +1,44 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "copysign.S" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(copysign,function) +#include "libm_synonyms.h" + + ENTRY(copysign) + sethi %hi(0x80000000),%o3 + andn %o0,%o3,%o0 + and %o2,%o3,%o2 + or %o2,%o0,%o0 + std %o0,[%sp+0x48] + retl + ldd [%sp+0x48],%f0 + + SET_SIZE(copysign) diff --git a/usr/src/lib/libm/sparc/src/fabs.S b/usr/src/lib/libm/sparc/src/fabs.S new file mode 100644 index 0000000000..34f1ad38d6 --- /dev/null +++ b/usr/src/lib/libm/sparc/src/fabs.S @@ -0,0 +1,48 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "fabs.S" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(fabs,function) +#include "libm_synonyms.h" + + ENTRY(fabs) + sethi %hi(0x80000000),%o2 + andn %o0,%o2,%o0 + std %o0,[%sp+0x48] + nop + nop + nop + nop + nop + nop + retl + ldd [%sp+0x48],%f0 + + SET_SIZE(fabs) diff --git a/usr/src/lib/libm/sparc/src/libm_inlines.h b/usr/src/lib/libm/sparc/src/libm_inlines.h new file mode 100644 index 0000000000..93c9aa2302 --- /dev/null +++ b/usr/src/lib/libm/sparc/src/libm_inlines.h @@ -0,0 +1,301 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * Copyright 2011, Richard Lowe. + */ + +/* Functions in this file are duplicated in locallibm.il. Keep them in sync */ + +#ifndef _LIBM_INLINES_H +#define _LIBM_INLINES_H + +#ifdef __GNUC__ + +#include <sys/types.h> +#include <sys/ieeefp.h> + +#ifdef __cplusplus +extern "C" { +#endif + +extern __inline__ double +__inline_sqrt(double d) +{ + double ret; + + __asm__ __volatile__("fsqrtd %1,%0\n\t" : "=e" (ret) : "e" (d)); + return (ret); +} + +extern __inline__ float +__inline_sqrtf(float f) +{ + float ret; + + __asm__ __volatile__("fsqrts %1,%0\n\t" : "=f" (ret) : "f" (f)); + return (ret); +} + +extern __inline__ enum fp_class_type +fp_classf(float f) +{ + enum fp_class_type ret; + uint32_t tmp; + + /* XXX: Separate input and output */ + __asm__ __volatile__( + "sethi %%hi(0x80000000),%1\n\t" + "andncc %2,%1,%0\n\t" + "bne 1f\n\t" + "nop\n\t" + "mov 0,%0\n\t" + "ba 2f\n\t" /* x is 0 */ + "nop\n\t" + "1:\n\t" + "sethi %%hi(0x7f800000),%1\n\t" + "andcc %0,%1,%%g0\n\t" + "bne 1f\n\t" + "nop\n\t" + "mov 1,%0\n\t" + "ba 2f\n\t" /* x is subnormal */ + "nop\n\t" + "1:\n\t" + "cmp %0,%1\n\t" + "bge 1f\n\t" + "nop\n\t" + "mov 2,%0\n\t" + "ba 2f\n\t" /* x is normal */ + "nop\n\t" + "1:\n\t" + "bg 1f\n\t" + "nop\n\t" + "mov 3,%0\n\t" + "ba 2f\n\t" /* x is __infinity */ + "nop\n\t" + "1:\n\t" + "sethi %%hi(0x00400000),%1\n\t" + "andcc %0,%1,%%g0\n\t" + "mov 4,%0\n\t" /* x is quiet NaN */ + "bne 2f\n\t" + "nop\n\t" + "mov 5,%0\n\t" /* x is signaling NaN */ + "2:\n\t" + : "=r" (ret), "=&r" (tmp) + : "r" (f) + : "cc"); + return (ret); +} + +#define _HI_WORD(x) ((uint32_t *)&x)[0] +#define _LO_WORD(x) ((uint32_t *)&x)[1] + +extern __inline__ enum fp_class_type +fp_class(double d) +{ + enum fp_class_type ret; + uint32_t tmp; + + __asm__ __volatile__( + "sethi %%hi(0x80000000),%1\n\t" /* %1 gets 80000000 */ + "andn %2,%1,%0\n\t" /* %2-%0 gets abs(x) */ + "orcc %0,%3,%%g0\n\t" /* set cc as x is zero/nonzero */ + "bne 1f\n\t" /* branch if x is nonzero */ + "nop\n\t" + "mov 0,%0\n\t" + "ba 2f\n\t" /* x is 0 */ + "nop\n\t" + "1:\n\t" + "sethi %%hi(0x7ff00000),%1\n\t" /* %1 gets 7ff00000 */ + "andcc %0,%1,%%g0\n\t" /* cc set by __exp field of x */ + "bne 1f\n\t" /* branch if normal or max __exp */ + "nop\n\t" + "mov 1,%0\n\t" + "ba 2f\n\t" /* x is subnormal */ + "nop\n\t" + "1:\n\t" + "cmp %0,%1\n\t" + "bge 1f\n\t" /* branch if x is max __exp */ + "nop\n\t" + "mov 2,%0\n\t" + "ba 2f\n\t" /* x is normal */ + "nop\n\t" + "1:\n\t" + "andn %0,%1,%0\n\t" /* o0 gets msw __significand field */ + "orcc %0,%3,%%g0\n\t" /* set cc by OR __significand */ + "bne 1f\n\t" /* Branch if __nan */ + "nop\n\t" + "mov 3,%0\n\t" + "ba 2f\n\t" /* x is __infinity */ + "nop\n\t" + "1:\n\t" + "sethi %%hi(0x00080000),%1\n\t" + "andcc %0,%1,%%g0\n\t" /* set cc by quiet/sig bit */ + "be 1f\n\t" /* Branch if signaling */ + "nop\n\t" + "mov 4,%0\n\t" /* x is quiet NaN */ + "ba 2f\n\t" + "nop\n\t" + "1:\n\t" + "mov 5,%0\n\t" /* x is signaling NaN */ + "2:\n\t" + : "=&r" (ret), "=&r" (tmp) + : "r" (_HI_WORD(d)), "r" (_LO_WORD(d)) + : "cc"); + + return (ret); +} + +extern __inline__ int +__swapEX(int i) +{ + int ret; + uint32_t fsr; + uint32_t tmp1, tmp2; + + __asm__ __volatile__( + "and %4,0x1f,%2\n\t" /* tmp1 = %2 = %o1 */ + "sll %2,5,%2\n\t" /* shift input to aexc bit location */ + ".volatile\n\t" + "st %%fsr,%1\n\t" + "ld %1,%0\n\t" /* %0 = fsr */ + "andn %0,0x3e0,%3\n\t" /* tmp2 = %3 = %o2 */ + "or %2,%3,%2\n\t" /* %2 = new fsr */ + "st %2,%1\n\t" + "ld %1,%%fsr\n\t" + "srl %0,5,%0\n\t" + "and %0,0x1f,%0\n\t" /* %0 = ret = %o0 */ + ".nonvolatile\n\t" + : "=r" (ret), "=m" (fsr), "=r" (tmp1), "=r" (tmp2) + : "r" (i) + : "cc"); + + return (ret); +} + +/* + * On the SPARC, __swapRP is a no-op; always return 0 for backward + * compatibility + */ +/* ARGSUSED */ +extern __inline__ enum fp_precision_type +__swapRP(enum fp_precision_type i) +{ + return (0); +} + +extern __inline__ enum fp_direction_type +__swapRD(enum fp_direction_type d) +{ + enum fp_direction_type ret; + uint32_t fsr; + uint32_t tmp1, tmp2, tmp3; + + __asm__ __volatile__( + "and %5,0x3,%0\n\t" + "sll %0,30,%2\n\t" /* shift input to RD bit location */ + ".volatile\n\t" + "st %%fsr,%1\n\t" + "ld %1,%0\n\t" /* %0 = fsr */ + "set 0xc0000000,%4\n\t" /* mask of rounding direction bits */ + "andn %0,%4,%3\n\t" + "or %2,%3,%2\n\t" /* %2 = new fsr */ + "st %2,%1\n\t" + "ld %1,%%fsr\n\t" + "srl %0,30,%0\n\t" + "and %0,0x3,%0\n\t" + ".nonvolatile\n\t" + : "=r" (ret), "=m" (fsr), "=r" (tmp1), "=r" (tmp2), "=r" (tmp3) + : "r" (d) + : "cc"); + + return (ret); +} + +extern __inline__ int +__swapTE(int i) +{ + int ret; + uint32_t fsr, tmp1, tmp2; + + __asm__ __volatile__( + "and %4,0x1f,%0\n\t" + "sll %0,23,%2\n\t" /* shift input to TEM bit location */ + ".volatile\n\t" + "st %%fsr,%1\n\t" + "ld %1,%0\n\t" /* %0 = fsr */ + "set 0x0f800000,%3\n\t" /* mask of TEM (Trap Enable Mode bits) */ + "andn %0,%3,%3\n\t" + "or %2,%3,%2\n\t" /* %2 = new fsr */ + "st %2,%1\n\t" + "ld %1,%%fsr\n\t" + "srl %0,23,%0\n\t" + "and %0,0x1f,%0\n\t" + ".nonvolatile\n\t" + : "=r" (ret), "=m" (fsr), "=r" (tmp1), "=r" (tmp2) + : "r" (i) + : "cc"); + + return (ret); +} + +extern __inline__ double +sqrt(double d) +{ + return (__inline_sqrt(d)); +} + +extern __inline__ float +sqrtf(float f) +{ + return (__inline_sqrtf(f)); +} + +extern __inline__ double +fabs(double d) +{ + double ret; + + __asm__ __volatile__("fabsd %1,%0\n\t" : "=e" (ret) : "e" (d)); + return (ret); +} + +extern __inline__ float +fabsf(float f) +{ + float ret; + + __asm__ __volatile__("fabss %1,%0\n\t" : "=f" (ret) : "f" (f)); + return (ret); +} + +#ifdef __cplusplus +} +#endif + +#endif /* __GNUC */ + +#endif /* _LIBM_INLINES_H */ diff --git a/usr/src/lib/libm/sparc/src/locallibm.il b/usr/src/lib/libm/sparc/src/locallibm.il new file mode 100644 index 0000000000..3822f5f92d --- /dev/null +++ b/usr/src/lib/libm/sparc/src/locallibm.il @@ -0,0 +1,2034 @@ +! +! CDDL HEADER START +! +! The contents of this file are subject to the terms of the +! Common Development and Distribution License (the "License"). +! You may not use this file except in compliance with the License. +! +! You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +! or http://www.opensolaris.org/os/licensing. +! See the License for the specific language governing permissions +! and limitations under the License. +! +! When distributing Covered Code, this CDDL HEADER in each +! file and the License file at usr/src/OPENSOLARIS.LICENSE. +! If applicable, add the following below this CDDL HEADER, with the +! fields enclosed by brackets "[]" replaced with your own identifying +! information: Portions Copyright [yyyy] [name of copyright owner] +! +! CDDL HEADER END +! +! Copyright 2011 Nexenta Systems, Inc. All rights reserved. +! +! Copyright 2006 Sun Microsystems, Inc. All rights reserved. +! Use is subject to license terms. +! + +! Portions of this file are duplicated as GCC inline assembly in +! libm_inlines.h. Keep them in sync. + + .inline __r_hypot_,2 + ld [%o0],%o4 + sethi 0x1fffff,%o5 + or %o5,1023,%o5 + and %o4,%o5,%o4 + sethi 0x1fe000,%o3 + cmp %o4,%o3 + ld [%o0],%f0 ! load result with first argument + bne 2f + nop + fabss %f0,%f0 + ld [%o1],%f1 + .volatile + fcmps %f0,%f1 ! generate invalid for Snan + .nonvolatile + nop + fba 5f + nop +2: + ld [%o1],%o4 + sethi 0x1fffff,%o5 + or %o5,1023,%o5 + and %o4,%o5,%o4 + sethi 0x1fe000,%o3 + cmp %o4,%o3 + bne 4f + nop + ld [%o1],%f0 ! second argument inf + fabss %f0,%f0 + ld [%o0],%f1 + .volatile + fcmps %f0,%f1 ! generate invalid for Snan + .nonvolatile + nop + fba 5f + nop +4: + ld [%o1],%f3 + fsmuld %f0,%f0,%f0 + fsmuld %f3,%f3,%f2 + faddd %f2,%f0,%f0 + fsqrtd %f0,%f0 + fdtos %f0,%f0 +5: + .end + + .inline __c_abs,1 + ld [%o0],%o4 + sethi 0x1fffff,%o5 + or %o5,1023,%o5 + and %o4,%o5,%o4 + sethi 0x1fe000,%o3 + cmp %o4,%o3 + ld [%o0],%f0 + bne 2f + nop + fabss %f0,%f0 + ld [%o0+4],%f1 + .volatile + fcmps %f0,%f1 ! generate invalid for Snan + .nonvolatile + nop + fba 5f + nop +2: + ld [%o0+4],%o4 + sethi 0x1fffff,%o5 + or %o5,1023,%o5 + and %o4,%o5,%o4 + sethi 0x1fe000,%o3 + cmp %o4,%o3 + bne 4f + nop + ld [%o0+4],%f0 + fabss %f0,%f0 + ld [%o0],%f1 + .volatile + fcmps %f0,%f1 ! generate invalid for Snan + .nonvolatile + nop + fba 5f + nop +! store to 8-aligned address +4: + ld [%o0+4],%f3 + fsmuld %f0,%f0,%f0 + fsmuld %f3,%f3,%f2 + faddd %f2,%f0,%f0 + fsqrtd %f0,%f0 + fdtos %f0,%f0 +5: + .end +!- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +! void +! __Fc_mult(c, a, b) +! complex *c, *a, *b; +! { + + .inline __Fc_mult,3 +! 21 c->real = (a->real * b->real) - (a->imag * b->imag) + ld [%o1+4],%f0 ! f0 = a->imag + ld [%o2+4],%f1 ! f1 = b->imag + ld [%o1],%f2 ! f2 = a->real + fsmuld %f0,%f1,%f4 ! f4 = (a->imag * b->imag) + ld [%o2],%f3 ! f3 = b->real + fsmuld %f2,%f1,%f6 ! f6 = a->real * b->imag + fsmuld %f2,%f3,%f8 ! f8 = a->real * b->real + fsmuld %f0,%f3,%f10 ! f10 = a->imag * b->real + fsubd %f8,%f4,%f0 ! f0 = ar*br - ai*bi + faddd %f6,%f10,%f2 ! f2 = ai*br + ar*bi + fdtos %f0,%f4 + fdtos %f2,%f6 + st %f4,[%o0] + st %f6,[%o0+4] + .end +! } +!- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +! void +! __Fc_div(c, a, b) +! complex *c, *a, *b; +! { + .inline __Fc_div,3 + ld [%o2+4],%o3 + sethi %hi(0x7fffffff),%o4 + or %o4,%lo(0x7fffffff),%o4 ! [internal] + andcc %o3,%o4,%g0 + ld [%o2],%f6 ! f6 gets reb + bne 1f + nop + ld [%o1],%f0 + ld [%o2],%f1 + fdivs %f0,%f1,%f0 + st %f0,[%o0] + ld [%o1+4],%f3 + fdivs %f3,%f1,%f3 + st %f3,[%o0+4] + ba 2f + nop +1: ! [internal] + sethi %hi(0x3ff00000),%o4 + or %g0,0,%o5 + std %o4,[%sp+0x48] + ldd [%sp+0x48],%f8 + ld [%o2+4],%f10 ! f10 gets imb + fsmuld %f6,%f6,%f16 ! f16/17 gets reb**2 + ld [%o1+4],%f4 ! f4 gets ima + fsmuld %f10,%f10,%f12 ! f12/13 gets imb**2 + ld [%o1],%f19 ! f19 gets rea + fsmuld %f4,%f10,%f0 ! f0/f1 gets ima*imb + fsmuld %f19,%f6,%f2 ! f2/3 gets rea*reb + faddd %f12,%f16,%f12 ! f12/13 gets reb**2+imb**2 + fdivd %f8,%f12,%f12 ! f12/13 gets 1/(reb**2+imb**2) + faddd %f2,%f0,%f2 ! f2/3 gets rea*reb+ima*imb + fsmuld %f4,%f6,%f24 ! f24/5 gets ima*reb + fmuld %f2,%f12,%f2 ! f2/3 gets rec + fsmuld %f19,%f10,%f10 ! f10/11 gets rea*imb + fsubd %f24,%f10,%f10 ! f10/11 gets ima*reb-rea*imb + fmuld %f10,%f12,%f12 ! f12 gets imc + fdtos %f2,%f7 ! f7 gets rec + fdtos %f12,%f15 ! f15 gets imc + st %f7,[%o0] + st %f15,[%o0+4] +2: + .end +! } + + .inline .mul,2 + .volatile + smul %o0,%o1,%o0 + rd %y,%o1 + sra %o0,31,%o2 + cmp %o1,%o2 + .nonvolatile + .end + + .inline .umul,2 + .volatile + umul %o0,%o1,%o0 + rd %y,%o1 + tst %o1 + .nonvolatile + .end + + .inline .div,2 + sra %o0,31,%o4 ! extend sign + .volatile + wr %o4,%g0,%y + cmp %o1,0xffffffff ! is divisor -1? + be,a 1f ! if yes + .volatile + subcc %g0,%o0,%o0 ! simply negate dividend + nop ! RT620 FABs A.0/A.1 + sdiv %o0,%o1,%o0 ! o0 contains quotient a/b + .nonvolatile +1: + .end + + .inline .udiv,2 + .volatile + wr %g0,%g0,%y + nop + nop + nop + udiv %o0,%o1,%o0 ! o0 contains quotient a/b + .nonvolatile + .end + + .inline .rem,2 + sra %o0,31,%o4 ! extend sign + .volatile + wr %o4,%g0,%y + cmp %o1,0xffffffff ! is divisor -1? + be,a 1f ! if yes + .volatile + or %g0,%g0,%o0 ! simply return 0 + nop ! RT620 FABs A.0/A.1 + sdiv %o0,%o1,%o2 ! o2 contains quotient a/b + .nonvolatile + smul %o2,%o1,%o4 ! o4 contains q*b + sub %o0,%o4,%o0 ! o0 gets a-q*b +1: + .end + + .inline .urem,2 + .volatile + wr %g0,%g0,%y + nop + nop + nop + udiv %o0,%o1,%o2 ! o2 contains quotient a/b + .nonvolatile + umul %o2,%o1,%o4 ! o4 contains q*b + sub %o0,%o4,%o0 ! o0 gets a-q*b + .end + + .inline .div_o3,2 + sra %o0,31,%o4 ! extend sign + .volatile + wr %o4,%g0,%y + cmp %o1,0xffffffff ! is divisor -1? + be,a 1f ! if yes + .volatile + subcc %g0,%o0,%o0 ! simply negate dividend + mov %o0,%o3 ! o3 gets __remainder + sdiv %o0,%o1,%o0 ! o0 contains quotient a/b + .nonvolatile + smul %o0,%o1,%o4 ! o4 contains q*b + ba 2f + sub %o3,%o4,%o3 ! o3 gets a-q*b +1: + mov %g0,%o3 ! __remainder is 0 +2: + .end + + .inline .udiv_o3,2 + .volatile + wr %g0,%g0,%y + mov %o0,%o3 ! o3 gets __remainder + nop + nop + udiv %o0,%o1,%o0 ! o0 contains quotient a/b + .nonvolatile + umul %o0,%o1,%o4 ! o4 contains q*b + sub %o3,%o4,%o3 ! o3 gets a-q*b + .end + + .inline __ieee754_sqrt,2 + std %o0,[%sp+0x48] ! store to 8-aligned address + ldd [%sp+0x48],%f0 + fsqrtd %f0,%f0 + .end + + .inline __inline_sqrtf,1 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f0 + fsqrts %f0,%f0 + .end + + .inline __inline_sqrt,2 + std %o0,[%sp+0x48] ! store to 8-aligned address + ldd [%sp+0x48],%f0 + fsqrtd %f0,%f0 + .end + + .inline __sqrtf,1 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f0 + fsqrts %f0,%f0 + .end + + .inline __sqrt,2 + std %o0,[%sp+0x48] ! store to 8-aligned address + ldd [%sp+0x48],%f0 + fsqrtd %f0,%f0 + .end + + .inline __r_sqrt_,1 + ld [%o0],%f0 + fsqrts %f0,%f0 + .end + + .inline __d_sqrt_,1 + ld [%o0],%f0 + ld [%o0+4],%f1 + fsqrtd %f0,%f0 + .end + + .inline __ceil,2 + std %o0,[%sp+0x48] + sethi %hi(0x80000000),%o5 + andn %o0,%o5,%o2 + sethi %hi(0x43300000),%o3 + st %g0,[%sp+0x54] + subcc %o2,%o3,%g0 + bl 1f + nop + sethi %hi(0x3ff00000),%o2 + st %o2,[%sp+0x50] + ldd [%sp+0x48],%f0 + ldd [%sp+0x50],%f2 + fmuld %f0,%f2,%f0 + ba 4f + nop +1: + tst %o0 + st %o3,[%sp+0x50] + ldd [%sp+0x50],%f2 + bge 2f + nop + fnegs %f2,%f2 +2: + ldd [%sp+0x48],%f4 + faddd %f4,%f2,%f0 + fsubd %f0,%f2,%f0 + fcmpd %f0,%f4 + sethi %hi(0x3ff00000),%o2 + st %o2,[%sp+0x50] + and %o0,%o5,%o4 + fbge 3f + nop + ldd [%sp+0x50],%f4 + faddd %f0,%f4,%f0 +3: + st %f0,[%sp+0x48] + ld [%sp+0x48],%o3 + andn %o3,%o5,%o3 + or %o4,%o3,%o3 + st %o3,[%sp+0x48] + ld [%sp+0x48],%f0 +4: + .end + + .inline __floor,2 + std %o0,[%sp+0x48] + sethi %hi(0x80000000),%o5 + andn %o0,%o5,%o2 + sethi %hi(0x43300000),%o3 + st %g0,[%sp+0x54] + subcc %o2,%o3,%g0 + bl 1f + nop + sethi %hi(0x3ff00000),%o2 + st %o2,[%sp+0x50] + ldd [%sp+0x48],%f0 + ldd [%sp+0x50],%f2 + fmuld %f0,%f2,%f0 + ba 4f + nop +1: + tst %o0 + st %o3,[%sp+0x50] + ldd [%sp+0x50],%f2 + bge 2f + nop + fnegs %f2,%f2 +2: + ldd [%sp+0x48],%f4 + faddd %f4,%f2,%f0 + fsubd %f0,%f2,%f0 + fcmpd %f0,%f4 + sethi %hi(0x3ff00000),%o2 + st %o2,[%sp+0x50] + ldd [%sp+0x50],%f4 + and %o0,%o5,%o4 + fble 3f + nop + fsubd %f0,%f4,%f0 +3: + st %f0,[%sp+0x48] + ld [%sp+0x48],%o3 + andn %o3,%o5,%o3 + or %o4,%o3,%o3 + st %o3,[%sp+0x48] + ld [%sp+0x48],%f0 +4: + .end + + .inline __ilogb,2 + sethi %hi(0x7ff00000),%o4 + andcc %o4,%o0,%o2 + bne 1f + nop + sethi %hi(0x43500000),%o3 + std %o0,[%sp+0x48] + st %o3,[%sp+0x50] + st %g0,[%sp+0x54] + ldd [%sp+0x48],%f0 + ldd [%sp+0x50],%f2 + fmuld %f0,%f2,%f0 + sethi %hi(0x80000001),%o0 + or %o0,%lo(0x80000001),%o0 + st %f0,[%sp+0x48] + ld [%sp+0x48],%o2 + andcc %o2,%o4,%o2 + srl %o2,20,%o2 + be 2f + nop + sub %o2,0x435,%o0 + ba 2f + nop +1: + subcc %o4,%o2,%g0 + srl %o2,20,%o3 + bne 0f + nop + sethi %hi(0x7fffffff),%o0 + or %o0,%lo(0x7fffffff),%o0 + ba 2f + nop +0: + sub %o3,0x3ff,%o0 +2: + .end + + .inline __rint,2 + std %o0,[%sp+0x48] + sethi %hi(0x80000000),%o2 + andn %o0,%o2,%o2 + ldd [%sp+0x48],%f0 + sethi %hi(0x43300000),%o3 + st %g0,[%sp+0x50] + st %g0,[%sp+0x54] + subcc %o2,%o3,%g0 + bl 1f + nop + sethi %hi(0x3ff00000),%o2 + st %o2,[%sp+0x50] + ldd [%sp+0x50],%f2 + fmuld %f0,%f2,%f0 + ba 3f + nop +1: + tst %o0 + st %o3,[%sp+0x48] + st %g0,[%sp+0x4c] + ldd [%sp+0x48],%f2 + bge 2f + nop + fnegs %f2,%f2 +2: + faddd %f0,%f2,%f0 + fcmpd %f0,%f2 + fbne 0f + nop + ldd [%sp+0x50],%f0 + bge 3f + nop + fnegs %f0,%f0 + ba 3f + nop +0: + fsubd %f0,%f2,%f0 +3: + .end + + .inline __rintf,1 + st %o0,[%sp+0x48] + sethi %hi(0x80000000),%o2 + andn %o0,%o2,%o2 + ld [%sp+0x48],%f0 + sethi %hi(0x4b000000),%o3 + st %g0,[%sp+0x50] + subcc %o2,%o3,%g0 + bl 1f + nop + sethi %hi(0x3f800000),%o2 + st %o2,[%sp+0x50] + ld [%sp+0x50],%f2 + fmuls %f0,%f2,%f0 + ba 3f + nop +1: + tst %o0 + st %o3,[%sp+0x48] + ld [%sp+0x48],%f2 + bge 2f + nop + fnegs %f2,%f2 +2: + fadds %f0,%f2,%f0 + fcmps %f0,%f2 + fbne 0f + nop + ld [%sp+0x50],%f0 + bge 3f + nop + fnegs %f0,%f0 + ba 3f + nop +0: + fsubs %f0,%f2,%f0 +3: + .end + + .inline __min_subnormal,0 + set 0x0,%o0 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f0 + set 0x1,%o0 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f1 + .end + + .inline __d_min_subnormal_,0 + set 0x0,%o0 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f0 + set 0x1,%o0 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f1 + .end + + .inline __min_subnormalf,0 + set 0x1,%o0 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f0 + .end + + .inline __r_min_subnormal_,0 + set 0x1,%o0 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f0 + .end + + .inline __max_subnormal,0 + set 0x000fffff,%o0 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f0 + set 0xffffffff,%o0 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f1 + .end + + .inline __d_max_subnormal_,0 + set 0x000fffff,%o0 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f0 + set 0xffffffff,%o0 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f1 + .end + + .inline __max_subnormalf,0 + set 0x007fffff,%o0 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f0 + .end + + .inline __r_max_subnormal_,0 + set 0x007fffff,%o0 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f0 + .end + + .inline __min_normal,0 + set 0x00100000,%o0 + set 0x0,%o1 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f0 + .end + + .inline __d_min_normal_,0 + set 0x00100000,%o0 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f0 + set 0x0,%o0 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f1 + .end + + .inline __min_normalf,0 + set 0x00800000,%o0 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f0 + .end + + .inline __r_min_normal_,0 + set 0x00800000,%o0 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f0 + .end + + .inline __max_normal,0 + set 0x7fefffff,%o0 + set 0xffffffff,%o1 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f0 + .end + + .inline __d_max_normal_,0 + set 0x7fefffff,%o0 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f0 + set 0xffffffff,%o0 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f1 + .end + + .inline __max_normalf,0 + set 0x7f7fffff,%o0 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f0 + .end + + .inline __r_max_normal_,0 + set 0x7f7fffff,%o0 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f0 + .end + + .inline __infinity,0 + set 0x7ff00000,%o0 + set 0x0,%o1 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f0 + .end + + .inline __infinity,0 + set 0x7ff00000,%o0 + set 0x0,%o1 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f0 + .end + + .inline __d_infinity_,0 + set 0x7ff00000,%o0 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f0 + set 0x0,%o0 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f1 + .end + + .inline __infinityf,0 + set 0x7f800000,%o0 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f0 + .end + + .inline __r_infinity_,0 + set 0x7f800000,%o0 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f0 + .end + + .inline __signaling_nan,0 + set 0x7ff00000,%o0 + set 0x1,%o1 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f0 + .end + + .inline __d_signaling_nan_,0 + set 0x7ff00000,%o0 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f0 + set 0x1,%o0 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f1 + .end + + .inline __signaling_nanf,0 + set 0x7f800001,%o0 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f0 + .end + + .inline __r_signaling_nan_,0 + set 0x7f800001,%o0 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f0 + .end + + .inline __quiet_nan,0 + set 0x7fffffff,%o0 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f0 + set 0xffffffff,%o0 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f1 + .end + + .inline __d_quiet_nan_,0 + set 0x7fffffff,%o0 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f0 + set 0xffffffff,%o0 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f1 + .end + + .inline __quiet_nanf,0 + set 0x7fffffff,%o0 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f0 + .end + + .inline __r_quiet_nan_,0 + set 0x7fffffff,%o0 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f0 + .end + + .inline __swapEX,1 + and %o0,0x1f,%o1 + sll %o1,5,%o1 ! shift input to aexc bit location + .volatile + st %fsr,[%sp+0x44] + ld [%sp+0x44],%o0 ! o0 = fsr + andn %o0,0x3e0,%o2 + or %o1,%o2,%o1 ! o1 = new fsr + st %o1,[%sp+0x44] + ld [%sp+0x44],%fsr + srl %o0,5,%o0 + and %o0,0x1f,%o0 + .nonvolatile + .end + + .inline _QgetRD,0 + st %fsr,[%sp+0x44] + ld [%sp+0x44],%o0 ! o0 = fsr + srl %o0,30,%o0 ! return __round control value + .end + + .inline _QgetRP,0 + or %g0,%g0,%o0 + .end + + .inline __swapRD,1 + and %o0,0x3,%o0 + sll %o0,30,%o1 ! shift input to RD bit location + .volatile + st %fsr,[%sp+0x44] + ld [%sp+0x44],%o0 ! o0 = fsr + set 0xc0000000,%o4 ! mask of rounding direction bits + andn %o0,%o4,%o2 + or %o1,%o2,%o1 ! o1 = new fsr + st %o1,[%sp+0x44] + ld [%sp+0x44],%fsr + srl %o0,30,%o0 + and %o0,0x3,%o0 + .nonvolatile + .end +! +! On the SPARC, __swapRP is a no-op; always return 0 for backward compatibility +! + + .inline __swapRP,1 + or %g0,%g0,%o0 + .end + + .inline __swapTE,1 + and %o0,0x1f,%o0 + sll %o0,23,%o1 ! shift input to TEM bit location + .volatile + st %fsr,[%sp+0x44] + ld [%sp+0x44],%o0 ! o0 = fsr + set 0x0f800000,%o4 ! mask of TEM (Trap Enable Mode bits) + andn %o0,%o4,%o2 + or %o1,%o2,%o1 ! o1 = new fsr + st %o1,[%sp+0x48] + ld [%sp+0x48],%fsr + srl %o0,23,%o0 + and %o0,0x1f,%o0 + .nonvolatile + .end + + .inline __fp_class,2 + sethi %hi(0x80000000),%o2 ! o2 gets 80000000 + andn %o0,%o2,%o0 ! o0-o1 gets abs(x) + orcc %o0,%o1,%g0 ! set cc as x is zero/nonzero + bne 1f ! branch if x is nonzero + nop + mov 0,%o0 + ba 2f ! x is 0 + nop +1: + sethi %hi(0x7ff00000),%o2 ! o2 gets 7ff00000 + andcc %o0,%o2,%g0 ! cc set by __exp field of x + bne 1f ! branch if normal or max __exp + nop + mov 1,%o0 + ba 2f ! x is subnormal + nop +1: + cmp %o0,%o2 + bge 1f ! branch if x is max __exp + nop + mov 2,%o0 + ba 2f ! x is normal + nop +1: + andn %o0,%o2,%o0 ! o0 gets msw __significand field + orcc %o0,%o1,%g0 ! set cc by OR __significand + bne 1f ! Branch if __nan + nop + mov 3,%o0 + ba 2f ! x is __infinity + nop +1: + sethi %hi(0x00080000),%o2 + andcc %o0,%o2,%g0 ! set cc by quiet/sig bit + be 1f ! Branch if signaling + nop + mov 4,%o0 ! x is quiet NaN + ba 2f + nop +1: + mov 5,%o0 ! x is signaling NaN +2: + .end + + .inline __fp_classf,1 + sethi %hi(0x80000000),%o2 + andncc %o0,%o2,%o0 + bne 1f + nop + mov 0,%o0 + ba 2f ! x is 0 + nop +1: + sethi %hi(0x7f800000),%o2 + andcc %o0,%o2,%g0 + bne 1f + nop + mov 1,%o0 + ba 2f ! x is subnormal + nop +1: + cmp %o0,%o2 + bge 1f + nop + mov 2,%o0 + ba 2f ! x is normal + nop +1: + bg 1f + nop + mov 3,%o0 + ba 2f ! x is __infinity + nop +1: + sethi %hi(0x00400000),%o2 + andcc %o0,%o2,%g0 + mov 4,%o0 ! x is quiet NaN + bne 2f + nop + mov 5,%o0 ! x is signaling NaN +2: + .end + + .inline __ir_fp_class_,1 + ld [%o0],%o0 + sethi %hi(0x80000000),%o2 + andncc %o0,%o2,%o0 + bne 1f + nop + mov 0,%o0 + ba 2f ! x is 0 + nop +1: + sethi %hi(0x7f800000),%o2 + andcc %o0,%o2,%g0 + bne 1f + nop + mov 1,%o0 + ba 2f ! x is subnormal + nop +1: + cmp %o0,%o2 + bge 1f + nop + mov 2,%o0 + ba 2f ! x is normal + nop +1: + bg 1f + nop + mov 3,%o0 + ba 2f ! x is __infinity + nop +1: + sethi %hi(0x00400000),%o2 + andcc %o0,%o2,%g0 + mov 4,%o0 ! x is quiet NaN + bne 2f + nop + mov 5,%o0 ! x is signaling NaN +2: + .end + + .inline __copysign,4 + set 0x80000000,%o3 + and %o2,%o3,%o2 + andn %o0,%o3,%o0 + or %o0,%o2,%o0 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f0 + .end + + .inline __copysignf,2 + set 0x80000000,%o2 + andn %o0,%o2,%o0 + and %o1,%o2,%o1 + or %o0,%o1,%o0 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f0 + .end + + .inline __r_copysign_,2 + ld [%o0],%o0 + ld [%o1],%o1 + set 0x80000000,%o2 + andn %o0,%o2,%o0 + and %o1,%o2,%o1 + or %o0,%o1,%o0 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f0 + .end + + .inline _finite,2 + set 0x7ff00000,%o1 + and %o0,%o1,%o0 + cmp %o0,%o1 + mov 1,%o0 + bne 1f + nop + mov 0,%o0 +1: + .end + + .inline __finitef,2 + set 0x7f800000,%o1 + and %o0,%o1,%o0 + cmp %o0,%o1 + mov 1,%o0 + bne 1f + nop + mov 0,%o0 +1: + .end + + .inline __ir_finite_,1 + ld [%o0],%o0 + set 0x7f800000,%o1 + and %o0,%o1,%o0 + cmp %o0,%o1 + mov 1,%o0 + bne 1f + nop + mov 0,%o0 +1: + .end + + .inline __signbit,1 + srl %o0,31,%o0 + .end + + .inline __signbitf,1 + srl %o0,31,%o0 + .end + + .inline __ir_signbit_,1 + ld [%o0],%o0 + srl %o0,31,%o0 + .end + + .inline __isinf,2 + tst %o1 + sethi %hi(0x80000000),%o2 + bne 1f + nop + andn %o0,%o2,%o0 + sethi %hi(0x7ff00000),%o2 + cmp %o0,%o2 + mov 1,%o0 + be 2f + nop +1: + mov 0,%o0 +2: + .end + + .inline __isinff,1 + sethi %hi(0x80000000),%o2 + andn %o0,%o2,%o0 ! o0 gets abs(x) + sethi %hi(0x7f800000),%o2 + cmp %o0,%o2 + mov 0,%o0 + bne 1f ! Branch if not inf. + nop + mov 1,%o0 +1: + .end + + .inline __ir_isinf_,1 + ld [%o0],%o0 + sethi %hi(0x80000000),%o2 + andn %o0,%o2,%o0 ! o0 gets abs(x) + sethi %hi(0x7f800000),%o2 + cmp %o0,%o2 + mov 0,%o0 + bne 1f ! Branch if not inf. + nop + mov 1,%o0 +1: + .end + + .inline __isnan,2 + sethi %hi(0x80000000),%o2 + andn %o0,%o2,%o0 + sub %g0,%o1,%o3 + or %o1,%o3,%o1 + srl %o1,31,%o1 + or %o0,%o1,%o0 + sethi %hi(0x7ff00000),%o4 + sub %o4,%o0,%o0 + srl %o0,31,%o0 + .end + + .inline __isnanf,1 + sethi %hi(0x80000000),%o2 + andn %o0,%o2,%o0 + sethi %hi(0x7f800000),%o1 + sub %o1,%o0,%o0 + srl %o0,31,%o0 + .end + + .inline __ir_isnan_,1 + ld [%o0],%o0 + sethi %hi(0x80000000),%o2 + andn %o0,%o2,%o0 + sethi %hi(0x7f800000),%o1 + sub %o1,%o0,%o0 + srl %o0,31,%o0 + .end + + .inline __isnormal,2 + sethi %hi(0x80000000),%o2 + andn %o0,%o2,%o0 + sethi %hi(0x7ff00000),%o2 + cmp %o0,%o2 + sethi %hi(0x00100000),%o2 + bge 1f + nop + cmp %o0,%o2 + mov 1,%o0 + bge 2f + nop +1: + mov 0,%o0 +2: + .end + + .inline __isnormalf,1 + sethi %hi(0x80000000),%o2 + andn %o0,%o2,%o0 + sethi %hi(0x7f800000),%o2 + cmp %o0,%o2 + sethi %hi(0x00800000),%o2 + bge 1f + nop + cmp %o0,%o2 + mov 1,%o0 + bge 2f + nop +1: + mov 0,%o0 +2: + .end + + .inline __ir_isnormal_,1 + ld [%o0],%o0 + sethi %hi(0x80000000),%o2 + andn %o0,%o2,%o0 + sethi %hi(0x7f800000),%o2 + cmp %o0,%o2 + sethi %hi(0x00800000),%o2 + bge 1f + nop + cmp %o0,%o2 + mov 1,%o0 + bge 2f + nop +1: + mov 0,%o0 +2: + .end + + .inline __issubnormal,2 + sethi %hi(0x80000000),%o2 ! o2 gets 80000000 + andn %o0,%o2,%o0 ! o0/o1 gets abs(x) + sethi %hi(0x00100000),%o2 ! o2 gets 00100000 + cmp %o0,%o2 + bge 1f ! branch if x norm or max __exp + nop + orcc %o0,%o1,%g0 + be 1f ! Branch if x zero + nop + mov 1,%o0 ! x is subnormal + ba 2f + nop +1: + mov 0,%o0 +2: + .end + + .inline __issubnormalf,1 + sethi %hi(0x80000000),%o2 ! o2 gets 80000000 + andn %o0,%o2,%o0 ! o0 gets abs(x) + sethi %hi(0x00800000),%o2 ! o2 gets 00800000 + cmp %o0,%o2 + bge 1f ! branch if x norm or max __exp + nop + orcc %o0,%g0,%g0 + be 1f ! Branch if x zero + nop + mov 1,%o0 ! x is subnormal + ba 2f + nop +1: + mov 0,%o0 +2: + .end + + .inline __ir_issubnormal_,1 + ld [%o0],%o0 + sethi %hi(0x80000000),%o2 ! o2 gets 80000000 + andn %o0,%o2,%o0 ! o0 gets abs(x) + sethi %hi(0x00800000),%o2 ! o2 gets 00800000 + cmp %o0,%o2 + bge 1f ! branch if x norm or max __exp + nop + orcc %o0,%g0,%g0 + be 1f ! Branch if x zero + nop + mov 1,%o0 ! x is subnormal + ba 2f + nop +1: + mov 0,%o0 +2: + .end + + .inline __iszero,2 + sethi %hi(0x80000000),%o2 + andn %o0,%o2,%o0 + orcc %o0,%o1,%g0 + mov 1,%o0 + be 1f + nop + mov 0,%o0 +1: + .end + + .inline __iszerof,1 + sethi %hi(0x80000000),%o2 + andncc %o0,%o2,%o0 + mov 1,%o0 + be 1f + nop + mov 0,%o0 +1: + .end + + .inline __ir_iszero_,1 + ld [%o0],%o0 + sethi %hi(0x80000000),%o2 + andncc %o0,%o2,%o0 + mov 1,%o0 + be 1f + nop + mov 0,%o0 +1: + .end + + .inline abs,1 + sra %o0,31,%o1 + xor %o0,%o1,%o0 + sub %o0,%o1,%o0 + .end + + .inline __fabs,2 + st %o0,[%sp+0x48] + st %o1,[%sp+0x4c] + ldd [%sp+0x48],%f0 + fabsd %f0,%f0 + .end + + .inline __fabsf,1 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f0 + fabss %f0,%f0 + .end + + .inline __r_fabs_,1 + ld [%o0],%f0 + fabss %f0,%f0 + .end +! +! __nintf - f77 NINT(REAL*4) +! + + .inline __nintf,1 + srl %o0,30-7,%g1 + sethi %hi(0x7fffff),%o2 + st %o0,[%sp+0x44] + and %g1,0xff,%g1 + or %o2,%lo(0x7fffff),%o2 + sethi %hi(1<<22),%o4 + subcc %g1,127+31,%g0 + and %o0,%o2,%o3 + bl 0f + nop + sethi %hi(0xcf000000),%o2 + sethi %hi(0x80000000),%g1 + subcc %o0,%o2,%g0 + or %g1,%g0,%o0 + be 9f + nop + ld [%sp+0x44],%f0 + fstoi %f0,%f0 + st %f0,[%sp+0x44] + ld [%sp+0x44],%o0 + ba 9f + nop +0: + add %o4,%o4,%o5 + or %o3,%o5,%o3 + sra %o0,31-0,%o2 + subcc %g1,127,%g1 + srl %o4,%g1,%o4 + bge 1f + nop + subcc %g1,-1,%g0 + or %g0,0,%o0 + bne 2f + nop + or %g0,1,%o0 + ba 2f + nop +1: + add %o3,%o4,%o3 + or %g0,23,%o0 + subcc %o0,%g1,%o0 + bl 1f + nop + srl %o3,%o0,%o0 + ba 2f + nop +1: + sub %g0,%o0,%o0 + sll %o3,%o0,%o0 +2: + xor %o0,%o2,%o0 + and %o2,1,%o2 + add %o0,%o2,%o0 +9: + .end + + .inline __il_nint,1 + ld [%o0],%o0 + sra %o0,0,%o0 + srlx %o0,31-8,%g1 + or %g0,1,%o2 + sllx %o2,23-1,%o4 + and %g1,0xff,%g1 + sllx %o2,63-0,%o2 + subcc %g1,127+63,%g0 + bl 0f + nop + st %o0,[%sp+0x48] + ld [%sp+0x48],%f0 + fstox %f0,%f0 + std %f0,[%sp+0x48] + ldx [%sp+0x48],%o1 + ba 9f + nop +0: + add %o4,%o4,%o5 + srax %o2,63-23,%o2 + sub %g1,127+23,%o1 + xnor %o2,%g0,%o2 + and %o0,%o2,%o3 + or %o3,%o5,%o3 + srax %o0,63-0,%o2 + subcc %g1,127,%g1 + bge 1f + nop + subcc %g1,-1,%g0 + or %g0,0,%o0 + bne 2f + nop + or %g0,1,%o0 + ba 2f + nop +1: + brlz,pt %o1,3f + nop + sub %g1,23,%o0 + sllx %o3,%o0,%o0 + ba 2f + nop +3: + srlx %o4,%g1,%o4 + add %o3,%o4,%o3 + or %g0,23,%o0 + sub %o0,%g1,%o0 + srlx %o3,%o0,%o0 +2: + xor %o0,%o2,%o0 + sub %o0,%o2,%o1 +9: + srlx %o1,32,%o0 + .end +! +! __i_dnnt - f77 NINT(REAL*8) +! + + .inline __i_dnnt,1 + ld [%o0],%o1 + sllx %o1,32,%o1 + ld [%o0+4],%o0 + or %o0,%o1,%o0 + srlx %o0,63-11,%g1 + or %g0,1,%o2 + stx %o0,[%sp+0x48] + sllx %o2,52-1,%o4 + and %g1,0x7ff,%g1 + sllx %o2,63-0,%o2 + subcc %g1,1023+32,%g0 + bl 0f + nop + ldd [%sp+0x48],%f0 + ba 8f + nop +0: + add %o4,%o4,%o5 + srax %o2,63-52,%o2 + sub %g1,1023+30,%o1 + xnor %o2,%g0,%o2 + and %o0,%o2,%o3 + or %o3,%o5,%o3 + srax %o0,63-0,%o2 + subcc %g1,1023,%g1 + bge 1f + nop + subcc %g1,-1,%g0 + or %g0,0,%o0 + bne 2f + nop + or %g0,1,%o0 + ba 2f + nop +1: + srlx %o4,%g1,%o4 + add %o3,%o4,%o3 + or %g0,52,%o0 + sub %o0,%g1,%o0 + srlx %o3,%o0,%o0 +2: + xor %o0,%o2,%o0 + sub %o0,%o2,%o0 + brlz,pt %o1,9f + nop + stx %o0,[%sp+0x48] + ldd [%sp+0x48],%f0 + fxtod %f0,%f0 +8: + fdtoi %f0,%f0 + st %f0,[%sp+0x44] + ld [%sp+0x44],%o0 +9: + .end + + .inline __il_dnnt,1 + ld [%o0],%o1 + sllx %o1,32,%o1 + ld [%o0+4],%o0 + or %o0,%o1,%o0 + srlx %o0,63-11,%g1 + or %g0,1,%o2 + sllx %o2,52-1,%o4 + and %g1,0x7ff,%g1 + sllx %o2,63-0,%o2 + subcc %g1,1023+63,%g0 + bl 0f + nop + stx %o0,[%sp+0x48] + ldd [%sp+0x48],%f0 + fdtox %f0,%f0 + std %f0,[%sp+0x48] + ldx [%sp+0x48],%o1 + ba 9f + nop +0: + add %o4,%o4,%o5 + srax %o2,63-52,%o2 + sub %g1,1023+52,%o1 + xnor %o2,%g0,%o2 + and %o0,%o2,%o3 + or %o3,%o5,%o3 + srax %o0,63-0,%o2 + subcc %g1,1023,%g1 + bge 1f + nop + subcc %g1,-1,%g0 + or %g0,0,%o0 + bne 2f + nop + or %g0,1,%o0 + ba 2f + nop +1: + brlz,pt %o1,3f + nop + sub %g1,52,%o0 + sllx %o3,%o0,%o0 + ba 2f + nop +3: + srlx %o4,%g1,%o4 + add %o3,%o4,%o3 + or %g0,52,%o0 + sub %o0,%g1,%o0 + srlx %o3,%o0,%o0 +2: + xor %o0,%o2,%o0 + sub %o0,%o2,%o1 +9: + srlx %o1,32,%o0 + .end + + .inline __anintf,1 + or %g0,1,%o1 + srl %o0,23,%g1 + and %g1,0xff,%g1 + sub %g0,%g1,%g1 + add %g1,0x95,%g1 + subcc %g1,23,%g0 + sll %o1,%g1,%o1 + sub %o1,1,%o2 + bcs 1f + nop + be 2f + nop + bl 3f + nop + sethi %hi(0x80000000),%o1 + and %o0,%o1,%o0 + ba 3f + nop +1: + and %o0,%o1,%o1 +2: + add %o0,%o1,%o0 + andn %o0,%o2,%o0 +3: + st %o0,[%sp+0x48] + ld [%sp+0x48],%f0 + .end + + .inline __anint,2 + sllx %o0,32,%o0 + or %o0,%o1,%o0 + or %g0,1,%o1 + srlx %o0,52,%g1 + and %g1,0x7ff,%g1 + sub %g0,%g1,%g1 + add %g1,0x432,%g1 + subcc %g1,52,%g0 + sllx %o1,%g1,%o1 + sub %o1,1,%o2 + bcs,pt %icc,1f + nop + be,pt %icc,2f + nop + bl,pt %icc,3f + nop + srlx %o0,63,%o0 + sllx %o0,63,%o0 + ba 3f + nop +1: + and %o0,%o1,%o1 +2: + add %o0,%o1,%o0 + andn %o0,%o2,%o0 +3: + stx %o0,[%sp+0x48] + ldd [%sp+0x48],%f0 + .end + + .inline __Fz_minus,3 + ld [%o1],%f0 + ld [%o1+0x4],%f1 + ld [%o2],%f4 + ld [%o2+0x4],%f5 + fsubd %f0,%f4,%f0 + ld [%o1+8],%f2 + ld [%o1+0xc],%f3 + ld [%o2+8],%f6 + ld [%o2+0xc],%f7 + fsubd %f2,%f6,%f2 + st %f0,[%o0+0x0] + st %f1,[%o0+0x4] + st %f2,[%o0+0x8] + st %f3,[%o0+0xc] + .end + + .inline __Fz_add,3 + ld [%o1],%f0 + ld [%o1+0x4],%f1 + ld [%o2],%f4 + ld [%o2+0x4],%f5 + faddd %f0,%f4,%f0 + ld [%o1+8],%f2 + ld [%o1+0xc],%f3 + ld [%o2+8],%f6 + ld [%o2+0xc],%f7 + faddd %f2,%f6,%f2 + st %f0,[%o0+0x0] + st %f1,[%o0+0x4] + st %f2,[%o0+0x8] + st %f3,[%o0+0xc] + .end + + .inline __Fz_neg,2 + ld [%o1],%f0 + fnegs %f0,%f0 + ld [%o1+0x4],%f1 + st %f1,[%o0+0x4] + ld [%o1+8],%f2 + fnegs %f2,%f2 + ld [%o1+0xc],%f3 + st %f3,[%o0+0xc] + st %f0,[%o0] + st %f2,[%o0+0x8] + .end + + .inline __Ff_conv_z,2 + st %o1,[%sp+0x44] + ld [%sp+0x44],%f0 + fstod %f0,%f0 + st %g0,[%o0+0x8] + st %g0,[%o0+0xc] + st %f1,[%o0+0x4] + st %f0,[%o0] + .end + + .inline __Fz_conv_f,1 + ld [%o0],%f0 + ld [%o0+4],%f1 + fdtos %f0,%f0 + .end + + .inline __Fz_conv_i,1 + ld [%o0],%f0 + ld [%o0+4],%f1 + fdtoi %f0,%f0 + st %f0,[%sp+0x44] + ld [%sp+0x44],%o0 + .end + + .inline __Fi_conv_z,2 + st %o1,[%sp+0x44] + ld [%sp+0x44],%f0 + fitod %f0,%f0 + st %g0,[%o0+0x8] + st %g0,[%o0+0xc] + st %f1,[%o0+0x4] + st %f0,[%o0] + .end + + .inline __Fz_conv_d,1 + ld [%o0],%f0 + ld [%o0+4],%f1 + .end + + .inline __Fd_conv_z,3 + st %o1,[%o0] + st %o2,[%o0+0x4] + st %g0,[%o0+0x8] + st %g0,[%o0+0xc] + .end + + .inline __Fz_conv_c,2 + ldd [%o1],%f0 + fdtos %f0,%f0 + st %f0,[%o0] + ldd [%o1+0x8],%f2 + fdtos %f2,%f1 + st %f1,[%o0+0x4] + .end + + .inline __Fz_eq,2 + ld [%o0],%f0 + ld [%o0+4],%f1 + ld [%o1],%f2 + ld [%o1+4],%f3 + fcmpd %f0,%f2 + mov %o0,%o2 + mov 0,%o0 + fbne 1f + nop + ld [%o2+8],%f0 + ld [%o2+12],%f1 + ld [%o1+8],%f2 + ld [%o1+12],%f3 + fcmpd %f0,%f2 + nop + fbne 1f + nop + mov 1,%o0 +1: + .end + + .inline __Fz_ne,2 + ld [%o0],%f0 + ld [%o0+4],%f1 + ld [%o1],%f2 + ld [%o1+4],%f3 + fcmpd %f0,%f2 + mov %o0,%o2 + mov 1,%o0 + fbne 1f + nop + ld [%o2+8],%f0 + ld [%o2+12],%f1 + ld [%o1+8],%f2 + ld [%o1+12],%f3 + fcmpd %f0,%f2 + nop + fbne 1f + nop + mov 0,%o0 +1: + .end + + .inline __c_cmplx,3 + ld [%o1],%o1 + st %o1,[%o0] + ld [%o2],%o2 + st %o2,[%o0+4] + .end + + .inline __d_cmplx,3 + ld [%o1],%f0 + st %f0,[%o0] + ld [%o1+4],%f1 + st %f1,[%o0+4] + ld [%o2],%f0 + st %f0,[%o0+0x8] + ld [%o2+4],%f1 + st %f1,[%o0+0xc] + .end + + .inline __r_cnjg,2 + ld [%o1+0x4],%f1 + fnegs %f1,%f1 + ld [%o1],%f0 + st %f0,[%o0] + st %f1,[%o0+4] + .end + + .inline __d_cnjg,2 + ld [%o1+0x8],%f0 + fnegs %f0,%f0 + ld [%o1+0xc],%f1 + st %f1,[%o0+0xc] + ld [%o1+0x0],%f1 + st %f1,[%o0+0x0] + ld [%o1+0x4],%f1 + st %f1,[%o0+0x4] + st %f0,[%o0+0x8] + .end + + .inline __r_dim,2 + st %g0,[%sp+0x48] + ld [%sp+0x48],%f4 + ld [%o0],%f0 + ld [%o1],%f2 + fcmps %fcc0,%f0,%f2 + fmovsule %fcc0,%f4,%f2 + fsubs %f0,%f2,%f0 + fmovsule %fcc0,%f4,%f0 + .end + + .inline __d_dim,2 + stx %g0,[%sp+0x48] + ldd [%sp+0x48],%f4 + ld [%o0],%f0 + ld [%o0+4],%f1 + ld [%o1],%f2 + ld [%o1+4],%f3 + fcmpd %fcc0,%f0,%f2 + fmovdule %fcc0,%f4,%f2 + fsubd %f0,%f2,%f0 + fmovdule %fcc0,%f4,%f0 + .end + + .inline __r_imag,1 + ld [%o0+4],%f0 + .end + + .inline __d_imag,1 + ld [%o0+8],%f0 + ld [%o0+0xc],%f1 + .end + + .inline __f95_signf,2 + ld [%o0],%f0 + ld [%o1],%o1 + fabss %f0,%f0 + fnegs %f0,%f1 + sra %o1,0,%o1 + fmovrslz %o1,%f1,%f0 + .end + + .inline __f95_sign,2 + ld [%o0],%f0 + ld [%o0+4],%f1 + ld [%o1],%o1 + fabsd %f0,%f0 + fnegd %f0,%f2 + sra %o1,0,%o1 + fmovrdlz %o1,%f2,%f0 + .end + + .inline __r_sign,2 + ld [%o0],%f0 + ld [%o1],%o1 + fabss %f0,%f0 + fnegs %f0,%f1 + sub %o1,1,%o0 + and %o1,%o0,%o1 + sra %o1,0,%o1 + fmovrslz %o1,%f1,%f0 + .end + + .inline __d_sign,2 + ld [%o0],%f0 + ld [%o0+4],%f1 + ld [%o1],%o0 + sllx %o0,32,%o0 + ld [%o1+4],%o1 + or %o1,%o0,%o1 + fabsd %f0,%f0 + fnegd %f0,%f2 + sub %o1,1,%o0 + and %o1,%o0,%o1 + fmovrdlz %o1,%f2,%f0 + .end + + .inline __Fz_mult,3 + ld [%o1],%f0 + ld [%o1+0x4],%f1 + ld [%o2],%f4 + ld [%o2+0x4],%f5 + fmuld %f0,%f4,%f8 ! f8 = r1*r2 + ld [%o1+0x8],%f2 + ld [%o1+0xc],%f3 + ld [%o2+0x8],%f6 + ld [%o2+0xc],%f7 + fmuld %f2,%f6,%f10 ! f10= i1*i2 + fsubd %f8,%f10,%f12 ! f12= r1*r2-i1*i2 + st %f12,[%o0] + st %f13,[%o0+4] + fmuld %f0,%f6,%f14 ! f14= r1*i2 + fmuld %f2,%f4,%f16 ! f16= r2*i1 + faddd %f14,%f16,%f2 ! f2 = r1*i2+r2*i1 + st %f2,[%o0+8] + st %f3,[%o0+12] + .end +!- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +! void +! __Fc_minus(c, a, b) +! complex *c, *a, *b; +! { + + .inline __Fc_minus,3 +! 30 c->real = a->real - b->real + ld [%o1],%f0 + ld [%o2],%f1 + fsubs %f0,%f1,%f2 +! 31 c->imag = a->imag - b->imag + ld [%o1+4],%f3 + ld [%o2+4],%f4 + fsubs %f3,%f4,%f5 + st %f2,[%o0] + st %f5,[%o0+4] + .end + } +!- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +! void +! __Fc_add(c, a, b) +! complex *c, *a, *b; +! { + + .inline __Fc_add,3 +! 39 c->real = a->real + b->real + ld [%o1],%f0 + ld [%o2],%f1 + fadds %f0,%f1,%f2 +! 40 c->imag = a->imag + b->imag + ld [%o1+4],%f3 + ld [%o2+4],%f4 + fadds %f3,%f4,%f5 + st %f2,[%o0] + st %f5,[%o0+4] + .end +! } +!- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +! void +! __Fc_neg(c, a) +! complex *c, *a; +! { + + .inline __Fc_neg,2 +! 48 c->real = - a->real + ld [%o1],%f0 + fnegs %f0,%f1 +! 49 c->imag = - a->imag + ld [%o1+4],%f2 + fnegs %f2,%f3 + st %f1,[%o0] + st %f3,[%o0+4] + .end +! } +!- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +! void +! __Ff_conv_c(c, x) +! complex *c; +! FLOATPARAMETER x; +! { + + .inline __Ff_conv_c,2 +! 59 c->real = x + st %o1,[%o0] +! 60 c->imag = 0.0 + st %g0,[%o0+4] + .end +! } +!- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +! FLOATFUNCTIONTYPE +! __Fc_conv_f(c) +! complex *c; +! { + + .inline __Fc_conv_f,1 +! 69 RETURNFLOAT(c->real) + ld [%o0],%f0 + .end +! } +!- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +! int +! __Fc_conv_i(c) +! complex *c; +! { + + .inline __Fc_conv_i,1 +! 78 return (int)c->real + ld [%o0],%f0 + fstoi %f0,%f1 + st %f1,[%sp+68] + ld [%sp+68],%o0 + .end +! } +!- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +! void +! __Fi_conv_c(c, i) +! complex *c; +! int i; +! { + + .inline __Fi_conv_c,2 +! 88 c->real = (float)i + st %o1,[%sp+68] + ld [%sp+68],%f0 + fitos %f0,%f1 + st %f1,[%o0] +! 89 c->imag = 0.0 + st %g0,[%o0+4] + .end +! } +!- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +! double +! __Fc_conv_d(c) +! complex *c; +! { + + .inline __Fc_conv_d,1 +! 98 return (double)c->real + ld [%o0],%f2 + fstod %f2,%f0 + .end +! } +!- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +! void +! __Fd_conv_c(c, x) +! complex *c; +! double x; +! { + + .inline __Fd_conv_c,2 + st %o1,[%sp+72] + st %o2,[%sp+76] +! 109 c->real = (float)(x) + ldd [%sp+72],%f0 + fdtos %f0,%f1 + st %f1,[%o0] +! 110 c->imag = 0.0 + st %g0,[%o0+4] + .end +! } +!- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +! void +! __Fc_conv_z(result, c) +! dcomplex *result; +! complex *c; +! { + + .inline __Fc_conv_z,2 +! 120 result->dreal = (double)c->real + ld [%o1],%f0 + fstod %f0,%f2 + st %f2,[%o0] + st %f3,[%o0+4] +! 121 result->dimag = (double)c->imag + ld [%o1+4],%f3 + fstod %f3,%f4 + st %f4,[%o0+8] + st %f5,[%o0+12] + .end +! } +!- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +! int +! __Fc_eq(x, y) +! complex *x, *y; +! { + + .inline __Fc_eq,2 +! return (x->real == y->real) && (x->imag == y->imag); + ld [%o0],%f0 + ld [%o1],%f2 + mov %o0,%o2 + fcmps %f0,%f2 + mov 0,%o0 + fbne 1f + nop + ld [%o2+4],%f0 + ld [%o1+4],%f2 + fcmps %f0,%f2 + nop + fbne 1f + nop + mov 1,%o0 +1: + .end +! } +!- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +! int +! __Fc_ne(x, y) +! complex *x, *y; +! { + + .inline __Fc_ne,2 +! return (x->real != y->real) || (x->imag != y->imag); + ld [%o0],%f0 + ld [%o1],%f2 + mov %o0,%o2 + fcmps %f0,%f2 + mov 1,%o0 + fbne 1f + nop + ld [%o2+4],%f0 + ld [%o1+4],%f2 + fcmps %f0,%f2 + nop + fbne 1f + nop + mov 0,%o0 +1: + .end +! } diff --git a/usr/src/lib/libm/sparc/src/nextafter.S b/usr/src/lib/libm/sparc/src/nextafter.S new file mode 100644 index 0000000000..e23b4193ee --- /dev/null +++ b/usr/src/lib/libm/sparc/src/nextafter.S @@ -0,0 +1,124 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "nextafter.S" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(nextafter,function) + .weak _nextafter + .type _nextafter,#function +_nextafter = __nextafter +#include "libm_synonyms.h" +#include "libm_protos.h" + +#if defined(LIBM_BUILD) && !defined(ELFOBJ) +#define mENTRY(x) ENTRY(__libm/**/x) +#define mNAME(x) NAME(__libm/**/x) +#else +#define mENTRY(x) ENTRY(x) +#define mNAME(x) NAME(x) +#endif + + RO_DATA + .align 8 +.Lconstant: +two54 = 0x00 + .word 0x43500000,0x0 ! 2**54 +twom54 = 0x08 + .word 0x3c900000,0x0 ! 2**-54 +tiny = 0x10 + .word 0x00100000,0x0 ! tiny + +! variable using fp +x = -0x8 +y = -0x10 + + ENTRY(nextafter) + save %sp,-128,%sp + PIC_SETUP(l7) + std %i0,[%fp+x] + or %g0,%i0,%o0 ! save original arguments + or %g0,%i1,%o1 + std %i2,[%fp+y] + or %g0,%i2,%o2 + or %g0,%i3,%o3 + ldd [%fp+x],%f2 ! x + ldd [%fp+y],%f0 ! y + fcmpd %f2,%f0 ! x:y + PIC_SET(l7,.Lconstant,l0) + sethi %hi(0x80000000),%l1 + andn %i0,%l1,%l4 + fbe 9f ! next_return + nop + fbu,a 9f ! next_return + fmuld %f2,%f0,%f0 ! + -> * for Cheetah + orcc %i1,%l4,%g0 ! see if x is zero + bne 1f + tst %i0 + ! x is zero, return sign(y)*min + and %i2,%l1,%i0 + ba 4f ! next_final + mov 1,%i1 +1: bge 2f + nop + ! x is negative + fbl 1f ! next_subulp + nop + fbg 3f ! next_addulp + nop +2: + fbl 3f ! next_addulp + nop +1: ! next_subulp + subcc %i1,1,%i1 + ba 4f ! next_final + subx %i0,0,%i0 +3: ! next_addulp + addcc %i1,1,%i1 + addx %i0,0,%i0 +4: ! next_final + sethi %hi(0x7ff00000),%l3 + std %i0,[%fp+x] + andcc %i0,%l3,%i2 + be,a 1f ! xflow + ldd [%l0+tiny],%f2 + cmp %i2,%l3 + bne,a 9f ! next_return + ldd [%fp+x],%f0 + call mNAME(_SVID_libm_err) ! overflow + or %g0,46,%o4 + ba 9f + nop +1: ! xflow + fmuld %f2,%f2,%f2 + ldd [%fp+x],%f0 +9: ! next_return + ret + restore + + SET_SIZE(nextafter) diff --git a/usr/src/lib/libm/sparcv9/Makefile b/usr/src/lib/libm/sparcv9/Makefile new file mode 100644 index 0000000000..91181acd63 --- /dev/null +++ b/usr/src/lib/libm/sparcv9/Makefile @@ -0,0 +1,25 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2011 Nexenta Systems, Inc. All rights reserved. +# + +TARGET_ARCH= sparcv9 + +include ../Makefile.com +include $(SRC)/lib/Makefile.lib.64 + +CHIP = ultra + +install: all $(ROOTLIBS64) $(ROOTLINKS64) + +include ../Makefile.targ diff --git a/usr/src/lib/libm/sparcv9/src/libm_inlines.h b/usr/src/lib/libm/sparcv9/src/libm_inlines.h new file mode 100644 index 0000000000..916cc6ba87 --- /dev/null +++ b/usr/src/lib/libm/sparcv9/src/libm_inlines.h @@ -0,0 +1,298 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * Copyright 2011, Richard Lowe. + */ + +/* Functions in this file are duplicated in locallibm.il. Keep them in sync */ + +#ifndef _LIBM_INLINES_H +#define _LIBM_INLINES_H + +#ifdef __GNUC__ + +#include <sys/types.h> +#include <sys/ieeefp.h> + +#ifdef __cplusplus +extern "C" { +#endif + +extern __inline__ enum fp_class_type +fp_classf(float f) +{ + enum fp_class_type ret; + int fint; /* scratch for f as int */ + uint64_t tmp; + + __asm__ __volatile__( + "fabss %3,%3\n\t" + "st %3,%1\n\t" + "ld %1,%0\n\t" + "orcc %%g0,%0,%%g0\n\t" + "be,pn %%icc,2f\n\t" + "nop\n\t" + "1:\n\t" + "sethi %%hi(0x7f800000),%2\n\t" + "andcc %0,%2,%%g0\n\t" + "bne,pt %%icc,1f\n\t" + "nop\n\t" + "or %%g0,1,%0\n\t" + "ba 2f\n\t" /* subnormal */ + "nop\n\t" + "1:\n\t" + "subcc %0,%2,%%g0\n\t" + "bge,pn %%icc,1f\n\t" + "nop\n\t" + "or %%g0,2,%0\n\t" + "ba 2f\n\t" /* normal */ + "nop\n\t" + "1:\n\t" + "bg,pn %%icc,1f\n\t" + "nop\n\t" + "or %%g0,3,%0\n\t" + "ba 2f\n\t" /* infinity */ + "nop\n\t" + "1:\n\t" + "sethi %%hi(0x00400000),%2\n\t" + "andcc %0,%2,%%g0\n\t" + "or %%g0,4,%0\n\t" + "bne,pt %%icc,2f\n\t" /* quiet NaN */ + "nop\n\t" + "or %%g0,5,%0\n\t" /* signalling NaN */ + "2:\n\t" + : "=r" (ret), "=m" (fint), "=r" (tmp), "+f" (f) + : + : "cc"); + + return (ret); +} + +extern __inline__ enum fp_class_type +fp_class(double d) +{ + enum fp_class_type ret; + uint64_t dint; /* Scratch for d-as-long */ + uint64_t tmp; + + __asm__ __volatile__( + "fabsd %3,%3\n\t" + "std %3,%1\n\t" + "ldx %1,%0\n\t" + "orcc %%g0,%0,%%g0\n\t" + "be,pn %%xcc,2f\n\t" + "nop\n\t" + "sethi %%hi(0x7ff00000),%2\n\t" + "sllx %2,32,%2\n\t" + "andcc %0,%2,%%g0\n\t" + "bne,pt %%xcc,1f\n\t" + "nop\n\t" + "or %%g0,1,%0\n\t" + "ba 2f\n\t" + "nop\n\t" + "1:\n\t" + "subcc %0,%2,%%g0\n\t" + "bge,pn %%xcc,1f\n\t" + "nop\n\t" + "or %%g0,2,%0\n\t" + "ba 2f\n\t" + "nop\n\t" + "1:\n\t" + "andncc %0,%2,%0\n\t" + "bne,pn %%xcc,1f\n\t" + "nop\n\t" + "or %%g0,3,%0\n\t" + "ba 2f\n\t" + "nop\n\t" + "1:\n\t" + "sethi %%hi(0x00080000),%2\n\t" + "sllx %2,32,%2\n\t" + "andcc %0,%2,%%g0\n\t" + "or %%g0,4,%0\n\t" + "bne,pt %%xcc,2f\n\t" + "nop\n\t" + "or %%g0,5,%0\n\t" + "2:\n\t" + : "=r" (ret), "=m" (dint), "=r" (tmp), "+e" (d) + : + : "cc"); + + return (ret); +} + +extern __inline__ float +__inline_sqrtf(float f) +{ + float ret; + + __asm__ __volatile__("fsqrts %1,%0\n\t" : "=f" (ret) : "f" (f)); + return (ret); +} + +extern __inline__ double +__inline_sqrt(double d) +{ + double ret; + + __asm__ __volatile__("fsqrtd %1,%0\n\t" : "=f" (ret) : "f" (d)); + return (ret); +} + +extern __inline__ int +__swapEX(int i) +{ + int ret; + uint32_t fsr; + uint64_t tmp1, tmp2; + + __asm__ __volatile__( + "and %4,0x1f,%2\n\t" + "sll %2,5,%2\n\t" /* shift input to aexc bit location */ + ".volatile\n\t" + "st %%fsr,%1\n\t" + "ld %1,%0\n\t" /* %0 = fsr */ + "andn %0,0x3e0,%3\n\t" + "or %2,%3,%2\n\t" /* %2 = new fsr */ + "st %2,%1\n\t" + "ld %1,%%fsr\n\t" + "srl %0,5,%0\n\t" + "and %0,0x1f,%0\n\t" + ".nonvolatile\n\t" + : "=r" (ret), "=m" (fsr), "=r" (tmp1), "=r" (tmp2) + : "r" (i) + : "cc"); + + return (ret); +} + +/* + * On the SPARC, __swapRP is a no-op; always return 0 for backward + * compatibility + */ +/* ARGSUSED */ +extern __inline__ enum fp_precision_type +__swapRP(enum fp_precision_type i) +{ + return (0); +} + +extern __inline__ enum fp_direction_type +__swapRD(enum fp_direction_type d) +{ + enum fp_direction_type ret; + uint32_t fsr; + uint64_t tmp1, tmp2, tmp3; + + __asm__ __volatile__( + "and %5,0x3,%0\n\t" + "sll %0,30,%2\n\t" /* shift input to RD bit location */ + ".volatile\n\t" + "st %%fsr,%1\n\t" + "ld %1,%0\n\t" /* %0 = fsr */ + /* mask of rounding direction bits */ + "sethi %%hi(0xc0000000),%4\n\t" + "andn %0,%4,%3\n\t" + "or %2,%3,%2\n\t" /* %2 = new fsr */ + "st %2,%1\n\t" + "ld %1,%%fsr\n\t" + "srl %0,30,%0\n\t" + "and %0,0x3,%0\n\t" + ".nonvolatile\n\t" + : "=r" (ret), "=m" (fsr), "=r" (tmp1), "=r" (tmp2), "=r" (tmp3) + : "r" (d) + : "cc"); + + return (ret); +} + +extern __inline__ int +__swapTE(int i) +{ + int ret; + uint32_t fsr; + uint64_t tmp1, tmp2, tmp3; + + __asm__ __volatile__( + "and %5,0x1f,%0\n\t" + "sll %0,23,%2\n\t" /* shift input to TEM bit location */ + ".volatile\n\t" + "st %%fsr,%1\n\t" + "ld %1,%0\n\t" /* %0 = fsr */ + /* mask of TEM (Trap Enable Mode bits) */ + "sethi %%hi(0x0f800000),%4\n\t" + "andn %0,%4,%3\n\t" + "or %2,%3,%2\n\t" /* %2 = new fsr */ + "st %2,%1\n\t" + "ld %1,%%fsr\n\t" + "srl %0,23,%0\n\t" + "and %0,0x1f,%0\n\t" + ".nonvolatile\n\t" + : "=r" (ret), "=m" (fsr), "=r" (tmp1), "=r" (tmp2), "=r" (tmp3) + : "r" (i) + : "cc"); + + return (ret); +} + + +extern __inline__ double +sqrt(double d) +{ + return (__inline_sqrt(d)); +} + +extern __inline__ float +sqrtf(float f) +{ + return (__inline_sqrtf(f)); +} + +extern __inline__ double +fabs(double d) +{ + double ret; + + __asm__ __volatile__("fabsd %1,%0\n\t" : "=e" (ret) : "e" (d)); + return (ret); +} + +extern __inline__ float +fabsf(float f) +{ + float ret; + + __asm__ __volatile__("fabss %1,%0\n\t" : "=f" (ret) : "f" (f)); + return (ret); +} + +#ifdef __cplusplus +} +#endif + +#endif /* __GNUC__ */ + +#endif /* _LIBM_INLINES_H */ diff --git a/usr/src/lib/libm/sparcv9/src/locallibm.il b/usr/src/lib/libm/sparcv9/src/locallibm.il new file mode 100644 index 0000000000..dcef23826a --- /dev/null +++ b/usr/src/lib/libm/sparcv9/src/locallibm.il @@ -0,0 +1,1075 @@ +! +! CDDL HEADER START +! +! The contents of this file are subject to the terms of the +! Common Development and Distribution License (the "License"). +! You may not use this file except in compliance with the License. +! +! You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +! or http://www.opensolaris.org/os/licensing. +! See the License for the specific language governing permissions +! and limitations under the License. +! +! When distributing Covered Code, this CDDL HEADER in each +! file and the License file at usr/src/OPENSOLARIS.LICENSE. +! If applicable, add the following below this CDDL HEADER, with the +! fields enclosed by brackets "[]" replaced with your own identifying +! information: Portions Copyright [yyyy] [name of copyright owner] +! +! CDDL HEADER END +! +! Copyright 2011 Nexenta Systems, Inc. All rights reserved. +! +! Copyright 2006 Sun Microsystems, Inc. All rights reserved. +! Use is subject to license terms. +! + +! Portions of this file are duplicated as GCC inline assembly in +! libm_inlines.h. Keep them in sync. + + .inline __ieee754_sqrt,1 + fsqrtd %f0,%f0 + .end + + .inline __inline_sqrtf,1 + fsqrts %f1,%f0 + .end + + .inline __inline_sqrt,1 + fsqrtd %f0,%f0 + .end + + .inline __sqrtf,1 + fsqrts %f1,%f0 + .end + + .inline __sqrt,1 + fsqrtd %f0,%f0 + .end + + .inline __ceil,1 + sethi %hi(0x43300000),%o0 + sllx %o0,32,%o0 + stx %o0,[%sp+0x87f] + ldd [%sp+0x87f],%f2 + fabsd %f0,%f4 + fsubd %f2,%f2,%f6 + fcmpd %fcc0,%f4,%f2 + fbl,pt %fcc0,1f + nop + sethi %hi(0x3ff00000),%o0 + sllx %o0,32,%o0 + stx %o0,[%sp+0x87f] + ldd [%sp+0x87f],%f6 + fmuld %f0,%f6,%f0 + ba 4f + nop +1: + fcmpd %fcc1,%f0,%f6 + fbg,pt %fcc1,2f + nop + fbe,pn %fcc1,4f + nop + fnegd %f2,%f2 +2: + faddd %f0,%f2,%f4 + fsubd %f4,%f2,%f4 + fcmpd %fcc0,%f4,%f0 + fbge,pt %fcc0,3f + nop + sethi %hi(0x3ff00000),%o0 + st %o0,[%sp+0x87f] + ldd [%sp+0x87f],%f2 + faddd %f4,%f2,%f4 +3: + fabsd %f4,%f0 + fbge,pt %fcc1,4f + nop + fnegd %f0,%f0 +4: + .end + + .inline __floor,1 + sethi %hi(0x43300000),%o0 + sllx %o0,32,%o0 + stx %o0,[%sp+0x87f] + ldd [%sp+0x87f],%f2 + fabsd %f0,%f4 + fsubd %f2,%f2,%f6 + fcmpd %fcc0,%f4,%f2 + fbl,pt %fcc0,1f + nop + sethi %hi(0x3ff00000),%o0 + sllx %o0,32,%o0 + stx %o0,[%sp+0x87f] + ldd [%sp+0x87f],%f6 + fmuld %f0,%f6,%f0 + ba 4f + nop +1: + fcmpd %fcc1,%f0,%f6 + fbg,pt %fcc1,2f + nop + fbe,pn %fcc1,4f + nop + fnegd %f2,%f2 +2: + faddd %f0,%f2,%f4 + fsubd %f4,%f2,%f4 + fcmpd %fcc0,%f4,%f0 + fble,pt %fcc0,3f + nop + sethi %hi(0x3ff00000),%o0 + st %o0,[%sp+0x87f] + ldd [%sp+0x87f],%f2 + fsubd %f4,%f2,%f4 +3: + fabsd %f4,%f0 + fbge,pt %fcc1,4f + nop + fnegd %f0,%f0 +4: + .end + + .inline __ilogb,1 + st %f0,[%sp+0x87f] + ld [%sp+0x87f],%o0 + sethi %hi(0x7ff00000),%o1 + andcc %o0,%o1,%o0 + bne,pt %icc,2f + nop + sethi %hi(0x43500000),%o0 + sllx %o0,32,%o0 + stx %o0,[%sp+0x87f] + ldd [%sp+0x87f],%f2 + fmuld %f0,%f2,%f0 + st %f0,[%sp+0x87f] + ld [%sp+0x87f],%o0 + andcc %o0,%o1,%o0 + bne,pt %icc,1f + nop + sethi %hi(0x80000001),%o0 + or %o0,%lo(0x80000001),%o0 + ba 4f + nop +1: + srl %o0,20,%o0 + sub %o0,0x435,%o0 + ba 4f + nop +2: + subcc %o1,%o0,%g0 + bne,pt %icc,3f + nop + sethi %hi(0x7fffffff),%o0 + or %o0,%lo(0x7fffffff),%o0 + ba 4f + nop +3: + srl %o0,20,%o0 + sub %o0,0x3ff,%o0 +4: + .end + + .inline __rint,1 + std %f0,[%sp+0x87f] + ldx [%sp+0x87f],%o0 + sethi %hi(0x80000000),%o2 + sllx %o2,32,%o2 + andn %o0,%o2,%o2 + sethi %hi(0x43300000),%o3 + sllx %o3,32,%o3 + stx %g0,[%sp+0x887] + subcc %o2,%o3,%g0 + bl,pt %xcc,1f + nop + sethi %hi(0x3ff00000),%o2 + sllx %o2,32,%o2 + stx %o2,[%sp+0x887] + ldd [%sp+0x887],%f2 + fmuld %f0,%f2,%f0 + ba 3f + nop +1: + orcc %o0,0,%g0 + stx %o3,[%sp+0x87f] + ldd [%sp+0x87f],%f2 + bge,pt %xcc,2f + nop + fnegd %f2,%f2 +2: + faddd %f0,%f2,%f0 + fcmpd %f0,%f2 + fbne,pt %fcc0,0f + nop + ldd [%sp+0x887],%f0 + bge,pt %xcc,3f + nop + fnegd %f0,%f0 + ba 3f + nop +0: + fsubd %f0,%f2,%f0 +3: + .end + + .inline __rintf,1 + st %f1,[%sp+0x87f] + ld [%sp+0x87f],%o0 + sethi %hi(0x80000000),%o2 + andn %o0,%o2,%o2 + sethi %hi(0x4b000000),%o3 + st %g0,[%sp+0x887] + subcc %o2,%o3,%g0 + bl 1f + nop + sethi %hi(0x3f800000),%o2 + st %o2,[%sp+0x887] + ld [%sp+0x887],%f2 + fmuls %f1,%f2,%f0 + ba 3f + nop +1: + tst %o0 + st %o3,[%sp+0x87f] + ld [%sp+0x87f],%f2 + bge 2f + nop + fnegs %f2,%f2 +2: + fadds %f1,%f2,%f0 + fcmps %f0,%f2 + fbne 0f + nop + ld [%sp+0x887],%f0 + bge 3f + nop + fnegs %f0,%f0 + ba 3f + nop +0: + fsubs %f0,%f2,%f0 +3: + .end + + .inline __min_subnormal,1 + or %g0,1,%o0 + stx %o0,[%sp+0x87f] + ldd [%sp+0x87f],%f0 + .end + + .inline __min_subnormalf,1 + or %g0,1,%o0 + st %o0,[%sp+0x87f] + ld [%sp+0x87f],%f0 + .end + + .inline __max_subnormal,1 + xnor %g0,%g0,%o0 + srlx %o0,12,%o0 + stx %o0,[%sp+0x87f] + ldd [%sp+0x87f],%f0 + .end + + .inline __max_subnormalf,1 + xnor %g0,%g0,%o0 + srl %o0,9,%o0 + st %o0,[%sp+0x87f] + ld [%sp+0x87f],%f0 + .end + + .inline __min_normal,1 + sethi %hi(0x00100000),%o0 + sllx %o0,32,%o0 + stx %o0,[%sp+0x87f] + ldd [%sp+0x87f],%f0 + .end + + .inline __min_normalf,1 + sethi %hi(0x00800000),%o0 + st %o0,[%sp+0x87f] + ld [%sp+0x87f],%f0 + .end + + .inline __max_normal,1 + sethi %hi(0x80100000),%o1 + sllx %o1,32,%o1 + xnor %g0,%g0,%o0 + andn %o0,%o1,%o0 + stx %o0,[%sp+0x87f] + ldd [%sp+0x87f],%f0 + .end + + .inline __max_normalf,1 + sethi %hi(0x7f7ffc00),%o0 + or %o0,0x3ff,%o0 + st %o0,[%sp+0x87f] + ld [%sp+0x87f],%f0 + .end + + .inline __infinity,1 + sethi %hi(0x7ff00000),%o0 + sllx %o0,32,%o0 + stx %o0,[%sp+0x87f] + ldd [%sp+0x87f],%f0 + .end + + .inline __infinity,1 + sethi %hi(0x7ff00000),%o0 + sllx %o0,32,%o0 + stx %o0,[%sp+0x87f] + ldd [%sp+0x87f],%f0 + .end + + .inline __infinityf,1 + sethi %hi(0x7f800000),%o0 + st %o0,[%sp+0x87f] + ld [%sp+0x87f],%f0 + .end + + .inline __signaling_nan,1 + sethi %hi(0x7ff00000),%o0 + sllx %o0,32,%o0 + or %o0,0x1,%o0 + stx %o0,[%sp+0x87f] + ldd [%sp+0x87f],%f0 + .end + + .inline __signaling_nanf,1 + sethi %hi(0x7f800000),%o0 + or %o0,1,%o0 + st %o0,[%sp+0x87f] + ld [%sp+0x87f],%f0 + .end + + .inline __quiet_nan,1 + xnor %g0,%g0,%o0 + srlx %o0,1,%o0 + stx %o0,[%sp+0x87f] + ldd [%sp+0x87f],%f0 + .end + + .inline __quiet_nanf,1 + xnor %g0,%g0,%o0 + srl %o0,1,%o0 + st %o0,[%sp+0x87f] + ld [%sp+0x87f],%f0 + .end + + .inline __swapEX,1 + and %o0,0x1f,%o1 + sll %o1,5,%o1 + .volatile + st %fsr,[%sp+0x87f] + ld [%sp+0x87f],%o0 + andn %o0,0x3e0,%o2 + or %o1,%o2,%o1 + st %o1,[%sp+0x87f] + ld [%sp+0x87f],%fsr + srl %o0,5,%o0 + and %o0,0x1f,%o0 + .nonvolatile + .end + + .inline _QgetRD,0 + st %fsr,[%sp+0x87f] + ld [%sp+0x87f],%o0 + srl %o0,30,%o0 + .end + + .inline _QgetRP,0 + or %g0,%g0,%o0 + .end + + .inline __swapRD,1 + and %o0,0x3,%o0 + sll %o0,30,%o1 + .volatile + st %fsr,[%sp+0x87f] + ld [%sp+0x87f],%o0 + sethi %hi(0xc0000000),%o4 + andn %o0,%o4,%o2 + or %o1,%o2,%o1 + st %o1,[%sp+0x87f] + ld [%sp+0x87f],%fsr + srl %o0,30,%o0 + and %o0,0x3,%o0 + .nonvolatile + .end +! +! On the SPARC, __swapRP is a no-op; always return 0 for backward compatibility +! + + .inline __swapRP,1 + or %g0,%g0,%o0 + .end + + .inline __swapTE,1 + and %o0,0x1f,%o0 + sll %o0,23,%o1 + .volatile + st %fsr,[%sp+0x87f] + ld [%sp+0x87f],%o0 + sethi %hi(0x0f800000),%o4 + andn %o0,%o4,%o2 + or %o1,%o2,%o1 + st %o1,[%sp+0x87f] + ld [%sp+0x87f],%fsr + srl %o0,23,%o0 + and %o0,0x1f,%o0 + .nonvolatile + .end + + .inline __fp_class,1 + fabsd %f0,%f0 + std %f0,[%sp+0x87f] + ldx [%sp+0x87f],%o0 + orcc %g0,%o0,%g0 + be,pn %xcc,2f + nop + sethi %hi(0x7ff00000),%o1 + sllx %o1,32,%o1 + andcc %o0,%o1,%g0 + bne,pt %xcc,1f + nop + or %g0,1,%o0 + ba 2f + nop +1: + subcc %o0,%o1,%g0 + bge,pn %xcc,1f + nop + or %g0,2,%o0 + ba 2f + nop +1: + andncc %o0,%o1,%o0 + bne,pn %xcc,1f + nop + or %g0,3,%o0 + ba 2f + nop +1: + sethi %hi(0x00080000),%o1 + sllx %o1,32,%o1 + andcc %o0,%o1,%g0 + or %g0,4,%o0 + bne,pt %xcc,2f + nop + or %g0,5,%o0 +2: + .end + + .inline __fp_classf,1 + fabss %f1,%f1 + st %f1,[%sp+0x87f] + ld [%sp+0x87f],%o0 + orcc %g0,%o0,%g0 + be,pn %icc,2f + nop +1: + sethi %hi(0x7f800000),%o1 + andcc %o0,%o1,%g0 + bne,pt %icc,1f + nop + or %g0,1,%o0 + ba 2f + nop +1: + subcc %o0,%o1,%g0 + bge,pn %icc,1f + nop + or %g0,2,%o0 + ba 2f + nop +1: + bg,pn %icc,1f + nop + or %g0,3,%o0 + ba 2f + nop +1: + sethi %hi(0x00400000),%o1 + andcc %o0,%o1,%g0 + or %g0,4,%o0 + bne,pt %icc,2f + nop + or %g0,5,%o0 +2: + .end + + .inline __copysign,2 + fabsd %f0,%f0 + st %f0,[%sp+0x87f] + ld [%sp+0x87f],%o0 + st %f2,[%sp+0x887] + ld [%sp+0x887],%o1 + srl %o1,31,%o1 + sll %o1,31,%o1 + or %o0,%o1,%o0 + st %o0,[%sp+0x87f] + ld [%sp+0x87f],%f0 + .end + + .inline __copysignf,2 + fabss %f1,%f1 + st %f1,[%sp+0x87f] + ld [%sp+0x87f],%o0 + st %f3,[%sp+0x887] + ld [%sp+0x887],%o1 + srl %o1,31,%o1 + sll %o1,31,%o1 + or %o0,%o1,%o0 + st %o0,[%sp+0x87f] + ld [%sp+0x87f],%f0 + .end + + .inline _finite,1 + fabsd %f0,%f0 + st %f0,[%sp+0x87f] + ld [%sp+0x87f],%o0 + sethi %hi(0x7ff00000),%o1 + sub %o0,%o1,%o0 + srl %o0,31,%o0 + .end + + .inline __finitef,1 + fabss %f1,%f1 + st %f1,[%sp+0x87f] + ld [%sp+0x87f],%o0 + sethi %hi(0x7f800000),%o1 + sub %o0,%o1,%o0 + srl %o0,31,%o0 + .end + + .inline __signbit,1 + st %f0,[%sp+0x87f] + ld [%sp+0x87f],%o0 + srl %o0,31,%o0 + .end + + .inline __signbitf,1 + st %f1,[%sp+0x87f] + ld [%sp+0x87f],%o0 + srl %o0,31,%o0 + .end + + .inline __isinf,1 + fabsd %f0,%f0 + std %f0,[%sp+0x87f] + ldx [%sp+0x87f],%o0 + sethi %hi(0x7ff00000),%o1 + sllx %o1,32,%o1 + sub %o0,%o1,%o0 + sub %g0,%o0,%o1 + or %o0,%o1,%o0 + xnor %o0,%g0,%o0 + srlx %o0,63,%o0 + .end + + .inline __isinff,1 + fabss %f1,%f1 + st %f1,[%sp+0x87f] + ld [%sp+0x87f],%o0 + sethi %hi(0x7f800000),%o1 + sub %o0,%o1,%o0 + sub %g0,%o0,%o1 + or %o0,%o1,%o0 + xnor %o0,%g0,%o0 + srl %o0,31,%o0 + .end + + .inline __isnan,1 + std %f0,[%sp+0x87f] + ldx [%sp+0x87f],%o0 + sllx %o0,1,%o0 + srlx %o0,1,%o0 + sethi %hi(0x7ff00000),%o1 + sllx %o1,32,%o1 + sub %o1,%o0,%o0 + srlx %o0,63,%o0 + .end + + .inline __isnanf,1 + st %f1,[%sp+0x87f] + ld [%sp+0x87f],%o0 + sethi %hi(0x80000000),%o2 + andn %o0,%o2,%o0 + sethi %hi(0x7f800000),%o1 + sub %o1,%o0,%o0 + srl %o0,31,%o0 + .end + + .inline __isnormal,1 + fabsd %f0,%f0 + st %f0,[%sp+0x87f] + ld [%sp+0x87f],%o0 + sethi %hi(0x7ff00000),%o1 + sub %o0,%o1,%o2 + sethi %hi(0x00100000),%o1 + sub %o0,%o1,%o1 + andn %o2,%o1,%o0 + srl %o0,31,%o0 + .end + + .inline __isnormalf,1 + fabss %f1,%f1 + st %f1,[%sp+0x87f] + ld [%sp+0x87f],%o0 + sethi %hi(0x7f800000),%o1 + sub %o0,%o1,%o2 + sethi %hi(0x00800000),%o1 + sub %o0,%o1,%o1 + andn %o2,%o1,%o0 + srl %o0,31,%o0 + .end + + .inline __issubnormal,1 + fabsd %f0,%f0 + std %f0,[%sp+0x87f] + ldx [%sp+0x87f],%o0 + sethi %hi(0x00100000),%o1 + sllx %o1,32,%o1 + sub %o0,%o1,%o1 + sub %g0,%o0,%o2 + or %o0,%o2,%o0 + and %o0,%o1,%o0 + srlx %o0,63,%o0 + .end + + .inline __issubnormalf,1 + fabss %f1,%f1 + st %f1,[%sp+0x87f] + ld [%sp+0x87f],%o0 + sethi %hi(0x00800000),%o1 + sub %o0,%o1,%o1 + sub %g0,%o0,%o2 + or %o0,%o2,%o0 + and %o0,%o1,%o0 + srl %o0,31,%o0 + .end + + .inline __iszero,1 + fabsd %f0,%f0 + std %f0,[%sp+0x87f] + ldx [%sp+0x87f],%o0 + sub %g0,%o0,%o1 + or %o0,%o1,%o0 + xnor %o0,%g0,%o0 + srlx %o0,63,%o0 + .end + + .inline __iszerof,1 + fabss %f1,%f1 + st %f1,[%sp+0x87f] + ld [%sp+0x87f],%o0 + sub %g0,%o0,%o1 + or %o0,%o1,%o0 + xnor %o0,%g0,%o0 + srl %o0,31,%o0 + .end + + .inline abs,1 + sra %o0,31,%o1 + xor %o0,%o1,%o0 + sub %o0,%o1,%o0 + sra %o0,0,%o0 + .end + + .inline __fabs,1 + fabsd %f0,%f0 + .end + + .inline __fabsf,1 + fabss %f1,%f0 + .end +! +! __nintf - f77 NINT(REAL*4) +! + + .inline __nintf,1 + st %f1,[%sp+0x87f] + ld [%sp+0x87f],%o0 + srl %o0,30-7,%g1 + sethi %hi(0x7fffff),%o2 + and %g1,0xff,%g1 + or %o2,%lo(0x7fffff),%o2 + sethi %hi(1<<22),%o4 + subcc %g1,127+31,%g0 + and %o0,%o2,%o3 + bl 1f + nop + sethi %hi(0xcf000000),%o2 + sethi %hi(0x80000000),%g1 + subcc %o0,%o2,%g0 + or %g1,%g0,%o0 + be 0f + nop + fstoi %f1,%f0 + st %f0,[%sp+0x87f] + ld [%sp+0x87f],%o0 +0: + sra %o0,0,%o0 + ba 9f + nop +1: + add %o4,%o4,%o5 + or %o3,%o5,%o3 + sra %o0,31-0,%o2 + subcc %g1,127,%g1 + srl %o4,%g1,%o4 + bge 1f + nop + subcc %g1,-1,%g0 + or %g0,0,%o0 + bne 2f + nop + or %g0,1,%o0 + ba 2f + nop +1: + add %o3,%o4,%o3 + or %g0,23,%o0 + subcc %o0,%g1,%o0 + bl 1f + nop + srl %o3,%o0,%o0 + ba 2f + nop +1: + sub %g0,%o0,%o0 + sll %o3,%o0,%o0 +2: + xor %o0,%o2,%o0 + sra %o0,0,%o0 + and %o2,1,%o2 + add %o0,%o2,%o0 +9: + .end + + .inline __il_nint,1 + ld [%o0],%o0 + sra %o0,0,%o0 + srlx %o0,31-8,%g1 + or %g0,1,%o2 + sllx %o2,23-1,%o4 + and %g1,0xff,%g1 + sllx %o2,63-0,%o2 + subcc %g1,127+63,%g0 + bl 0f + nop + st %o0,[%sp+0x87f] + ld [%sp+0x87f],%f0 + fstox %f0,%f0 + std %f0,[%sp+0x87f] + ldx [%sp+0x87f],%o0 + ba 9f + nop +0: + add %o4,%o4,%o5 + srax %o2,63-23,%o2 + sub %g1,127+23,%o1 + xnor %o2,%g0,%o2 + and %o0,%o2,%o3 + or %o3,%o5,%o3 + srax %o0,63-0,%o2 + subcc %g1,127,%g1 + bge 1f + nop + subcc %g1,-1,%g0 + or %g0,0,%o0 + bne 2f + nop + or %g0,1,%o0 + ba 2f + nop +1: + brlz,pt %o1,3f + nop + sub %g1,23,%o0 + sllx %o3,%o0,%o0 + ba 2f + nop +3: + srlx %o4,%g1,%o4 + add %o3,%o4,%o3 + or %g0,23,%o0 + sub %o0,%g1,%o0 + srlx %o3,%o0,%o0 +2: + xor %o0,%o2,%o0 + sub %o0,%o2,%o0 +9: + .end +! +! __i_dnnt - f77 NINT(REAL*8) +! + + .inline __i_dnnt,1 + ldx [%o0],%o0 + srlx %o0,63-11,%g1 + or %g0,1,%o2 + stx %o0,[%sp+0x87f] + sllx %o2,52-1,%o4 + and %g1,0x7ff,%g1 + sllx %o2,63-0,%o2 + subcc %g1,1023+32,%g0 + bl 0f + nop + ldd [%sp+0x87f],%f0 + ba 8f + nop +0: + add %o4,%o4,%o5 + srax %o2,63-52,%o2 + sub %g1,1023+30,%o1 + xnor %o2,%g0,%o2 + and %o0,%o2,%o3 + or %o3,%o5,%o3 + srax %o0,63-0,%o2 + subcc %g1,1023,%g1 + bge 1f + nop + subcc %g1,-1,%g0 + or %g0,0,%o0 + bne 2f + nop + or %g0,1,%o0 + ba 2f + nop +1: + srlx %o4,%g1,%o4 + add %o3,%o4,%o3 + or %g0,52,%o0 + sub %o0,%g1,%o0 + srlx %o3,%o0,%o0 +2: + xor %o0,%o2,%o0 + sub %o0,%o2,%o0 + brlz,pt %o1,9f + nop + stx %o0,[%sp+0x87f] + ldd [%sp+0x87f],%f0 + fxtod %f0,%f0 +8: + fdtoi %f0,%f0 + st %f0,[%sp+0x87f] + ld [%sp+0x87f],%o0 + sra %o0,0,%o0 +9: + .end + + .inline __il_dnnt,1 + ldx [%o0],%o0 + srlx %o0,63-11,%g1 + or %g0,1,%o2 + sllx %o2,52-1,%o4 + and %g1,0x7ff,%g1 + sllx %o2,63-0,%o2 + subcc %g1,1023+63,%g0 + bl 0f + nop + stx %o0,[%sp+0x87f] + ldd [%sp+0x87f],%f0 + fdtox %f0,%f0 + std %f0,[%sp+0x87f] + ldx [%sp+0x87f],%o0 + ba 9f + nop +0: + add %o4,%o4,%o5 + srax %o2,63-52,%o2 + sub %g1,1023+52,%o1 + xnor %o2,%g0,%o2 + and %o0,%o2,%o3 + or %o3,%o5,%o3 + srax %o0,63-0,%o2 + subcc %g1,1023,%g1 + bge 1f + nop + subcc %g1,-1,%g0 + or %g0,0,%o0 + bne 2f + nop + or %g0,1,%o0 + ba 2f + nop +1: + brlz,pt %o1,3f + nop + sub %g1,52,%o0 + sllx %o3,%o0,%o0 + ba 2f + nop +3: + srlx %o4,%g1,%o4 + add %o3,%o4,%o3 + or %g0,52,%o0 + sub %o0,%g1,%o0 + srlx %o3,%o0,%o0 +2: + xor %o0,%o2,%o0 + sub %o0,%o2,%o0 +9: + .end + + .inline __anintf,1 + st %f1,[%sp+0x87f] + ld [%sp+0x87f],%o0 + or %g0,1,%o1 + srl %o0,23,%g1 + and %g1,0xff,%g1 + sub %g0,%g1,%g1 + add %g1,0x95,%g1 + subcc %g1,23,%g0 + sll %o1,%g1,%o1 + sub %o1,1,%o2 + bcs 1f + nop + be 2f + nop + bl 3f + nop + sethi %hi(0x80000000),%o1 + and %o0,%o1,%o0 + ba 3f + nop +1: + and %o0,%o1,%o1 +2: + add %o0,%o1,%o0 + andn %o0,%o2,%o0 +3: + st %o0,[%sp+0x87f] + ld [%sp+0x87f],%f0 + .end + + .inline __anint,1 + std %f0,[%sp+0x87f] + ldx [%sp+0x87f],%o0 + or %g0,1,%o1 + srlx %o0,52,%g1 + and %g1,0x7ff,%g1 + sub %g0,%g1,%g1 + add %g1,0x432,%g1 + subcc %g1,52,%g0 + sllx %o1,%g1,%o1 + sub %o1,1,%o2 + bcs,pt %icc,1f + nop + be,pt %icc,2f + nop + bl,pt %icc,3f + nop + srlx %o0,63,%o0 + sllx %o0,63,%o0 + ba 3f + nop +1: + and %o0,%o1,%o1 +2: + add %o0,%o1,%o0 + andn %o0,%o2,%o0 +3: + stx %o0,[%sp+0x87f] + ldd [%sp+0x87f],%f0 + .end + + .inline __r_dim,2 + st %g0,[%sp+0x87f] + ld [%sp+0x87f],%f4 + ld [%o0],%f0 + ld [%o1],%f2 + fcmps %fcc0,%f0,%f2 + fmovsule %fcc0,%f4,%f2 + fsubs %f0,%f2,%f0 + fmovsule %fcc0,%f4,%f0 + .end + + .inline __d_dim,2 + stx %g0,[%sp+0x87f] + ldd [%sp+0x87f],%f4 + ld [%o0],%f0 + ld [%o0+4],%f1 + ld [%o1],%f2 + ld [%o1+4],%f3 + fcmpd %fcc0,%f0,%f2 + fmovdule %fcc0,%f4,%f2 + fsubd %f0,%f2,%f0 + fmovdule %fcc0,%f4,%f0 + .end + + .inline __f95_signf,2 + ld [%o0],%f0 + ld [%o1],%o1 + fabss %f0,%f0 + fnegs %f0,%f1 + sra %o1,0,%o1 + fmovrslz %o1,%f1,%f0 + .end + + .inline __f95_sign,2 + ld [%o0],%f0 + ld [%o0+4],%f1 + ld [%o1],%o1 + fabsd %f0,%f0 + fnegd %f0,%f2 + sra %o1,0,%o1 + fmovrdlz %o1,%f2,%f0 + .end + + .inline __r_sign,2 + ld [%o0],%f0 + ld [%o1],%o1 + fabss %f0,%f0 + fnegs %f0,%f1 + sub %o1,1,%o0 + and %o1,%o0,%o1 + sra %o1,0,%o1 + fmovrslz %o1,%f1,%f0 + .end + + .inline __d_sign,2 + ldd [%o0],%f0 + ldx [%o1],%o1 + fabsd %f0,%f0 + fnegd %f0,%f2 + sub %o1,1,%o0 + and %o1,%o0,%o1 + fmovrdlz %o1,%f2,%f0 + .end +! +! complex __Fc_div_f(complex a, complex b); +! + + .inline __Fc_div_f,0 + st %g0,[%sp+0x87f] + ld [%sp+0x87f],%f4 + fcmps %fcc0,%f3,%f4 + fbne,pn %fcc0,1f + nop + fdivs %f0,%f2,%f0 + fdivs %f1,%f2,%f1 + ba 2f + nop +1: + sethi %hi(0x3ff00000),%o0 + sllx %o0,32,%o0 + stx %o0,[%sp+0x87f] + ldd [%sp+0x87f],%f16 + fsmuld %f2,%f2,%f4 + fsmuld %f3,%f3,%f6 + fsmuld %f1,%f3,%f8 + fsmuld %f0,%f2,%f10 + faddd %f6,%f4,%f6 + fdivd %f16,%f6,%f6 + faddd %f10,%f8,%f10 + fsmuld %f1,%f2,%f12 + fmuld %f10,%f6,%f10 + fsmuld %f0,%f3,%f14 + fsubd %f12,%f14,%f14 + fmuld %f14,%f6,%f6 + fdtos %f10,%f0 + fdtos %f6,%f1 +2: + .end diff --git a/usr/src/lib/libm1/Makefile b/usr/src/lib/libm1/Makefile new file mode 100644 index 0000000000..cb351e34cb --- /dev/null +++ b/usr/src/lib/libm1/Makefile @@ -0,0 +1,42 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2011 Nexenta Systems, Inc. All rights reserved. +# + +LIBRARY= libm.a +VERS= .1 + +# include common library definitions +include $(SRC)/lib/Makefile.lib + +SUBDIRS = $(MACH) +$(BUILD64)SUBDIRS += $(MACH64) + +all := TARGET= all +install := TARGET= install +clean := TARGET= clean +clobber := TARGET= clobber +lint := TARGET= lint + +.KEEP_STATE: + +.PARALLEL: $(SUBDIRS) + +all clean clobber install lint: $(SUBDIRS) + +$(SUBDIRS): FRC + @cd $@; pwd; VERSION='$(VERSION)' $(MAKE) $(TARGET) + +FRC: + +include $(SRC)/lib/Makefile.targ diff --git a/usr/src/lib/libm1/Makefile.com b/usr/src/lib/libm1/Makefile.com new file mode 100644 index 0000000000..c97b92c957 --- /dev/null +++ b/usr/src/lib/libm1/Makefile.com @@ -0,0 +1,41 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2011, Richard Lowe. +# + + +LIBRARY = libm.a +VERS = .1 + +LIBMDIR = $(SRC)/lib/libm + +OBJECTS = libmv1.o + +include $(SRC)/lib/Makefile.lib +include $(SRC)/lib/Makefile.rootfs +include $(LIBMDIR)/Makefile.libm.com + +LIBS = $(DYNLIB) +SRCS = $(OBJECTS:%.o=../common/%.c) +SRCDIR = ../common/ + +CPPFLAGS += -DLIBM_BUILD +MAPFILEDIR = ../common/ +DYNFLAGS += -zignore -Wl,-F'libm.so.2' +LINTFLAGS64 += -errchk=longptr64 + +.KEEP_STATE: + +all: $(LIBS) + +lint: lintcheck diff --git a/usr/src/lib/libm1/amd64/Makefile b/usr/src/lib/libm1/amd64/Makefile new file mode 100644 index 0000000000..21fab0dd1c --- /dev/null +++ b/usr/src/lib/libm1/amd64/Makefile @@ -0,0 +1,23 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2011 Nexenta Systems, Inc. All rights reserved. +# + +TARGET_ARCH= amd64 + +include ../Makefile.com +include $(SRC)/lib/Makefile.lib.64 + +install: all $(ROOTLIBS64) + +include $(SRC)/lib/Makefile.targ diff --git a/usr/src/lib/libm1/common/libmv1.c b/usr/src/lib/libm1/common/libmv1.c new file mode 100644 index 0000000000..572d75528b --- /dev/null +++ b/usr/src/lib/libm1/common/libmv1.c @@ -0,0 +1,662 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma weak _lib_version = __libm_lib_version +#pragma weak acos = __acos +#pragma weak acosh = __acosh +#pragma weak asin = __asin +#pragma weak asinh = __asinh +#pragma weak atan = __atan +#pragma weak atan2 = __atan2 +#pragma weak atanh = __atanh +#pragma weak cbrt = __cbrt +#pragma weak ceil = __ceil +#pragma weak copysign = __copysign +#pragma weak cos = __cos +#pragma weak cosh = __cosh +#pragma weak erf = __erf +#pragma weak erfc = __erfc +#pragma weak exp = __exp +#pragma weak expm1 = __expm1 +#pragma weak fabs = __fabs +#pragma weak floor = __floor +#pragma weak fmod = __fmod +#pragma weak gamma = __gamma +#pragma weak gamma_r = __gamma_r +#pragma weak hypot = __hypot +#pragma weak ilogb = __ilogb +#pragma weak isnan = __isnan +#pragma weak j0 = __j0 +#pragma weak j1 = __j1 +#pragma weak jn = __jn +#pragma weak lgamma = __lgamma +#pragma weak lgamma_r = __lgamma_r +#pragma weak log = __log +#pragma weak log10 = __log10 +#pragma weak log1p = __log1p +#pragma weak logb = __logb +#pragma weak nextafter = __nextafter +#pragma weak pow = __pow +#pragma weak remainder = __remainder +#pragma weak rint = __rint +#pragma weak scalb = __scalb +#pragma weak scalbn = __scalbn +#pragma weak signgam = __signgam +#pragma weak significand = __significand +#pragma weak sin = __sin +#pragma weak sinh = __sinh +#pragma weak sqrt = __sqrt +#pragma weak tan = __tan +#pragma weak tanh = __tanh +#pragma weak y0 = __y0 +#pragma weak y1 = __y1 +#pragma weak yn = __yn + +#include <math.h> + +const enum version __libm_lib_version = libm_ieee; +int __signgam = 0; + +#if !defined(__sparcv9) && !defined(__amd64) +/* ARGSUSED */ +int * +__libm_errno(void) { + return (0); +} +#endif + +/* ARGSUSED */ +int +__libm__rem_pio2(double x, double *y) { + return (0); +} + +/* ARGSUSED */ +int +__libm__rem_pio2m(double *x, double *y, int e0, int nx, int p, const int *ip) { + return (0); +} + +/* ARGSUSED */ +double +__acos(double x) { + return (0.0); +} + +/* ARGSUSED */ +double +__acosh(double x) { + return (0.0); +} + +/* ARGSUSED */ +double +__asin(double x) { + return (0.0); +} + +/* ARGSUSED */ +double +__asinh(double x) { + return (0.0); +} + +/* ARGSUSED */ +double +__atan(double x) { + return (0.0); +} + +/* ARGSUSED */ +double +__atan2(double y, double x) { + return (0.0); +} + +/* ARGSUSED */ +double +__atanh(double x) { + return (0.0); +} + +/* ARGSUSED */ +double +__cbrt(double x) { + return (0.0); +} + +/* ARGSUSED */ +double +__ceil(double x) { + return (0.0); +} + +/* ARGSUSED */ +double +__copysign(double x, double y) { + return (0.0); +} + +/* ARGSUSED */ +double +__cos(double x) { + return (0.0); +} + +/* ARGSUSED */ +double +__cosh(double x) { + return (0.0); +} + +/* ARGSUSED */ +double +__erf(double x) { + return (0.0); +} + +/* ARGSUSED */ +double +__erfc(double x) { + return (0.0); +} + +/* ARGSUSED */ +double +__exp(double x) { + return (0.0); +} + +/* ARGSUSED */ +double +__expm1(double x) { + return (0.0); +} + +/* ARGSUSED */ +double +__fabs(double x) { + return (0.0); +} + +/* ARGSUSED */ +double +__floor(double x) { + return (0.0); +} + +/* ARGSUSED */ +double +__fmod(double x, double y) { + return (0.0); +} + +/* ARGSUSED */ +double +__gamma(double x) { + return (0.0); +} + +/* ARGSUSED */ +double +__gamma_r(double x, int *signgamp) { + return (0.0); +} + +/* ARGSUSED */ +double +__hypot(double x, double y) { + return (0.0); +} + +/* ARGSUSED */ +int +__ilogb(double x) { + return (0); +} + +/* ARGSUSED */ +int +__isnan(double x) { + return (0); +} + +/* ARGSUSED */ +double +__j0(double x) { + return (0.0); +} + +/* ARGSUSED */ +double +__j1(double x) { + return (0.0); +} + +/* ARGSUSED */ +double +__jn(int n, double y) { + return (0.0); +} + +/* ARGSUSED */ +double +__lgamma(double x) { + return (0.0); +} + +/* ARGSUSED */ +double +__lgamma_r(double x, int *signgamp) { + return (0.0); +} + +/* ARGSUSED */ +double +__log(double x) { + return (0.0); +} + +/* ARGSUSED */ +double +__log10(double x) { + return (0.0); +} + +/* ARGSUSED */ +double +__log1p(double x) { + return (0.0); +} + +/* ARGSUSED */ +double +__logb(double x) { + return (0.0); +} + +/* ARGSUSED */ +double +__nextafter(double x, double y) { + return (0.0); +} + +/* ARGSUSED */ +double +__pow(double x, double y) { + return (0.0); +} + +/* ARGSUSED */ +double +__remainder(double x, double y) { + return (0.0); +} + +/* ARGSUSED */ +double +__rint(double x) { + return (0.0); +} + +/* ARGSUSED */ +double +__scalb(double x, double y) { + return (0.0); +} + +/* ARGSUSED */ +double +__scalbn(double x, int n) { + return (0.0); +} + +/* ARGSUSED */ +double +__significand(double x) { + return (0.0); +} + +/* ARGSUSED */ +double +__sin(double x) { + return (0.0); +} + +/* ARGSUSED */ +double +__sinh(double x) { + return (0.0); +} + +/* ARGSUSED */ +double +__sqrt(double x) { + return (0.0); +} + +/* ARGSUSED */ +double +__tan(double x) { + return (0.0); +} + +/* ARGSUSED */ +double +__tanh(double x) { + return (0.0); +} + +/* ARGSUSED */ +double +__y0(double x) { + return (0.0); +} + +/* ARGSUSED */ +double +__y1(double x) { + return (0.0); +} + +/* ARGSUSED */ +double +__yn(int n, double x) { + return (0.0); +} + +/* ARGSUSED */ +int +matherr(struct exception *excep) { + return (0); +} + +/* ARGSUSED */ +float +__acosf(float x) { + return (0.0F); +} + +/* ARGSUSED */ +float +__asinf(float x) { + return (0.0F); +} + +/* ARGSUSED */ +float +__atanf(float x) { + return (0.0F); +} + +/* ARGSUSED */ +float +__atan2f(float y, float x) { + return (0.0F); +} + +/* ARGSUSED */ +float +__ceilf(float x) { + return (0.0F); +} + +/* ARGSUSED */ +float +__cosf(float x) { + return (0.0F); +} + +/* ARGSUSED */ +float +__coshf(float x) { + return (0.0F); +} + +/* ARGSUSED */ +float +__expf(float x) { + return (0.0F); +} + +/* ARGSUSED */ +float +__fabsf(float x) { + return (0.0F); +} + +/* ARGSUSED */ +float +__floorf(float x) { + return (0.0F); +} + +/* ARGSUSED */ +float +__fmodf(float x, float y) { + return (0.0F); +} + +/* ARGSUSED */ +float +__frexpf(float x, int *e) { + return (0.0F); +} + +/* ARGSUSED */ +float +__ldexpf(float x, int n) { + return (0.0F); +} + +/* ARGSUSED */ +float +__logf(float x) { + return (0.0F); +} + +/* ARGSUSED */ +float +__log10f(float x) { + return (0.0F); +} + +/* ARGSUSED */ +float +__modff(float x, float *iptr) { + return (0.0F); +} + +/* ARGSUSED */ +float +__powf(float x, float y) { + return (0.0F); +} + +/* ARGSUSED */ +float +__sinf(float x) { + return (0.0F); +} + +/* ARGSUSED */ +float +__sinhf(float x) { + return (0.0F); +} + +/* ARGSUSED */ +float +__sqrtf(float x) { + return (0.0F); +} + +/* ARGSUSED */ +float +__tanf(float x) { + return (0.0F); +} + +/* ARGSUSED */ +float +__tanhf(float x) { + return (0.0F); +} + +/* ARGSUSED */ +long double +__acosl(long double x) { + return (0.0L); +} + +/* ARGSUSED */ +long double +__asinl(long double x) { + return (0.0L); +} + +/* ARGSUSED */ +long double +__atanl(long double x) { + return (0.0L); +} + +/* ARGSUSED */ +long double +__atan2l(long double y, long double x) { + return (0.0L); +} + +/* ARGSUSED */ +long double +__ceill(long double x) { + return (0.0L); +} + +/* ARGSUSED */ +long double +__cosl(long double x) { + return (0.0L); +} + +/* ARGSUSED */ +long double +__coshl(long double x) { + return (0.0L); +} + +/* ARGSUSED */ +long double +__expl(long double x) { + return (0.0L); +} + +/* ARGSUSED */ +long double +__fabsl(long double x) { + return (0.0L); +} + +/* ARGSUSED */ +long double +__floorl(long double x) { + return (0.0L); +} + +/* ARGSUSED */ +long double +__fmodl(long double x, long double y) { + return (0.0L); +} + +/* ARGSUSED */ +long double +__frexpl(long double x, int *e) { + return (0.0L); +} + +/* ARGSUSED */ +long double +__ldexpl(long double x, int n) { + return (0.0L); +} + +/* ARGSUSED */ +long double +__logl(long double x) { + return (0.0L); +} + +/* ARGSUSED */ +long double +__log10l(long double x) { + return (0.0L); +} + +/* ARGSUSED */ +long double +__modfl(long double x, long double *iptr) { + return (0.0L); +} + +/* ARGSUSED */ +long double +__powl(long double x, long double y) { + return (0.0L); +} + +/* ARGSUSED */ +long double +__sinl(long double x) { + return (0.0L); +} + +/* ARGSUSED */ +long double +__sinhl(long double x) { + return (0.0L); +} + +/* ARGSUSED */ +long double +__sqrtl(long double x) { + return (0.0L); +} + +/* ARGSUSED */ +long double +__tanl(long double x) { + return (0.0L); +} + +/* ARGSUSED */ +long double +__tanhl(long double x) { + return (0.0L); +} diff --git a/usr/src/lib/libm1/common/mapfile-vers b/usr/src/lib/libm1/common/mapfile-vers new file mode 100644 index 0000000000..da5e870241 --- /dev/null +++ b/usr/src/lib/libm1/common/mapfile-vers @@ -0,0 +1,239 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright 2011 Nexenta Systems, Inc. All rights reserved. +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# Interface definition for libm.so.1 +# +# For information regarding the establishment of versioned definitions see: +# The Linker and Libraries Manual (version 2.5 or greater) +# This is part of the Developers Guide in the Answerbook. Specifically refer +# to Chapter 2 under section "Defining Additional Symbols" through section +# "Reducing Symbol Scope", and Chapter 5 "Versioning". +# +# For specific rules for the modification (evolution) of these version +# definitions see: +# psarc_1995_14: Integration of Scoped Libraries +# (/shared/sac/PSARC/1995/014) +# Policy for Shared Library Version Names and Interface Definitions +# (/shared/ON/general_docs/scoping-rules.ps) + +# +# MAPFILE HEADER START +# +# WARNING: STOP NOW. DO NOT MODIFY THIS FILE. +# Object versioning must comply with the rules detailed in +# +# usr/src/lib/README.mapfiles +# +# You should not be making modifications here until you've read the most current +# copy of that file. If you need help, contact a gatekeeper for guidance. +# +# MAPFILE HEADER END +# + +$mapfile_version 2 + +$if _ELF32 +$add lf64 +$endif +$if _sparc && _ELF32 +$add sparc32 +$endif +$if _sparc && _ELF64 +$add sparcv9 +$endif +$if _x86 && _ELF32 +$add i386 +$endif +$if _x86 && _ELF64 +$add amd64 +$endif + +SYMBOL_VERSION SUNW_1.1.1 { + global: + __acosf; + __acosl; + __asinf; + __asinl; + __atan2f; + __atan2l; + __atanf; + __atanl; + __ceilf; + __ceill; + __cosf; + __coshf; + __coshl; + __cosl; + __expf; + __expl; + __fabsf; + __fabsl; + __floorf; + __floorl; + __fmodf; + __fmodl; + __frexpf; + __frexpl; + __ldexpf; + __ldexpl; + __log10f; + __log10l; + __logf; + __logl; + __modff; + __modfl; + __powf; + __powl; + __sinf; + __sinhf; + __sinhl; + __sinl; + __sqrtf; + __sqrtl; + __tanf; + __tanhf; + __tanhl; + __tanl; +} SUNW_1.1; + +SYMBOL_VERSION SUNW_1.1 { + global: + __acos; + __acosh; + __asin; + __asinh; + __atan; + __atan2; + __atanh; + __cbrt; + __ceil; + __copysign; + __cos; + __cosh; + __erf; + __erfc; + __exp; + __expm1; + __fabs; + __floor; + __fmod; + __gamma; + __gamma_r; + __hypot; + __ilogb; + __isnan; + __j0; + __j1; + __jn; + __lgamma; + __lgamma_r; + __log; + __log10; + __log1p; + __logb; + __nextafter; + __pow; + __remainder; + __rint; + __scalb; + __scalbn; + __signgam; + __significand; + __sin; + __sinh; + __sqrt; + __tan; + __tanh; + __y0; + __y1; + __yn; + acos; + acosh; + asin; + asinh; + atan; + atan2; + atanh; + cbrt; + ceil; + copysign; + cos; + cosh; + erf; + erfc; + exp; + expm1; + fabs; + floor; + fmod; + gamma; + gamma_r; + hypot; + ilogb; + isnan; + j0; + j1; + jn; + lgamma; + lgamma_r; + log; + log10; + log1p; + logb; + matherr; + nextafter; + pow; + remainder; + rint; + scalb; + scalbn; + signgam; + significand; + sin; + sinh; + sqrt; + tan; + tanh; + y0; + y1; + yn; +}; + +$if i386 +SYMBOL_VERSION SUNWprivate_1.2 { + global: + __libm_errno; # SC3.0.1 -lmopt +} SUNWprivate_1.1; +$endif + +SYMBOL_VERSION SUNWprivate_1.1 { + global: + _lib_version; + __libm__rem_pio2; + __libm__rem_pio2m; + # anything else is local + local: + *; # symbols not mentioned in this file are scoped out +}; diff --git a/usr/src/lib/libm1/i386/Makefile b/usr/src/lib/libm1/i386/Makefile new file mode 100644 index 0000000000..3d2c71860a --- /dev/null +++ b/usr/src/lib/libm1/i386/Makefile @@ -0,0 +1,21 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2011 Nexenta Systems, Inc. All rights reserved. +# + +TARGET_ARCH= i386 +include ../Makefile.com + +install: all $(ROOTLIBS) + +include $(SRC)/lib/Makefile.targ diff --git a/usr/src/lib/libm1/sparc/Makefile b/usr/src/lib/libm1/sparc/Makefile new file mode 100644 index 0000000000..686bb12ef2 --- /dev/null +++ b/usr/src/lib/libm1/sparc/Makefile @@ -0,0 +1,22 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2011 Nexenta Systems, Inc. All rights reserved. +# + +TARGET_ARCH= sparc + +include ../Makefile.com + +install: all $(ROOTLIBS) + +include $(SRC)/lib/Makefile.targ diff --git a/usr/src/lib/libm1/sparcv9/Makefile b/usr/src/lib/libm1/sparcv9/Makefile new file mode 100644 index 0000000000..4899efe7b7 --- /dev/null +++ b/usr/src/lib/libm1/sparcv9/Makefile @@ -0,0 +1,23 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2011 Nexenta Systems, Inc. All rights reserved. +# + +TARGET_ARCH= sparcv9 + +include ../Makefile.com +include $(SRC)/lib/Makefile.lib.64 + +install: all $(ROOTLIBS64) + +include $(SRC)/lib/Makefile.targ diff --git a/usr/src/lib/libmvec/Makefile b/usr/src/lib/libmvec/Makefile new file mode 100644 index 0000000000..2a5ce644c0 --- /dev/null +++ b/usr/src/lib/libmvec/Makefile @@ -0,0 +1,50 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2011 Nexenta Systems, Inc. All rights reserved. +# + +LIBRARY= libmvec.a +VERS= .1 + +# include common library definitions +include $(SRC)/lib/Makefile.lib + +COMPS_i386 = i386_hwcap1 +COMPS_sparc = sparc_sparcv8plus+vis sparc_sparcv9+vis2 +COMPONENTS = $(COMPS_$(MACH)) + +COMPS_amd64 = +COMPS_sparcv9 = sparcv9_sparcv9+vis sparcv9_sparcv9+vis2 +COMPONENTS64 = $(COMPS_$(MACH64)) + +SUBDIRS = $(MACH) $(COMPONENTS) +$(BUILD64)SUBDIRS += $(MACH64) $(COMPONENTS64) + +all := TARGET= all +install := TARGET= install +clean := TARGET= clean +clobber := TARGET= clobber +lint := TARGET= lint + +.KEEP_STATE: + +.PARALLEL: $(SUBDIRS) + +all clean clobber install lint: $(SUBDIRS) + +$(SUBDIRS): FRC + @cd $@; pwd; VERSION='$(VERSION)' $(MAKE) $(TARGET) + +FRC: + +include $(SRC)/lib/Makefile.targ diff --git a/usr/src/lib/libmvec/Makefile.com b/usr/src/lib/libmvec/Makefile.com new file mode 100644 index 0000000000..d7374ccdc5 --- /dev/null +++ b/usr/src/lib/libmvec/Makefile.com @@ -0,0 +1,296 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2011 Nexenta Systems, Inc. All rights reserved. +# + +LIBMDIR = $(SRC)/lib/libm + +mvecOBJS = \ + __vTBL_atan1.o \ + __vTBL_atan2.o \ + __vTBL_rsqrt.o \ + __vTBL_sincos.o \ + __vTBL_sincos2.o \ + __vTBL_sqrtf.o \ + __vatan.o \ + __vatan2.o \ + __vatan2f.o \ + __vatanf.o \ + __vc_abs.o \ + __vc_exp.o \ + __vc_log.o \ + __vc_pow.o \ + __vcos.o \ + __vcosbig.o \ + __vcosbigf.o \ + __vcosf.o \ + __vexp.o \ + __vexpf.o \ + __vhypot.o \ + __vhypotf.o \ + __vlog.o \ + __vlogf.o \ + __vpow.o \ + __vpowf.o \ + __vrem_pio2m.o \ + __vrhypot.o \ + __vrhypotf.o \ + __vrsqrt.o \ + __vrsqrtf.o \ + __vsin.o \ + __vsinbig.o \ + __vsinbigf.o \ + __vsincos.o \ + __vsincosbig.o \ + __vsincosbigf.o \ + __vsincosf.o \ + __vsinf.o \ + __vsqrt.o \ + __vsqrtf.o \ + __vz_abs.o \ + __vz_exp.o \ + __vz_log.o \ + __vz_pow.o \ + vatan2_.o \ + vatan2f_.o \ + vatan_.o \ + vatanf_.o \ + vc_abs_.o \ + vc_exp_.o \ + vc_log_.o \ + vc_pow_.o \ + vcos_.o \ + vcosf_.o \ + vexp_.o \ + vexpf_.o \ + vhypot_.o \ + vhypotf_.o \ + vlog_.o \ + vlogf_.o \ + vpow_.o \ + vpowf_.o \ + vrhypot_.o \ + vrhypotf_.o \ + vrsqrt_.o \ + vrsqrtf_.o \ + vsin_.o \ + vsincos_.o \ + vsincosf_.o \ + vsinf_.o \ + vsqrt_.o \ + vsqrtf_.o \ + vz_abs_.o \ + vz_exp_.o \ + vz_log_.o \ + vz_pow_.o \ + #end + +mvecvisCOBJS = \ + __vTBL_atan1.o \ + __vTBL_atan2.o \ + __vTBL_rsqrt.o \ + __vTBL_sincos.o \ + __vTBL_sincos2.o \ + __vTBL_sqrtf.o \ + __vcosbig.o \ + __vcosbigf.o \ + __vrem_pio2m.o \ + __vsinbig.o \ + __vsinbigf.o \ + __vsincosbig.o \ + __vsincosbigf.o \ + #end + +mvecvisSOBJS = \ + __vatan.o \ + __vatan2.o \ + __vatan2f.o \ + __vatanf.o \ + __vcos.o \ + __vcosf.o \ + __vexp.o \ + __vexpf.o \ + __vhypot.o \ + __vhypotf.o \ + __vlog.o \ + __vlogf.o \ + __vpow.o \ + __vpowf.o \ + __vrhypot.o \ + __vrhypotf.o \ + __vrsqrt.o \ + __vrsqrtf.o \ + __vsin.o \ + __vsincos.o \ + __vsincosf.o \ + __vsinf.o \ + __vsqrt.o \ + __vsqrtf.o \ + #end + +mvecvis2COBJS = \ + __vTBL_sincos.o \ + __vTBL_sincos2.o \ + __vTBL_sqrtf.o \ + __vcosbig.o \ + __vcosbig_ultra3.o \ + __vrem_pio2m.o \ + __vsinbig.o \ + __vsinbig_ultra3.o \ + #end + +mvecvis2SOBJS = \ + __vcos_ultra3.o \ + __vlog_ultra3.o \ + __vsin_ultra3.o \ + __vsqrtf_ultra3.o \ + #end + +include $(SRC)/lib/Makefile.lib +include $(SRC)/lib/Makefile.rootfs +include $(LIBMDIR)/Makefile.libm.com + +LIBS = $(DYNLIB) +SRCDIR = ../common/ +DYNFLAGS += -zignore + +LINTERROFF = -erroff=E_FP_DIVISION_BY_ZERO +LINTERROFF += -erroff=E_FP_INVALID +LINTERROFF += -erroff=E_BAD_PTR_CAST_ALIGN +LINTERROFF += -erroff=E_ASSIGMENT_CAUSE_LOSS_PREC +LINTERROFF += -erroff=E_FUNC_SET_NOT_USED + +LINTFLAGS += $(LINTERROFF) +LINTFLAGS64 += $(LINTERROFF) +LINTFLAGS64 += -errchk=longptr64 + +CLAGS += $(LINTERROFF) +CFLAGS64 += $(LINTERROFF) + +ASDEF += -DLIBMVEC_SO_BUILD + +FLTRPATH_sparc = $$ORIGIN/cpu/$$ISALIST/libmvec_isa.so.1 +FLTRPATH_sparcv9 = $$ORIGIN/../cpu/$$ISALIST/sparcv9/libmvec_isa.so.1 +FLTRPATH_i386 = $$ORIGIN/libmvec/$$HWCAP +FLTRPATH = $(FLTRPATH_$(TARGET_ARCH)) + +sparc_CFLAGS += -_cc=-W0,-xintrinsic +sparcv9_CFLAGS += -_cc=-W0,-xintrinsic +CPPFLAGS_i386 += -Dfabs=__fabs + +CPPFLAGS += -DLIBMVEC_SO_BUILD + +SRCS_mvec_i386 = \ + ../common/__vsqrtf.c \ + #end + +SRCS_mvec_sparc = \ + $(SRCS_mvec_i386) \ + #end +SRCS_mvec_sparcv9 = \ + $(SRCS_mvec_i386) \ + #end + +SRCS_mvec = \ + $(SRCS_mvec_$(TARGETMACH)) \ + ../common/__vTBL_atan1.c \ + ../common/__vTBL_atan2.c \ + ../common/__vTBL_rsqrt.c \ + ../common/__vTBL_sincos.c \ + ../common/__vTBL_sincos2.c \ + ../common/__vTBL_sqrtf.c \ + ../common/__vatan.c \ + ../common/__vatan2.c \ + ../common/__vatan2f.c \ + ../common/__vatanf.c \ + ../common/__vc_abs.c \ + ../common/__vc_exp.c \ + ../common/__vc_log.c \ + ../common/__vc_pow.c \ + ../common/__vcos.c \ + ../common/__vcosbig.c \ + ../common/__vcosbigf.c \ + ../common/__vcosf.c \ + ../common/__vexp.c \ + ../common/__vexpf.c \ + ../common/__vhypot.c \ + ../common/__vhypotf.c \ + ../common/__vlog.c \ + ../common/__vlogf.c \ + ../common/__vpow.c \ + ../common/__vpowf.c \ + ../common/__vrem_pio2m.c \ + ../common/__vrhypot.c \ + ../common/__vrhypotf.c \ + ../common/__vrsqrt.c \ + ../common/__vrsqrtf.c \ + ../common/__vsin.c \ + ../common/__vsinbig.c \ + ../common/__vsinbigf.c \ + ../common/__vsincos.c \ + ../common/__vsincosbig.c \ + ../common/__vsincosbigf.c \ + ../common/__vsincosf.c \ + ../common/__vsinf.c \ + ../common/__vsqrt.c \ + ../common/__vz_abs.c \ + ../common/__vz_exp.c \ + ../common/__vz_log.c \ + ../common/__vz_pow.c \ + ../common/vatan2_.c \ + ../common/vatan2f_.c \ + ../common/vatan_.c \ + ../common/vatanf_.c \ + ../common/vc_abs_.c \ + ../common/vc_exp_.c \ + ../common/vc_log_.c \ + ../common/vc_pow_.c \ + ../common/vcos_.c \ + ../common/vcosf_.c \ + ../common/vexp_.c \ + ../common/vexpf_.c \ + ../common/vhypot_.c \ + ../common/vhypotf_.c \ + ../common/vlog_.c \ + ../common/vlogf_.c \ + ../common/vpow_.c \ + ../common/vpowf_.c \ + ../common/vrhypot_.c \ + ../common/vrhypotf_.c \ + ../common/vrsqrt_.c \ + ../common/vrsqrtf_.c \ + ../common/vsin_.c \ + ../common/vsincos_.c \ + ../common/vsincosf_.c \ + ../common/vsinf_.c \ + ../common/vsqrt_.c \ + ../common/vsqrtf_.c \ + ../common/vz_abs_.c \ + ../common/vz_exp_.c \ + ../common/vz_log_.c \ + ../common/vz_pow_.c \ + #end + +.KEEP_STATE: + +all: $(LIBS) + +lint: lintcheck + +pics/%.o: ../$(TARGET_ARCH)/src/%.S + $(COMPILE.s) -o $@ $< + $(POST_PROCESS_O) + +pics/%.o: ../common/$$(CHIP)/%.S + $(COMPILE.s) -o $@ $< + $(POST_PROCESS_O) diff --git a/usr/src/lib/libmvec/amd64/Makefile b/usr/src/lib/libmvec/amd64/Makefile new file mode 100644 index 0000000000..c7f904a08c --- /dev/null +++ b/usr/src/lib/libmvec/amd64/Makefile @@ -0,0 +1,30 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2011 Nexenta Systems, Inc. All rights reserved. +# + +TARGET_ARCH = amd64 + +LIBRARY = libmvec.a +VERS = .1 + +OBJECTS = $(mvecOBJS) + +include ../Makefile.com +include $(SRC)/lib/Makefile.lib.64 + +SRCS = $(SRCS_mvec) + +install: all $(ROOTLIBS64) $(ROOTLINKS64) + +include $(SRC)/lib/libm/Makefile.targ diff --git a/usr/src/lib/libmvec/amd64/src/__vsqrtf.S b/usr/src/lib/libmvec/amd64/src/__vsqrtf.S new file mode 100644 index 0000000000..a4b90fb643 --- /dev/null +++ b/usr/src/lib/libmvec/amd64/src/__vsqrtf.S @@ -0,0 +1,128 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "__vsqrtf.S" + +#include "libm.h" + + ENTRY(__vsqrtf) + push %rbp + movq %rsp,%rbp + +/ on entry: +/ %edi = n +/ %rsi = x +/ %edx = stridex +/ %rcx = y +/ %r8d = stridey + + movslq %edx,%rdx / sign extend and scale strides + shlq $2,%rdx + movslq %r8d,%r8 + shlq $2,%r8 + + cmpl $4,%edi + jl .finish + + cmpq $4,%rdx + jne .nonunit + cmpq $4,%r8 + jne .nonunit + +/ unit-stride case + movq %rdx,%r9 + shlq $2,%r9 + movq %r8,%r10 + shlq $2,%r10 + + .align 16 +.loop: + movups (%rsi),%xmm0 + addq %r9,%rsi + sqrtps %xmm0,%xmm0 + movups %xmm0,(%rcx) + addq %r10,%rcx + subl $4,%edi + cmpl $4,%edi + jge .loop + +.finish: + testl %edi,%edi + jle .done + +.finish_loop: + movss (%rsi),%xmm0 + addq %rdx,%rsi + sqrtss %xmm0,%xmm0 + movss %xmm0,(%rcx) + addq %r8,%rcx + decl %edi + jg .finish_loop + +.done: + leave + ret + + .align 16 +.nonunit: + movss (%rsi),%xmm0 + addq %rdx,%rsi + movss (%rsi),%xmm1 + addq %rdx,%rsi + movss (%rsi),%xmm2 + addq %rdx,%rsi + movss (%rsi),%xmm3 + addq %rdx,%rsi + + movlhps %xmm1,%xmm0 / xmm0: 0 x1 0 x0 + movlhps %xmm3,%xmm2 / xmm2: 0 x3 0 x2 + shufps $0x88,%xmm2,%xmm0 / xmm0: x3 x2 x1 x0 + + sqrtps %xmm0,%xmm0 / xmm0: y3 y2 y1 y0 + + movaps %xmm0,%xmm1 / xmm1: y3 y2 y1 y0 + shufps $0xf5,%xmm0,%xmm1 / xmm1: y3 y3 y1 y1 + movhlps %xmm0,%xmm2 / xmm2: 0 x3 y3 y2 + movhlps %xmm1,%xmm3 / xmm3: 0 0 y3 y3 + + movss %xmm0,(%rcx) + addq %r8,%rcx + movss %xmm1,(%rcx) + addq %r8,%rcx + movss %xmm2,(%rcx) + addq %r8,%rcx + movss %xmm3,(%rcx) + addq %r8,%rcx + + subl $4,%edi + cmpl $4,%edi + jge .nonunit + + jmp .finish + + SET_SIZE(__vsqrtf) diff --git a/usr/src/lib/libmvec/common/__vTBL_atan1.c b/usr/src/lib/libmvec/common/__vTBL_atan1.c new file mode 100644 index 0000000000..bc640eba53 --- /dev/null +++ b/usr/src/lib/libmvec/common/__vTBL_atan1.c @@ -0,0 +1,617 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma align 32 (__vlibm_TBL_atan1) +const double __vlibm_TBL_atan1[] = { + +/* i= -2 conup conlo = 0.0 */ 0.0, 0.0 , +/* i= -1 PI/2 upper, lower */ 1.570796326794896558E+00, 6.123233995736765886e-17, + /* 3ff921fb54442d18, 3c91a62633145c07, */ + + +/* i= 0 atan(3F900000...) */ 1.56237286204768313E-02, -4.91360013656630395E-19, +/* i= 0 atan(3F900000...) 3F8FFF555BBB729B, BC2220C39D4DFF50, */ + +/* i= 1 atan(3F910000...) */ 1.66000375562312640E-02, 1.12189118956867269E-18, +/* i= 1 atan(3F910000...) 3F90FF99A9AA60D7, 3C34B1FB39D277D8, */ + +/* i= 2 atan(3F920000...) */ 1.75763148444955872E-02, 6.59519250301009539E-19, +/* i= 2 atan(3F920000...) 3F91FF8685C3E636, 3C2854FBB35044B1, */ + +/* i= 3 atan(3F930000...) */ 1.85525586258889763E-02, 1.39203477545012197E-19, +/* i= 3 atan(3F930000...) 3F92FF712238A4B8, 3C048AF56CEBE552, */ + +/* i= 4 atan(3F940000...) */ 1.95287670414137082E-02, -9.79999553454266918E-19, +/* i= 4 atan(3F940000...) 3F93FF595F18A700, BC3213EAC36CFB2C, */ + +/* i= 5 atan(3F950000...) */ 2.05049382324763683E-02, -8.40094761552091156E-19, +/* i= 5 atan(3F950000...) 3F94FF3F1C75BEE7, BC2EFE787F0F4330, */ + +/* i= 6 atan(3F960000...) */ 2.14810703409090559E-02, -4.19450646799657488E-20, +/* i= 6 atan(3F960000...) 3F95FF223A639D5C, BBE8C28F1824574A, */ + +/* i= 7 atan(3F970000...) */ 2.24571615089905717E-02, -1.30959312135654387E-18, +/* i= 7 atan(3F970000...) 3F96FF0298F7EA3F, BC382860F0066622, */ + +/* i= 8 atan(3F980000...) */ 2.34332098794675855E-02, -1.09469246421805015E-18, +/* i= 8 atan(3F980000...) 3F97FEE0184A5C36, BC343189FC0A354B, */ + +/* i= 9 atan(3F990000...) */ 2.44092135955758099E-02, -1.47897509599299710E-18, +/* i= 9 atan(3F990000...) 3F98FEBA9874D084, BC3B48432E1BE204, */ + +/* i= 10 atan(3F9A0000...) */ 2.53851708010611396E-02, -1.34303200040391535E-18, +/* i= 10 atan(3F9A0000...) 3F99FE91F99362D6, BC38C64A0FD5DBE3, */ + +/* i= 11 atan(3F9B0000...) */ 2.63610796402007873E-02, 1.37267443271608158E-18, +/* i= 11 atan(3F9B0000...) 3F9AFE661BC4850F, 3C395245904A67C3, */ + +/* i= 12 atan(3F9C0000...) */ 2.73369382578244127E-02, -8.16108165671393861E-19, +/* i= 12 atan(3F9C0000...) 3F9BFE36DF291712, BC2E1BEC7756100E, */ + +/* i= 13 atan(3F9D0000...) */ 2.83127447993351995E-02, 8.59249306270865423E-19, +/* i= 13 atan(3F9D0000...) 3F9CFE0423E47E7D, 3C2FB36157FAFE79, */ + +/* i= 14 atan(3F9E0000...) */ 2.92884974107309737E-02, -7.76024364493026302E-19, +/* i= 14 atan(3F9E0000...) 3F9DFDCDCA1CBE70, BC2CA157C8222A15, */ + +/* i= 15 atan(3F9F0000...) */ 3.02641942386252458E-02, -1.66574467444210944E-18, +/* i= 15 atan(3F9F0000...) 3F9EFD93B1FA8F3E, BC3EBA41BEEDF844, */ + +/* i= 16 atan(3FA00000...) */ 3.12398334302682774E-02, -1.18844271158774798E-18, +/* i= 16 atan(3FA00000...) 3F9FFD55BBA97625, BC35EC431444912C, */ + +/* i= 17 atan(3FA10000...) */ 3.31909314971115949E-02, -9.42939153905567217E-19, +/* i= 17 atan(3FA10000...) 3FA0FE66DA9B94EE, BC3164E77D4EB175, */ + +/* i= 18 atan(3FA20000...) */ 3.51417768027967800E-02, 2.65885150818196357E-18, +/* i= 18 atan(3FA20000...) 3FA1FE1A5C2EC497, 3C4886091E8FC4CB, */ + +/* i= 19 atan(3FA30000...) */ 3.70923545503918164E-02, -1.94050652720581784E-18, +/* i= 19 atan(3FA30000...) 3FA2FDC4E3737DDD, BC41E5E438D0BA04, */ + +/* i= 20 atan(3FA40000...) */ 3.90426499551669928E-02, 6.27126337421308897E-19, +/* i= 20 atan(3FA40000...) 3FA3FD65F169C9D9, 3C27230A716461B5, */ + +/* i= 21 atan(3FA50000...) */ 4.09926482452637811E-02, 2.47687641119150859E-18, +/* i= 21 atan(3FA50000...) 3FA4FCFD072DFF79, 3C46D85BEC38D078, */ + +/* i= 22 atan(3FA60000...) */ 4.29423346623621707E-02, 2.03095297887322147E-18, +/* i= 22 atan(3FA60000...) 3FA5FC89A5FA3B2D, 3C42BB73BF4E7F99, */ + +/* i= 23 atan(3FA70000...) */ 4.48916944623464972E-02, 2.31751818996581527E-19, +/* i= 23 atan(3FA70000...) 3FA6FC0B4F27D5BB, 3C1119AB07E9C009, */ + +/* i= 24 atan(3FA80000...) */ 4.68407129159696539E-02, -1.65567744225495210E-19, +/* i= 24 atan(3FA80000...) 3FA7FB818430DA2A, BC086EF8F794F105, */ + +/* i= 25 atan(3FA90000...) */ 4.87893753095156174E-02, 2.91348767453902927E-18, +/* i= 25 atan(3FA90000...) 3FA8FAEBC6B17ABA, 3C4ADF473CC8D797, */ + +/* i= 26 atan(3FAA0000...) */ 5.07376669454602178E-02, 2.07462271032410652E-18, +/* i= 26 atan(3FAA0000...) 3FA9FA49986984DF, 3C4322907AF0ABC2, */ + +/* i= 27 atan(3FAB0000...) */ 5.26855731431300420E-02, 2.86866232988833092E-18, +/* i= 27 atan(3FAB0000...) 3FAAF99A7B3DD42F, 3C4A756FFAAB786E, */ + +/* i= 28 atan(3FAC0000...) */ 5.46330792393594777E-02, -2.66980035901898370E-18, +/* i= 28 atan(3FAC0000...) 3FABF8DDF139C444, BC489FE34B2A7FA8, */ + +/* i= 29 atan(3FAD0000...) */ 5.65801705891457105E-02, 3.25489507698250449E-18, +/* i= 29 atan(3FAD0000...) 3FACF8137C90A177, 3C4E0567596F063F, */ + +/* i= 30 atan(3FAE0000...) */ 5.85268325663017702E-02, -2.48271181407783583E-19, +/* i= 30 atan(3FAE0000...) 3FADF73A9F9F1882, BC1251B5C410BCB4, */ + +/* i= 31 atan(3FAF0000...) */ 6.04730505641073168E-02, -5.66989890333967427E-19, +/* i= 31 atan(3FAF0000...) 3FAEF652DCECA4DC, BC24EB116F8EA623, */ + +/* i= 32 atan(3FB00000...) */ 6.24188099959573500E-02, -1.54907563082950458E-18, +/* i= 32 atan(3FB00000...) 3FAFF55BB72CFDEA, BC3C934D86D23F1D, */ + +/* i= 33 atan(3FB10000...) */ 6.63088949198234884E-02, -4.88592398930400059E-19, +/* i= 33 atan(3FB10000...) 3FB0F99EA71D52A7, BC22069FEEC3624F, */ + +/* i= 34 atan(3FB20000...) */ 7.01969710718705203E-02, -1.79819216032204589E-18, +/* i= 34 atan(3FB20000...) 3FB1F86DBF082D59, BC4095DC7732EF81, */ + +/* i= 35 atan(3FB30000...) */ 7.40829225490337306E-02, 1.35448289530322996E-19, +/* i= 35 atan(3FB30000...) 3FB2F719318A4A9A, 3C03FD1779B9801F, */ + +/* i= 36 atan(3FB40000...) */ 7.79666338315423008E-02, 5.80455187314335664E-18, +/* i= 36 atan(3FB40000...) 3FB3F59F0E7C559D, 3C5AC4CE285DF847, */ + +/* i= 37 atan(3FB50000...) */ 8.18479898030765457E-02, 1.73846131383378367E-18, +/* i= 37 atan(3FB50000...) 3FB4F3FD677292FB, 3C4008D36264979E, */ + +/* i= 38 atan(3FB60000...) */ 8.57268757707448092E-02, 5.34719414350295085E-18, +/* i= 38 atan(3FB60000...) 3FB5F2324FD2D7B2, 3C58A8DA4401318E, */ + +/* i= 39 atan(3FB70000...) */ 8.96031774848717461E-02, -1.08082588355136405E-18, +/* i= 39 atan(3FB70000...) 3FB6F03BDCEA4B0D, BC33F00E512FA17D, */ + +/* i= 40 atan(3FB80000...) */ 9.34767811585894698E-02, -6.28447259954209545E-18, +/* i= 40 atan(3FB80000...) 3FB7EE182602F10F, BC5CFB654C0C3D98, */ + +/* i= 41 atan(3FB90000...) */ 9.73475734872236709E-02, 2.51506589544357698E-18, +/* i= 41 atan(3FB90000...) 3FB8EBC54478FB28, 3C4732880CAD24CC, */ + +/* i= 42 atan(3FBA0000...) */ 1.01215441667466668E-01, 5.68120255862341373E-18, +/* i= 42 atan(3FBA0000...) 3FB9E94153CFDCF1, 3C5A332E1D69C47E, */ + +/* i= 43 atan(3FBB0000...) */ 1.05080273416329528E-01, 3.03631931857741762E-18, +/* i= 43 atan(3FBB0000...) 3FBAE68A71C722B8, 3C4C014E6910B9DB, */ + +/* i= 44 atan(3FBC0000...) */ 1.08941956989865793E-01, 6.82671220724095851E-18, +/* i= 44 atan(3FBC0000...) 3FBBE39EBE6F07C3, 3C5F7B8F29A05987, */ + +/* i= 45 atan(3FBD0000...) */ 1.12800381201659389E-01, 1.86724154759436245E-18, +/* i= 45 atan(3FBD0000...) 3FBCE07C5C3CCA32, 3C4138E6425918A7, */ + +/* i= 46 atan(3FBE0000...) */ 1.16655435441069349E-01, 5.48792581210869929E-18, +/* i= 46 atan(3FBE0000...) 3FBDDD21701EBA6E, 3C594EFFCD76FE58, */ + +/* i= 47 atan(3FBF0000...) */ 1.20507009691224562E-01, -5.32529096262256550E-19, +/* i= 47 atan(3FBF0000...) 3FBED98C2190043B, BC23A598592C7B13, */ + +/* i= 48 atan(3FC00000...) */ 1.24354994546761438E-01, -3.12532414245393831E-18, +/* i= 48 atan(3FC00000...) 3FBFD5BA9AAC2F6E, BC4CD37686760C17, */ + +/* i= 49 atan(3FC10000...) */ 1.32039761614638762E-01, -1.27692540070995953E-17, +/* i= 49 atan(3FC10000...) 3FC0E6ADCCF40882, BC6D71A31BB98D0D, */ + +/* i= 50 atan(3FC20000...) */ 1.39708874289163648E-01, -2.95798642473158131E-18, +/* i= 50 atan(3FC20000...) 3FC1E1FAFB043727, BC4B485914DACF8C, */ + +/* i= 51 atan(3FC30000...) */ 1.47361481088651630E-01, 5.40959914766629796E-18, +/* i= 51 atan(3FC30000...) 3FC2DCBDB2FBA1FF, 3C58F28705561534, */ + +/* i= 52 atan(3FC40000...) */ 1.54996741923940973E-01, 9.58541559411432383E-18, +/* i= 52 atan(3FC40000...) 3FC3D6EEE8C6626C, 3C661A3B0CE9281B, */ + +/* i= 53 atan(3FC50000...) */ 1.62613828597948568E-01, 7.78447064310625246E-18, +/* i= 53 atan(3FC50000...) 3FC4D087A9DA4F17, 3C61F323F1ADF158, */ + +/* i= 54 atan(3FC60000...) */ 1.70211925285474408E-01, -3.54116407980212514E-18, +/* i= 54 atan(3FC60000...) 3FC5C9811E3EC26A, BC5054AB2C010F3D, */ + +/* i= 55 atan(3FC70000...) */ 1.77790228992676075E-01, -4.02958210085442233E-18, +/* i= 55 atan(3FC70000...) 3FC6C1D4898933D9, BC52954A7603C427, */ + +/* i= 56 atan(3FC80000...) */ 1.85347949995694761E-01, 4.18069226884307898E-18, +/* i= 56 atan(3FC80000...) 3FC7B97B4BCE5B02, 3C5347B0B4F881CA, */ + +/* i= 57 atan(3FC90000...) */ 1.92884312257974672E-01, -7.41459017624724575E-18, +/* i= 57 atan(3FC90000...) 3FC8B06EE2879C29, BC6118CD30308C4F, */ + +/* i= 58 atan(3FCA0000...) */ 2.00398553825878512E-01, 3.13995428718444929E-18, +/* i= 58 atan(3FCA0000...) 3FC9A6A8E96C8626, 3C4CF601E7B4348E, */ + +/* i= 59 atan(3FCB0000...) */ 2.07889927202262986E-01, 7.33316066652089850E-18, +/* i= 59 atan(3FCB0000...) 3FCA9C231B403279, 3C60E8BBE89CCA85, */ + +/* i= 60 atan(3FCC0000...) */ 2.15357699697738048E-01, 4.73816013007873192E-19, +/* i= 60 atan(3FCC0000...) 3FCB90D7529260A2, 3C217B10D2E0E5AA, */ + +/* i= 61 atan(3FCD0000...) */ 2.22801153759394521E-01, -5.49882217244684317E-18, +/* i= 61 atan(3FCD0000...) 3FCC84BF8A742E6E, BC595BDD0682EA26, */ + +/* i= 62 atan(3FCE0000...) */ 2.30219587276843718E-01, 1.23134045291427032E-17, +/* i= 62 atan(3FCE0000...) 3FCD77D5DF205736, 3C6C648D1534597E, */ + +/* i= 63 atan(3FCF0000...) */ 2.37612313865471242E-01, 1.05823143137111299E-17, +/* i= 63 atan(3FCF0000...) 3FCE6A148E96EC4D, 3C6866B22029F765, */ + +/* i= 64 atan(3FD00000...) */ 2.44978663126864143E-01, 1.06987556187344514E-17, +/* i= 64 atan(3FD00000...) 3FCF5B75F92C80DD, 3C68AB6E3CF7AFBD, */ + +/* i= 65 atan(3FD10000...) */ 2.59629629408257512E-01, 1.92387549246153041E-17, +/* i= 65 atan(3FD10000...) 3FD09DC597D86362, 3C762E47390CB865, */ + +/* i= 66 atan(3FD20000...) */ 2.74167451119658789E-01, 8.26135357516377194E-18, +/* i= 66 atan(3FD20000...) 3FD18BF5A30BF178, 3C630CA4748B1BF8, */ + +/* i= 67 atan(3FD30000...) */ 2.88587361894077410E-01, -1.42836995737725708E-17, +/* i= 67 atan(3FD30000...) 3FD278372057EF46, BC7077CDD36DFC81, */ + +/* i= 68 atan(3FD40000...) */ 3.02884868374971417E-01, -1.10108279030013690E-17, +/* i= 68 atan(3FD40000...) 3FD362773707EBCC, BC6963A544B672D8, */ + +/* i= 69 atan(3FD50000...) */ 3.17055753209147029E-01, -1.89392892429264215E-17, +/* i= 69 atan(3FD50000...) 3FD44AA436C2AF0A, BC75D5E43C55B3BA, */ + +/* i= 70 atan(3FD60000...) */ 3.31096076704132103E-01, -7.95261037579379870E-18, +/* i= 70 atan(3FD60000...) 3FD530AD9951CD4A, BC62566480884082, */ + +/* i= 71 atan(3FD70000...) */ 3.45002177207105132E-01, -2.29388047555783039E-17, +/* i= 71 atan(3FD70000...) 3FD614840309CFE2, BC7A725715711F00, */ + +/* i= 72 atan(3FD80000...) */ 3.58770670270572245E-01, -2.46238155826386349E-17, +/* i= 72 atan(3FD80000...) 3FD6F61941E4DEF1, BC7C63AAE6F6E918, */ + +/* i= 73 atan(3FD90000...) */ 3.72398446676754202E-01, 1.96123115048456534E-17, +/* i= 73 atan(3FD90000...) 3FD7D5604B63B3F7, 3C769C885C2B249A, */ + +/* i= 74 atan(3FDA0000...) */ 3.85882669398073752E-01, 2.37882273249194087E-17, +/* i= 74 atan(3FDA0000...) 3FD8B24D394A1B25, 3C7B6D0BA3748FA8, */ + +/* i= 75 atan(3FDB0000...) */ 3.99220769575252543E-01, 2.24659810561704206E-17, +/* i= 75 atan(3FDB0000...) 3FD98CD5454D6B18, 3C79E6C988FD0A77, */ + +/* i= 76 atan(3FDC0000...) */ 4.12410441597387323E-01, -1.58765222777068909E-17, +/* i= 76 atan(3FDC0000...) 3FDA64EEC3CC23FD, BC724DEC1B50B7FF, */ + +/* i= 77 atan(3FDD0000...) */ 4.25449637370042266E-01, 2.33155307418928847E-17, +/* i= 77 atan(3FDD0000...) 3FDB3A911DA65C6C, 3C7AE187B1CA5040, */ + +/* i= 78 atan(3FDE0000...) */ 4.38336559857957830E-01, -2.49427703062654091E-17, +/* i= 78 atan(3FDE0000...) 3FDC0DB4C94EC9F0, BC7CC1CE70934C34, */ + +/* i= 79 atan(3FDF0000...) */ 4.51069655988523499E-01, -2.27037952294204745E-17, +/* i= 79 atan(3FDF0000...) 3FDCDE53432C1351, BC7A2CFA4418F1AD, */ + +/* i= 80 atan(3FE00000...) */ 4.63647609000806094E-01, 2.26987774529616871E-17, +/* i= 80 atan(3FE00000...) 3FDDAC670561BB4F, 3C7A2B7F222F65E2, */ + +/* i= 81 atan(3FE10000...) */ 4.88333951056405535E-01, -1.13732361893295846E-17, +/* i= 81 atan(3FE10000...) 3FDF40DD0B541418, BC6A3992DC382A23, */ + +/* i= 82 atan(3FE20000...) */ 5.12389460310737732E-01, -2.54627814728558035E-17, +/* i= 82 atan(3FE20000...) 3FE0657E94DB30D0, BC7D5B495F6349E6, */ + +/* i= 83 atan(3FE30000...) */ 5.35811237960463704E-01, -4.06379568348255750E-18, +/* i= 83 atan(3FE30000...) 3FE1255D9BFBD2A9, BC52BDAEE1C0EE35, */ + +/* i= 84 atan(3FE40000...) */ 5.58599315343562441E-01, -5.45563054859162639E-18, +/* i= 84 atan(3FE40000...) 3FE1E00BABDEFEB4, BC5928DF287A668F, */ + +/* i= 85 atan(3FE50000...) */ 5.80756353567670414E-01, -1.44146437819306691E-17, +/* i= 85 atan(3FE50000...) 3FE2958E59308E31, BC709E73B0C6C087, */ + +/* i= 86 atan(3FE60000...) */ 6.02287346134964152E-01, 2.95043073722840231E-17, +/* i= 86 atan(3FE60000...) 3FE345F01CCE37BB, 3C81021137C71102, */ + +/* i= 87 atan(3FE70000...) */ 6.23199329934065904E-01, 2.67240388514009508E-17, +/* i= 87 atan(3FE70000...) 3FE3F13FB89E96F4, 3C7ECF8B492644F0, */ + +/* i= 88 atan(3FE80000...) */ 6.43501108793284371E-01, 1.58347850514442862E-17, +/* i= 88 atan(3FE80000...) 3FE4978FA3269EE1, 3C72419A87F2A458, */ + +/* i= 89 atan(3FE90000...) */ 6.63202992706093286E-01, -3.07605486442964900E-17, +/* i= 89 atan(3FE90000...) 3FE538F57B89061F, BC81BB74ABDA520C, */ + +/* i= 90 atan(3FEA0000...) */ 6.82316554874748071E-01, 6.94322367156000774E-18, +/* i= 90 atan(3FEA0000...) 3FE5D58987169B18, 3C60028E4BC5E7CA, */ + +/* i= 91 atan(3FEB0000...) */ 7.00854407884450192E-01, -1.98762623433581612E-17, +/* i= 91 atan(3FEB0000...) 3FE66D663923E087, BC76EA6FEBE8BBBA, */ + +/* i= 92 atan(3FEC0000...) */ 7.18829999621624527E-01, -2.14783884444569830E-17, +/* i= 92 atan(3FEC0000...) 3FE700A7C5784634, BC78C34D25AADEF6, */ + +/* i= 93 atan(3FED0000...) */ 7.36257428981428097E-01, 3.47393764829945672E-17, +/* i= 93 atan(3FED0000...) 3FE78F6BBD5D315E, 3C8406A089803740, */ + +/* i= 94 atan(3FEE0000...) */ 7.53151280962194414E-01, -2.42569346591820681E-17, +/* i= 94 atan(3FEE0000...) 3FE819D0B7158A4D, BC7BF76229D3B917, */ + +/* i= 95 atan(3FEF0000...) */ 7.69526480405658297E-01, -3.70499190560272129E-17, +/* i= 95 atan(3FEF0000...) 3FE89FF5FF57F1F8, BC855B9A5E177A1B, */ + +/* i= 96 atan(3FF00000...) */ 7.85398163397448279E-01, 3.06161699786838302E-17, +/* i= 96 atan(3FF00000...) 3FE921FB54442D18, 3C81A62633145C07, */ + +/* i= 97 atan(3FF10000...) */ 8.15691923316223422E-01, -1.07145656277874308E-17, +/* i= 97 atan(3FF10000...) 3FEA1A25F2C82506, BC68B4C3611182FC, */ + +/* i= 98 atan(3FF20000...) */ 8.44153986113171051E-01, -4.84133701193491676E-17, +/* i= 98 atan(3FF20000...) 3FEB034F38649C88, BC8BE88D6936F833, */ + +/* i= 99 atan(3FF30000...) */ 8.70903457075652976E-01, -2.26982359074728705E-17, +/* i= 99 atan(3FF30000...) 3FEBDE70ED439FE7, BC7A2B56372C05EF, */ + +/* i= 100 atan(3FF40000...) */ 8.96055384571343927E-01, 2.92387628577430489E-17, +/* i= 100 atan(3FF40000...) 3FECAC7C57846F9E, 3C80DAE13AD18A6B, */ + +/* i= 101 atan(3FF50000...) */ 9.19719605350416858E-01, -4.05743941285276792E-17, +/* i= 101 atan(3FF50000...) 3FED6E57CF4F0ACA, BC8763B9456AE66E, */ + +/* i= 102 atan(3FF60000...) */ 9.42000040379463610E-01, 5.46083748584668763E-17, +/* i= 102 atan(3FF60000...) 3FEE24DD44C855D1, 3C8F7AC612AB33D8, */ + +/* i= 103 atan(3FF70000...) */ 9.62994330680936206E-01, -3.98666059521075245E-18, +/* i= 103 atan(3FF70000...) 3FEED0D97C9041C9, BC52629E3B5DA490, */ + +/* i= 104 atan(3FF80000...) */ 9.82793723247329054E-01, 1.39033110312309985E-17, +/* i= 104 atan(3FF80000...) 3FEF730BD281F69B, 3C7007887AF0CBBD, */ + +/* i= 105 atan(3FF90000...) */ 1.00148313569423464E+00, 9.43830802354539200E-17, +/* i= 105 atan(3FF90000...) 3FF006132E34D617, 3C9B343DFA868D93, */ + +/* i= 106 atan(3FFA0000...) */ 1.01914134426634972E+00, 1.00040188693667989E-17, +/* i= 106 atan(3FFA0000...) 3FF04E67277A01D7, 3C67115496C13EB6, */ + +/* i= 107 atan(3FFB0000...) */ 1.03584125300880014E+00, 3.19431398178450371E-17, +/* i= 107 atan(3FFB0000...) 3FF092CE471853CC, 3C8269F9B3E200C2, */ + +/* i= 108 atan(3FFC0000...) */ 1.05165021254837376E+00, -9.65056473146751351E-17, +/* i= 108 atan(3FFC0000...) 3FF0D38F2C5BA09F, BC9BD0DC231BFD70, */ + +/* i= 109 atan(3FFD0000...) */ 1.06663036531574362E+00, -5.95658963716037456E-17, +/* i= 109 atan(3FFD0000...) 3FF110EB007F39F7, BC912B2FF85E5500, */ + +/* i= 110 atan(3FFE0000...) */ 1.08083900054116833E+00, -1.56763225113590725E-17, +/* i= 110 atan(3FFE0000...) 3FF14B1DD5F90CE1, BC7212D570A63FA2, */ + +/* i= 111 atan(3FFF0000...) */ 1.09432890732118993E+00, -5.49067615502236423E-18, +/* i= 111 atan(3FFF0000...) 3FF1825F074030D9, BC59523F0AF0D3B5, */ + +/* i= 112 atan(40000000...) */ 1.10714871779409041E+00, 9.40447137356637941E-17, +/* i= 112 atan(40000000...) 3FF1B6E192EBBE44, 3C9B1B466A88828E, */ + +/* i= 113 atan(40010000...) */ 1.13095374397916038E+00, 7.12383380453844630E-17, +/* i= 113 atan(40010000...) 3FF21862F3FADE36, 3C94887628D68748, */ + +/* i= 114 atan(40020000...) */ 1.15257199721566761E+00, -9.15973850890037882E-17, +/* i= 114 atan(40020000...) 3FF270EF55A53A25, BC9A66B1AF5F84FB, */ + +/* i= 115 atan(40030000...) */ 1.17227388112847630E+00, 8.38518861402867437E-17, +/* i= 115 atan(40030000...) 3FF2C1A241D66DC3, 3C982B2D58B6A8E9, */ + +/* i= 116 atan(40040000...) */ 1.19028994968253166E+00, 7.68333362984206881E-17, +/* i= 116 atan(40040000...) 3FF30B6D796A4DA8, 3C96254CB03BB199, */ + +/* i= 117 atan(40050000...) */ 1.20681737028525249E+00, 4.17246763886143912E-17, +/* i= 117 atan(40050000...) 3FF34F1FBB19EB09, 3C880D79B4CF61D5, */ + +/* i= 118 atan(40060000...) */ 1.22202532321098967E+00, -2.97916286489284927E-17, +/* i= 118 atan(40060000...) 3FF38D6A6CE13353, BC812C77E8A80F5C, */ + +/* i= 119 atan(40070000...) */ 1.23605948947808186E+00, 7.87975273945942128E-17, +/* i= 119 atan(40070000...) 3FF3C6E650B38047, 3C96B63B358E746D, */ + +/* i= 120 atan(40080000...) */ 1.24904577239825443E+00, -2.19620379961231129E-18, +/* i= 120 atan(40080000...) 3FF3FC176B7A8560, BC4441A3BD3F1084, */ + +/* i= 121 atan(40090000...) */ 1.26109338225244039E+00, 3.24213962153496050E-17, +/* i= 121 atan(40090000...) 3FF42D70411F9EC1, 3C82B08DB7F10896, */ + +/* i= 122 atan(400A0000...) */ 1.27229739520871732E+00, 2.24587501503450703E-17, +/* i= 122 atan(400A0000...) 3FF45B54837351A0, 3C79E4A72EEDACC4, */ + +/* i= 123 atan(400B0000...) */ 1.28274087974427076E+00, -9.28318875426612948E-18, +/* i= 123 atan(400B0000...) 3FF4861B4CFBE710, BC6567D3D25932D1, */ + +/* i= 124 atan(400C0000...) */ 1.29249666778978534E+00, -6.83080476892666033E-17, +/* i= 124 atan(400C0000...) 3FF4AE10FC6589A5, BC93B03E8A27F555, */ + +/* i= 125 atan(400D0000...) */ 1.30162883400919616E+00, -1.23691849982462667E-17, +/* i= 125 atan(400D0000...) 3FF4D378C1999A0D, BC6C857A639541C8, */ + +/* i= 126 atan(400E0000...) */ 1.31019393504755555E+00, 8.74541373478027883E-17, +/* i= 126 atan(400E0000...) 3FF4F68DEA672617, 3C9934F9F2B0020E, */ + +/* i= 127 atan(400F0000...) */ 1.31824205101683711E+00, -6.31939403114467626E-17, +/* i= 127 atan(400F0000...) 3FF51784FA1544BA, BC9236E3C857C019, */ + +/* i= 128 atan(40100000...) */ 1.32581766366803255E+00, -8.82442937395113632E-17, +/* i= 128 atan(40100000...) 3FF5368C951E9CFD, BC996F47948A99F1, */ + +/* i= 129 atan(40110000...) */ 1.33970565959899957E+00, -2.59901186030413438E-17, +/* i= 129 atan(40110000...) 3FF56F6F33A3E6A7, BC7DF6EDD6F1EC3B, */ + +/* i= 130 atan(40120000...) */ 1.35212738092095464E+00, 2.14767425075115096E-17, +/* i= 130 atan(40120000...) 3FF5A25052114E60, 3C78C2D0C89DE218, */ + +/* i= 131 atan(40130000...) */ 1.36330010035969384E+00, 1.09324617152693622E-16, +/* i= 131 atan(40130000...) 3FF5D013C41ADABD, 3C9F82BBA194DD5D, */ + +/* i= 132 atan(40140000...) */ 1.37340076694501589E+00, -3.30771035576951650E-17, +/* i= 132 atan(40140000...) 3FF5F97315254857, BC831151A43B51CA, */ + +/* i= 133 atan(40150000...) */ 1.38257482149012589E+00, -3.56149043864823010E-17, +/* i= 133 atan(40150000...) 3FF61F06C6A92B89, BC8487D50BCEB1A5, */ + +/* i= 134 atan(40160000...) */ 1.39094282700241845E+00, -9.84371213348884259E-17, +/* i= 134 atan(40160000...) 3FF6414D44094C7C, BC9C5F60A65C7397, */ + +/* i= 135 atan(40170000...) */ 1.39860551227195762E+00, -2.32406118259162798E-17, +/* i= 135 atan(40170000...) 3FF660B02C736A06, BC7ACB6AFB332A0F, */ + +/* i= 136 atan(40180000...) */ 1.40564764938026987E+00, -8.92263013823449239E-17, +/* i= 136 atan(40180000...) 3FF67D8863BC99BD, BC99B7BD2E1E8C9C, */ + +/* i= 137 atan(40190000...) */ 1.41214106460849531E+00, -9.57380711055722328E-17, +/* i= 137 atan(40190000...) 3FF698213A9D5053, BC9B9839085189E3, */ + +/* i= 138 atan(401A0000...) */ 1.41814699839963154E+00, -8.26388378251101363E-17, +/* i= 138 atan(401A0000...) 3FF6B0BAE830C070, BC97D1AB82FFB70B, */ + +/* i= 139 atan(401B0000...) */ 1.42371797140649403E+00, 8.72187092222396751E-17, +/* i= 139 atan(401B0000...) 3FF6C78C7EDEB195, 3C99239AD620FFE2, */ + +/* i= 140 atan(401C0000...) */ 1.42889927219073276E+00, -6.45713474323875439E-17, +/* i= 140 atan(401C0000...) 3FF6DCC57BB565FD, BC929C86447928E7, */ + +/* i= 141 atan(401D0000...) */ 1.43373015248470903E+00, -4.39620446676763619E-17, +/* i= 141 atan(401D0000...) 3FF6F08F07435FEC, BC8957A7170DF016, */ + +/* i= 142 atan(401E0000...) */ 1.43824479449822262E+00, -2.49301991026456555E-17, +/* i= 142 atan(401E0000...) 3FF7030CF9403197, BC7CBE1896221608, */ + +/* i= 143 atan(401F0000...) */ 1.44247309910910193E+00, -1.10511943543031571E-16, +/* i= 143 atan(401F0000...) 3FF7145EAC2088A4, BC9FDA5797B32A0B, */ + +/* i= 144 atan(40200000...) */ 1.44644133224813509E+00, 9.21132397154505156E-17, +/* i= 144 atan(40200000...) 3FF7249FAA996A21, 3C9A8CC1E7480C68, */ + +/* i= 145 atan(40210000...) */ 1.45368758222803240E+00, -6.81876925015134676E-17, +/* i= 145 atan(40210000...) 3FF7424DE90454D4, BC93A75D182E1A5F, */ + +/* i= 146 atan(40220000...) */ 1.46013910562100091E+00, 6.26097470783084416E-17, +/* i= 146 atan(40220000...) 3FF75CBAD2A40BD5, 3C920BC8AF35C4D5, */ + +/* i= 147 atan(40230000...) */ 1.46591938806466282E+00, -9.71125555407483218E-17, +/* i= 147 atan(40230000...) 3FF77467E364F601, BC9BFDA44F3537B8, */ + +/* i= 148 atan(40240000...) */ 1.47112767430373470E+00, -1.08492227620614239E-16, +/* i= 148 atan(40240000...) 3FF789BD2C160054, BC9F45503CCAD255, */ + +/* i= 149 atan(40250000...) */ 1.47584462045214027E+00, 3.38755967276631476E-17, +/* i= 149 atan(40250000...) 3FF79D0F3FAD1C92, 3C838727DC4FB7D1, */ + +/* i= 150 atan(40260000...) */ 1.48013643959415142E+00, 8.50262547607966975E-17, +/* i= 150 atan(40260000...) 3FF7AEA38C1ACBD1, 3C9881D48AE6DE92, */ + +/* i= 151 atan(40270000...) */ 1.48405798811891154E+00, -3.44545106786359401E-17, +/* i= 151 atan(40270000...) 3FF7BEB396C5699A, BC83DC969C7E2365, */ + +/* i= 152 atan(40280000...) */ 1.48765509490645531E+00, 7.84437173946107664E-17, +/* i= 152 atan(40280000...) 3FF7CD6F6DC59DB4, 3C969C1FED612CFC, */ + +/* i= 153 atan(40290000...) */ 1.49096634108265924E+00, 6.22143476002012210E-17, +/* i= 153 atan(40290000...) 3FF7DAFF85A63058, 3C91EE9BCCA84EB2, */ + +/* i= 154 atan(402A0000...) */ 1.49402443552511865E+00, -7.47641750277645943E-17, +/* i= 154 atan(402A0000...) 3FF7E7862AA0157C, BC958C9F564B028C, */ + +/* i= 155 atan(402B0000...) */ 1.49685728913695626E+00, 1.69600762125511713E-17, +/* i= 155 atan(402B0000...) 3FF7F320A0F9F587, 3C738DBB20936502, */ + +/* i= 156 atan(402C0000...) */ 1.49948886200960629E+00, -8.69233960451104982E-19, +/* i= 156 atan(402C0000...) 3FF7FDE80870C2A0, BC3008D760C989AB, */ + +/* i= 157 atan(402D0000...) */ 1.50193983749385196E+00, 6.06189958407581368E-17, +/* i= 157 atan(402D0000...) 3FF807F2112987C7, 3C9178E474EC8C66, */ + +/* i= 158 atan(402E0000...) */ 1.50422816301907281E+00, 9.13778153422684716E-18, +/* i= 158 atan(402E0000...) 3FF811518CDE39A6, 3C6511FE80FBB230, */ + +/* i= 159 atan(402F0000...) */ 1.50636948736934317E+00, -1.05533910133197090E-16, +/* i= 159 atan(402F0000...) 3FF81A16E43F190B, BC9E6B0733383AD4, */ + +/* i= 160 atan(40300000...) */ 1.50837751679893928E+00, -6.60752345087512057E-18, +/* i= 160 atan(40300000...) 3FF82250768AC529, BC5E78C96D05AFCB, */ + +/* i= 161 atan(40310000...) */ 1.51204050407917401E+00, -8.17827248696306499E-17, +/* i= 161 atan(40310000...) 3FF831516233F561, BC97927FFEC5F9DC, */ + +/* i= 162 atan(40320000...) */ 1.51529782154917969E+00, 9.27265838320600392E-17, +/* i= 162 atan(40320000...) 3FF83EA8EDB40F72, 3C9ABA03A56FDC09, */ + +/* i= 163 atan(40330000...) */ 1.51821326518395483E+00, 7.14053211560016173E-17, +/* i= 163 atan(40330000...) 3FF84A99FE25186B, 3C9494C8619D0BBC, */ + +/* i= 164 atan(40340000...) */ 1.52083793107295384E+00, 1.64275464789776791E-17, +/* i= 164 atan(40340000...) 3FF8555A2787981F, 3C72F08E51763131, */ + +/* i= 165 atan(40350000...) */ 1.52321322351791322E+00, 6.06514977555146142E-18, +/* i= 165 atan(40350000...) 3FF85F14D43D81BE, 3C5BF8770A76AFAF, */ + +/* i= 166 atan(40360000...) */ 1.52537304737331958E+00, 2.48298338570039438E-17, +/* i= 166 atan(40360000...) 3FF867ED918AB138, 3C7CA07933F18E43, */ + +/* i= 167 atan(40370000...) */ 1.52734543140336587E+00, -9.47004210780093541E-17, +/* i= 167 atan(40370000...) 3FF87001C35928D4, BC9B4BA860ADA728, */ + +/* i= 168 atan(40380000...) */ 1.52915374769630819E+00, 9.96025861033048094E-18, +/* i= 168 atan(40380000...) 3FF87769EB8E956B, 3C66F77FB9BAEBA6, */ + +/* i= 169 atan(40390000...) */ 1.53081763967160667E+00, -8.91334763349872231E-17, +/* i= 169 atan(40390000...) 3FF87E3AA32878AE, BC99B0E3C3BBC6CF, */ + +/* i= 170 atan(403A0000...) */ 1.53235373677370856E+00, 7.35876234111923764E-17, +/* i= 170 atan(403A0000...) 3FF884855A158B25, 3C9535CEE7C891BB, */ + +/* i= 171 atan(403B0000...) */ 1.53377621092096650E+00, 9.37735480657284383E-17, +/* i= 171 atan(403B0000...) 3FF88A58EC949D14, 3C9B07443DD06AD8, */ + +/* i= 172 atan(403C0000...) */ 1.53509721411557254E+00, 1.10616555458501787E-16, +/* i= 172 atan(403C0000...) 3FF88FC218ACE9DB, 3C9FE20FA7E1E941, */ + +/* i= 173 atan(403D0000...) */ 1.53632722579538861E+00, -1.73373217093894906E-18, +/* i= 173 atan(403D0000...) 3FF894CBDB6BEDFC, BC3FFB5195F35C00, */ + +/* i= 174 atan(403E0000...) */ 1.53747533091664934E+00, 8.11685860076124202E-17, +/* i= 174 atan(403E0000...) 3FF8997FBB8B19C0, 3C97652F3D7700A3, */ + +/* i= 175 atan(403F0000...) */ 1.53854944435964280E+00, -1.04663067143013889E-16, +/* i= 175 atan(403F0000...) 3FF89DE605ACDBB3, BC9E2AC570EAC042, */ + +/* i= 176 atan(40400000...) */ 1.53955649336462841E+00, -6.59487545533283128E-17, +/* i= 176 atan(40400000...) 3FF8A205FD558740, BC930228C09A91B4, */ + +/* i= 177 atan(40410000...) */ 1.54139303859089161E+00, -1.02574621979876286E-16, +/* i= 177 atan(40410000...) 3FF8A98BBF307AA8, BC9D90ABD3CB737A, */ + +/* i= 178 atan(40420000...) */ 1.54302569020147562E+00, -3.65410017872781400E-17, +/* i= 178 atan(40420000...) 3FF8B03BB4C4D9C4, BC851080044823F8, */ + +/* i= 179 atan(40430000...) */ 1.54448660954197448E+00, -4.84886962896552125E-17, +/* i= 179 atan(40430000...) 3FF8B63797517BB5, BC8BF3AB273B6CE0, */ + +/* i= 180 atan(40440000...) */ 1.54580153317597646E+00, -1.28017749694693433E-18, +/* i= 180 atan(40440000...) 3FF8BB9A63718F45, BC379D77A1373742, */ + +/* i= 181 atan(40450000...) */ 1.54699130060982659E+00, 8.40387156476469915E-17, +/* i= 181 atan(40450000...) 3FF8C079F3350D26, 3C9838F674C6574D, */ + +/* i= 182 atan(40460000...) */ 1.54807296595325550E+00, 5.63378094641568198E-17, +/* i= 182 atan(40460000...) 3FF8C4E82889748C, 3C903CFF21ED4F81, */ + +/* i= 183 atan(40470000...) */ 1.54906061995310385E+00, 1.07720671947039880E-16, +/* i= 183 atan(40470000...) 3FF8C8F3C9E38564, 3C9F0C61F67DF753, */ + +/* i= 184 atan(40480000...) */ 1.54996600675867957E+00, -3.65867202631610758E-17, +/* i= 184 atan(40480000...) 3FF8CCA927CF0B3D, BC85173F363FCD3B, */ + +/* i= 185 atan(40490000...) */ 1.55079899282174605E+00, 3.88158322748794045E-17, +/* i= 185 atan(40490000...) 3FF8D0129ACD6D1C, 3C866034AEC68494, */ + +/* i= 186 atan(404A0000...) */ 1.55156792769518947E+00, -6.25939220821526366E-17, +/* i= 186 atan(404A0000...) 3FF8D338E42F92C4, BC920A9DC23967F4, */ + +/* i= 187 atan(404B0000...) */ 1.55227992472688747E+00, 1.03058038268892371E-16, +/* i= 187 atan(404B0000...) 3FF8D623796F0778, 3C9DB4574D874450, */ + +/* i= 188 atan(404C0000...) */ 1.55294108165534417E+00, -6.37987893547135838E-17, +/* i= 188 atan(404C0000...) 3FF8D8D8BF65316F, BC9263850ED82243, */ + +/* i= 189 atan(404D0000...) */ 1.55355665560036682E+00, 1.03636378617620221E-16, +/* i= 189 atan(404D0000...) 3FF8DB5E3944965E, 3C9DDF03D7D94A94, */ + +/* i= 190 atan(404E0000...) */ 1.55413120308095598E+00, -1.10032784474653953E-16, +/* i= 190 atan(404E0000...) 3FF8DDB8AE2ED03E, BC9FB6FC889F3B9F, */ + +/* i= 191 atan(404F0000...) */ 1.55466869295126031E+00, 7.12642375326129392E-17, +/* i= 191 atan(404F0000...) 3FF8DFEC478573A0, 3C948A5F6312C3FA, */ + +/* i= 192 atan(40500000...) */ 1.55517259817441977E+00, 1.48861661196504977E-17, +/* i= 192 atan(40500000...) 3FF8E1FCA98CB633, 3C71299EE93BE016, */ + +}; diff --git a/usr/src/lib/libmvec/common/__vTBL_atan2.c b/usr/src/lib/libmvec/common/__vTBL_atan2.c new file mode 100644 index 0000000000..caf915a292 --- /dev/null +++ b/usr/src/lib/libmvec/common/__vTBL_atan2.c @@ -0,0 +1,356 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "libm_inlines.h" + +const double __vlibm_TBL_atan2[] = { + 7.8539816339744827900e-01, 3.0616169978683830179e-17, + 1.0000000000000000000e+00, 0, + 7.7198905126506112140e-01, 2.6989956960083153652e-16, + 9.7353506088256835938e-01, 0, + 7.6068143954461309164e-01, -3.5178810518941914972e-16, + 9.5174932479858398438e-01, 0, + 7.4953661876353638860e-01, -3.2548100004524337476e-16, + 9.3073129653930664062e-01, 0, + 7.3854614984728339522e-01, -2.0775571023910406668e-16, + 9.1042709350585937500e-01, 0, + 7.2770146962041337702e-01, 3.8883249403168348802e-16, + 8.9078664779663085938e-01, 0, + 7.1699492488093774512e-01, -4.0468841511547224071e-16, + 8.7176513671875000000e-01, 0, + 7.0641813488653149022e-01, 5.6902424353981484031e-17, + 8.5331964492797851562e-01, 0, + 6.9596351101035658360e-01, 2.8245513321075021303e-16, + 8.3541154861450195312e-01, 0, + 6.8562363680534943455e-01, -4.2316970721658854064e-16, + 8.1800508499145507812e-01, 0, + 6.7539055666438230219e-01, 4.3535917281300047233e-16, + 8.0106592178344726562e-01, 0, + 6.6525763346931832132e-01, 1.1830431602404727977e-17, + 7.8456401824951171875e-01, 0, + 6.5521767574310185722e-01, -1.7435923100651044208e-16, + 7.6847028732299804688e-01, 0, + 6.4526390999481897381e-01, -1.4741927403093983947e-16, + 7.5275802612304687500e-01, 0, + 6.3538979894204850041e-01, 1.5734535069995660853e-16, + 7.3740243911743164062e-01, 0, + 6.2558914346942717799e-01, -2.8175588856316910960e-16, + 7.2238063812255859375e-01, 0, + 6.1585586476157949676e-01, -4.3056167357725226449e-16, + 7.0767116546630859375e-01, 0, + 6.0618408027576098362e-01, 1.5018013918429320289e-16, + 6.9325399398803710938e-01, 0, + 5.9656817827486730010e-01, 5.5271942033557644157e-17, + 6.7911052703857421875e-01, 0, + 5.8700289083426504533e-01, -8.2411369282676383293e-17, + 6.6522359848022460938e-01, 0, + 5.7748303053627658699e-01, 4.9400383775709159558e-17, + 6.5157699584960937500e-01, 0, + 5.6800353968303252117e-01, 2.9924431103311109543e-16, + 6.3815546035766601562e-01, 0, + 5.5855953863493823519e-01, -2.0306003403868777403e-16, + 6.2494468688964843750e-01, 0, + 5.4914706708329674711e-01, 2.8255378613779667461e-17, + 6.1193227767944335938e-01, 0, + 5.3976176660618069292e-01, 1.6370248781078747995e-16, + 5.9910583496093750000e-01, 0, + 5.3039888601412332747e-01, -7.6196097360093680134e-17, + 5.8645296096801757812e-01, 0, + 5.2105543924318808990e-01, -2.2400815668154739561e-16, + 5.7396411895751953125e-01, 0, + 5.1172778873967050828e-01, -3.6888136019899681185e-16, + 5.6162929534912109375e-01, 0, + 5.0241199666452196482e-01, -2.5412891474397011281e-16, + 5.4943847656250000000e-01, 0, + 4.9310493954293743712e-01, 4.4132186128251152229e-16, + 5.3738307952880859375e-01, 0, + 4.8380436844750995817e-01, -2.7844387907776656488e-16, + 5.2545595169067382812e-01, 0, + 4.7450670361463753721e-01, -2.0494355197368286028e-16, + 5.1364850997924804688e-01, 0, + 4.6367660027976320691e-01, 3.1709878607954760668e-16, + 5.0003623962402343750e-01, 0, + 4.5304753104003925301e-01, 3.3593436122420574865e-16, + 4.8681926727294921875e-01, 0, + 4.4423658037407065535e-01, 2.1987183192008082015e-17, + 4.7596645355224609375e-01, 0, + 4.3567016972500294258e-01, 3.0118422805369552650e-16, + 4.6550178527832031250e-01, 0, + 4.2733152672544871820e-01, -3.2667693224866479909e-16, + 4.5539522171020507812e-01, 0, + 4.1920540176693954493e-01, -2.2454273841113897647e-16, + 4.4561982154846191406e-01, 0, + 4.1127722812701872357e-01, -3.1620568973494653391e-16, + 4.3615055084228515625e-01, 0, + 4.0353384063084263289e-01, -3.5932009901481421723e-16, + 4.2696499824523925781e-01, 0, + 3.9596319345246833166e-01, -4.0281533417458698585e-16, + 4.1804289817810058594e-01, 0, + 3.8855405220339722661e-01, 1.6132231486045176674e-16, + 4.0936565399169921875e-01, 0, + 3.8129566313738116889e-01, 1.7684657060650804570e-16, + 4.0091586112976074219e-01, 0, + 3.7417884791401867517e-01, 2.6897604227426977619e-16, + 3.9267849922180175781e-01, 0, + 3.6719421967585041955e-01, -4.5886151448673745001e-17, + 3.8463878631591796875e-01, 0, + 3.6033388248727771241e-01, 1.5804115573136074946e-16, + 3.7678408622741699219e-01, 0, + 3.5358982224579182940e-01, 1.2624619863035782939e-16, + 3.6910200119018554688e-01, 0, + 3.4695498404186952968e-01, 9.3221684607372865177e-17, + 3.6158156394958496094e-01, 0, + 3.4042268308109679964e-01, 2.7697913559445449137e-16, + 3.5421252250671386719e-01, 0, + 3.3398684598563566084e-01, 3.6085337449716011085e-16, + 3.4698557853698730469e-01, 0, + 3.2764182824591436827e-01, 2.0581506352606456186e-16, + 3.3989214897155761719e-01, 0, + 3.2138200938788497041e-01, -1.9015787485430693661e-16, + 3.3292388916015625000e-01, 0, + 3.1520245348069497737e-01, 2.6961839659264087022e-16, + 3.2607340812683105469e-01, 0, + 3.0909871873117023000e-01, -1.5641891686756272625e-16, + 3.1933403015136718750e-01, 0, + 3.0306644308947827682e-01, 2.8801634211591956223e-16, + 3.1269931793212890625e-01, 0, + 2.9710135482774191473e-01, -4.3148994478973365819e-16, + 3.0616307258605957031e-01, 0, + 2.9120015759141004708e-01, -6.8539854790808585159e-17, + 2.9972028732299804688e-01, 0, + 2.8535879880370362827e-01, -1.2231638445300492682e-16, + 2.9336524009704589844e-01, 0, + 2.7957422506893880865e-01, -4.6707752931043135528e-17, + 2.8709340095520019531e-01, 0, + 2.7384352102802367313e-01, -4.1215636366229625876e-16, + 2.8090047836303710938e-01, 0, + 2.6816369484161040049e-01, -2.3700583122400495333e-16, + 2.7478218078613281250e-01, 0, + 2.6253212627627764419e-01, 2.3123213692190889610e-16, + 2.6873469352722167969e-01, 0, + 2.5694635355759309903e-01, -4.0638513814701264145e-16, + 2.6275444030761718750e-01, 0, + 2.5140385572454615470e-01, -3.4795333793554943723e-16, + 2.5683784484863281250e-01, 0, + 2.4500357070096612233e-01, 6.6542334848010259289e-17, + 2.5002646446228027344e-01, 0, + 2.3877766609573036760e-01, -2.7756633678549343650e-16, + 2.4342155456542968750e-01, 0, + 2.3365669377188336142e-01, 3.2700803838522067998e-16, + 2.3800384998321533203e-01, 0, + 2.2870810463931334766e-01, -4.4279127662219799521e-16, + 2.3278105258941650391e-01, 0, + 2.2391820542294382790e-01, 3.7558889374284208052e-16, + 2.2773718833923339844e-01, 0, + 2.1927501815429550902e-01, -1.4829838176513811186e-16, + 2.2285830974578857422e-01, 0, + 2.1476740847367459253e-01, -2.0535381496063397578e-17, + 2.1813154220581054688e-01, 0, + 2.1038568111737454558e-01, -4.2826767738736168650e-16, + 2.1354568004608154297e-01, 0, + 2.0612057974373865221e-01, 4.2108051749502232359e-16, + 2.0909011363983154297e-01, 0, + 2.0196410359405447821e-01, 3.5157118083511092869e-16, + 2.0475566387176513672e-01, 0, + 1.9790861144712756925e-01, 3.7894950972257700994e-16, + 2.0053362846374511719e-01, 0, + 1.9394752160084305359e-01, 2.8270367403478935534e-16, + 1.9641649723052978516e-01, 0, + 1.9007440763641536563e-01, -2.0842758095683676397e-16, + 1.9239699840545654297e-01, 0, + 1.8628369629742813629e-01, 3.4710917040399448932e-16, + 1.8846881389617919922e-01, 0, + 1.8256998712939509488e-01, 1.1053834120570125251e-16, + 1.8462586402893066406e-01, 0, + 1.7892875067284830237e-01, 3.0486232913366680305e-16, + 1.8086302280426025391e-01, 0, + 1.7535529778449010507e-01, -2.3810135019970148624e-16, + 1.7717504501342773438e-01, 0, + 1.7184559192514736736e-01, 5.1432582846210893916e-17, + 1.7355740070343017578e-01, 0, + 1.6839590847744290159e-01, 3.1605623296041433586e-18, + 1.7000591754913330078e-01, 0, + 1.6500283902547518977e-01, 1.5405422268770998251e-16, + 1.6651678085327148438e-01, 0, + 1.6166306303174859949e-01, 4.0042241517254928672e-16, + 1.6308629512786865234e-01, 0, + 1.5837358268281231943e-01, -2.2786616251622967291e-16, + 1.5971112251281738281e-01, 0, + 1.5513160990288810126e-01, -3.7547723514797166336e-16, + 1.5638816356658935547e-01, 0, + 1.5193468535499299321e-01, 4.3497510505554267446e-16, + 1.5311467647552490234e-01, 0, + 1.4878033155427861089e-01, -2.3102860235324261895e-16, + 1.4988791942596435547e-01, 0, + 1.4566628729590647140e-01, 9.9227592950040279415e-17, + 1.4670538902282714844e-01, 0, + 1.4259050967286590605e-01, -3.3869909683813096906e-18, + 1.4356482028961181641e-01, 0, + 1.3955105903633846509e-01, 1.5500435650773331566e-17, + 1.4046406745910644531e-01, 0, + 1.3654610022831903393e-01, 3.3965918616682805753e-16, + 1.3740110397338867188e-01, 0, + 1.3357402082462854764e-01, 2.7572431581527535421e-16, + 1.3437414169311523438e-01, 0, + 1.3063319828908959153e-01, -3.4667213797076707331e-16, + 1.3138139247894287109e-01, 0, + 1.2772200049776749609e-01, 3.1089261947725651968e-16, + 1.2842106819152832031e-01, 0, + 1.2436931430778752627e-01, -4.0654251891464630059e-16, + 1.2501454353332519531e-01, 0, + 1.2111683701666819957e-01, -3.9381654342464836012e-16, + 1.2171256542205810547e-01, 0, + 1.1844801833536511282e-01, -3.6673155595150283444e-16, + 1.1900508403778076172e-01, 0, + 1.1587365536613614125e-01, -1.5026628801318421951e-16, + 1.1639505624771118164e-01, 0, + 1.1338607085741525538e-01, 1.2886806274050538880e-16, + 1.1387449502944946289e-01, 0, + 1.1097844020819369604e-01, 2.3848343623577768044e-16, + 1.1143630743026733398e-01, 0, + 1.0864456107308662069e-01, 4.2065430313285469408e-16, + 1.0907405614852905273e-01, 0, + 1.0637891628473727934e-01, -4.6883543790348472687e-18, + 1.0678201913833618164e-01, 0, + 1.0417650062205296990e-01, 1.4774925414624453292e-16, + 1.0455501079559326172e-01, 0, + 1.0203276464730581807e-01, -1.5677032794816452332e-16, + 1.0238832235336303711e-01, 0, + 9.9943617083734892503e-02, 3.4511310907979792828e-16, + 1.0027772188186645508e-01, 0, + 9.7905249824711049200e-02, 3.4489485563461708496e-16, + 9.8219275474548339844e-02, 0, + 9.5914316649349906641e-02, -1.3214510886789011569e-17, + 9.6209526062011718750e-02, 0, + 9.3967698614664918466e-02, 1.1048427091217964090e-16, + 9.4245254993438720703e-02, 0, + 9.2062564267554769515e-02, -3.7297463814697759309e-16, + 9.2323541641235351562e-02, 0, + 9.0196252506350660383e-02, -3.5280143043576718079e-16, + 9.0441644191741943359e-02, 0, + 8.8366391663268650802e-02, -6.1140673227541621183e-17, + 8.8597118854522705078e-02, 0, + 8.6570782100201526532e-02, -2.0998844594957629702e-16, + 8.6787700653076171875e-02, 0, + 8.4807337678923566671e-02, 3.9530981588194673068e-16, + 8.5011243820190429688e-02, 0, + 8.3074323040850828193e-02, -4.3022503210464894539e-17, + 8.3265960216522216797e-02, 0, + 8.1369880712663267275e-02, -6.3063867569127169744e-18, + 8.1549942493438720703e-02, 0, + 7.9692445771216036121e-02, -5.0787623072962671502e-17, + 7.9861581325531005859e-02, 0, + 7.8040568735575632786e-02, -3.8810063021216721741e-16, + 7.8199386596679687500e-02, 0, + 7.6412797391314235540e-02, 4.1246529500495762995e-16, + 7.6561868190765380859e-02, 0, + 7.4807854772808823896e-02, -3.7025599052186724156e-16, + 7.4947714805603027344e-02, 0, + 7.3224639528778112663e-02, 4.2209138483206712401e-17, + 7.3355793952941894531e-02, 0, + 7.1661929761571485642e-02, -3.2074473649855177622e-16, + 7.1784853935241699219e-02, 0, + 7.0118738881148168218e-02, -2.5371257235753296804e-16, + 7.0233881473541259766e-02, 0, + 6.8594137996416115755e-02, 3.3796987842548399135e-16, + 6.8701922893524169922e-02, 0, + 6.7087137393172291411e-02, 5.5061492696328852397e-17, + 6.7187964916229248047e-02, 0, + 6.5596983299946565182e-02, -2.1580863111502565280e-16, + 6.5691232681274414062e-02, 0, + 6.4122802037412718335e-02, -3.1315661827469233434e-16, + 6.4210832118988037109e-02, 0, + 6.2426231582525915087e-02, -2.5758980071296622188e-16, + 6.2507450580596923828e-02, 0, + 6.0781559928021700046e-02, 1.3736899336217710591e-16, + 6.0856521129608154297e-02, 0, + 5.9432882624005145544e-02, 2.2246097394328856474e-16, + 5.9502959251403808594e-02, 0, + 5.8132551274581167888e-02, -6.2525053236379489390e-18, + 5.8198124170303344727e-02, 0, + 5.6876611930681164608e-02, -2.6589930995607417149e-16, + 5.6938022375106811523e-02, 0, + 5.5661522654748551986e-02, -4.2736362859832186197e-16, + 5.5719077587127685547e-02, 0, + 5.4484124463757943602e-02, -1.6708067365310384253e-16, + 5.4538100957870483398e-02, 0, + 5.3341582449436764080e-02, 3.3271673004611311850e-17, + 5.3392231464385986328e-02, 0, + 5.2231267345892007370e-02, -3.5593396674200571616e-16, + 5.2278816699981689453e-02, 0, + 5.1150874758829623090e-02, 1.4432815841187114832e-16, + 5.1195532083511352539e-02, 0, + 5.0098306612679444072e-02, 9.4680943793589404083e-17, + 5.0140261650085449219e-02, 0, + 4.9071641675614507960e-02, 2.1131168520301896817e-16, + 4.9111068248748779297e-02, 0, + 4.8069135772851545596e-02, 1.6035336741307516296e-16, + 4.8106193542480468750e-02, 0, + 4.7089192241088539959e-02, -2.2491738698796901479e-16, + 4.7124028205871582031e-02, 0, + 4.6130362086062248750e-02, -1.5111423469578965206e-16, + 4.6163111925125122070e-02, 0, + 4.5191314382707403752e-02, 4.1989325207399786612e-16, + 4.5222103595733642578e-02, 0, + 4.4270836390474244126e-02, -4.1432635292331004454e-16, + 4.4299781322479248047e-02, 0, + 4.3367774164955186222e-02, -3.0615383054587355892e-16, + 4.3394982814788818359e-02, 0, + 4.2481121875321825598e-02, -3.6730166956273555173e-16, + 4.2506694793701171875e-02, 0, + 4.1609902899457651415e-02, -4.4226425958068821782e-16, + 4.1633933782577514648e-02, 0, + 4.0753259129372665370e-02, 1.9801161516527046872e-16, + 4.0775835514068603516e-02, 0, + 3.9910361780060910064e-02, 8.2560620036613164573e-18, + 3.9931565523147583008e-02, 0, + 3.9080441183869218946e-02, 3.9908991939242971628e-17, + 3.9100348949432373047e-02, 0, + 3.8262816593271686827e-02, 9.5182237812195590276e-17, + 3.8281500339508056641e-02, 0, + 3.7456806948784837630e-02, 1.5213508760679563439e-16, + 3.7474334239959716797e-02, 0, + 3.6661849947035918262e-02, 7.3335516005184616486e-17, + 3.6678284406661987305e-02, 0, + 3.5877353272533163420e-02, -1.3007348019891714540e-16, + 3.5892754793167114258e-02, 0, + 3.5102754135096780885e-02, -2.9903662298950558656e-16, + 3.5117179155349731445e-02, 0, + 3.4337638360670830195e-02, 2.9656295131966114331e-16, + 3.4351140260696411133e-02, 0, + 3.3581472523789734907e-02, 3.4810947205572817820e-16, + 3.3594101667404174805e-02, 0, + 3.2833871859357266487e-02, -3.8885440174405159838e-16, + 3.2845675945281982422e-02, 0, + 3.2094421679560447558e-02, 5.8805134853032009978e-17, + 3.2105445861816406250e-02, 0, + 3.1243584858944295490e-02, 2.8737383773884313066e-17, + 3.1253755092620849609e-02, 0, + 0, 0, 0, 0 +}; diff --git a/usr/src/lib/libmvec/common/__vTBL_rsqrt.c b/usr/src/lib/libmvec/common/__vTBL_rsqrt.c new file mode 100644 index 0000000000..0cfccd83c9 --- /dev/null +++ b/usr/src/lib/libmvec/common/__vTBL_rsqrt.c @@ -0,0 +1,169 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma align 32 (__vlibm_TBL_rsqrt) + +/* + i = [0,128] + TBL[2*i ] = (double)(1.0 / sqrtl(*(double*)&(0x3fe0000000000000LL + (i << 46)))); + TBL[2*i+1] = (double)(1.0 / sqrtl(*(double*)&(0x3fe0000000000000LL + (i << 46))) - TBL[2*i]); +*/ + +const double __vlibm_TBL_rsqrt[] = { + 1.4142135623730951455e+00, -9.6672933134529134511e-17, + 1.4032928308912466786e+00, 6.4948026279769118919e-17, + 1.3926212476455828160e+00, -1.1055881989569260189e-16, + 1.3821894809301762397e+00, -6.3734410461405640301e-17, + 1.3719886811400707760e+00, -7.6980807939588139983e-17, + 1.3620104492139977204e+00, 2.8850217265224690802e-17, + 1.3522468075656264297e+00, 9.4322960168092127774e-17, + 1.3426901732747025253e+00, 4.7150841580269266495e-18, + 1.3333333333333332593e+00, 7.4014868308343765253e-17, + 1.3241694217637887121e+00, 7.7131873618846925903e-18, + 1.3151918984428583315e+00, -2.0328800352543524759e-17, + 1.3063945294843617440e+00, -9.1582083631189420602e-17, + 1.2977713690461003537e+00, -4.8412149406758561904e-17, + 1.2893167424406084542e+00, 2.3274915882478143921e-17, + 1.2810252304406970492e+00, 1.8704771066280918649e-17, + 1.2728916546811681609e+00, -8.8457926431820830415e-17, + 1.2649110640673517647e+00, -3.1906346897860143141e-17, + 1.2570787221094177344e+00, 8.6769863266554017163e-17, + 1.2493900951088485751e+00, -5.0929983362732175622e-17, + 1.2418408411301324890e+00, 8.8840637867087758165e-17, + 1.2344267996967352996e+00, -1.7516410189877601154e-17, + 1.2271439821557927896e+00, -9.0396673750943792696e-17, + 1.2199885626608373279e+00, 2.7575041782657058896e-18, + 1.2129568697262453902e+00, 5.0766000649864922701e-17, + 1.2060453783110545167e+00, -2.6141724617295359467e-17, + 1.1992507023933782762e+00, 3.5079005878814235254e-17, + 1.1925695879998878812e+00, -4.3139588510944642176e-17, + 1.1859989066577618644e+00, 2.2700827457352136295e-17, + 1.1795356492391770864e+00, -1.8736930872699025425e-17, + 1.1731769201708264205e+00, -1.0717525135280878089e-16, + 1.1669199319831564665e+00, -1.9717488453279445066e-17, + 1.1607620001760186046e+00, 7.0604910402531185787e-17, + 1.1547005383792514621e+00, 6.6900561478712689458e-17, + 1.1487330537883810866e+00, -1.1022220198146414245e-16, + 1.1428571428571427937e+00, 6.3441315692866084503e-17, + 1.1370704872299222110e+00, 1.0524397995692614457e-16, + 1.1313708498984760276e+00, 1.1479495462389219323e-17, + 1.1257560715684669095e+00, 6.0574394710210801304e-17, + 1.1202240672224077489e+00, 9.3922898547554319150e-17, + 1.1147728228665882977e+00, -4.5491044078590048284e-17, + 1.1094003924504582947e+00, -5.0709657003823779908e-17, + 1.1041048949477667573e+00, -8.8666430365492392908e-18, + 1.0988845115895122806e+00, -8.8730050685366661178e-17, + 1.0937374832394612945e+00, -1.0139924803906119049e-16, + 1.0886621079036347126e+00, -2.3035347176474180687e-18, + 1.0836567383657542685e+00, -9.7789672372212451307e-17, + 1.0787197799411873955e+00, -5.7527821233647078927e-17, + 1.0738496883424388795e+00, 1.9216919863927710029e-17, + 1.0690449676496975862e+00, -4.7415720102268737205e-17, + 1.0643041683803828867e+00, -3.0438242811018816132e-19, + 1.0596258856520350822e+00, -3.6947737086388254690e-17, + 1.0550087574332591700e+00, 3.7548847295491266968e-17, + 1.0504514628777803509e+00, 1.0231500228552561044e-16, + 1.0459527207369814228e+00, 8.0806674896943551777e-17, + 1.0415112878465908608e+00, 7.8292411070687721348e-17, + 1.0371259576834630511e+00, -2.6664053809928624719e-17, + 1.0327955589886446131e+00, -1.1033761728824692438e-16, + 1.0285189544531601058e+00, -7.0307587734203009158e-17, + 1.0242950394631678002e+00, -1.0770393913594349379e-17, + 1.0201227409013413627e+00, -9.8717216425570547616e-17, + 1.0160010160015240377e+00, -3.5150724174046424206e-17, + 1.0119288512538813229e+00, 6.3292764451724411186e-17, + 1.0079052613579393416e+00, -6.9021193162451496902e-17, + 1.0039292882210537616e+00, -6.9245436618476016139e-17, + 1.0000000000000000000e+00, 0.0000000000000000000e+00, + 9.9227787671366762812e-01, 2.1405178579048182592e-17, + 9.8473192783466190203e-01, -4.0158639458782051420e-17, + 9.7735555485044178781e-01, -3.4924457286878990179e-19, + 9.7014250014533187638e-01, 1.7693410507027811240e-17, + 9.6308682468615358641e-01, 1.9691102487554127121e-17, + 9.5618288746751489704e-01, 1.4935376108861049295e-17, + 9.4942532655508271588e-01, -5.3278073247766967031e-17, + 9.4280904158206335630e-01, 9.5662462186576827694e-18, + 9.3632917756904454620e-01, -3.4655680606790736102e-17, + 9.2998110995055427441e-01, -2.8820206372616569176e-17, + 9.2376043070340119190e-01, 3.1315988690467019525e-17, + 9.1766293548224708854e-01, -2.4907828666661326139e-17, + 9.1168461167710357351e-01, 1.7178891233165183242e-17, + 9.0582162731567661407e-01, -1.3578665987704751967e-17, + 9.0007032074081916306e-01, -3.9003513621620290514e-17, + 8.9442719099991585541e-01, 2.3156459848049343849e-17, + 8.8888888888888883955e-01, 4.9343245538895843502e-17, + 8.8345220859877238162e-01, -2.7808199947420238654e-17, + 8.7811407991752277180e-01, 1.2001012979479060187e-17, + 8.7287156094396955996e-01, -3.4900338036123033814e-17, + 8.6772183127462465535e-01, 3.2650033503527982608e-17, + 8.6266218562750729415e-01, 3.1665473509444755614e-17, + 8.5769002787023584933e-01, 1.6930198090043138729e-17, + 8.5280286542244176928e-01, -3.2089317494821048697e-17, + 8.4799830400508802164e-01, -3.8599776100732649845e-17, + 8.4327404271156780613e-01, 1.5736536222265119505e-17, + 8.3862786937753464045e-01, -3.8316227580533944669e-18, + 8.3405765622829908246e-01, -3.1744458177500410304e-17, + 8.2956135578434020417e-01, 1.0522097091084975821e-17, + 8.2513699700703468931e-01, 3.6488948923760358306e-17, + 8.2078268166812329287e-01, -1.6507622733959848503e-17, + 8.1649658092772603446e-01, -1.7276510382355637441e-18, + 8.1227693210689522196e-01, 1.2819865235943699943e-17, + 8.0812203564176865456e-01, -5.5241676076873786747e-17, + 8.0403025220736967782e-01, -1.7427816411530239645e-17, + 8.0000000000000004441e-01, -4.4408920985006264082e-17, + 7.9602975216799132241e-01, -1.3876860654527447191e-17, + 7.9211803438133943089e-01, 1.6428787126265500350e-17, + 7.8826342253143455441e-01, -3.2571002717425679181e-17, + 7.8446454055273617811e-01, -5.0417296289807987128e-17, + 7.8072005835882651859e-01, 2.4898247108034524775e-17, + 7.7702868988581130782e-01, 3.6763699589769887870e-17, + 7.7338919123653082632e-01, 4.9918835031221789176e-17, + 7.6980035891950104876e-01, -2.9414493989201982553e-17, + 7.6626102817692109959e-01, 1.4524522292996552738e-17, + 7.6277007139647390321e-01, -5.0856154603265522966e-17, + 7.5932639660199918730e-01, 8.9842992531287086391e-18, + 7.5592894601845450619e-01, -5.1765894871838619595e-17, + 7.5257669470687782454e-01, 9.6579665081799721467e-18, + 7.4926864926535519107e-01, -1.8380676468162380710e-17, + 7.4600384659225105199e-01, -3.9485726539632463848e-17, + 7.4278135270820744296e-01, 9.6276948503597478238e-18, + 7.3960026163363878915e-01, 4.0208430305794580702e-17, + 7.3645969431865865307e-01, 4.0077997112003520937e-17, + 7.3335879762256905856e-01, -2.2493399096927370000e-17, + 7.3029674334022143256e-01, 5.2048227304015206987e-17, + 7.2727272727272729291e-01, -2.0185873175002846750e-17, + 7.2428596834014824513e-01, 2.3633090263928220565e-18, + 7.2133570773394584119e-01, -9.5131613777431479940e-18, + 7.1842120810709964029e-01, -3.7440154323260191964e-17, + 7.1554175279993270653e-01, -3.6792926140636546510e-18, + 7.1269664509979835376e-01, 5.3969540859927280847e-18, + 7.0988520753289097165e-01, 4.4593566535489654887e-17, + 7.0710678118654757274e-01, -4.8336466567264567255e-17, +}; + diff --git a/usr/src/lib/libmvec/common/__vTBL_sincos.c b/usr/src/lib/libmvec/common/__vTBL_sincos.c new file mode 100644 index 0000000000..05d5a2e016 --- /dev/null +++ b/usr/src/lib/libmvec/common/__vTBL_sincos.c @@ -0,0 +1,334 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +const double __vlibm_TBL_sincos_hi[] = { + 1.55614992773556032e-01, 9.87817783816471895e-01, +-1.55614992773556032e-01,-9.87817783816471895e-01, + 1.59472458931843419e-01, 9.87202377854830448e-01, +-1.59472458931843419e-01,-9.87202377854830448e-01, + 1.63327491736612845e-01, 9.86571908399497599e-01, +-1.63327491736612845e-01,-9.86571908399497599e-01, + 1.67180032364806747e-01, 9.85926385070661437e-01, +-1.67180032364806747e-01,-9.85926385070661437e-01, + 1.71030022031395029e-01, 9.85265817718213865e-01, +-1.71030022031395029e-01,-9.85265817718213865e-01, + 1.74877401990272185e-01, 9.84590216421599829e-01, +-1.74877401990272185e-01,-9.84590216421599829e-01, + 1.78722113535153659e-01, 9.83899591489663994e-01, +-1.78722113535153659e-01,-9.83899591489663994e-01, + 1.82564098000471547e-01, 9.83193953460493097e-01, +-1.82564098000471547e-01,-9.83193953460493097e-01, + 1.86403296762269882e-01, 9.82473313101255297e-01, +-1.86403296762269882e-01,-9.82473313101255297e-01, + 1.90239651239099056e-01, 9.81737681408035745e-01, +-1.90239651239099056e-01,-9.81737681408035745e-01, + 1.94073102892909799e-01, 9.80987069605669171e-01, +-1.94073102892909799e-01,-9.80987069605669171e-01, + 1.97903593229946273e-01, 9.80221489147568126e-01, +-1.97903593229946273e-01,-9.80221489147568126e-01, + 2.01731063801638799e-01, 9.79440951715548347e-01, +-2.01731063801638799e-01,-9.79440951715548347e-01, + 2.05555456205495507e-01, 9.78645469219650899e-01, +-2.05555456205495507e-01,-9.78645469219650899e-01, + 2.09376712085993649e-01, 9.77835053797959763e-01, +-2.09376712085993649e-01,-9.77835053797959763e-01, + 2.13194773135469889e-01, 9.77009717816417433e-01, +-2.13194773135469889e-01,-9.77009717816417433e-01, + 2.17009581095010146e-01, 9.76169473868635285e-01, +-2.17009581095010146e-01,-9.76169473868635285e-01, + 2.20821077755338491e-01, 9.75314334775702285e-01, +-2.20821077755338491e-01,-9.75314334775702285e-01, + 2.24629204957705303e-01, 9.74444313585988930e-01, +-2.24629204957705303e-01,-9.74444313585988930e-01, + 2.28433904594774750e-01, 9.73559423574948180e-01, +-2.28433904594774750e-01,-9.73559423574948180e-01, + 2.32235118611511471e-01, 9.72659678244912729e-01, +-2.32235118611511471e-01,-9.72659678244912729e-01, + 2.36032789006066335e-01, 9.71745091324889509e-01, +-2.36032789006066335e-01,-9.71745091324889509e-01, + 2.39826857830661572e-01, 9.70815676770349412e-01, +-2.39826857830661572e-01,-9.70815676770349412e-01, + 2.43617267192474896e-01, 9.69871448763015342e-01, +-2.43617267192474896e-01,-9.69871448763015342e-01, + 2.47403959254522937e-01, 9.68912421710644733e-01, +-2.47403959254522937e-01,-9.68912421710644733e-01, + 2.54965960415878490e-01, 9.66950029230677854e-01, +-2.54965960415878490e-01,-9.66950029230677854e-01, + 2.62512399769153304e-01, 9.64928619104771013e-01, +-2.62512399769153304e-01,-9.64928619104771013e-01, + 2.70042816718585044e-01, 9.62848314709379705e-01, +-2.70042816718585044e-01,-9.62848314709379705e-01, + 2.77556751646336308e-01, 9.60709243015561931e-01, +-2.77556751646336308e-01,-9.60709243015561931e-01, + 2.85053745940547443e-01, 9.58511534581228619e-01, +-2.85053745940547443e-01,-9.58511534581228619e-01, + 2.92533342023327536e-01, 9.56255323543175328e-01, +-2.92533342023327536e-01,-9.56255323543175328e-01, + 2.99995083378683025e-01, 9.53940747608894690e-01, +-2.99995083378683025e-01,-9.53940747608894690e-01, + 3.07438514580380851e-01, 9.51567948048172241e-01, +-3.07438514580380851e-01,-9.51567948048172241e-01, + 3.14863181319745222e-01, 9.49137069684462986e-01, +-3.14863181319745222e-01,-9.49137069684462986e-01, + 3.22268630433386605e-01, 9.46648260886053361e-01, +-3.22268630433386605e-01,-9.46648260886053361e-01, + 3.29654409930860148e-01, 9.44101673557004362e-01, +-3.29654409930860148e-01,-9.44101673557004362e-01, + 3.37020069022253066e-01, 9.41497463127881073e-01, +-3.37020069022253066e-01,-9.41497463127881073e-01, + 3.44365158145698402e-01, 9.38835788546265482e-01, +-3.44365158145698402e-01,-9.38835788546265482e-01, + 3.51689228994814085e-01, 9.36116812267055343e-01, +-3.51689228994814085e-01,-9.36116812267055343e-01, + 3.58991834546065036e-01, 9.33340700242548449e-01, +-3.58991834546065036e-01,-9.33340700242548449e-01, + 3.66272529086047571e-01, 9.30507621912314287e-01, +-3.66272529086047571e-01,-9.30507621912314287e-01, + 3.73530868238692970e-01, 9.27617750192851864e-01, +-3.73530868238692970e-01,-9.27617750192851864e-01, + 3.80766408992390171e-01, 9.24671261467036043e-01, +-3.80766408992390171e-01,-9.24671261467036043e-01, + 3.87978709727025028e-01, 9.21668335573351927e-01, +-3.87978709727025028e-01,-9.21668335573351927e-01, + 3.95167330240934256e-01, 9.18609155794918308e-01, +-3.95167330240934256e-01,-9.18609155794918308e-01, + 4.02331831777773097e-01, 9.15493908848301174e-01, +-4.02331831777773097e-01,-9.15493908848301174e-01, + 4.09471777053295072e-01, 9.12322784872117820e-01, +-4.09471777053295072e-01,-9.12322784872117820e-01, + 4.16586730282041129e-01, 9.09095977415431022e-01, +-4.16586730282041129e-01,-9.09095977415431022e-01, + 4.23676257203938034e-01, 9.05813683425936378e-01, +-4.23676257203938034e-01,-9.05813683425936378e-01, + 4.30739925110803223e-01, 9.02476103237941474e-01, +-4.30739925110803223e-01,-9.02476103237941474e-01, + 4.37777302872755125e-01, 8.99083440560138447e-01, +-4.37777302872755125e-01,-8.99083440560138447e-01, + 4.44787960964527218e-01, 8.95635902463170708e-01, +-4.44787960964527218e-01,-8.95635902463170708e-01, + 4.51771471491683785e-01, 8.92133699366994382e-01, +-4.51771471491683785e-01,-8.92133699366994382e-01, + 4.58727408216736576e-01, 8.88577045028035584e-01, +-4.58727408216736576e-01,-8.88577045028035584e-01, + 4.65655346585160168e-01, 8.84966156526143299e-01, +-4.65655346585160168e-01,-8.84966156526143299e-01, + 4.72554863751304455e-01, 8.81301254251340649e-01, +-4.72554863751304455e-01,-8.81301254251340649e-01, + 4.79425538604203005e-01, 8.77582561890372759e-01, +-4.79425538604203005e-01,-8.77582561890372759e-01, + 4.93078685753923052e-01, 8.69984718058417372e-01, +-4.93078685753923052e-01,-8.69984718058417372e-01, + 5.06611454814257400e-01, 8.62174479934880500e-01, +-5.06611454814257400e-01,-8.62174479934880500e-01, + 5.20020541953727045e-01, 8.54153754277385380e-01, +-5.20020541953727045e-01,-8.54153754277385380e-01, + 5.33302673536020122e-01, 8.45924499231067939e-01, +-5.33302673536020122e-01,-8.45924499231067939e-01, + 5.46454606919203556e-01, 8.37488723850523642e-01, +-5.46454606919203556e-01,-8.37488723850523642e-01, + 5.59473131247366862e-01, 8.28848487609325724e-01, +-5.59473131247366862e-01,-8.28848487609325724e-01, + 5.72355068234507214e-01, 8.20005899897234047e-01, +-5.72355068234507214e-01,-8.20005899897234047e-01, + 5.85097272940462210e-01, 8.10963119505217933e-01, +-5.85097272940462210e-01,-8.10963119505217933e-01, + 5.97696634538701477e-01, 8.01722354098418410e-01, +-5.97696634538701477e-01,-8.01722354098418410e-01, + 6.10150077075791386e-01, 7.92285859677178572e-01, +-6.10150077075791386e-01,-7.92285859677178572e-01, + 6.22454560222343689e-01, 7.82655940026272812e-01, +-6.22454560222343689e-01,-7.82655940026272812e-01, + 6.34607080015269331e-01, 7.72834946152471503e-01, +-6.34607080015269331e-01,-7.72834946152471503e-01, + 6.46604669591152370e-01, 7.62825275710576234e-01, +-6.46604669591152370e-01,-7.62825275710576234e-01, + 6.58444399910567579e-01, 7.52629372418066489e-01, +-6.58444399910567579e-01,-7.52629372418066489e-01, + 6.70123380473162888e-01, 7.42249725458501319e-01, +-6.70123380473162888e-01,-7.42249725458501319e-01, + 6.81638760023334123e-01, 7.31688868873820897e-01, +-6.81638760023334123e-01,-7.31688868873820897e-01, + 6.92987727246317964e-01, 7.20949380945696383e-01, +-6.92987727246317964e-01,-7.20949380945696383e-01, + 7.04167511454533712e-01, 7.10033883566079660e-01, +-7.04167511454533712e-01,-7.10033883566079660e-01 +}; + +const double __vlibm_TBL_sincos_lo[] = { + 8.88605337234228782e-18, 4.91917302237681002e-17, +-8.88605337234228782e-18,-4.91917302237681002e-17, + 5.81822082653163949e-19, 4.19401745952789211e-17, +-5.81822082653163949e-19,-4.19401745952789211e-17, + 5.48356943034715901e-18,-1.03274445882754459e-17, +-5.48356943034715901e-18, 1.03274445882754459e-17, +-1.21877614400540502e-17,-1.63494100549760754e-18, + 1.21877614400540502e-17, 1.63494100549760754e-18, +-9.95477472645292259e-18,-4.92572126294455489e-17, + 9.95477472645292259e-18, 4.92572126294455489e-17, + 4.43433505081671336e-18,-2.26634179854541132e-17, +-4.43433505081671336e-18, 2.26634179854541132e-17, +-1.62404059010738783e-20,-2.16479885316442748e-17, + 1.62404059010738783e-20, 2.16479885316442748e-17, + 7.94348727702255030e-18,-2.49458400454010874e-17, +-7.94348727702255030e-18, 2.49458400454010874e-17, + 2.34937969012815731e-18,-3.91992037542008779e-17, +-2.34937969012815731e-18, 3.91992037542008779e-17, + 6.04001694249999295e-18, 3.13336233097345808e-17, +-6.04001694249999295e-18,-3.13336233097345808e-17, +-7.83274121019861488e-18, 1.96784118087030288e-17, + 7.83274121019861488e-18,-1.96784118087030288e-17, + 1.16502095128541978e-17,-2.95181339018270543e-17, +-1.16502095128541978e-17, 2.95181339018270543e-17, + 5.58723281546011280e-18, 1.31087695215267578e-17, +-5.58723281546011280e-18,-1.31087695215267578e-17, + 1.06518785731668444e-17,-3.07669849664887505e-17, +-1.06518785731668444e-17, 3.07669849664887505e-17, +-5.53640369317216307e-18, 2.99100284927694838e-17, + 5.53640369317216307e-18,-2.99100284927694838e-17, + 1.22477058822641605e-18,-4.86093565810892311e-17, +-1.22477058822641605e-18, 4.86093565810892311e-17, + 1.11700710733643761e-17,-7.85069060928502747e-18, +-1.11700710733643761e-17, 7.85069060928502747e-18, +-1.47298004525206156e-19, 4.12921182559656912e-17, + 1.47298004525206156e-19,-4.12921182559656912e-17, +-1.05859041643290307e-17, 4.99012883492139510e-17, + 1.05859041643290307e-17,-4.99012883492139510e-17, +-4.98254439531455880e-18,-8.05559790337166344e-18, + 4.98254439531455880e-18, 8.05559790337166344e-18, +-8.31808085268720599e-18, 2.39202645464901648e-17, + 8.31808085268720599e-18,-2.39202645464901648e-17, +-9.89486060733470012e-19,-4.18461124842153636e-17, + 9.89486060733470012e-19, 4.18461124842153636e-17, +-7.26081066097971201e-18, 5.12857925321536470e-17, + 7.26081066097971201e-18,-5.12857925321536470e-17, +-9.57516421953495973e-18, 2.52768896842457810e-18, + 9.57516421953495973e-18,-2.52768896842457810e-18, +-7.53102495590705992e-18, 5.07143666240393522e-17, + 7.53102495590705992e-18,-5.07143666240393522e-17, +-2.23100354354259536e-17,-3.23777029770769223e-17, + 2.23100354354259536e-17, 3.23777029770769223e-17, +-2.25345975279021249e-17,-3.03455426810186255e-18, + 2.25345975279021249e-17, 3.03455426810186255e-18, +-1.21032650978877771e-17,-4.64600977172424097e-18, + 1.21032650978877771e-17, 4.64600977172424097e-18, + 1.76740702627918219e-17,-2.80782706351672909e-17, +-1.76740702627918219e-17, 2.80782706351672909e-17, +-1.81620831076181184e-17, 8.13462149294625475e-18, + 1.81620831076181184e-17,-8.13462149294625475e-18, + 7.51694493032735190e-18,-3.14845086884162891e-17, +-7.51694493032735190e-18, 3.14845086884162891e-17, + 2.60639277793073401e-17, 4.37575894717349784e-17, +-2.60639277793073401e-17,-4.37575894717349784e-17, + 1.10043664427652965e-19,-3.86148346756741172e-17, +-1.10043664427652965e-19, 3.86148346756741172e-17, + 2.85898059254855721e-17, 4.14914804609944515e-17, +-2.85898059254855721e-17,-4.14914804609944515e-17, + 2.09377335812660597e-17,-3.91168333493415196e-17, +-2.09377335812660597e-17, 3.91168333493415196e-17, + 2.35998378957031002e-17,-1.60176532845458484e-17, +-2.35998378957031002e-17, 1.60176532845458484e-17, + 1.03122798607872161e-17,-4.85238302367970955e-18, +-1.03122798607872161e-17, 4.85238302367970955e-18, + 5.88166458751798880e-18, 6.91932945992178774e-18, +-5.88166458751798880e-18,-6.91932945992178774e-18, +-2.56162087360699421e-17,-5.23503020396832165e-17, + 2.56162087360699421e-17, 5.23503020396832165e-17, + 1.74954828401588476e-17,-1.32285954777808795e-17, +-1.74954828401588476e-17, 1.32285954777808795e-17, +-9.93881456210652418e-18, 4.48876000332807380e-18, + 9.93881456210652418e-18,-4.48876000332807380e-18, +-2.37566914410618903e-17, 4.53509425735919737e-17, + 2.37566914410618903e-17,-4.53509425735919737e-17, + 2.13725286462113737e-17, 5.54441253880345633e-17, +-2.13725286462113737e-17,-5.54441253880345633e-17, + 1.75979951033595287e-17,-8.55069309786724315e-18, +-1.75979951033595287e-17, 8.55069309786724315e-18, +-1.96134878714142281e-17,-4.05641501045149965e-17, + 1.96134878714142281e-17, 4.05641501045149965e-17, + 1.44138754527020067e-17, 5.41337556683804221e-17, +-1.44138754527020067e-17,-5.41337556683804221e-17, +-5.67940300009126604e-18, 2.63490402114133324e-17, + 5.67940300009126604e-18,-2.63490402114133324e-17, +-9.61085068253371493e-18, 2.92000611384121121e-17, + 9.61085068253371493e-18,-2.92000611384121121e-17, +-2.33180070006887094e-17, 4.28646664908052081e-17, + 2.33180070006887094e-17,-4.28646664908052081e-17, +-2.62128796074765330e-17, 3.11249067465132618e-17, + 2.62128796074765330e-17,-3.11249067465132618e-17, + 7.64345629962023030e-18, 9.07695177507561595e-18, +-7.64345629962023030e-18,-9.07695177507561595e-18, +-6.65539297734492513e-18,-8.85404388576271590e-18, + 6.65539297734492513e-18, 8.85404388576271590e-18, +-8.23407394209890257e-18, 2.31606552113801660e-17, + 8.23407394209890257e-18,-2.31606552113801660e-17, + 1.60809820962183558e-17,-4.03449199835716708e-17, +-1.60809820962183558e-17, 4.03449199835716708e-17, + 1.45987039105142601e-17,-7.69055777598735693e-18, +-1.45987039105142601e-17, 7.69055777598735693e-18, +-3.60879070379054568e-18,-4.97307318930606626e-17, + 3.60879070379054568e-18, 4.97307318930606626e-17, +-5.10396986055601290e-18,-4.26231498642799968e-17, + 5.10396986055601290e-18, 4.26231498642799968e-17, + 5.60508397387175474e-18, 1.65738511074092287e-17, +-5.60508397387175474e-18,-1.65738511074092287e-17, +-3.26941342361816774e-17, 4.41324275781058045e-18, + 3.26941342361816774e-17,-4.41324275781058045e-18, +-3.98326674569845477e-17, 5.42056510267528622e-18, + 3.98326674569845477e-17,-5.42056510267528622e-18, + 5.12931811503204399e-17, 1.54950664735032887e-17, +-5.12931811503204399e-17,-1.54950664735032887e-17, + 8.39975484092950739e-18, 4.33370260439483957e-17, +-8.39975484092950739e-18,-4.33370260439483957e-17, + 1.57556551448872803e-17, 1.11639354066174440e-17, +-1.57556551448872803e-17,-1.11639354066174440e-17, + 2.65758723572153157e-17,-3.91243174820912803e-17, +-2.65758723572153157e-17, 3.91243174820912803e-17, +-5.48839724611618050e-17,-3.09133348612217870e-17, + 5.48839724611618050e-17, 3.09133348612217870e-17, + 5.45032359305438502e-17, 4.01345333110870077e-17, +-5.45032359305438502e-17,-4.01345333110870077e-17, +-1.47982699075898800e-17,-2.90497793128345697e-17, + 1.47982699075898800e-17, 2.90497793128345697e-17, +-6.04903576570970714e-18,-1.47407164121148702e-17, + 6.04903576570970714e-18, 1.47407164121148702e-17, +-3.45685823926249648e-17, 4.23101492189102265e-17, + 3.45685823926249648e-17,-4.23101492189102265e-17, + 4.56764771439328899e-19, 1.66729950215466278e-17, +-4.56764771439328899e-19,-1.66729950215466278e-17, +-3.77363867003067107e-17,-1.29709930131505256e-17, + 3.77363867003067107e-17, 1.29709930131505256e-17, + 6.18353672557495936e-18,-1.23393036048695210e-17, +-6.18353672557495936e-18, 1.23393036048695210e-17, + 4.41046731319790287e-17,-1.04758243065127675e-17, +-4.41046731319790287e-17, 1.04758243065127675e-17, +-5.35432907989094549e-17, 3.49498670147881544e-17, + 5.35432907989094549e-17,-3.49498670147881544e-17, +-3.94095700584824985e-17, 1.50527221189129099e-17, + 3.94095700584824985e-17,-1.50527221189129099e-17, +}; diff --git a/usr/src/lib/libmvec/common/__vTBL_sincos2.c b/usr/src/lib/libmvec/common/__vTBL_sincos2.c new file mode 100644 index 0000000000..e48c07807d --- /dev/null +++ b/usr/src/lib/libmvec/common/__vTBL_sincos2.c @@ -0,0 +1,146 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * Let arg(x) denote a double precision number near x such that both + * sin(arg(x)) and cos(arg(x)) are approximated by double precision + * numbers to within a relative error less than 2^-61. + * + * Then for i = 5, ..., 101 + * + * __vlibm_TBL_sincos2[4*i] := arg(i/128), + * __vlibm_TBL_sincos2[4*i+1] := sin(arg(i/128)), and + * __vlibm_TBL_sincos2[4*i+2] := cos(arg(i/128)) + * + * (For i = 0, ..., 4, use zero instead of arg(i/128) above.) + */ +const double __vlibm_TBL_sincos2[] = { + 0.0000000000000000000e+00, 0.0000000000000000000e+00, 1.0000000000000000000e+00, 0.0, + 0.0000000000000000000e+00, 0.0000000000000000000e+00, 1.0000000000000000000e+00, 0.0, + 0.0000000000000000000e+00, 0.0000000000000000000e+00, 1.0000000000000000000e+00, 0.0, + 0.0000000000000000000e+00, 0.0000000000000000000e+00, 1.0000000000000000000e+00, 0.0, + 0.0000000000000000000e+00, 0.0000000000000000000e+00, 1.0000000000000000000e+00, 0.0, + 3.9062500000301640657e-02, 3.9052566650723562203e-02, 9.9923715755469721955e-01, 0.0, + 4.6874999999606224710e-02, 4.6857835747740897436e-02, 9.9890156833846133200e-01, 0.0, + 5.4687499999642848192e-02, 5.4660244884709843771e-02, 9.9850501131899360718e-01, 0.0, + 6.2500000000560454461e-02, 6.2459317842939558740e-02, 9.9804751070006414437e-01, 0.0, + 7.0312499999974784060e-02, 7.0254578604834888589e-02, 9.9752909440529957674e-01, 0.0, + 7.8125000000139249723e-02, 7.8045551390106132628e-02, 9.9694979407601780341e-01, 0.0, + 8.5937500000010338952e-02, 8.5831760676889648498e-02, 9.9630964506979713402e-01, 0.0, + 9.3749999999981376009e-02, 9.3612731235494350823e-02, 9.9560868645800348897e-01, 0.0, + 1.0156249999991998068e-01, 1.0138798815545004006e-01, 9.9484696102354874814e-01, 0.0, + 1.0937499999996859457e-01, 1.0915705687529114742e-01, 9.9402451525821255984e-01, 0.0, + 1.1718749999982362719e-01, 1.1691946321080448623e-01, 9.9314139935987832963e-01, 0.0, + 1.2500000000009922618e-01, 1.2467473338532614191e-01, 9.9219766722931668212e-01, 0.0, + 1.3281249999975877629e-01, 1.3242239405610808922e-01, 9.9119337646720018231e-01, 0.0, + 1.4062500000063443695e-01, 1.4016197234769187108e-01, 9.9012858837001815893e-01, 0.0, + 1.4843749999955710428e-01, 1.4789299587297158323e-01, 9.8900336792738841041e-01, 0.0, + 1.5624999999999389377e-01, 1.5561499277355000936e-01, 9.8781778381647289411e-01, 0.0, + 1.6406250000016783797e-01, 1.6332749173677843513e-01, 9.8657190839947017658e-01, 0.0, + 1.7187500000029506952e-01, 1.7103002203168574114e-01, 9.8526581771816335031e-01, 0.0, + 1.7968750000084471319e-01, 1.7872211353598477235e-01, 9.8389959148951300349e-01, 0.0, + 1.8749999999944111373e-01, 1.8640329676172079365e-01, 9.8247331310135943561e-01, 0.0, + 1.9531249999999666933e-01, 1.9407310289290652383e-01, 9.8098706960566983692e-01, 0.0, + 2.0312500000009747758e-01, 2.0173106380173427832e-01, 9.7944095171552869594e-01, 0.0, + 2.1093750000010619283e-01, 2.0937671208609748286e-01, 9.7783505379793755896e-01, 0.0, + 2.1875000000030794811e-01, 2.1700958109531076623e-01, 9.7616947386856844915e-01, 0.0, + 2.2656249999987468358e-01, 2.2462920495758317840e-01, 9.7444431358601713011e-01, 0.0, + 2.3437500000010527690e-01, 2.3223511861161386105e-01, 9.7265967824488830384e-01, 0.0, + 2.4218749999999975020e-01, 2.3982685783066132190e-01, 9.7081567677034952268e-01, 0.0, + 2.4999999999974262255e-01, 2.4740395925427355328e-01, 9.6891242171070846023e-01, 0.0, + 2.5781250000144378953e-01, 2.5496596041727453974e-01, 9.6695002923030970443e-01, 0.0, + 2.6562500000037131409e-01, 2.6251239976951157296e-01, 9.6492861910467353503e-01, 0.0, + 2.7343750000018046675e-01, 2.7004281671875879356e-01, 9.6284831470933096575e-01, 0.0, + 2.8125000000148109303e-01, 2.7755675164775922559e-01, 9.6070924301515081556e-01, 0.0, + 2.8906250000049193982e-01, 2.8505374594101895447e-01, 9.5851153458108839800e-01, 0.0, + 2.9687499999876038048e-01, 2.9253334202214215098e-01, 9.5625532354353792730e-01, 0.0, + 3.0468750000020183855e-01, 2.9999508337887559328e-01, 9.5394074760883418307e-01, 0.0, + 3.1249999999968136599e-01, 3.0743851458007764865e-01, 9.5156794804827016243e-01, 0.0, + 3.2031250000105265796e-01, 3.1486318132074436749e-01, 9.4913706968413158460e-01, 0.0, + 3.2812499999976940668e-01, 3.2226863043316833490e-01, 9.4664826088612763488e-01, 0.0, + 3.3593749999946614926e-01, 3.2965440993035616257e-01, 9.4410167355718033200e-01, 0.0, + 3.4375000000042527093e-01, 3.3702006902265346788e-01, 9.4149746312773774370e-01, 0.0, + 3.5156249999849442656e-01, 3.4436515814428492188e-01, 9.3883578854678395587e-01, 0.0, + 3.5937500000102234887e-01, 3.5168922899577109709e-01, 9.3611681226669574141e-01, 0.0, + 3.6718749999811656215e-01, 3.5899183454430716456e-01, 9.3334070024322457471e-01, 0.0, + 3.7500000000009731105e-01, 3.6627252908613811000e-01, 9.3050762191227864850e-01, 0.0, + 3.8281249999980870857e-01, 3.7353086823851550102e-01, 9.2761775019292336264e-01, 0.0, + 3.9062500000029726221e-01, 3.8076640899266506191e-01, 9.2467126146692291133e-01, 0.0, + 3.9843749999969407805e-01, 3.8797870972674308732e-01, 9.2166833557347060957e-01, 0.0, + 4.0625000000035305092e-01, 3.9516733024125855200e-01, 9.1860915579477875337e-01, 0.0, + 4.1406249999977551290e-01, 4.0233183177756759452e-01, 9.1549390884839154658e-01, 0.0, + 4.2187500000064509509e-01, 4.0947177705388360103e-01, 9.1232278487185369809e-01, 0.0, + 4.2968750000090671914e-01, 4.1658673028286541395e-01, 9.0909597741505332458e-01, 0.0, + 4.3749999999977579046e-01, 4.2367625720373491838e-01, 9.0581368342603141297e-01, 0.0, + 4.4531249999998151479e-01, 4.3073992511078651457e-01, 9.0247610323794946741e-01, 0.0, + 4.5312499999986916022e-01, 4.3777730287263749709e-01, 8.9908344056019573465e-01, 0.0, + 4.6093749998776573085e-01, 4.4478796095356976092e-01, 8.9563590246861235489e-01, 0.0, + 4.6874999999894750857e-01, 4.5177147149074481369e-01, 8.9213369936746989008e-01, 0.0, + 4.7656249999993238742e-01, 4.5872740821667651323e-01, 8.8857704502806655888e-01, 0.0, + 4.8437500000085281782e-01, 4.6565534658591489769e-01, 8.8496615652574617261e-01, 0.0, + 4.9218750000026373348e-01, 4.7255486375153687995e-01, 8.8130125425121597083e-01, 0.0, + 5.0000000000063071770e-01, 4.7942553860475650707e-01, 8.7758256189007033399e-01, 0.0, + 5.0781250000246225262e-01, 4.8626695179542711589e-01, 8.7381030641185719610e-01, 0.0, + 5.1562499999926780792e-01, 4.9307868575328606120e-01, 8.6998471805877841678e-01, 0.0, + 5.2343749999866429068e-01, 4.9986032473185659786e-01, 8.6610603032132438273e-01, 0.0, + 5.3125000000045408122e-01, 5.0661145481464886497e-01, 8.6217447993465046174e-01, 0.0, + 5.3906250000013333779e-01, 5.1333166394358564766e-01, 8.5819030686259190066e-01, 0.0, + 5.4687499999851685306e-01, 5.2002054195246016910e-01, 8.5415375427815665166e-01, 0.0, + 5.5468749999993749444e-01, 5.2667768059033359673e-01, 8.5006506854945318441e-01, 0.0, + 5.6249999999973876452e-01, 5.3330267353579918765e-01, 8.4592449923120727195e-01, 0.0, + 5.7031249999981425969e-01, 5.3989511643504806138e-01, 8.4173229904143864744e-01, 0.0, + 5.7812499995867461244e-01, 5.4645460688459401855e-01, 8.3748872387310613341e-01, 0.0, + 5.8593749999782485105e-01, 5.5298074462871504853e-01, 8.3319403266578417888e-01, 0.0, + 5.9374999999819222385e-01, 5.5947313124586850464e-01, 8.2884848761033713682e-01, 0.0, + 6.0156250000116751053e-01, 5.6593137050886854755e-01, 8.2445235391376847645e-01, 0.0, + 6.0937499999740707413e-01, 5.7235506823238102569e-01, 8.2000589989871808250e-01, 0.0, + 6.1718749999640543091e-01, 5.7874383235483894961e-01, 8.1550939694845581140e-01, 0.0, + 6.2500000000776623210e-01, 5.8509727294676028286e-01, 8.1096311950067390129e-01, 0.0, + 6.3281250000034772185e-01, 5.9141500220159670675e-01, 8.0636734505489826574e-01, 0.0, + 6.4062499999937538853e-01, 5.9769663453820076615e-01, 8.0172235409879177848e-01, 0.0, + 6.4843750000738653583e-01, 6.0394178656004393613e-01, 7.9702843013700730435e-01, 0.0, + 6.5625000000061406435e-01, 6.1015007707627788580e-01, 7.9228585967680387192e-01, 0.0, + 6.6406249999753186319e-01, 6.1632112717960729764e-01, 7.8749493216912724858e-01, 0.0, + 6.7187500000431277236e-01, 6.2245456022571910015e-01, 7.8265594002358829240e-01, 0.0, + 6.7968749999981381560e-01, 6.2855000184488485360e-01, 7.7776917860043492947e-01, 0.0, + 6.8749999999877509094e-01, 6.3460708001432264425e-01, 7.7283494615324888066e-01, 0.0, + 6.9531250000506295006e-01, 6.4062542504411801314e-01, 7.6785354383960691127e-01, 0.0, + 7.0312499999963207209e-01, 6.4660466959087170569e-01, 7.6282527571081415463e-01, 0.0, + 7.1093749999987698729e-01, 6.5254444872567274327e-01, 7.5775044865529961324e-01, 0.0, + 7.1875000000017263968e-01, 6.5844439991069747542e-01, 7.5262937241795280219e-01, 0.0, + 7.2656250000154842805e-01, 6.6430416304410366823e-01, 7.4746235956218753937e-01, 0.0, + 7.3437500000182720505e-01, 6.7012338047451913692e-01, 7.4224972545727685436e-01, 0.0, + 7.4218750000178623782e-01, 6.7590169702749525182e-01, 7.3699178825503341983e-01, 0.0, + 7.5000000000121047616e-01, 6.8163876002421985856e-01, 7.3168886887299577904e-01, 0.0, + 7.5781249999863331546e-01, 6.8733421930288085555e-01, 7.2634129097504795958e-01, 0.0, + 7.6562500000199784633e-01, 6.9298772724775825615e-01, 7.2094938094431193498e-01, 0.0, + 7.7343750000033728575e-01, 6.9859893878992307403e-01, 7.1551346788274594601e-01, 0.0, + 7.8125000000087474472e-01, 7.0416751145515477095e-01, 7.1003388356546370819e-01, 0.0, + 7.8906249999555477803e-01, 7.0969310536076801732e-01, 7.0451096244372934940e-01, 0.0, +}; diff --git a/usr/src/lib/libmvec/common/__vTBL_sqrtf.c b/usr/src/lib/libmvec/common/__vTBL_sqrtf.c new file mode 100644 index 0000000000..fe90847349 --- /dev/null +++ b/usr/src/lib/libmvec/common/__vTBL_sqrtf.c @@ -0,0 +1,554 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma align 32 (__vlibm_TBL_sqrtf) + +/* + i = [0,255] + TBL[2*i+0] = 1.0 / (*(double*)&(0x3ff0000000000000LL + (i << 44))); + TBL[2*i+1] = sqrt(*(double*)&(0x3ff0000000000000LL + (i << 44)))/sqrt(2); + TBL[512+2*i+0] = 1.0 / (*(double*)&(0x3ff0000000000000LL + (i << 44))); + TBL[512+2*i+1] = sqrt(*(double*)&(0x3ff0000000000000LL + (i << 44))); +*/ + +const double __vlibm_TBL_sqrtf[] = { + 1.0000000000000000000, 0.7071067811865474617, + 0.9961089494163424263, 0.7084865030471646508, + 0.9922480620155038622, 0.7098635432250340882, + 0.9884169884169884401, 0.7112379172963151364, + 0.9846153846153846700, 0.7126096406869610878, + 0.9808429118773945854, 0.7139787286747413253, + 0.9770992366412213359, 0.7153451963912248468, + 0.9733840304182509451, 0.7167090588237321480, + 0.9696969696969697239, 0.7180703308172535770, + 0.9660377358490566113, 0.7194290270763336048, + 0.9624060150375939315, 0.7207851621669246756, + 0.9588014981273408344, 0.7221387505182088606, + 0.9552238805970149071, 0.7234898064243890925, + 0.9516728624535315539, 0.7248383440464502003, + 0.9481481481481481843, 0.7261843774138906360, + 0.9446494464944649172, 0.7275279204264260002, + 0.9411764705882352811, 0.7288689868556624818, + 0.9377289377289377281, 0.7302075903467450946, + 0.9343065693430656626, 0.7315437444199764938, + 0.9309090909090909083, 0.7328774624724109232, + 0.9275362318840579823, 0.7342087577794206288, + 0.9241877256317689859, 0.7355376434962387355, + 0.9208633093525180335, 0.7368641326594745911, + 0.9175627240143369168, 0.7381882381886073485, + 0.9142857142857142572, 0.7395099728874520162, + 0.9110320284697508431, 0.7408293494456060779, + 0.9078014184397162900, 0.7421463804398696906, + 0.9045936395759717197, 0.7434610783356448982, + 0.9014084507042253724, 0.7447734554883115310, + 0.8982456140350877360, 0.7460835241445826771, + 0.8951048951048951041, 0.7473912964438372830, + 0.8919860627177700341, 0.7486967844194336585, + 0.8888888888888888395, 0.7499999999999998890, + 0.8858131487889273625, 0.7513009550107067058, + 0.8827586206896551602, 0.7525996611745184861, + 0.8797250859106529042, 0.7538961301134260440, + 0.8767123287671232390, 0.7551903733496606597, + 0.8737201365187713398, 0.7564824023068876802, + 0.8707482993197278587, 0.7577722283113837998, + 0.8677966101694915002, 0.7590598625931948007, + 0.8648648648648649129, 0.7603453162872774174, + 0.8619528619528619151, 0.7616286004346212168, + 0.8590604026845637398, 0.7629097259833563793, + 0.8561872909698996503, 0.7641887037898427160, + 0.8533333333333333881, 0.7654655446197431434, + 0.8504983388704319136, 0.7667402591490810604, + 0.8476821192052980125, 0.7680128579652816256, + 0.8448844884488448947, 0.7692833515681981593, + 0.8421052631578946901, 0.7705517503711221128, + 0.8393442622950819665, 0.7718180647017791607, + 0.8366013071895425091, 0.7730823048033113043, + 0.8338762214983713728, 0.7743444808352416553, + 0.8311688311688312236, 0.7756046028744285614, + 0.8284789644012945375, 0.7768626809160033009, + 0.8258064516129032251, 0.7781187248742956752, + 0.8231511254019292512, 0.7793727445837452805, + 0.8205128205128204844, 0.7806247497997997886, + 0.8178913738019168989, 0.7818747501998001281, + 0.8152866242038216971, 0.7831227553838541189, + 0.8126984126984126977, 0.7843687748756957845, + 0.8101265822784810000, 0.7856128181235333408, + 0.8075709779179810477, 0.7868548945008857487, + 0.8050314465408805464, 0.7880950133074056119, + 0.8025078369905955800, 0.7893331837696929698, + 0.8000000000000000444, 0.7905694150420947697, + 0.7975077881619937470, 0.7918037162074953450, + 0.7950310559006210642, 0.7930360962780950151, + 0.7925696594427245056, 0.7942665641961771383, + 0.7901234567901234129, 0.7954951288348659499, + 0.7876923076923076916, 0.7967217989988725213, + 0.7852760736196319202, 0.7979465834252315037, + 0.7828746177370030646, 0.7991694907840263262, + 0.7804878048780488076, 0.8003905296791060664, + 0.7781155015197568359, 0.8016097086487912193, + 0.7757575757575757569, 0.8028270361665704735, + 0.7734138972809667667, 0.8040425206417879389, + 0.7710843373493976305, 0.8052561704203202719, + 0.7687687687687687621, 0.8064679937852462510, + 0.7664670658682635196, 0.8076779989575053609, + 0.7641791044776119479, 0.8088861940965489383, + 0.7619047619047618625, 0.8100925873009824363, + 0.7596439169139466152, 0.8112971866091980289, + 0.7573964497041419941, 0.8124999999999998890, + 0.7551622418879055942, 0.8137010353932209172, + 0.7529411764705882248, 0.8149003006503310331, + 0.7507331378299120228, 0.8160978035750371395, + 0.7485380116959063912, 0.8172935519138762039, + 0.7463556851311953233, 0.8184875533567996797, + 0.7441860465116278966, 0.8196798155377500450, + 0.7420289855072463858, 0.8208703460352310133, + 0.7398843930635837784, 0.8220591523728690841, + 0.7377521613832852543, 0.8232462420199680997, + 0.7356321839080459668, 0.8244316223920574727, + 0.7335243553008595763, 0.8256153008514316438, + 0.7314285714285714279, 0.8267972847076845433, + 0.7293447293447293811, 0.8279775812182355033, + 0.7272727272727272929, 0.8291561975888499525, + 0.7252124645892351618, 0.8303331409741513403, + 0.7231638418079096020, 0.8315084184781292853, + 0.7211267605633803202, 0.8326820371546392874, + 0.7191011235955055980, 0.8338540040078957771, + 0.7170868347338935633, 0.8350243259929617246, + 0.7150837988826815872, 0.8361930100162282553, + 0.7130919220055710328, 0.8373600629358912695, + 0.7111111111111111382, 0.8385254915624210659, + 0.7091412742382271484, 0.8396893026590250830, + 0.7071823204419889208, 0.8408515029421067544, + 0.7052341597796143446, 0.8420120990817173690, + 0.7032967032967033516, 0.8431710977020024922, + 0.7013698630136986356, 0.8443285053816433905, + 0.6994535519125683054, 0.8454843286542926828, + 0.6975476839237056970, 0.8466385740090041079, + 0.6956521739130434590, 0.8477912478906584060, + 0.6937669376693766932, 0.8489423567003827609, + 0.6918918918918919303, 0.8500919067959651354, + 0.6900269541778976112, 0.8512399044922647207, + 0.6881720430107527431, 0.8523863560616159463, + 0.6863270777479892892, 0.8535312677342289378, + 0.6844919786096256287, 0.8546746456985838680, + 0.6826666666666666439, 0.8558164961018219774, + 0.6808510638297872175, 0.8569568250501304885, + 0.6790450928381962514, 0.8580956386091237453, + 0.6772486772486772111, 0.8592329428042199124, + 0.6754617414248020868, 0.8603687436210126771, + 0.6736842105263157743, 0.8615030470056387335, + 0.6719160104986876547, 0.8626358588651412695, + 0.6701570680628272658, 0.8637671850678283469, + 0.6684073107049608442, 0.8648970314436278395, + 0.6666666666666666297, 0.8660254037844384856, + 0.6649350649350649345, 0.8671523078444753896, + 0.6632124352331606465, 0.8682777493406126368, + 0.6614987080103359451, 0.8694017339527221333, + 0.6597938144329896781, 0.8705242673240073392, + 0.6580976863753212891, 0.8716453550613345591, + 0.6564102564102564097, 0.8727650027355586815, + 0.6547314578005115626, 0.8738832158818476969, + 0.6530612244897958663, 0.8749999999999998890, + 0.6513994910941476313, 0.8761153605547615797, + 0.6497461928934009645, 0.8772293029761374372, + 0.6481012658227848222, 0.8783418326596996728, + 0.6464646464646465196, 0.8794529549668930191, + 0.6448362720403022497, 0.8805626752253356004, + 0.6432160804020100597, 0.8816709987291176942, + 0.6416040100250626210, 0.8827779307390958285, + 0.6400000000000000133, 0.8838834764831843271, + 0.6384039900249376398, 0.8849876411566435230, + 0.6368159203980099381, 0.8860904299223640868, + 0.6352357320099255578, 0.8871918479111493561, + 0.6336633663366336711, 0.8882919002219933358, + 0.6320987654320987525, 0.8893905919223566992, + 0.6305418719211822731, 0.8904879280484380155, + 0.6289926289926289771, 0.8915839136054440894, + 0.6274509803921568540, 0.8926785535678561923, + 0.6259168704156479190, 0.8937718528796931849, + 0.6243902439024390238, 0.8948638164547719764, + 0.6228710462287104788, 0.8959544491769656505, + 0.6213592233009708199, 0.8970437559004575956, + 0.6198547215496368334, 0.8981317414499945251, + 0.6183574879227052845, 0.8992184106211348338, + 0.6168674698795181266, 0.9003037681804957337, + 0.6153846153846154188, 0.9013878188659971702, + 0.6139088729016786150, 0.9024705673871031841, + 0.6124401913875597847, 0.9035520184250599440, + 0.6109785202863962095, 0.9046321766331330005, + 0.6095238095238095788, 0.9057110466368397672, + 0.6080760095011876754, 0.9067886330341817791, + 0.6066350710900474397, 0.9078649403958718445, + 0.6052009456264775267, 0.9089399732655616404, + 0.6037735849056603543, 0.9100137361600647568, + 0.6023529411764705355, 0.9110862335695781855, + 0.6009389671361502483, 0.9121574699579014789, + 0.5995316159250585475, 0.9132274497626535759, + 0.5981308411214952825, 0.9142961773954870752, + 0.5967365967365967361, 0.9153636572423006212, + 0.5953488372093023173, 0.9164298936634486248, + 0.5939675174013920866, 0.9174948909939498742, + 0.5925925925925925597, 0.9185586535436917055, + 0.5912240184757505679, 0.9196211855976350602, + 0.5898617511520737322, 0.9206824914160146589, + 0.5885057471264367734, 0.9217425752345390633, + 0.5871559633027523262, 0.9228014412645875186, + 0.5858123569794050356, 0.9238590936934051312, + 0.5844748858447488260, 0.9249155366842962689, + 0.5831435079726651205, 0.9259707743768158528, + 0.5818181818181817899, 0.9270248108869577619, + 0.5804988662131519428, 0.9280776503073435713, + 0.5791855203619910020, 0.9291292967074065157, + 0.5778781038374717349, 0.9301797541335758979, + 0.5765765765765765716, 0.9312290266094586100, + 0.5752808988764045450, 0.9322771181360186565, + 0.5739910313901345207, 0.9333240326917547902, + 0.5727069351230424932, 0.9343697742328782585, + 0.5714285714285713969, 0.9354143466934853324, + 0.5701559020044543180, 0.9364577539857310562, + 0.5688888888888888884, 0.9375000000000000000, + 0.5676274944567627490, 0.9385410886050753465, + 0.5663716814159291957, 0.9395810236483067568, + 0.5651214128035320083, 0.9406198089557756825, + 0.5638766519823789070, 0.9416574483324601230, + 0.5626373626373626369, 0.9426939455623971620, + 0.5614035087719297934, 0.9437293044088436167, + 0.5601750547045951656, 0.9447635286144357991, + 0.5589519650655021543, 0.9457966219013471676, + 0.5577342047930283764, 0.9468285879714447573, + 0.5565217391304347894, 0.9478594305064437231, + 0.5553145336225596695, 0.9488891531680609948, + 0.5541125541125541121, 0.9499177595981663780, + 0.5529157667386609409, 0.9509452534189335449, + 0.5517241379310344751, 0.9519716382329884707, + 0.5505376344086021501, 0.9529969176235565387, + 0.5493562231759656633, 0.9540210951546090890, + 0.5481798715203426431, 0.9550441743710077480, + 0.5470085470085470636, 0.9560661587986472032, + 0.5458422174840085184, 0.9570870519445969782, + 0.5446808510638297962, 0.9581068572972432085, + 0.5435244161358810944, 0.9591255783264254209, + 0.5423728813559322015, 0.9601432184835759776, + 0.5412262156448203188, 0.9611597812018561893, + 0.5400843881856539630, 0.9621752698962906525, + 0.5389473684210526194, 0.9631896879639025855, + 0.5378151260504201447, 0.9642030387838443906, + 0.5366876310272536976, 0.9652153257175312140, + 0.5355648535564853097, 0.9662265521087691766, + 0.5344467640918579843, 0.9672367212838850481, + 0.5333333333333333259, 0.9682458365518541443, + 0.5322245322245322541, 0.9692539012044263380, + 0.5311203319502074693, 0.9702609185162514027, + 0.5300207039337474502, 0.9712668917450032469, + 0.5289256198347107585, 0.9722718241315028154, + 0.5278350515463917647, 0.9732757188998396591, + 0.5267489711934156826, 0.9742785792574933934, + 0.5256673511293634693, 0.9752804083954520475, + 0.5245901639344262568, 0.9762812094883317471, + 0.5235173824130879838, 0.9772809856944930651, + 0.5224489795918367818, 0.9782797401561579287, + 0.5213849287169042279, 0.9792774759995248601, + 0.5203252032520325754, 0.9802741963348825527, + 0.5192697768762677413, 0.9812699042567237795, + 0.5182186234817813819, 0.9822646028438568599, + 0.5171717171717171713, 0.9832582951595170151, + 0.5161290322580645018, 0.9842509842514762797, + 0.5150905432595573874, 0.9852426731521528591, + 0.5140562248995983463, 0.9862333648787187101, + 0.5130260521042083743, 0.9872230624332070104, + 0.5120000000000000107, 0.9882117688026185176, + 0.5109780439121756057, 0.9891994869590258199, + 0.5099601593625497920, 0.9901862198596785847, + 0.5089463220675943811, 0.9911719704471065873, + 0.5079365079365079083, 0.9921567416492214075, + 0.5069306930693069368, 0.9931405363794189034, + 0.5059288537549406772, 0.9941233575366791309, + 0.5049309664694280331, 0.9951052080056659310, + 0.5039370078740157410, 0.9960860906568265172, + 0.5029469548133594925, 0.9970660083464885082, + 0.5019607843137254832, 0.9980449639169568510, + 0.5009784735812132794, 0.9990229601966111872, + 1.0000000000000000000, 1.0000000000000000000, + 0.9961089494163424263, 1.0019512213675874079, + 0.9922480620155038622, 1.0038986502630631303, + 0.9884169884169884401, 1.0058423087144425789, + 0.9846153846153846700, 1.0077822185373186414, + 0.9808429118773945854, 1.0097184013377193956, + 0.9770992366412213359, 1.0116508785149154193, + 0.9733840304182509451, 1.0135796712641784723, + 0.9696969696969697239, 1.0155048005794951038, + 0.9660377358490566113, 1.0174262872562316318, + 0.9624060150375939315, 1.0193441518937556012, + 0.9588014981273408344, 1.0212584148980119458, + 0.9552238805970149071, 1.0231690964840562952, + 0.9516728624535315539, 1.0250762166785454266, + 0.9481481481481481843, 1.0269797953221864173, + 0.9446494464944649172, 1.0288798520721456065, + 0.9411764705882352811, 1.0307764064044151464, + 0.9377289377289377281, 1.0326694776161440270, + 0.9343065693430656626, 1.0345590848279280216, + 0.9309090909090909083, 1.0364452469860625516, + 0.9275362318840579823, 1.0383279828647593579, + 0.9241877256317689859, 1.0402073110683274226, + 0.9208633093525180335, 1.0420832500333165882, + 0.9175627240143369168, 1.0439558180306292012, + 0.9142857142857142572, 1.0458250331675944533, + 0.9110320284697508431, 1.0476909133900131899, + 0.9078014184397162900, 1.0495534764841665254, + 0.9045936395759717197, 1.0514127400787951494, + 0.9014084507042253724, 1.0532687216470448810, + 0.8982456140350877360, 1.0551214385083833580, + 0.8951048951048951041, 1.0569709078304851957, + 0.8919860627177700341, 1.0588171466310885016, + 0.8888888888888888395, 1.0606601717798211926, + 0.8858131487889273625, 1.0625000000000000000, + 0.8827586206896551602, 1.0643366478704001654, + 0.8797250859106529042, 1.0661701318269987127, + 0.8767123287671232390, 1.0680004681646912967, + 0.8737201365187713398, 1.0698276730389806310, + 0.8707482993197278587, 1.0716517624676404896, + 0.8677966101694915002, 1.0734727523323541742, + 0.8648648648648649129, 1.0752906583803283347, + 0.8619528619528619151, 1.0771054962258803656, + 0.8590604026845637398, 1.0789172813520042649, + 0.8561872909698996503, 1.0807260291119114015, + 0.8533333333333333881, 1.0825317547305484123, + 0.8504983388704319136, 1.0843344733060920060, + 0.8476821192052980125, 1.0861341998114228957, + 0.8448844884488448947, 1.0879309490955757500, + 0.8421052631578946901, 1.0897247358851684940, + 0.8393442622950819665, 1.0915155747858111823, + 0.8366013071895425091, 1.0933034802834937782, + 0.8338762214983713728, 1.0950884667459519495, + 0.8311688311688312236, 1.0968705484240153236, + 0.8284789644012945375, 1.0986497394529342042, + 0.8258064516129032251, 1.1004260538536880798, + 0.8231511254019292512, 1.1021995055342748149, + 0.8205128205128204844, 1.1039701082909809671, + 0.8178913738019168989, 1.1057378758096332305, + 0.8152866242038216971, 1.1075028216668343362, + 0.8126984126984126977, 1.1092649593311780798, + 0.8101265822784810000, 1.1110243021644485850, + 0.8075709779179810477, 1.1127808634228035789, + 0.8050314465408805464, 1.1145346562579379057, + 0.8025078369905955800, 1.1162856937182343842, + 0.8000000000000000444, 1.1180339887498949025, + 0.7975077881619937470, 1.1197795541980573031, + 0.7950310559006210642, 1.1215224028078976115, + 0.7925696594427245056, 1.1232625472257142807, + 0.7901234567901234129, 1.1250000000000000000, + 0.7876923076923076916, 1.1267347735824966293, + 0.7852760736196319202, 1.1284668803292368100, + 0.7828746177370030646, 1.1301963325015702555, + 0.7804878048780488076, 1.1319231422671771625, + 0.7781155015197568359, 1.1336473217010658576, + 0.7757575757575757569, 1.1353688827865593414, + 0.7734138972809667667, 1.1370878374162658453, + 0.7710843373493976305, 1.1388041973930373985, + 0.7687687687687687621, 1.1405179744309161816, + 0.7664670658682635196, 1.1422291801560666702, + 0.7641791044776119479, 1.1439378261076953436, + 0.7619047619047618625, 1.1456439237389599572, + 0.7596439169139466152, 1.1473474844178637166, + 0.7573964497041419941, 1.1490485194281396808, + 0.7551622418879055942, 1.1507470399701229535, + 0.7529411764705882248, 1.1524430571616108843, + 0.7507331378299120228, 1.1541365820387117225, + 0.7485380116959063912, 1.1558276255566830582, + 0.7463556851311953233, 1.1575161985907584938, + 0.7441860465116278966, 1.1592023119369629924, + 0.7420289855072463858, 1.1608859763129193432, + 0.7398843930635837784, 1.1625672023586421933, + 0.7377521613832852543, 1.1642460006373223091, + 0.7356321839080459668, 1.1659223816361019566, + 0.7335243553008595763, 1.1675963557668378456, + 0.7314285714285714279, 1.1692679333668567487, + 0.7293447293447293811, 1.1709371246996995719, + 0.7272727272727272929, 1.1726039399558574328, + 0.7252124645892351618, 1.1742683892534959700, + 0.7231638418079096020, 1.1759304826391736576, + 0.7211267605633803202, 1.1775902300885483509, + 0.7191011235955055980, 1.1792476415070753948, + 0.7170868347338935633, 1.1809027267306990705, + 0.7150837988826815872, 1.1825554955265313861, + 0.7130919220055710328, 1.1842059575935259819, + 0.7111111111111111382, 1.1858541225631422655, + 0.7091412742382271484, 1.1875000000000000000, + 0.7071823204419889208, 1.1891435994025278955, + 0.7052341597796143446, 1.1907849302036030981, + 0.7032967032967033516, 1.1924240017711820183, + 0.7013698630136986356, 1.1940608234089249429, + 0.6994535519125683054, 1.1956954043568119861, + 0.6975476839237056970, 1.1973277537917510482, + 0.6956521739130434590, 1.1989578808281797784, + 0.6937669376693766932, 1.2005857945186590996, + 0.6918918918918919303, 1.2022115038544589627, + 0.6900269541778976112, 1.2038350177661389928, + 0.6881720430107527431, 1.2054563451241193661, + 0.6863270777479892892, 1.2070754947392479117, + 0.6844919786096256287, 1.2086924753633572216, + 0.6826666666666666439, 1.2103072956898177637, + 0.6808510638297872175, 1.2119199643540823352, + 0.6790450928381962514, 1.2135304899342249652, + 0.6772486772486772111, 1.2151388809514738210, + 0.6754617414248020868, 1.2167451458707365664, + 0.6736842105263157743, 1.2183492931011203897, + 0.6719160104986876547, 1.2199513309964460372, + 0.6701570680628272658, 1.2215512678557540749, + 0.6684073107049608442, 1.2231491119238078191, + 0.6666666666666666297, 1.2247448713915889407, + 0.6649350649350649345, 1.2263385543967864066, + 0.6632124352331606465, 1.2279301690242812040, + 0.6614987080103359451, 1.2295197233066250675, + 0.6597938144329896781, 1.2311072252245129910, + 0.6580976863753212891, 1.2326926827072512971, + 0.6564102564102564097, 1.2342761036332186020, + 0.6547314578005115626, 1.2358574958303243374, + 0.6530612244897958663, 1.2374368670764581690, + 0.6513994910941476313, 1.2390142250999380824, + 0.6497461928934009645, 1.2405895775799504754, + 0.6481012658227848222, 1.2421629321469869200, + 0.6464646464646465196, 1.2437342963832749287, + 0.6448362720403022497, 1.2453036778232047244, + 0.6432160804020100597, 1.2468710839537502366, + 0.6416040100250626210, 1.2484365222148861019, + 0.6400000000000000133, 1.2500000000000000000, + 0.6384039900249376398, 1.2515615246562992180, + 0.6368159203980099381, 1.2531211034852138830, + 0.6352357320099255578, 1.2546787437427957546, + 0.6336633663366336711, 1.2562344526401112432, + 0.6320987654320987525, 1.2577882373436317653, + 0.6305418719211822731, 1.2593401049756178800, + 0.6289926289926289771, 1.2608900626145009838, + 0.6274509803921568540, 1.2624381172952596764, + 0.6259168704156479190, 1.2639842760097927954, + 0.6243902439024390238, 1.2655285457072866784, + 0.6228710462287104788, 1.2670709332945808701, + 0.6213592233009708199, 1.2686114456365273906, + 0.6198547215496368334, 1.2701500895563484494, + 0.6183574879227052845, 1.2716868718359877199, + 0.6168674698795181266, 1.2732217992164600595, + 0.6153846153846154188, 1.2747548783981961229, + 0.6139088729016786150, 1.2762861160413836448, + 0.6124401913875597847, 1.2778155187663045034, + 0.6109785202863962095, 1.2793430931536700079, + 0.6095238095238095788, 1.2808688457449497466, + 0.6080760095011876754, 1.2823927830426995467, + 0.6066350710900474397, 1.2839149115108836607, + 0.6052009456264775267, 1.2854352375751958437, + 0.6037735849056603543, 1.2869537676233751000, + 0.6023529411764705355, 1.2884705080055189885, + 0.6009389671361502483, 1.2899854650343933749, + 0.5995316159250585475, 1.2914986449857390749, + 0.5981308411214952825, 1.2930100540985751678, + 0.5967365967365967361, 1.2945196985754987562, + 0.5953488372093023173, 1.2960275845829825059, + 0.5939675174013920866, 1.2975337182516684109, + 0.5925925925925925597, 1.2990381056766580059, + 0.5912240184757505679, 1.3005407529178008019, + 0.5898617511520737322, 1.3020416659999787257, + 0.5885057471264367734, 1.3035408509133881161, + 0.5871559633027523262, 1.3050383136138188345, + 0.5858123569794050356, 1.3065340600229295998, + 0.5844748858447488260, 1.3080280960285217695, + 0.5831435079726651205, 1.3095204274848102344, + 0.5818181818181817899, 1.3110110602126894275, + 0.5804988662131519428, 1.3125000000000000000, + 0.5791855203619910020, 1.3139872526017899457, + 0.5778781038374717349, 1.3154728237405741709, + 0.5765765765765765716, 1.3169567191065922884, + 0.5752808988764045450, 1.3184389443580617485, + 0.5739910313901345207, 1.3199195051214296370, + 0.5727069351230424932, 1.3213984069916233643, + 0.5714285714285713969, 1.3228756555322953581, + 0.5701559020044543180, 1.3243512562760682005, + 0.5688888888888888884, 1.3258252147247766572, + 0.5676274944567627490, 1.3272975363497063750, + 0.5663716814159291957, 1.3287682265918312474, + 0.5651214128035320083, 1.3302372908620476721, + 0.5638766519823789070, 1.3317047345414072534, + 0.5626373626373626369, 1.3331705629813463965, + 0.5614035087719297934, 1.3346347815039139029, + 0.5601750547045951656, 1.3360973954019967902, + 0.5589519650655021543, 1.3375584099395434468, + 0.5577342047930283764, 1.3390178303517843439, + 0.5565217391304347894, 1.3404756618454509720, + 0.5553145336225596695, 1.3419319095989930002, + 0.5541125541125541121, 1.3433865787627923272, + 0.5529157667386609409, 1.3448396744593758001, + 0.5517241379310344751, 1.3462912017836259349, + 0.5505376344086021501, 1.3477411658029889718, + 0.5493562231759656633, 1.3491895715576813775, + 0.5481798715203426431, 1.3506364240608943472, + 0.5470085470085470636, 1.3520817282989960884, + 0.5458422174840085184, 1.3535254892317321040, + 0.5446808510638297962, 1.3549677117924250336, + 0.5435244161358810944, 1.3564084008881691634, + 0.5423728813559322015, 1.3578475614000269367, + 0.5412262156448203188, 1.3592851981832216879, + 0.5400843881856539630, 1.3607213160673274910, + 0.5389473684210526194, 1.3621559198564605619, + 0.5378151260504201447, 1.3635890143294642218, + 0.5366876310272536976, 1.3650206042400971906, + 0.5355648535564853097, 1.3664506943172154418, + 0.5344467640918579843, 1.3678792892649556112, + 0.5333333333333333259, 1.3693063937629152971, + 0.5322245322245322541, 1.3707320124663318062, + 0.5311203319502074693, 1.3721561500062593453, + 0.5300207039337474502, 1.3735788109897444365, + 0.5289256198347107585, 1.3750000000000000000, + 0.5278350515463917647, 1.3764197215965774390, + 0.5267489711934156826, 1.3778379803155376138, + 0.5256673511293634693, 1.3792547806696193735, + 0.5245901639344262568, 1.3806701271484076443, + 0.5235173824130879838, 1.3820840242184988522, + 0.5224489795918367818, 1.3834964763236659024, + 0.5213849287169042279, 1.3849074878850211601, + 0.5203252032520325754, 1.3863170633011772104, + 0.5192697768762677413, 1.3877252069484073971, + 0.5182186234817813819, 1.3891319231808043622, + 0.5171717171717171713, 1.3905372163304368094, + 0.5161290322580645018, 1.3919410907075053796, + 0.5150905432595573874, 1.3933435506004971938, + 0.5140562248995983463, 1.3947446002763372874, + 0.5130260521042083743, 1.3961442439805422655, + 0.5120000000000000107, 1.3975424859373686282, + 0.5109780439121756057, 1.3989393303499619847, + 0.5099601593625497920, 1.4003347814005049354, + 0.5089463220675943811, 1.4017288432503627327, + 0.5079365079365079083, 1.4031215200402280541, + 0.5069306930693069368, 1.4045128158902644433, + 0.5059288537549406772, 1.4059027349002490848, + 0.5049309664694280331, 1.4072912811497126917, + 0.5039370078740157410, 1.4086784586980805045, + 0.5029469548133594925, 1.4100642715848097364, + 0.5019607843137254832, 1.4114487238295267968, + 0.5009784735812132794, 1.4128318194321642931, +}; + diff --git a/usr/src/lib/libmvec/common/__vatan.c b/usr/src/lib/libmvec/common/__vatan.c new file mode 100644 index 0000000000..f2a7ae1190 --- /dev/null +++ b/usr/src/lib/libmvec/common/__vatan.c @@ -0,0 +1,317 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <sys/isa_defs.h> +#include "libm_inlines.h" + +#ifdef _LITTLE_ENDIAN +#define HI(x) *(1+(int*)x) +#define LO(x) *(unsigned*)x +#else +#define HI(x) *(int*)x +#define LO(x) *(1+(unsigned*)x) +#endif + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +void +__vatan(int n, double * restrict x, int stridex, double * restrict y, int stridey) +{ + double f, z, ans = 0.0L, ansu, ansl, tmp, poly, conup, conlo, dummy; + double f1, ans1, ansu1, ansl1, tmp1, poly1, conup1, conlo1; + double f2, ans2, ansu2, ansl2, tmp2, poly2, conup2, conlo2; + int index, sign, intf, intflo, intz, argcount; + int index1, sign1 = 0; + int index2, sign2 = 0; + double *yaddr,*yaddr1 = 0,*yaddr2 = 0; + extern const double __vlibm_TBL_atan1[]; + extern double fabs(double); + +/* Power series atan(x) = x + p1*x**3 + p2*x**5 + p3*x**7 + * Error = -3.08254E-18 On the interval |x| < 1/64 */ + +/* define dummy names for readability. Use parray to help compiler optimize loads */ +#define p3 parray[0] +#define p2 parray[1] +#define p1 parray[2] + + static const double parray[] = { + -1.428029046844299722E-01, /* p[3] */ + 1.999999917247000615E-01, /* p[2] */ + -3.333333333329292858E-01, /* p[1] */ + 1.0, /* not used for p[0], though */ + -1.0, /* used to flip sign of answer */ + }; + + if (n <= 0) return; /* if no. of elements is 0 or neg, do nothing */ + do + { + LOOP0: + + f = fabs(*x); /* fetch argument */ + intf = HI(x); /* upper half of x, as integer */ + intflo = LO(x); /* lower half of x, as integer */ + sign = intf & 0x80000000; /* sign of argument */ + intf = intf & ~0x80000000; /* abs(upper argument) */ + + if ((intf > 0x43600000) || (intf < 0x3e300000)) /* filter out special cases */ + { + if ( (intf > 0x7ff00000) || ((intf == 0x7ff00000) && (intflo !=0))) + { + ans = f - f; /* return NaN if x=NaN*/ + } + else if (intf < 0x3e300000) /* avoid underflow for small arg */ + { + dummy = 1.0e37 + f; + dummy = dummy; + ans = f; + } + else if (intf > 0x43600000) /* avoid underflow for big arg */ + { + index = 2; + ans = __vlibm_TBL_atan1[index] + __vlibm_TBL_atan1[index+1];/* pi/2 up + pi/2 low */ + } + *y = (sign) ? -ans: ans; /* store answer, with sign bit */ + x += stridex; + y += stridey; + argcount = 0; /* initialize argcount */ + if (--n <=0) break; /* we are done */ + goto LOOP0; /* otherwise, examine next arg */ + } + + index = 0; /* points to 0,0 in table */ + if (intf > 0x40500000) /* if (|x| > 64 */ + { f = -1.0/f; + index = 2; /* point to pi/2 upper, lower */ + } + else if (intf >= 0x3f900000) /* if |x| >= (1/64)... */ + { + intz = (intf + 0x00008000) & 0x7fff0000;/* round arg, keep upper */ + HI(&z) = intz; /* store as a double (z) */ + LO(&z) = 0; /* ...lower */ + f = (f - z)/(1.0 + f*z); /* get reduced argument */ + index = (intz - 0x3f900000) >> 15; /* (index >> 16) << 1) */ + index = index + 4; /* skip over 0,0,pi/2,pi/2 */ + } + yaddr = y; /* address to store this answer */ + x += stridex; /* point to next arg */ + y += stridey; /* point to next result */ + argcount = 1; /* we now have 1 good argument */ + if (--n <=0) + { + f1 = 0.0; /* put dummy values in args 1,2 */ + f2 = 0.0; + index1 = 0; + index2 = 0; + goto UNROLL3; /* finish up with 1 good arg */ + } + + /*--------------------------------------------------------------------------*/ + /*--------------------------------------------------------------------------*/ + /*--------------------------------------------------------------------------*/ + + LOOP1: + + f1 = fabs(*x); /* fetch argument */ + intf = HI(x); /* upper half of x, as integer */ + intflo = LO(x); /* lower half of x, as integer */ + sign1 = intf & 0x80000000; /* sign of argument */ + intf = intf & ~0x80000000; /* abs(upper argument) */ + + if ((intf > 0x43600000) || (intf < 0x3e300000)) /* filter out special cases */ + { + if ( (intf > 0x7ff00000) || ((intf == 0x7ff00000) && (intflo !=0))) + { + ans = f1 - f1; /* return NaN if x=NaN*/ + } + else if (intf < 0x3e300000) /* avoid underflow for small arg */ + { + dummy = 1.0e37 + f1; + dummy = dummy; + ans = f1; + } + else if (intf > 0x43600000) /* avoid underflow for big arg */ + { + index1 = 2; + ans = __vlibm_TBL_atan1[index1] + __vlibm_TBL_atan1[index1+1];/* pi/2 up + pi/2 low */ + } + *y = (sign1) ? -ans: ans; /* store answer, with sign bit */ + x += stridex; + y += stridey; + argcount = 1; /* we still have 1 good arg */ + if (--n <=0) + { + f1 = 0.0; /* put dummy values in args 1,2 */ + f2 = 0.0; + index1 = 0; + index2 = 0; + goto UNROLL3; /* finish up with 1 good arg */ + } + goto LOOP1; /* otherwise, examine next arg */ + } + + index1 = 0; /* points to 0,0 in table */ + if (intf > 0x40500000) /* if (|x| > 64 */ + { f1 = -1.0/f1; + index1 = 2; /* point to pi/2 upper, lower */ + } + else if (intf >= 0x3f900000) /* if |x| >= (1/64)... */ + { + intz = (intf + 0x00008000) & 0x7fff0000;/* round arg, keep upper */ + HI(&z) = intz; /* store as a double (z) */ + LO(&z) = 0; /* ...lower */ + f1 = (f1 - z)/(1.0 + f1*z); /* get reduced argument */ + index1 = (intz - 0x3f900000) >> 15; /* (index >> 16) << 1) */ + index1 = index1 + 4; /* skip over 0,0,pi/2,pi/2 */ + } + yaddr1 = y; /* address to store this answer */ + x += stridex; /* point to next arg */ + y += stridey; /* point to next result */ + argcount = 2; /* we now have 2 good arguments */ + if (--n <=0) + { + f2 = 0.0; /* put dummy value in arg 2 */ + index2 = 0; + goto UNROLL3; /* finish up with 2 good args */ + } + + /*--------------------------------------------------------------------------*/ + /*--------------------------------------------------------------------------*/ + /*--------------------------------------------------------------------------*/ + + LOOP2: + + f2 = fabs(*x); /* fetch argument */ + intf = HI(x); /* upper half of x, as integer */ + intflo = LO(x); /* lower half of x, as integer */ + sign2 = intf & 0x80000000; /* sign of argument */ + intf = intf & ~0x80000000; /* abs(upper argument) */ + + if ((intf > 0x43600000) || (intf < 0x3e300000)) /* filter out special cases */ + { + if ( (intf > 0x7ff00000) || ((intf == 0x7ff00000) && (intflo !=0))) + { + ans = f2 - f2; /* return NaN if x=NaN*/ + } + else if (intf < 0x3e300000) /* avoid underflow for small arg */ + { + dummy = 1.0e37 + f2; + dummy = dummy; + ans = f2; + } + else if (intf > 0x43600000) /* avoid underflow for big arg */ + { + index2 = 2; + ans = __vlibm_TBL_atan1[index2] + __vlibm_TBL_atan1[index2+1];/* pi/2 up + pi/2 low */ + } + *y = (sign2) ? -ans: ans; /* store answer, with sign bit */ + x += stridex; + y += stridey; + argcount = 2; /* we still have 2 good args */ + if (--n <=0) + { + f2 = 0.0; /* put dummy value in arg 2 */ + index2 = 0; + goto UNROLL3; /* finish up with 2 good args */ + } + goto LOOP2; /* otherwise, examine next arg */ + } + + index2 = 0; /* points to 0,0 in table */ + if (intf > 0x40500000) /* if (|x| > 64 */ + { f2 = -1.0/f2; + index2 = 2; /* point to pi/2 upper, lower */ + } + else if (intf >= 0x3f900000) /* if |x| >= (1/64)... */ + { + intz = (intf + 0x00008000) & 0x7fff0000;/* round arg, keep upper */ + HI(&z) = intz; /* store as a double (z) */ + LO(&z) = 0; /* ...lower */ + f2 = (f2 - z)/(1.0 + f2*z); /* get reduced argument */ + index2 = (intz - 0x3f900000) >> 15; /* (index >> 16) << 1) */ + index2 = index2 + 4; /* skip over 0,0,pi/2,pi/2 */ + } + yaddr2 = y; /* address to store this answer */ + x += stridex; /* point to next arg */ + y += stridey; /* point to next result */ + argcount = 3; /* we now have 3 good arguments */ + + +/* here is the 3 way unrolled section, + note, we may actually only have + 1,2, or 3 'real' arguments at this point +*/ + +UNROLL3: + + conup = __vlibm_TBL_atan1[index ]; /* upper table */ + conup1 = __vlibm_TBL_atan1[index1]; /* upper table */ + conup2 = __vlibm_TBL_atan1[index2]; /* upper table */ + + conlo = __vlibm_TBL_atan1[index +1]; /* lower table */ + conlo1 = __vlibm_TBL_atan1[index1+1]; /* lower table */ + conlo2 = __vlibm_TBL_atan1[index2+1]; /* lower table */ + + tmp = f *f ; + tmp1 = f1*f1; + tmp2 = f2*f2; + + poly = f *((p3*tmp + p2)*tmp + p1)*tmp ; + poly1 = f1*((p3*tmp1 + p2)*tmp1 + p1)*tmp1; + poly2 = f2*((p3*tmp2 + p2)*tmp2 + p1)*tmp2; + + ansu = conup + f ; /* compute atan(f) upper */ + ansu1 = conup1 + f1; /* compute atan(f) upper */ + ansu2 = conup2 + f2; /* compute atan(f) upper */ + + ansl = (((conup - ansu) + f) + poly) + conlo ; + ansl1 = (((conup1 - ansu1) + f1) + poly1) + conlo1; + ansl2 = (((conup2 - ansu2) + f2) + poly2) + conlo2; + + ans = ansu + ansl ; + ans1 = ansu1 + ansl1; + ans2 = ansu2 + ansl2; + +/* now check to see if these are 'real' or 'dummy' arguments BEFORE storing */ + + *yaddr = sign ? -ans: ans; /* this one is always good */ + if (argcount < 3) break; /* end loop and finish up */ + *yaddr1 = sign1 ? -ans1: ans1; + *yaddr2 = sign2 ? -ans2: ans2; + + } while (--n > 0); + + if (argcount == 2) + { *yaddr1 = sign1 ? -ans1: ans1; + } +} diff --git a/usr/src/lib/libmvec/common/__vatan2.c b/usr/src/lib/libmvec/common/__vatan2.c new file mode 100644 index 0000000000..49500b8f91 --- /dev/null +++ b/usr/src/lib/libmvec/common/__vatan2.c @@ -0,0 +1,453 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <sys/isa_defs.h> +#include "libm_inlines.h" + +#ifdef _LITTLE_ENDIAN +#define HI(x) *(1+(int*)x) +#define LO(x) *(unsigned*)x +#else +#define HI(x) *(int*)x +#define LO(x) *(1+(unsigned*)x) +#endif + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +extern const double __vlibm_TBL_atan2[]; + +static const double +zero = 0.0, +twom3 = 0.125, +one = 1.0, +two110 = 1.2980742146337069071e+33, +pio4 = 7.8539816339744827900e-01, +pio2 = 1.5707963267948965580e+00, +pio2_lo = 6.1232339957367658860e-17, +pi = 3.1415926535897931160e+00, +pi_lo = 1.2246467991473531772e-16, +p1 = -3.33333333333327571893331786354179101074860633009e-0001, +p2 = 1.99999999942671624230086497610394721817438631379e-0001, +p3 = -1.42856965565428636896183013324727205980484158356e-0001, +p4 = 1.10894981496317081405107718475040168084164825641e-0001; + +/* Don't __ the following; acomp will handle it */ +extern double fabs(double); + +void +__vatan2(int n, double * restrict y, int stridey, double * restrict x, + int stridex, double * restrict z, int stridez) +{ + double x0, x1, x2, y0, y1, y2, *pz0, *pz1, *pz2; + double ah0, ah1, ah2, al0, al1, al2, t0, t1, t2; + double z0, z1, z2, sign0, sign1, sign2, xh; + int i, k, hx, hy, sx, sy; + + do + { +loop0: + hy = HI(y); + sy = hy & 0x80000000; + hy &= ~0x80000000; + sign0 = (sy)? -one : one; + + hx = HI(x); + sx = hx & 0x80000000; + hx &= ~0x80000000; + + if (hy > hx || (hy == hx && LO(y) > LO(x))) + { + i = hx; + hx = hy; + hy = i; + x0 = fabs(*y); + y0 = fabs(*x); + if (sx) + { + ah0 = pio2; + al0 = pio2_lo; + } + else + { + ah0 = -pio2; + al0 = -pio2_lo; + sign0 = -sign0; + } + } + else + { + x0 = fabs(*x); + y0 = fabs(*y); + if (sx) + { + ah0 = -pi; + al0 = -pi_lo; + sign0 = -sign0; + } + else + ah0 = al0 = zero; + } + + if (hx >= 0x7fe00000 || hx - hy >= 0x03600000) + { + if (hx >= 0x7ff00000) + { + if ((hx ^ 0x7ff00000) | LO(&x0)) /* nan */ + ah0 = x0 + y0; + else if (hy >= 0x7ff00000) + ah0 += pio4; + *z = sign0 * ah0; + x += stridex; + y += stridey; + z += stridez; + i = 0; + if (--n <= 0) + break; + goto loop0; + } + if (hx - hy >= 0x03600000) + { + if ((int) ah0 == 0) + ah0 = y0 / x0; + *z = sign0 * ah0; + x += stridex; + y += stridey; + z += stridez; + i = 0; + if (--n <= 0) + break; + goto loop0; + } + y0 *= twom3; + x0 *= twom3; + hy -= 0x00300000; + hx -= 0x00300000; + } + else if (hy < 0x00100000) + { + if ((hy | LO(&y0)) == 0) + { + *z = sign0 * ah0; + x += stridex; + y += stridey; + z += stridez; + i = 0; + if (--n <= 0) + break; + goto loop0; + } + y0 *= two110; + x0 *= two110; + hy = HI(&y0); + hx = HI(&x0); + } + + k = (((hx - hy) + 0x00004000) >> 13) & ~0x3; + if (k > 644) + k = 644; + ah0 += __vlibm_TBL_atan2[k]; + al0 += __vlibm_TBL_atan2[k+1]; + t0 = __vlibm_TBL_atan2[k+2]; + + xh = x0; + LO(&xh) = 0; + z0 = ((y0 - t0 * xh) - t0 * (x0 - xh)) / (x0 + y0 * t0); + pz0 = z; + x += stridex; + y += stridey; + z += stridez; + i = 1; + if (--n <= 0) + break; + +loop1: + hy = HI(y); + sy = hy & 0x80000000; + hy &= ~0x80000000; + sign1 = (sy)? -one : one; + + hx = HI(x); + sx = hx & 0x80000000; + hx &= ~0x80000000; + + if (hy > hx || (hy == hx && LO(y) > LO(x))) + { + i = hx; + hx = hy; + hy = i; + x1 = fabs(*y); + y1 = fabs(*x); + if (sx) + { + ah1 = pio2; + al1 = pio2_lo; + } + else + { + ah1 = -pio2; + al1 = -pio2_lo; + sign1 = -sign1; + } + } + else + { + x1 = fabs(*x); + y1 = fabs(*y); + if (sx) + { + ah1 = -pi; + al1 = -pi_lo; + sign1 = -sign1; + } + else + ah1 = al1 = zero; + } + + if (hx >= 0x7fe00000 || hx - hy >= 0x03600000) + { + if (hx >= 0x7ff00000) + { + if ((hx ^ 0x7ff00000) | LO(&x1)) /* nan */ + ah1 = x1 + y1; + else if (hy >= 0x7ff00000) + ah1 += pio4; + *z = sign1 * ah1; + x += stridex; + y += stridey; + z += stridez; + i = 1; + if (--n <= 0) + break; + goto loop1; + } + if (hx - hy >= 0x03600000) + { + if ((int) ah1 == 0) + ah1 = y1 / x1; + *z = sign1 * ah1; + x += stridex; + y += stridey; + z += stridez; + i = 1; + if (--n <= 0) + break; + goto loop1; + } + y1 *= twom3; + x1 *= twom3; + hy -= 0x00300000; + hx -= 0x00300000; + } + else if (hy < 0x00100000) + { + if ((hy | LO(&y1)) == 0) + { + *z = sign1 * ah1; + x += stridex; + y += stridey; + z += stridez; + i = 1; + if (--n <= 0) + break; + goto loop1; + } + y1 *= two110; + x1 *= two110; + hy = HI(&y1); + hx = HI(&x1); + } + + k = (((hx - hy) + 0x00004000) >> 13) & ~0x3; + if (k > 644) + k = 644; + ah1 += __vlibm_TBL_atan2[k]; + al1 += __vlibm_TBL_atan2[k+1]; + t1 = __vlibm_TBL_atan2[k+2]; + + xh = x1; + LO(&xh) = 0; + z1 = ((y1 - t1 * xh) - t1 * (x1 - xh)) / (x1 + y1 * t1); + pz1 = z; + x += stridex; + y += stridey; + z += stridez; + i = 2; + if (--n <= 0) + break; + +loop2: + hy = HI(y); + sy = hy & 0x80000000; + hy &= ~0x80000000; + sign2 = (sy)? -one : one; + + hx = HI(x); + sx = hx & 0x80000000; + hx &= ~0x80000000; + + if (hy > hx || (hy == hx && LO(y) > LO(x))) + { + i = hx; + hx = hy; + hy = i; + x2 = fabs(*y); + y2 = fabs(*x); + if (sx) + { + ah2 = pio2; + al2 = pio2_lo; + } + else + { + ah2 = -pio2; + al2 = -pio2_lo; + sign2 = -sign2; + } + } + else + { + x2 = fabs(*x); + y2 = fabs(*y); + if (sx) + { + ah2 = -pi; + al2 = -pi_lo; + sign2 = -sign2; + } + else + ah2 = al2 = zero; + } + + if (hx >= 0x7fe00000 || hx - hy >= 0x03600000) + { + if (hx >= 0x7ff00000) + { + if ((hx ^ 0x7ff00000) | LO(&x2)) /* nan */ + ah2 = x2 + y2; + else if (hy >= 0x7ff00000) + ah2 += pio4; + *z = sign2 * ah2; + x += stridex; + y += stridey; + z += stridez; + i = 2; + if (--n <= 0) + break; + goto loop2; + } + if (hx - hy >= 0x03600000) + { + if ((int) ah2 == 0) + ah2 = y2 / x2; + *z = sign2 * ah2; + x += stridex; + y += stridey; + z += stridez; + i = 2; + if (--n <= 0) + break; + goto loop2; + } + y2 *= twom3; + x2 *= twom3; + hy -= 0x00300000; + hx -= 0x00300000; + } + else if (hy < 0x00100000) + { + if ((hy | LO(&y2)) == 0) + { + *z = sign2 * ah2; + x += stridex; + y += stridey; + z += stridez; + i = 2; + if (--n <= 0) + break; + goto loop2; + } + y2 *= two110; + x2 *= two110; + hy = HI(&y2); + hx = HI(&x2); + } + + k = (((hx - hy) + 0x00004000) >> 13) & ~0x3; + if (k > 644) + k = 644; + ah2 += __vlibm_TBL_atan2[k]; + al2 += __vlibm_TBL_atan2[k+1]; + t2 = __vlibm_TBL_atan2[k+2]; + + xh = x2; + LO(&xh) = 0; + z2 = ((y2 - t2 * xh) - t2 * (x2 - xh)) / (x2 + y2 * t2); + pz2 = z; + + x0 = z0 * z0; + x1 = z1 * z1; + x2 = z2 * z2; + + t0 = ah0 + (z0 + (al0 + (z0 * x0) * (p1 + x0 * + (p2 + x0 * (p3 + x0 * p4))))); + t1 = ah1 + (z1 + (al1 + (z1 * x1) * (p1 + x1 * + (p2 + x1 * (p3 + x1 * p4))))); + t2 = ah2 + (z2 + (al2 + (z2 * x2) * (p1 + x2 * + (p2 + x2 * (p3 + x2 * p4))))); + + *pz0 = sign0 * t0; + *pz1 = sign1 * t1; + *pz2 = sign2 * t2; + + x += stridex; + y += stridey; + z += stridez; + i = 0; + } while (--n > 0); + + if (i > 0) + { + if (i > 1) + { + x1 = z1 * z1; + t1 = ah1 + (z1 + (al1 + (z1 * x1) * (p1 + x1 * + (p2 + x1 * (p3 + x1 * p4))))); + *pz1 = sign1 * t1; + } + + x0 = z0 * z0; + t0 = ah0 + (z0 + (al0 + (z0 * x0) * (p1 + x0 * + (p2 + x0 * (p3 + x0 * p4))))); + *pz0 = sign0 * t0; + } +} diff --git a/usr/src/lib/libmvec/common/__vatan2f.c b/usr/src/lib/libmvec/common/__vatan2f.c new file mode 100644 index 0000000000..be6c7b2824 --- /dev/null +++ b/usr/src/lib/libmvec/common/__vatan2f.c @@ -0,0 +1,476 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +extern const double __vlibm_TBL_atan1[]; + +static const double +pio4 = 7.8539816339744827900e-01, +pio2 = 1.5707963267948965580e+00, +pi = 3.1415926535897931160e+00; + +static const float +zero = 0.0f, +one = 1.0f, +q1 = -3.3333333333296428046e-01f, +q2 = 1.9999999186853752618e-01f, +twop24 = 16777216.0f; + +void +__vatan2f(int n, float * restrict y, int stridey, float * restrict x, + int stridex, float * restrict z, int stridez) +{ + float x0, x1, x2, y0, y1, y2, *pz0 = 0, *pz1, *pz2; + double ah0, ah1, ah2; + double t0, t1, t2; + double sx0, sx1, sx2; + double sign0, sign1, sign2; + int i, k0 = 0, k1, k2, hx, sx, sy; + int hy0, hy1, hy2; + float base0 = 0.0, base1, base2; + double num0, num1, num2; + double den0, den1, den2; + double dx0, dx1, dx2; + double dy0, dy1, dy2; + double db0, db1, db2; + + do + { +loop0: + hy0 = *(int*)y; + hx = *(int*)x; + sign0 = one; + sy = hy0 & 0x80000000; + hy0 &= ~0x80000000; + + sx = hx & 0x80000000; + hx &= ~0x80000000; + + if (hy0 > hx) + { + x0 = *y; + y0 = *x; + i = hx; + hx = hy0; + hy0 = i; + if (sy) + { + x0 = -x0; + sign0 = -sign0; + } + if (sx) + { + y0 = -y0; + ah0 = pio2; + } + else + { + ah0 = -pio2; + sign0 = -sign0; + } + } + else + { + y0 = *y; + x0 = *x; + if (sy) + { + y0 = -y0; + sign0 = -sign0; + } + if (sx) + { + x0 = -x0; + ah0 = -pi; + sign0 = -sign0; + } + else + ah0 = zero; + } + + if (hx >= 0x7f800000 || hx - hy0 >= 0x0c800000) + { + if (hx >= 0x7f800000) + { + if (hx ^ 0x7f800000) /* nan */ + ah0 = x0 + y0; + else if (hy0 >= 0x7f800000) + ah0 += pio4; + } + else if ((int) ah0 == 0) + ah0 = y0 / x0; + *z = (sign0 == one) ? ah0 : -ah0; +/* sign0*ah0 would change nan behavior relative to previous release */ + x += stridex; + y += stridey; + z += stridez; + i = 0; + if (--n <= 0) + break; + goto loop0; + } + if (hy0 < 0x00800000) { + if (hy0 == 0) + { + *z = sign0 * (float) ah0; + x += stridex; + y += stridey; + z += stridez; + i = 0; + if (--n <= 0) + break; + goto loop0; + } + y0 *= twop24; /* scale subnormal y */ + x0 *= twop24; /* scale possibly subnormal x */ + hy0 = *(int*)&y0; + hx = *(int*)&x0; + } + pz0 = z; + + k0 = (hy0 - hx + 0x3f800000) & 0xfff80000; + if (k0 >= 0x3C800000) /* if |x| >= (1/64)... */ + { + *(int*)&base0 = k0; + k0 = (k0 - 0x3C800000) >> 18; /* (index >> 19) << 1) */ + k0 += 4; + /* skip over 0,0,pi/2,pi/2 */ + } + else /* |x| < 1/64 */ + { + k0 = 0; + base0 = zero; + } + + x += stridex; + y += stridey; + z += stridez; + i = 1; + if (--n <= 0) + break; + + +loop1: + hy1 = *(int*)y; + hx = *(int*)x; + sign1 = one; + sy = hy1 & 0x80000000; + hy1 &= ~0x80000000; + + sx = hx & 0x80000000; + hx &= ~0x80000000; + + if (hy1 > hx) + { + x1 = *y; + y1 = *x; + i = hx; + hx = hy1; + hy1 = i; + if (sy) + { + x1 = -x1; + sign1 = -sign1; + } + if (sx) + { + y1 = -y1; + ah1 = pio2; + } + else + { + ah1 = -pio2; + sign1 = -sign1; + } + } + else + { + y1 = *y; + x1 = *x; + if (sy) + { + y1 = -y1; + sign1 = -sign1; + } + if (sx) + { + x1 = -x1; + ah1 = -pi; + sign1 = -sign1; + } + else + ah1 = zero; + } + + if (hx >= 0x7f800000 || hx - hy1 >= 0x0c800000) + { + if (hx >= 0x7f800000) + { + if (hx ^ 0x7f800000) /* nan */ + ah1 = x1 + y1; + else if (hy1 >= 0x7f800000) + ah1 += pio4; + } + else if ((int) ah1 == 0) + ah1 = y1 / x1; + *z = (sign1 == one)? ah1 : -ah1; + x += stridex; + y += stridey; + z += stridez; + i = 1; + if (--n <= 0) + break; + goto loop1; + } + if (hy1 < 0x00800000) { + if (hy1 == 0) + { + *z = sign1 * (float) ah1; + x += stridex; + y += stridey; + z += stridez; + i = 1; + if (--n <= 0) + break; + goto loop1; + } + y1 *= twop24; /* scale subnormal y */ + x1 *= twop24; /* scale possibly subnormal x */ + hy1 = *(int*)&y1; + hx = *(int*)&x1; + } + pz1 = z; + + k1 = (hy1 - hx + 0x3f800000) & 0xfff80000; + if (k1 >= 0x3C800000) /* if |x| >= (1/64)... */ + { + *(int*)&base1 = k1; + k1 = (k1 - 0x3C800000) >> 18; /* (index >> 19) << 1) */ + k1 += 4; + /* skip over 0,0,pi/2,pi/2 */ + } + else /* |x| < 1/64 */ + { + k1 = 0; + base1 = zero; + } + + x += stridex; + y += stridey; + z += stridez; + i = 2; + if (--n <= 0) + break; + +loop2: + hy2 = *(int*)y; + hx = *(int*)x; + sign2 = one; + sy = hy2 & 0x80000000; + hy2 &= ~0x80000000; + + sx = hx & 0x80000000; + hx &= ~0x80000000; + + if (hy2 > hx) + { + x2 = *y; + y2 = *x; + i = hx; + hx = hy2; + hy2 = i; + if (sy) + { + x2 = -x2; + sign2 = -sign2; + } + if (sx) + { + y2 = -y2; + ah2 = pio2; + } + else + { + ah2 = -pio2; + sign2 = -sign2; + } + } + else + { + y2 = *y; + x2 = *x; + if (sy) + { + y2 = -y2; + sign2 = -sign2; + } + if (sx) + { + x2 = -x2; + ah2 = -pi; + sign2 = -sign2; + } + else + ah2 = zero; + } + + if (hx >= 0x7f800000 || hx - hy2 >= 0x0c800000) + { + if (hx >= 0x7f800000) + { + if (hx ^ 0x7f800000) /* nan */ + ah2 = x2 + y2; + else if (hy2 >= 0x7f800000) + ah2 += pio4; + } + else if ((int) ah2 == 0) + ah2 = y2 / x2; + *z = (sign2 == one)? ah2 : -ah2; + x += stridex; + y += stridey; + z += stridez; + i = 2; + if (--n <= 0) + break; + goto loop2; + } + if (hy2 < 0x00800000) { + if (hy2 == 0) + { + *z = sign2 * (float) ah2; + x += stridex; + y += stridey; + z += stridez; + i = 2; + if (--n <= 0) + break; + goto loop2; + } + y2 *= twop24; /* scale subnormal y */ + x2 *= twop24; /* scale possibly subnormal x */ + hy2 = *(int*)&y2; + hx = *(int*)&x2; + } + + pz2 = z; + + k2 = (hy2 - hx + 0x3f800000) & 0xfff80000; + if (k2 >= 0x3C800000) /* if |x| >= (1/64)... */ + { + *(int*)&base2 = k2; + k2 = (k2 - 0x3C800000) >> 18; /* (index >> 19) << 1) */ + k2 += 4; + /* skip over 0,0,pi/2,pi/2 */ + } + else /* |x| < 1/64 */ + { + k2 = 0; + base2 = zero; + } + + goto endloop; + +endloop: + + ah2 += __vlibm_TBL_atan1[k2]; + ah1 += __vlibm_TBL_atan1[k1]; + ah0 += __vlibm_TBL_atan1[k0]; + + db2 = base2; + db1 = base1; + db0 = base0; + dy2 = y2; + dy1 = y1; + dy0 = y0; + dx2 = x2; + dx1 = x1; + dx0 = x0; + + num2 = dy2 - dx2 * db2; + den2 = dx2 + dy2 * db2; + + num1 = dy1 - dx1 * db1; + den1 = dx1 + dy1 * db1; + + num0 = dy0 - dx0 * db0; + den0 = dx0 + dy0 * db0; + + t2 = num2 / den2; + t1 = num1 / den1; + t0 = num0 / den0; + + sx2 = t2 * t2; + sx1 = t1 * t1; + sx0 = t0 * t0; + + t2 += t2 * sx2 * (q1 + sx2 * q2); + t1 += t1 * sx1 * (q1 + sx1 * q2); + t0 += t0 * sx0 * (q1 + sx0 * q2); + + t2 += ah2; + t1 += ah1; + t0 += ah0; + + *pz2 = sign2 * t2; + *pz1 = sign1 * t1; + *pz0 = sign0 * t0; + + x += stridex; + y += stridey; + z += stridez; + i = 0; + } while (--n > 0); + + if (i > 1) + { + ah1 += __vlibm_TBL_atan1[k1]; + t1 = (y1 - x1 * (double)base1) / + (x1 + y1 * (double)base1); + sx1 = t1 * t1; + t1 += t1 * sx1 * (q1 + sx1 * q2); + t1 += ah1; + *pz1 = sign1 * t1; + } + + if (i > 0) + { + ah0 += __vlibm_TBL_atan1[k0]; + t0 = (y0 - x0 * (double)base0) / + (x0 + y0 * (double)base0); + sx0 = t0 * t0; + t0 += t0 * sx0 * (q1 + sx0 * q2); + t0 += ah0; + *pz0 = sign0 * t0; + } +} diff --git a/usr/src/lib/libmvec/common/__vatanf.c b/usr/src/lib/libmvec/common/__vatanf.c new file mode 100644 index 0000000000..bf14dd9ffb --- /dev/null +++ b/usr/src/lib/libmvec/common/__vatanf.c @@ -0,0 +1,411 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +void +__vatanf(int n, float * restrict x, int stridex, float * restrict y, int stridey) +{ + extern const double __vlibm_TBL_atan1[]; + double conup0, conup1, conup2; + float dummy, ansf = 0.0; + float f0, f1, f2; + float ans0, ans1, ans2; + float poly0, poly1, poly2; + float sign0, sign1, sign2; + int intf, intz, argcount; + int index0, index1, index2; + float z,*yaddr0,*yaddr1,*yaddr2; + int *pz = (int *) &z; +#ifdef UNROLL4 + double conup3; + int index3; + float f3, ans3, poly3, sign3, *yaddr3; +#endif + +/* Power series atan(x) = x + p1*x**3 + p2*x**5 + p3*x**7 + * Error = -3.08254E-18 On the interval |x| < 1/64 */ + + static const float p1 = -0.33329644f /* -3.333333333329292858E-01f */ ; + static const float pone = 1.0f; + + if (n <= 0) return; /* if no. of elements is 0 or neg, do nothing */ + do + { + LOOP0: + + intf = *(int *) x; /* upper half of x, as integer */ + f0 = *x; + sign0 = pone; + if (intf < 0) { + intf = intf & ~0x80000000; /* abs(upper argument) */ + f0 = -f0; + sign0 = -sign0; + } + + if ((intf > 0x5B000000) || (intf < 0x31800000)) /* filter out special cases */ + { + if (intf > 0x7f800000) + { + ansf = f0- f0; /* return NaN if x=NaN*/ + } + else if (intf < 0x31800000) /* avoid underflow for small arg */ + { + dummy = 1.0e37 + f0; + dummy = dummy; + ansf = f0; + } + else if (intf > 0x5B000000) /* avoid underflow for big arg */ + { + index0= 2; + ansf = __vlibm_TBL_atan1[index0];/* pi/2 up */ + } + *y = sign0*ansf; /* store answer, with sign bit */ + x += stridex; + y += stridey; + argcount = 0; /* initialize argcount */ + if (--n <=0) break; /* we are done */ + goto LOOP0; /* otherwise, examine next arg */ + } + + if (intf > 0x42800000) /* if (|x| > 64 */ + { + f0 = -pone/f0; + index0 = 2; /* point to pi/2 upper, lower */ + } + else if (intf >= 0x3C800000) /* if |x| >= (1/64)... */ + { + intz = (intf + 0x00040000) & 0x7ff80000;/* round arg, keep upper */ + pz[0] = intz; /* store as a float (z) */ + f0 = (f0 - z)/(pone + f0*z); + index0 = (intz - 0x3C800000) >> 18; /* (index >> 19) << 1) */ + index0 = index0+ 4; /* skip over 0,0,pi/2,pi/2 */ + } + else /* |x| < 1/64 */ + { + index0 = 0; /* points to 0,0 in table */ + } + yaddr0 = y; /* address to store this answer */ + x += stridex; /* point to next arg */ + y += stridey; /* point to next result */ + argcount = 1; /* we now have 1 good argument */ + if (--n <=0) + { + goto UNROLL; /* finish up with 1 good arg */ + } + + /*--------------------------------------------------------------------------*/ + /*--------------------------------------------------------------------------*/ + /*--------------------------------------------------------------------------*/ + + LOOP1: + + intf = *(int *) x; /* upper half of x, as integer */ + f1 = *x; + sign1 = pone; + if (intf < 0) { + intf = intf & ~0x80000000; /* abs(upper argument) */ + f1 = -f1; + sign1 = -sign1; + } + + if ((intf > 0x5B000000) || (intf < 0x31800000)) /* filter out special cases */ + { + if (intf > 0x7f800000) + { + ansf = f1 - f1; /* return NaN if x=NaN*/ + } + else if (intf < 0x31800000) /* avoid underflow for small arg */ + { + dummy = 1.0e37 + f1; + dummy = dummy; + ansf = f1; + } + else if (intf > 0x5B000000) /* avoid underflow for big arg */ + { + index1 = 2; + ansf = __vlibm_TBL_atan1[index1] ;/* pi/2 up */ + } + *y = sign1 * ansf; /* store answer, with sign bit */ + x += stridex; + y += stridey; + argcount = 1; /* we still have 1 good arg */ + if (--n <=0) + { + goto UNROLL; /* finish up with 1 good arg */ + } + goto LOOP1; /* otherwise, examine next arg */ + } + + if (intf > 0x42800000) /* if (|x| > 64 */ + { + f1 = -pone/f1; + index1 = 2; /* point to pi/2 upper, lower */ + } + else if (intf >= 0x3C800000) /* if |x| >= (1/64)... */ + { + intz = (intf + 0x00040000) & 0x7ff80000;/* round arg, keep upper */ + pz[0] = intz; /* store as a float (z) */ + f1 = (f1 - z)/(pone + f1*z); + index1 = (intz - 0x3C800000) >> 18; /* (index >> 19) << 1) */ + index1 = index1 + 4; /* skip over 0,0,pi/2,pi/2 */ + } + else + { + index1 = 0; /* points to 0,0 in table */ + } + + yaddr1 = y; /* address to store this answer */ + x += stridex; /* point to next arg */ + y += stridey; /* point to next result */ + argcount = 2; /* we now have 2 good arguments */ + if (--n <=0) + { + goto UNROLL; /* finish up with 2 good args */ + } + + /*--------------------------------------------------------------------------*/ + /*--------------------------------------------------------------------------*/ + /*--------------------------------------------------------------------------*/ + + LOOP2: + + intf = *(int *) x; /* upper half of x, as integer */ + f2 = *x; + sign2 = pone; + if (intf < 0) { + intf = intf & ~0x80000000; /* abs(upper argument) */ + f2 = -f2; + sign2 = -sign2; + } + + if ((intf > 0x5B000000) || (intf < 0x31800000)) /* filter out special cases */ + { + if (intf > 0x7f800000) + { + ansf = f2 - f2; /* return NaN if x=NaN*/ + } + else if (intf < 0x31800000) /* avoid underflow for small arg */ + { + dummy = 1.0e37 + f2; + dummy = dummy; + ansf = f2; + } + else if (intf > 0x5B000000) /* avoid underflow for big arg */ + { + index2 = 2; + ansf = __vlibm_TBL_atan1[index2] ;/* pi/2 up */ + } + *y = sign2 * ansf; /* store answer, with sign bit */ + x += stridex; + y += stridey; + argcount = 2; /* we still have 2 good args */ + if (--n <=0) + { + goto UNROLL; /* finish up with 2 good args */ + } + goto LOOP2; /* otherwise, examine next arg */ + } + + if (intf > 0x42800000) /* if (|x| > 64 */ + { + f2 = -pone/f2; + index2 = 2; /* point to pi/2 upper, lower */ + } + else if (intf >= 0x3C800000) /* if |x| >= (1/64)... */ + { + intz = (intf + 0x00040000) & 0x7ff80000;/* round arg, keep upper */ + pz[0] = intz; /* store as a float (z) */ + f2 = (f2 - z)/(pone + f2*z); + index2 = (intz - 0x3C800000) >> 18; /* (index >> 19) << 1) */ + index2 = index2 + 4; /* skip over 0,0,pi/2,pi/2 */ + } + else + { + index2 = 0; /* points to 0,0 in table */ + } + yaddr2 = y; /* address to store this answer */ + x += stridex; /* point to next arg */ + y += stridey; /* point to next result */ + argcount = 3; /* we now have 3 good arguments */ + if (--n <=0) + { + goto UNROLL; /* finish up with 2 good args */ + } + + + /*--------------------------------------------------------------------------*/ + /*--------------------------------------------------------------------------*/ + /*--------------------------------------------------------------------------*/ + +#ifdef UNROLL4 + LOOP3: + + intf = *(int *) x; /* upper half of x, as integer */ + f3 = *x; + sign3 = pone; + if (intf < 0) { + intf = intf & ~0x80000000; /* abs(upper argument) */ + f3 = -f3; + sign3 = -sign3; + } + + if ((intf > 0x5B000000) || (intf < 0x31800000)) /* filter out special cases */ + { + if (intf > 0x7f800000) + { + ansf = f3 - f3; /* return NaN if x=NaN*/ + } + else if (intf < 0x31800000) /* avoid underflow for small arg */ + { + dummy = 1.0e37 + f3; + dummy = dummy; + ansf = f3; + } + else if (intf > 0x5B000000) /* avoid underflow for big arg */ + { + index3 = 2; + ansf = __vlibm_TBL_atan1[index3] ;/* pi/2 up */ + } + *y = sign3 * ansf; /* store answer, with sign bit */ + x += stridex; + y += stridey; + argcount = 3; /* we still have 3 good args */ + if (--n <=0) + { + goto UNROLL; /* finish up with 3 good args */ + } + goto LOOP3; /* otherwise, examine next arg */ + } + + if (intf > 0x42800000) /* if (|x| > 64 */ + { + n3 = -pone; + d3 = f3; + f3 = n3/d3; + index3 = 2; /* point to pi/2 upper, lower */ + } + else if (intf >= 0x3C800000) /* if |x| >= (1/64)... */ + { + intz = (intf + 0x00040000) & 0x7ff80000;/* round arg, keep upper */ + pz[0] = intz; /* store as a float (z) */ + n3 = (f3 - z); + d3 = (pone + f3*z); /* get reduced argument */ + f3 = n3/d3; + index3 = (intz - 0x3C800000) >> 18; /* (index >> 19) << 1) */ + index3 = index3 + 4; /* skip over 0,0,pi/2,pi/2 */ + } + else + { + n3 = f3; + d3 = pone; + index3 = 0; /* points to 0,0 in table */ + } + yaddr3 = y; /* address to store this answer */ + x += stridex; /* point to next arg */ + y += stridey; /* point to next result */ + argcount = 4; /* we now have 4 good arguments */ + if (--n <=0) + { + goto UNROLL; /* finish up with 3 good args */ + } +#endif /* UNROLL4 */ + +/* here is the n-way unrolled section, + but we may actually have less than n + arguments at this point +*/ + +UNROLL: + +#ifdef UNROLL4 + if (argcount == 4) + { + conup0 = __vlibm_TBL_atan1[index0]; + conup1 = __vlibm_TBL_atan1[index1]; + conup2 = __vlibm_TBL_atan1[index2]; + conup3 = __vlibm_TBL_atan1[index3]; + poly0 = p1*f0*f0*f0 + f0; + ans0 = sign0 * (float)(conup0 + poly0); + poly1 = p1*f1*f1*f1 + f1; + ans1 = sign1 * (float)(conup1 + poly1); + poly2 = p1*f2*f2*f2 + f2; + ans2 = sign2 * (float)(conup2 + poly2); + poly3 = p1*f3*f3*f3 + f3; + ans3 = sign3 * (float)(conup3 + poly3); + *yaddr0 = ans0; + *yaddr1 = ans1; + *yaddr2 = ans2; + *yaddr3 = ans3; + } + else +#endif + if (argcount == 3) + { + conup0 = __vlibm_TBL_atan1[index0]; + conup1 = __vlibm_TBL_atan1[index1]; + conup2 = __vlibm_TBL_atan1[index2]; + poly0 = p1*f0*f0*f0 + f0; + poly1 = p1*f1*f1*f1 + f1; + poly2 = p1*f2*f2*f2 + f2; + ans0 = sign0 * (float)(conup0 + poly0); + ans1 = sign1 * (float)(conup1 + poly1); + ans2 = sign2 * (float)(conup2 + poly2); + *yaddr0 = ans0; + *yaddr1 = ans1; + *yaddr2 = ans2; + } + else + if (argcount == 2) + { + conup0 = __vlibm_TBL_atan1[index0]; + conup1 = __vlibm_TBL_atan1[index1]; + poly0 = p1*f0*f0*f0 + f0; + poly1 = p1*f1*f1*f1 + f1; + ans0 = sign0 * (float)(conup0 + poly0); + ans1 = sign1 * (float)(conup1 + poly1); + *yaddr0 = ans0; + *yaddr1 = ans1; + } + else + if (argcount == 1) + { + conup0 = __vlibm_TBL_atan1[index0]; + poly0 = p1*f0*f0*f0 + f0; + ans0 = sign0 * (float)(conup0 + poly0); + *yaddr0 = ans0; + } + + } while (n > 0); + +} diff --git a/usr/src/lib/libmvec/common/__vc_abs.c b/usr/src/lib/libmvec/common/__vc_abs.c new file mode 100644 index 0000000000..4808fda37a --- /dev/null +++ b/usr/src/lib/libmvec/common/__vc_abs.c @@ -0,0 +1,44 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +extern void __vhypotf(int, float *, int, float *, int, float *, int); + +void +__vc_abs(int n, float * restrict x, int stridex, float * restrict y, + int stridey) +{ + stridex <<= 1; + __vhypotf(n, x, stridex, x + 1, stridex, y, stridey); +} diff --git a/usr/src/lib/libmvec/common/__vc_exp.c b/usr/src/lib/libmvec/common/__vc_exp.c new file mode 100644 index 0000000000..8fc8a77e4e --- /dev/null +++ b/usr/src/lib/libmvec/common/__vc_exp.c @@ -0,0 +1,54 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +extern void __vexpf(int, float *, int, float *, int); +extern void __vsincosf(int, float *, int, float *, int, float *, int); + +void +__vc_exp(int n, float * restrict x, int stridex, float * restrict y, + int stridey, float * restrict tmp) +{ + int i, j; + + stridex <<= 1; + stridey <<= 1; + __vexpf(n, x, stridex, tmp, 1); + __vsincosf(n, x + 1, stridex, y + 1, stridey, y, stridey); + for (i = j = 0; i < n; i++, j += stridey) + { + y[j] *= tmp[i]; + y[j+1] *= tmp[i]; + } +} diff --git a/usr/src/lib/libmvec/common/__vc_log.c b/usr/src/lib/libmvec/common/__vc_log.c new file mode 100644 index 0000000000..0f99c5a256 --- /dev/null +++ b/usr/src/lib/libmvec/common/__vc_log.c @@ -0,0 +1,49 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +extern void __vatan2f(int, float *, int, float *, int, float *, int); +extern void __vhypotf(int, float *, int, float *, int, float *, int); +extern void __vlogf(int, float *, int, float *, int); + +void +__vc_log(int n, float * restrict x, int stridex, float * restrict y, + int stridey) +{ + stridex <<= 1; + stridey <<= 1; + __vhypotf(n, x, stridex, x + 1, stridex, y + 1, stridey); + __vlogf(n, y + 1, stridey, y, stridey); + __vatan2f(n, x + 1, stridex, x, stridex, y + 1, stridey); +} diff --git a/usr/src/lib/libmvec/common/__vc_pow.c b/usr/src/lib/libmvec/common/__vc_pow.c new file mode 100644 index 0000000000..b483ffe896 --- /dev/null +++ b/usr/src/lib/libmvec/common/__vc_pow.c @@ -0,0 +1,56 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +extern void __vc_exp(int, float *, int, float *, int, float *); +extern void __vc_log(int, float *, int, float *, int); + +void +__vc_pow(int n, float * restrict x, int stridex, float * restrict y, + int stridey, float * restrict z, int stridez, float * restrict tmp) +{ + float r; + int i, j, k; + + __vc_log(n, x, stridex, tmp, 1); + stridey <<= 1; + for (i = j = 0; i < n; i++, j += stridey) + { + k = i << 1; + r = y[j] * tmp[k] - y[j+1] * tmp[k+1]; + tmp[k+1] = y[j+1] * tmp[k] + y[j] * tmp[k+1]; + tmp[k] = r; + } + __vc_exp(n, tmp, 1, z, stridez, tmp + n + n); +} diff --git a/usr/src/lib/libmvec/common/__vcos.c b/usr/src/lib/libmvec/common/__vcos.c new file mode 100644 index 0000000000..28f40c50d5 --- /dev/null +++ b/usr/src/lib/libmvec/common/__vcos.c @@ -0,0 +1,1100 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <sys/isa_defs.h> +#include <sys/ccompile.h> + +#ifdef _LITTLE_ENDIAN +#define HI(x) *(1+(int*)x) +#define LO(x) *(unsigned*)x +#else +#define HI(x) *(int*)x +#define LO(x) *(1+(unsigned*)x) +#endif + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +/* + * vcos.1.c + * + * Vector cosine function. Just slight modifications to vsin.8.c, mainly + * in the primary range part. + * + * Modification to primary range processing. If an argument that does not + * fall in the primary range is encountered, then processing is continued + * in the medium range. + * + */ + +extern const double __vlibm_TBL_sincos_hi[], __vlibm_TBL_sincos_lo[]; + +static const double + half[2] = { 0.5, -0.5 }, + one = 1.0, + invpio2 = 0.636619772367581343075535, /* 53 bits of pi/2 */ + pio2_1 = 1.570796326734125614166, /* first 33 bits of pi/2 */ + pio2_2 = 6.077100506303965976596e-11, /* second 33 bits of pi/2 */ + pio2_3 = 2.022266248711166455796e-21, /* third 33 bits of pi/2 */ + pio2_3t = 8.478427660368899643959e-32, /* pi/2 - pio2_3 */ + pp1 = -1.666666666605760465276263943134982554676e-0001, + pp2 = 8.333261209690963126718376566146180944442e-0003, + qq1 = -4.999999999977710986407023955908711557870e-0001, + qq2 = 4.166654863857219350645055881018842089580e-0002, + poly1[2]= { -1.666666666666629669805215138920301589656e-0001, + -4.999999999999931701464060878888294524481e-0001 }, + poly2[2]= { 8.333333332390951295683993455280336376663e-0003, + 4.166666666394861917535640593963708222319e-0002 }, + poly3[2]= { -1.984126237997976692791551778230098403960e-0004, + -1.388888552656142867832756687736851681462e-0003 }, + poly4[2]= { 2.753403624854277237649987622848330351110e-0006, + 2.478519423681460796618128289454530524759e-0005 }; + +static const unsigned thresh[2] = { 0x3fc90000, 0x3fc40000 }; + +/* Don't __ the following; acomp will handle it */ +extern double fabs(double); +extern void __vlibm_vcos_big(int, double *, int, double *, int, int); + +/* + * y[i*stridey] := cos( x[i*stridex] ), for i = 0..n. + * + * Calls __vlibm_vcos_big to handle all elts which have abs >~ 1.647e+06. + * Argument reduction is done here for elts pi/4 < arg < 1.647e+06. + * + * elts < 2^-27 use the approximation 1.0 ~ cos(x). + */ +void +__vcos(int n, double * restrict x, int stridex, double * restrict y, + int stridey) +{ + double x0_or_one[4], x1_or_one[4], x2_or_one[4]; + double y0_or_zero[4], y1_or_zero[4], y2_or_zero[4]; + double x0, x1, x2, *py0 = 0, *py1 = 0, *py2, *xsave, *ysave; + unsigned hx0, hx1, hx2, xsb0, xsb1 = 0, xsb2; + int i, biguns, nsave, sxsave, sysave; + volatile int v __GNU_UNUSED; + nsave = n; + xsave = x; + sxsave = stridex; + ysave = y; + sysave = stridey; + biguns = 0; + + do /* MAIN LOOP */ + { + /* Gotos here so _break_ exits MAIN LOOP. */ +LOOP0: /* Find first arg in right range. */ + xsb0 = HI(x); /* get most significant word */ + hx0 = xsb0 & ~0x80000000; /* mask off sign bit */ + if (hx0 > 0x3fe921fb) { + /* Too big: arg reduction needed, so leave for second part */ + biguns = 1; + goto MEDIUM; + } + if (hx0 < 0x3e400000) { + /* Too small. cos x ~ 1. */ + v = *x; + *y = 1.0; + x += stridex; + y += stridey; + i = 0; + if (--n <= 0) + break; + goto LOOP0; + } + x0 = *x; + py0 = y; + x += stridex; + y += stridey; + i = 1; + if (--n <= 0) + break; + +LOOP1: /* Get second arg, same as above. */ + xsb1 = HI(x); + hx1 = xsb1 & ~0x80000000; + if (hx1 > 0x3fe921fb) + { + biguns = 2; + goto MEDIUM; + } + if (hx1 < 0x3e400000) + { + v = *x; + *y = 1.0; + x += stridex; + y += stridey; + i = 1; + if (--n <= 0) + break; + goto LOOP1; + } + x1 = *x; + py1 = y; + x += stridex; + y += stridey; + i = 2; + if (--n <= 0) + break; + +LOOP2: /* Get third arg, same as above. */ + xsb2 = HI(x); + hx2 = xsb2 & ~0x80000000; + if (hx2 > 0x3fe921fb) + { + biguns = 3; + goto MEDIUM; + } + if (hx2 < 0x3e400000) + { + v = *x; + *y = 1.0; + x += stridex; + y += stridey; + i = 2; + if (--n <= 0) + break; + goto LOOP2; + } + x2 = *x; + py2 = y; + + /* + * 0x3fc40000 = 5/32 ~ 0.15625 + * Get msb after subtraction. Will be 1 only if + * hx0 - 5/32 is negative. + */ + i = (hx0 - 0x3fc40000) >> 31; + i |= ((hx1 - 0x3fc40000) >> 30) & 2; + i |= ((hx2 - 0x3fc40000) >> 29) & 4; + switch (i) + { + double a0, a1, a2, w0, w1, w2; + double t0, t1, t2, z0, z1, z2; + unsigned j0, j1, j2; + + case 0: /* All are > 5/32 */ + j0 = (xsb0 + 0x4000) & 0xffff8000; + j1 = (xsb1 + 0x4000) & 0xffff8000; + j2 = (xsb2 + 0x4000) & 0xffff8000; + HI(&t0) = j0; + HI(&t1) = j1; + HI(&t2) = j2; + LO(&t0) = 0; + LO(&t1) = 0; + LO(&t2) = 0; + x0 -= t0; + x1 -= t1; + x2 -= t2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * (qq1 + z0 * qq2); + t1 = z1 * (qq1 + z1 * qq2); + t2 = z2 * (qq1 + z2 * qq2); + w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); + w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); + w2 = x2 * (one + z2 * (pp1 + z2 * pp2)); + j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + xsb0 = (xsb0 >> 30) & 2; + xsb1 = (xsb1 >> 30) & 2; + xsb2 = (xsb2 >> 30) & 2; + a0 = __vlibm_TBL_sincos_hi[j0+1]; /* cos_hi(t) */ + a1 = __vlibm_TBL_sincos_hi[j1+1]; + a2 = __vlibm_TBL_sincos_hi[j2+1]; + /* cos_lo(t) sin_hi(t) */ + t0 = __vlibm_TBL_sincos_lo[j0+1] - (__vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0); + t1 = __vlibm_TBL_sincos_lo[j1+1] - (__vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1); + t2 = __vlibm_TBL_sincos_lo[j2+1] - (__vlibm_TBL_sincos_hi[j2+xsb2]*w2 - a2*t2); + + *py0 = a0 + t0; + *py1 = a1 + t1; + *py2 = a2 + t2; + break; + + case 1: + j1 = (xsb1 + 0x4000) & 0xffff8000; + j2 = (xsb2 + 0x4000) & 0xffff8000; + HI(&t1) = j1; + HI(&t2) = j2; + LO(&t1) = 0; + LO(&t2) = 0; + x1 -= t1; + x2 -= t2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * (poly3[1] + z0 * poly4[1]); + t1 = z1 * (qq1 + z1 * qq2); + t2 = z2 * (qq1 + z2 * qq2); + t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0)); + w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); + w2 = x2 * (one + z2 * (pp1 + z2 * pp2)); + j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + xsb1 = (xsb1 >> 30) & 2; + xsb2 = (xsb2 >> 30) & 2; + a1 = __vlibm_TBL_sincos_hi[j1+1]; + a2 = __vlibm_TBL_sincos_hi[j2+1]; + t1 = __vlibm_TBL_sincos_lo[j1+1] - (__vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1); + t2 = __vlibm_TBL_sincos_lo[j2+1] - (__vlibm_TBL_sincos_hi[j2+xsb2]*w2 - a2*t2); + *py0 = one + t0; + *py1 = a1 + t1; + *py2 = a2 + t2; + break; + + case 2: + j0 = (xsb0 + 0x4000) & 0xffff8000; + j2 = (xsb2 + 0x4000) & 0xffff8000; + HI(&t0) = j0; + HI(&t2) = j2; + LO(&t0) = 0; + LO(&t2) = 0; + x0 -= t0; + x2 -= t2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * (qq1 + z0 * qq2); + t1 = z1 * (poly3[1] + z1 * poly4[1]); + t2 = z2 * (qq1 + z2 * qq2); + w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); + t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1)); + w2 = x2 * (one + z2 * (pp1 + z2 * pp2)); + j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + xsb0 = (xsb0 >> 30) & 2; + xsb2 = (xsb2 >> 30) & 2; + a0 = __vlibm_TBL_sincos_hi[j0+1]; + a2 = __vlibm_TBL_sincos_hi[j2+1]; + t0 = __vlibm_TBL_sincos_lo[j0+1] - (__vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0); + t2 = __vlibm_TBL_sincos_lo[j2+1] - (__vlibm_TBL_sincos_hi[j2+xsb2]*w2 - a2*t2); + *py0 = a0 + t0; + *py1 = one + t1; + *py2 = a2 + t2; + break; + + case 3: + j2 = (xsb2 + 0x4000) & 0xffff8000; + HI(&t2) = j2; + LO(&t2) = 0; + x2 -= t2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * (poly3[1] + z0 * poly4[1]); + t1 = z1 * (poly3[1] + z1 * poly4[1]); + t2 = z2 * (qq1 + z2 * qq2); + t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0)); + t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1)); + w2 = x2 * (one + z2 * (pp1 + z2 * pp2)); + j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + xsb2 = (xsb2 >> 30) & 2; + a2 = __vlibm_TBL_sincos_hi[j2+1]; + t2 = __vlibm_TBL_sincos_lo[j2+1] - (__vlibm_TBL_sincos_hi[j2+xsb2]*w2 - a2*t2); + *py0 = one + t0; + *py1 = one + t1; + *py2 = a2 + t2; + break; + + case 4: + j0 = (xsb0 + 0x4000) & 0xffff8000; + j1 = (xsb1 + 0x4000) & 0xffff8000; + HI(&t0) = j0; + HI(&t1) = j1; + LO(&t0) = 0; + LO(&t1) = 0; + x0 -= t0; + x1 -= t1; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * (qq1 + z0 * qq2); + t1 = z1 * (qq1 + z1 * qq2); + t2 = z2 * (poly3[1] + z2 * poly4[1]); + w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); + w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); + t2 = z2 * (poly1[1] + z2 * (poly2[1] + t2)); + j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + xsb0 = (xsb0 >> 30) & 2; + xsb1 = (xsb1 >> 30) & 2; + a0 = __vlibm_TBL_sincos_hi[j0+1]; + a1 = __vlibm_TBL_sincos_hi[j1+1]; + t0 = __vlibm_TBL_sincos_lo[j0+1] - (__vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0); + t1 = __vlibm_TBL_sincos_lo[j1+1] - (__vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1); + *py0 = a0 + t0; + *py1 = a1 + t1; + *py2 = one + t2; + break; + + case 5: + j1 = (xsb1 + 0x4000) & 0xffff8000; + HI(&t1) = j1; + LO(&t1) = 0; + x1 -= t1; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * (poly3[1] + z0 * poly4[1]); + t1 = z1 * (qq1 + z1 * qq2); + t2 = z2 * (poly3[1] + z2 * poly4[1]); + t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0)); + w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); + t2 = z2 * (poly1[1] + z2 * (poly2[1] + t2)); + j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + xsb1 = (xsb1 >> 30) & 2; + a1 = __vlibm_TBL_sincos_hi[j1+1]; + t1 = __vlibm_TBL_sincos_lo[j1+1] - (__vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1); + *py0 = one + t0; + *py1 = a1 + t1; + *py2 = one + t2; + break; + + case 6: + j0 = (xsb0 + 0x4000) & 0xffff8000; + HI(&t0) = j0; + LO(&t0) = 0; + x0 -= t0; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * (qq1 + z0 * qq2); + t1 = z1 * (poly3[1] + z1 * poly4[1]); + t2 = z2 * (poly3[1] + z2 * poly4[1]); + w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); + t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1)); + t2 = z2 * (poly1[1] + z2 * (poly2[1] + t2)); + j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + xsb0 = (xsb0 >> 30) & 2; + a0 = __vlibm_TBL_sincos_hi[j0+1]; + t0 = __vlibm_TBL_sincos_lo[j0+1] - (__vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0); + *py0 = a0 + t0; + *py1 = one + t1; + *py2 = one + t2; + break; + + case 7: /* All are < 5/32 */ + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * (poly3[1] + z0 * poly4[1]); + t1 = z1 * (poly3[1] + z1 * poly4[1]); + t2 = z2 * (poly3[1] + z2 * poly4[1]); + t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0)); + t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1)); + t2 = z2 * (poly1[1] + z2 * (poly2[1] + t2)); + *py0 = one + t0; + *py1 = one + t1; + *py2 = one + t2; + break; + } + + x += stridex; + y += stridey; + i = 0; + } while (--n > 0); /* END MAIN LOOP */ + + /* + * CLEAN UP last 0, 1, or 2 elts. + */ + if (i > 0) /* Clean up elts at tail. i < 3. */ + { + double a0, a1, w0, w1; + double t0, t1, z0, z1; + unsigned j0, j1; + + if (i > 1) + { + if (hx1 < 0x3fc40000) + { + z1 = x1 * x1; + t1 = z1 * (poly3[1] + z1 * poly4[1]); + t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1)); + t1 = one + t1; + *py1 = t1; + } + else + { + j1 = (xsb1 + 0x4000) & 0xffff8000; + HI(&t1) = j1; + LO(&t1) = 0; + x1 -= t1; + z1 = x1 * x1; + t1 = z1 * (qq1 + z1 * qq2); + w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); + j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + xsb1 = (xsb1 >> 30) & 2; + a1 = __vlibm_TBL_sincos_hi[j1+1]; + t1 = __vlibm_TBL_sincos_lo[j1+1] + - (__vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1); + *py1 = a1 + t1; + } + } + if (hx0 < 0x3fc40000) + { + z0 = x0 * x0; + t0 = z0 * (poly3[1] + z0 * poly4[1]); + t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0)); + t0 = one + t0; + *py0 = t0; + } + else + { + j0 = (xsb0 + 0x4000) & 0xffff8000; + HI(&t0) = j0; + LO(&t0) = 0; + x0 -= t0; + z0 = x0 * x0; + t0 = z0 * (qq1 + z0 * qq2); + w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); + j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + xsb0 = (xsb0 >> 30) & 2; + a0 = __vlibm_TBL_sincos_hi[j0+1]; + t0 = __vlibm_TBL_sincos_lo[j0+1] - (__vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0); + *py0 = a0 + t0; + } + } /* END CLEAN UP */ + + return; + + /* + * Take care of BIGUNS. + * + * We have jumped here in the middle of processing after having + * encountered a medium range argument. Therefore things are in a + * bit of a tizzy. + */ + +MEDIUM: + + x0_or_one[1] = 1.0; + x1_or_one[1] = 1.0; + x2_or_one[1] = 1.0; + x0_or_one[3] = -1.0; + x1_or_one[3] = -1.0; + x2_or_one[3] = -1.0; + y0_or_zero[1] = 0.0; + y1_or_zero[1] = 0.0; + y2_or_zero[1] = 0.0; + y0_or_zero[3] = 0.0; + y1_or_zero[3] = 0.0; + y2_or_zero[3] = 0.0; + + if (biguns == 3) + { + biguns = 0; + xsb0 = xsb0 >> 31; + xsb1 = xsb1 >> 31; + goto loop2; + } + else if (biguns == 2) + { + xsb0 = xsb0 >> 31; + biguns = 0; + goto loop1; + } + biguns = 0; + + do + { + double fn0, fn1, fn2, a0, a1, a2, w0, w1, w2, y0, y1, y2; + unsigned hx; + int n0, n1, n2; + + /* + * Find 3 more to work on: Not already done, not too big. + */ + +loop0: + hx = HI(x); + xsb0 = hx >> 31; + hx &= ~0x80000000; + if (hx > 0x413921fb) /* (1.6471e+06) Too big: leave it. */ + { + if (hx >= 0x7ff00000) /* Inf or NaN */ + { + x0 = *x; + *y = x0 - x0; + } + else + biguns = 1; + x += stridex; + y += stridey; + i = 0; + if (--n <= 0) + break; + goto loop0; + } + x0 = *x; + py0 = y; + x += stridex; + y += stridey; + i = 1; + if (--n <= 0) + break; + +loop1: + hx = HI(x); + xsb1 = hx >> 31; + hx &= ~0x80000000; + if (hx > 0x413921fb) + { + if (hx >= 0x7ff00000) + { + x1 = *x; + *y = x1 - x1; + } + else + biguns = 1; + x += stridex; + y += stridey; + i = 1; + if (--n <= 0) + break; + goto loop1; + } + x1 = *x; + py1 = y; + x += stridex; + y += stridey; + i = 2; + if (--n <= 0) + break; + +loop2: + hx = HI(x); + xsb2 = hx >> 31; + hx &= ~0x80000000; + if (hx > 0x413921fb) + { + if (hx >= 0x7ff00000) + { + x2 = *x; + *y = x2 - x2; + } + else + biguns = 1; + x += stridex; + y += stridey; + i = 2; + if (--n <= 0) + break; + goto loop2; + } + x2 = *x; + py2 = y; + + n0 = (int) (x0 * invpio2 + half[xsb0]); + n1 = (int) (x1 * invpio2 + half[xsb1]); + n2 = (int) (x2 * invpio2 + half[xsb2]); + fn0 = (double) n0; + fn1 = (double) n1; + fn2 = (double) n2; + n0 = (n0 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */ + n1 = (n1 + 1) & 3; + n2 = (n2 + 1) & 3; + a0 = x0 - fn0 * pio2_1; + a1 = x1 - fn1 * pio2_1; + a2 = x2 - fn2 * pio2_1; + w0 = fn0 * pio2_2; + w1 = fn1 * pio2_2; + w2 = fn2 * pio2_2; + x0 = a0 - w0; + x1 = a1 - w1; + x2 = a2 - w2; + y0 = (a0 - x0) - w0; + y1 = (a1 - x1) - w1; + y2 = (a2 - x2) - w2; + a0 = x0; + a1 = x1; + a2 = x2; + w0 = fn0 * pio2_3 - y0; + w1 = fn1 * pio2_3 - y1; + w2 = fn2 * pio2_3 - y2; + x0 = a0 - w0; + x1 = a1 - w1; + x2 = a2 - w2; + y0 = (a0 - x0) - w0; + y1 = (a1 - x1) - w1; + y2 = (a2 - x2) - w2; + a0 = x0; + a1 = x1; + a2 = x2; + w0 = fn0 * pio2_3t - y0; + w1 = fn1 * pio2_3t - y1; + w2 = fn2 * pio2_3t - y2; + x0 = a0 - w0; + x1 = a1 - w1; + x2 = a2 - w2; + y0 = (a0 - x0) - w0; + y1 = (a1 - x1) - w1; + y2 = (a2 - x2) - w2; + xsb0 = HI(&x0); + i = ((xsb0 & ~0x80000000) - thresh[n0&1]) >> 31; + xsb1 = HI(&x1); + i |= (((xsb1 & ~0x80000000) - thresh[n1&1]) >> 30) & 2; + xsb2 = HI(&x2); + i |= (((xsb2 & ~0x80000000) - thresh[n2&1]) >> 29) & 4; + switch (i) + { + double t0, t1, t2, z0, z1, z2; + unsigned j0, j1, j2; + + case 0: + j0 = (xsb0 + 0x4000) & 0xffff8000; + j1 = (xsb1 + 0x4000) & 0xffff8000; + j2 = (xsb2 + 0x4000) & 0xffff8000; + HI(&t0) = j0; + HI(&t1) = j1; + HI(&t2) = j2; + LO(&t0) = 0; + LO(&t1) = 0; + LO(&t2) = 0; + x0 = (x0 - t0) + y0; + x1 = (x1 - t1) + y1; + x2 = (x2 - t2) + y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * (qq1 + z0 * qq2); + t1 = z1 * (qq1 + z1 * qq2); + t2 = z2 * (qq1 + z2 * qq2); + w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); + w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); + w2 = x2 * (one + z2 * (pp1 + z2 * pp2)); + j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + xsb0 = (xsb0 >> 30) & 2; + xsb1 = (xsb1 >> 30) & 2; + xsb2 = (xsb2 >> 30) & 2; + n0 ^= (xsb0 & ~(n0 << 1)); + n1 ^= (xsb1 & ~(n1 << 1)); + n2 ^= (xsb2 & ~(n2 << 1)); + xsb0 |= 1; + xsb1 |= 1; + xsb2 |= 1; + a0 = __vlibm_TBL_sincos_hi[j0+n0]; + a1 = __vlibm_TBL_sincos_hi[j1+n1]; + a2 = __vlibm_TBL_sincos_hi[j2+n2]; + t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0]; + t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1]; + t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2]; + *py0 = ( a0 + t0 ); + *py1 = ( a1 + t1 ); + *py2 = ( a2 + t2 ); + break; + + case 1: + j0 = n0 & 1; + j1 = (xsb1 + 0x4000) & 0xffff8000; + j2 = (xsb2 + 0x4000) & 0xffff8000; + HI(&t1) = j1; + HI(&t2) = j2; + LO(&t1) = 0; + LO(&t2) = 0; + x0_or_one[0] = x0; + x0_or_one[2] = -x0; + y0_or_zero[0] = y0; + y0_or_zero[2] = -y0; + x1 = (x1 - t1) + y1; + x2 = (x2 - t2) + y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * (poly3[j0] + z0 * poly4[j0]); + t1 = z1 * (qq1 + z1 * qq2); + t2 = z2 * (qq1 + z2 * qq2); + t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0)); + w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); + w2 = x2 * (one + z2 * (pp1 + z2 * pp2)); + j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + xsb1 = (xsb1 >> 30) & 2; + xsb2 = (xsb2 >> 30) & 2; + n1 ^= (xsb1 & ~(n1 << 1)); + n2 ^= (xsb2 & ~(n2 << 1)); + xsb1 |= 1; + xsb2 |= 1; + a1 = __vlibm_TBL_sincos_hi[j1+n1]; + a2 = __vlibm_TBL_sincos_hi[j2+n2]; + t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0); + t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1]; + t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2]; + *py0 = t0; + *py1 = ( a1 + t1 ); + *py2 = ( a2 + t2 ); + break; + + case 2: + j0 = (xsb0 + 0x4000) & 0xffff8000; + j1 = n1 & 1; + j2 = (xsb2 + 0x4000) & 0xffff8000; + HI(&t0) = j0; + HI(&t2) = j2; + LO(&t0) = 0; + LO(&t2) = 0; + x1_or_one[0] = x1; + x1_or_one[2] = -x1; + x0 = (x0 - t0) + y0; + y1_or_zero[0] = y1; + y1_or_zero[2] = -y1; + x2 = (x2 - t2) + y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * (qq1 + z0 * qq2); + t1 = z1 * (poly3[j1] + z1 * poly4[j1]); + t2 = z2 * (qq1 + z2 * qq2); + w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); + t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1)); + w2 = x2 * (one + z2 * (pp1 + z2 * pp2)); + j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + xsb0 = (xsb0 >> 30) & 2; + xsb2 = (xsb2 >> 30) & 2; + n0 ^= (xsb0 & ~(n0 << 1)); + n2 ^= (xsb2 & ~(n2 << 1)); + xsb0 |= 1; + xsb2 |= 1; + a0 = __vlibm_TBL_sincos_hi[j0+n0]; + a2 = __vlibm_TBL_sincos_hi[j2+n2]; + t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0]; + t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1); + t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2]; + *py0 = ( a0 + t0 ); + *py1 = t1; + *py2 = ( a2 + t2 ); + break; + + case 3: + j0 = n0 & 1; + j1 = n1 & 1; + j2 = (xsb2 + 0x4000) & 0xffff8000; + HI(&t2) = j2; + LO(&t2) = 0; + x0_or_one[0] = x0; + x0_or_one[2] = -x0; + x1_or_one[0] = x1; + x1_or_one[2] = -x1; + y0_or_zero[0] = y0; + y0_or_zero[2] = -y0; + y1_or_zero[0] = y1; + y1_or_zero[2] = -y1; + x2 = (x2 - t2) + y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * (poly3[j0] + z0 * poly4[j0]); + t1 = z1 * (poly3[j1] + z1 * poly4[j1]); + t2 = z2 * (qq1 + z2 * qq2); + t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0)); + t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1)); + w2 = x2 * (one + z2 * (pp1 + z2 * pp2)); + j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + xsb2 = (xsb2 >> 30) & 2; + n2 ^= (xsb2 & ~(n2 << 1)); + xsb2 |= 1; + a2 = __vlibm_TBL_sincos_hi[j2+n2]; + t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0); + t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1); + t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2]; + *py0 = t0; + *py1 = t1; + *py2 = ( a2 + t2 ); + break; + + case 4: + j0 = (xsb0 + 0x4000) & 0xffff8000; + j1 = (xsb1 + 0x4000) & 0xffff8000; + j2 = n2 & 1; + HI(&t0) = j0; + HI(&t1) = j1; + LO(&t0) = 0; + LO(&t1) = 0; + x2_or_one[0] = x2; + x2_or_one[2] = -x2; + x0 = (x0 - t0) + y0; + x1 = (x1 - t1) + y1; + y2_or_zero[0] = y2; + y2_or_zero[2] = -y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * (qq1 + z0 * qq2); + t1 = z1 * (qq1 + z1 * qq2); + t2 = z2 * (poly3[j2] + z2 * poly4[j2]); + w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); + w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); + t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2)); + j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + xsb0 = (xsb0 >> 30) & 2; + xsb1 = (xsb1 >> 30) & 2; + n0 ^= (xsb0 & ~(n0 << 1)); + n1 ^= (xsb1 & ~(n1 << 1)); + xsb0 |= 1; + xsb1 |= 1; + a0 = __vlibm_TBL_sincos_hi[j0+n0]; + a1 = __vlibm_TBL_sincos_hi[j1+n1]; + t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0]; + t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1]; + t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2); + *py0 = ( a0 + t0 ); + *py1 = ( a1 + t1 ); + *py2 = t2; + break; + + case 5: + j0 = n0 & 1; + j1 = (xsb1 + 0x4000) & 0xffff8000; + j2 = n2 & 1; + HI(&t1) = j1; + LO(&t1) = 0; + x0_or_one[0] = x0; + x0_or_one[2] = -x0; + x2_or_one[0] = x2; + x2_or_one[2] = -x2; + y0_or_zero[0] = y0; + y0_or_zero[2] = -y0; + x1 = (x1 - t1) + y1; + y2_or_zero[0] = y2; + y2_or_zero[2] = -y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * (poly3[j0] + z0 * poly4[j0]); + t1 = z1 * (qq1 + z1 * qq2); + t2 = z2 * (poly3[j2] + z2 * poly4[j2]); + t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0)); + w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); + t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2)); + j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + xsb1 = (xsb1 >> 30) & 2; + n1 ^= (xsb1 & ~(n1 << 1)); + xsb1 |= 1; + a1 = __vlibm_TBL_sincos_hi[j1+n1]; + t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0); + t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1]; + t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2); + *py0 = t0; + *py1 = ( a1 + t1 ); + *py2 = t2; + break; + + case 6: + j0 = (xsb0 + 0x4000) & 0xffff8000; + j1 = n1 & 1; + j2 = n2 & 1; + HI(&t0) = j0; + LO(&t0) = 0; + x1_or_one[0] = x1; + x1_or_one[2] = -x1; + x2_or_one[0] = x2; + x2_or_one[2] = -x2; + x0 = (x0 - t0) + y0; + y1_or_zero[0] = y1; + y1_or_zero[2] = -y1; + y2_or_zero[0] = y2; + y2_or_zero[2] = -y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * (qq1 + z0 * qq2); + t1 = z1 * (poly3[j1] + z1 * poly4[j1]); + t2 = z2 * (poly3[j2] + z2 * poly4[j2]); + w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); + t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1)); + t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2)); + j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + xsb0 = (xsb0 >> 30) & 2; + n0 ^= (xsb0 & ~(n0 << 1)); + xsb0 |= 1; + a0 = __vlibm_TBL_sincos_hi[j0+n0]; + t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0]; + t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1); + t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2); + *py0 = ( a0 + t0 ); + *py1 = t1; + *py2 = t2; + break; + + case 7: + j0 = n0 & 1; + j1 = n1 & 1; + j2 = n2 & 1; + x0_or_one[0] = x0; + x0_or_one[2] = -x0; + x1_or_one[0] = x1; + x1_or_one[2] = -x1; + x2_or_one[0] = x2; + x2_or_one[2] = -x2; + y0_or_zero[0] = y0; + y0_or_zero[2] = -y0; + y1_or_zero[0] = y1; + y1_or_zero[2] = -y1; + y2_or_zero[0] = y2; + y2_or_zero[2] = -y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * (poly3[j0] + z0 * poly4[j0]); + t1 = z1 * (poly3[j1] + z1 * poly4[j1]); + t2 = z2 * (poly3[j2] + z2 * poly4[j2]); + t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0)); + t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1)); + t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2)); + t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0); + t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1); + t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2); + *py0 = t0; + *py1 = t1; + *py2 = t2; + break; + } + + x += stridex; + y += stridey; + i = 0; + } while (--n > 0); + + if (i > 0) + { + double fn0, fn1, a0, a1, w0, w1, y0, y1; + double t0, t1, z0, z1; + unsigned j0, j1; + int n0, n1; + + if (i > 1) + { + n1 = (int) (x1 * invpio2 + half[xsb1]); + fn1 = (double) n1; + n1 = (n1 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */ + a1 = x1 - fn1 * pio2_1; + w1 = fn1 * pio2_2; + x1 = a1 - w1; + y1 = (a1 - x1) - w1; + a1 = x1; + w1 = fn1 * pio2_3 - y1; + x1 = a1 - w1; + y1 = (a1 - x1) - w1; + a1 = x1; + w1 = fn1 * pio2_3t - y1; + x1 = a1 - w1; + y1 = (a1 - x1) - w1; + xsb1 = HI(&x1); + if ((xsb1 & ~0x80000000) < thresh[n1&1]) + { + j1 = n1 & 1; + x1_or_one[0] = x1; + x1_or_one[2] = -x1; + y1_or_zero[0] = y1; + y1_or_zero[2] = -y1; + z1 = x1 * x1; + t1 = z1 * (poly3[j1] + z1 * poly4[j1]); + t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1)); + t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1); + *py1 = t1; + } + else + { + j1 = (xsb1 + 0x4000) & 0xffff8000; + HI(&t1) = j1; + LO(&t1) = 0; + x1 = (x1 - t1) + y1; + z1 = x1 * x1; + t1 = z1 * (qq1 + z1 * qq2); + w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); + j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + xsb1 = (xsb1 >> 30) & 2; + n1 ^= (xsb1 & ~(n1 << 1)); + xsb1 |= 1; + a1 = __vlibm_TBL_sincos_hi[j1+n1]; + t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1]; + *py1 = ( a1 + t1 ); + } + } + n0 = (int) (x0 * invpio2 + half[xsb0]); + fn0 = (double) n0; + n0 = (n0 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */ + a0 = x0 - fn0 * pio2_1; + w0 = fn0 * pio2_2; + x0 = a0 - w0; + y0 = (a0 - x0) - w0; + a0 = x0; + w0 = fn0 * pio2_3 - y0; + x0 = a0 - w0; + y0 = (a0 - x0) - w0; + a0 = x0; + w0 = fn0 * pio2_3t - y0; + x0 = a0 - w0; + y0 = (a0 - x0) - w0; + xsb0 = HI(&x0); + if ((xsb0 & ~0x80000000) < thresh[n0&1]) + { + j0 = n0 & 1; + x0_or_one[0] = x0; + x0_or_one[2] = -x0; + y0_or_zero[0] = y0; + y0_or_zero[2] = -y0; + z0 = x0 * x0; + t0 = z0 * (poly3[j0] + z0 * poly4[j0]); + t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0)); + t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0); + *py0 = t0; + } + else + { + j0 = (xsb0 + 0x4000) & 0xffff8000; + HI(&t0) = j0; + LO(&t0) = 0; + x0 = (x0 - t0) + y0; + z0 = x0 * x0; + t0 = z0 * (qq1 + z0 * qq2); + w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); + j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + xsb0 = (xsb0 >> 30) & 2; + n0 ^= (xsb0 & ~(n0 << 1)); + xsb0 |= 1; + a0 = __vlibm_TBL_sincos_hi[j0+n0]; + t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0]; + *py0 = ( a0 + t0 ); + } + } + + if (biguns) + __vlibm_vcos_big(nsave, xsave, sxsave, ysave, sysave, 0x413921fb); +} diff --git a/usr/src/lib/libmvec/common/__vcosbig.c b/usr/src/lib/libmvec/common/__vcosbig.c new file mode 100644 index 0000000000..bd4a241215 --- /dev/null +++ b/usr/src/lib/libmvec/common/__vcosbig.c @@ -0,0 +1,173 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <sys/isa_defs.h> + +#ifdef _LITTLE_ENDIAN +#define HI(x) *(1+(int*)x) +#define LO(x) *(unsigned*)x +#else +#define HI(x) *(int*)x +#define LO(x) *(1+(unsigned*)x) +#endif + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +extern const double __vlibm_TBL_sincos_hi[], __vlibm_TBL_sincos_lo[]; +extern int __vlibm_rem_pio2m(double *, double *, int, int, int); + +static const double + zero = 0.0, + one = 1.0, + two24 = 16777216.0, + pp1 = -1.666666666605760465276263943134982554676e-0001, + pp2 = 8.333261209690963126718376566146180944442e-0003, + p1 = -1.666666666666629669805215138920301589656e-0001, + p2 = 8.333333332390951295683993455280336376663e-0003, + p3 = -1.984126237997976692791551778230098403960e-0004, + p4 = 2.753403624854277237649987622848330351110e-0006, + qq1 = -4.999999999977710986407023955908711557870e-0001, + qq2 = 4.166654863857219350645055881018842089580e-0002, + q1 = -4.999999999999931701464060878888294524481e-0001, + q2 = 4.166666666394861917535640593963708222319e-0002, + q3 = -1.388888552656142867832756687736851681462e-0003, + q4 = 2.478519423681460796618128289454530524759e-0005; + +void +__vlibm_vcos_big(int n, double * restrict x, int stridex, double * restrict y, + int stridey, int thresh) +{ + for (; n--; x += stridex, y += stridey) + { + double tx, tt[3], ty[2], t, w, z, a; + unsigned hx, xsb; + int e0, nx, j; + + hx = HI(x); + xsb = hx & 0x80000000; + hx &= ~0x80000000; + if (hx <= thresh || hx >= 0x7ff00000) + continue; + e0 = (hx >> 20) - 1046; + HI(&tx) = 0x41600000 | (hx & 0xfffff); + LO(&tx) = LO(x); + tt[0] = (double)((int) tx); + tx = (tx - tt[0]) * two24; + if (tx != zero) + { + nx = 2; + tt[1] = (double)((int) tx); + tt[2] = (tx - tt[1]) * two24; + if (tt[2] != zero) + nx = 3; + } + else + { + nx = 1; + tt[1] = tt[2] = zero; + } + nx = __vlibm_rem_pio2m(tt, ty, e0, nx, 2); + if (xsb) + { + nx = -nx; + ty[0] = -ty[0]; + ty[1] = -ty[1]; + } + nx = (nx + 1) & 3; /* Add 1 to turn sin into cos */ + + /* now nx and ty[*] are the quadrant and reduced arg */ + xsb = (nx & 2) << 30; + hx = HI(&ty[0]); + if (nx & 1) + { + if (hx & 0x80000000) + { + ty[0] = -ty[0]; + ty[1] = -ty[1]; + hx &= ~0x80000000; + } + if (hx < 0x3fc40000) + { + z = ty[0] * ty[0]; + t = z * (q1 + z * (q2 + z * (q3 + z * q4))); + a = one + t; + } + else + { + j = (hx + 0x4000) & 0x7fff8000; + HI(&t) = j; + LO(&t) = 0; + ty[0] = (ty[0] - t) + ty[1]; + z = ty[0] * ty[0]; + t = z * (qq1 + z * qq2); + w = ty[0] * (one + z * (pp1 + z * pp2)); + j = ((j - 0x3fc40000) >> 13) & ~3; + a = __vlibm_TBL_sincos_hi[j+1]; + t = __vlibm_TBL_sincos_lo[j+1] - (__vlibm_TBL_sincos_hi[j] * w - a * t); + a += t; + } + } + else + { + if (hx & 0x80000000) + { + ty[0] = -ty[0]; + ty[1] = -ty[1]; + hx &= ~0x80000000; + xsb ^= 0x80000000; + } + if (hx < 0x3fc90000) + { + z = ty[0] * ty[0]; + t = z * (p1 + z * (p2 + z * (p3 + z * p4))); + a = ty[0] + (ty[1] + ty[0] * t); + } + else + { + j = (hx + 0x4000) & 0x7fff8000; + HI(&t) = j; + LO(&t) = 0; + ty[0] = (ty[0] - t) + ty[1]; + z = ty[0] * ty[0]; + t = z * (qq1 + z * qq2); + w = ty[0] * (one + z * (pp1 + z * pp2)); + j = ((j - 0x3fc40000) >> 13) & ~3; + a = __vlibm_TBL_sincos_hi[j]; + t = (__vlibm_TBL_sincos_hi[j+1] * w + a * t) + __vlibm_TBL_sincos_lo[j]; + a += t; + } + } + if (xsb) a = -a; + *y = a; + } +} diff --git a/usr/src/lib/libmvec/common/__vcosbig_ultra3.c b/usr/src/lib/libmvec/common/__vcosbig_ultra3.c new file mode 100644 index 0000000000..04b1c9ec82 --- /dev/null +++ b/usr/src/lib/libmvec/common/__vcosbig_ultra3.c @@ -0,0 +1,653 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <sys/isa_defs.h> + +#ifdef _LITTLE_ENDIAN +#define HI(x) *(1+(int*)x) +#define LO(x) *(unsigned*)x +#else +#define HI(x) *(int*)x +#define LO(x) *(1+(unsigned*)x) +#endif + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +extern const double __vlibm_TBL_sincos_hi[], __vlibm_TBL_sincos_lo[]; + +static const double + half[2] = { 0.5, -0.5 }, + one = 1.0, + invpio2 = 0.636619772367581343075535, + pio2_1 = 1.570796326734125614166, + pio2_2 = 6.077100506303965976596e-11, + pio2_3 = 2.022266248711166455796e-21, + pio2_3t = 8.478427660368899643959e-32, + pp1 = -1.666666666605760465276263943134982554676e-0001, + pp2 = 8.333261209690963126718376566146180944442e-0003, + qq1 = -4.999999999977710986407023955908711557870e-0001, + qq2 = 4.166654863857219350645055881018842089580e-0002, + poly1[2]= { -1.666666666666629669805215138920301589656e-0001, + -4.999999999999931701464060878888294524481e-0001 }, + poly2[2]= { 8.333333332390951295683993455280336376663e-0003, + 4.166666666394861917535640593963708222319e-0002 }, + poly3[2]= { -1.984126237997976692791551778230098403960e-0004, + -1.388888552656142867832756687736851681462e-0003 }, + poly4[2]= { 2.753403624854277237649987622848330351110e-0006, + 2.478519423681460796618128289454530524759e-0005 }; + +static const unsigned thresh[2] = { 0x3fc90000, 0x3fc40000 }; + +extern void __vlibm_vcos_big(int, double *, int, double *, int, int); + +void +__vlibm_vcos_big_ultra3(int n, double * restrict x, int stridex, double * restrict y, + int stridey, int pthresh) +{ + double x0_or_one[4], x1_or_one[4], x2_or_one[4]; + double y0_or_zero[4], y1_or_zero[4], y2_or_zero[4]; + double x0, x1, x2, *py0, *py1, *py2, *xsave, *ysave; + unsigned xsb0, xsb1, xsb2; + int i, biguns, nsave, sxsave, sysave; + + nsave = n; + xsave = x; + sxsave = stridex; + ysave = y; + sysave = stridey; + biguns = 0; + + x0_or_one[1] = 1.0; + x1_or_one[1] = 1.0; + x2_or_one[1] = 1.0; + x0_or_one[3] = -1.0; + x1_or_one[3] = -1.0; + x2_or_one[3] = -1.0; + y0_or_zero[1] = 0.0; + y1_or_zero[1] = 0.0; + y2_or_zero[1] = 0.0; + y0_or_zero[3] = 0.0; + y1_or_zero[3] = 0.0; + y2_or_zero[3] = 0.0; + + do + { + double fn0, fn1, fn2, a0, a1, a2, w0, w1, w2, y0, y1, y2; + unsigned hx; + int n0, n1, n2; + +loop0: + hx = HI(x); + xsb0 = hx >> 31; + hx &= ~0x80000000; + if (hx <= pthresh || hx > 0x413921fb) + { + if (hx > 0x413921fb && hx < 0x7ff00000) + biguns = 1; + x += stridex; + y += stridey; + i = 0; + if (--n <= 0) + break; + goto loop0; + } + x0 = *x; + py0 = y; + x += stridex; + y += stridey; + i = 1; + if (--n <= 0) + break; + +loop1: + hx = HI(x); + xsb1 = hx >> 31; + hx &= ~0x80000000; + if (hx <= pthresh || hx > 0x413921fb) + { + if (hx > 0x413921fb && hx < 0x7ff00000) + biguns = 1; + x += stridex; + y += stridey; + i = 1; + if (--n <= 0) + break; + goto loop1; + } + x1 = *x; + py1 = y; + x += stridex; + y += stridey; + i = 2; + if (--n <= 0) + break; + +loop2: + hx = HI(x); + xsb2 = hx >> 31; + hx &= ~0x80000000; + if (hx <= pthresh || hx > 0x413921fb) + { + if (hx > 0x413921fb && hx < 0x7ff00000) + biguns = 1; + x += stridex; + y += stridey; + i = 2; + if (--n <= 0) + break; + goto loop2; + } + x2 = *x; + py2 = y; + + n0 = (int) (x0 * invpio2 + half[xsb0]); + n1 = (int) (x1 * invpio2 + half[xsb1]); + n2 = (int) (x2 * invpio2 + half[xsb2]); + fn0 = (double) n0; + fn1 = (double) n1; + fn2 = (double) n2; + n0 = (n0 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */ + n1 = (n1 + 1) & 3; + n2 = (n2 + 1) & 3; + a0 = x0 - fn0 * pio2_1; + a1 = x1 - fn1 * pio2_1; + a2 = x2 - fn2 * pio2_1; + w0 = fn0 * pio2_2; + w1 = fn1 * pio2_2; + w2 = fn2 * pio2_2; + x0 = a0 - w0; + x1 = a1 - w1; + x2 = a2 - w2; + y0 = (a0 - x0) - w0; + y1 = (a1 - x1) - w1; + y2 = (a2 - x2) - w2; + a0 = x0; + a1 = x1; + a2 = x2; + w0 = fn0 * pio2_3 - y0; + w1 = fn1 * pio2_3 - y1; + w2 = fn2 * pio2_3 - y2; + x0 = a0 - w0; + x1 = a1 - w1; + x2 = a2 - w2; + y0 = (a0 - x0) - w0; + y1 = (a1 - x1) - w1; + y2 = (a2 - x2) - w2; + a0 = x0; + a1 = x1; + a2 = x2; + w0 = fn0 * pio2_3t - y0; + w1 = fn1 * pio2_3t - y1; + w2 = fn2 * pio2_3t - y2; + x0 = a0 - w0; + x1 = a1 - w1; + x2 = a2 - w2; + y0 = (a0 - x0) - w0; + y1 = (a1 - x1) - w1; + y2 = (a2 - x2) - w2; + xsb0 = HI(&x0); + i = ((xsb0 & ~0x80000000) - thresh[n0&1]) >> 31; + xsb1 = HI(&x1); + i |= (((xsb1 & ~0x80000000) - thresh[n1&1]) >> 30) & 2; + xsb2 = HI(&x2); + i |= (((xsb2 & ~0x80000000) - thresh[n2&1]) >> 29) & 4; + switch (i) + { + double t0, t1, t2, z0, z1, z2; + unsigned j0, j1, j2; + + case 0: + j0 = (xsb0 + 0x4000) & 0xffff8000; + j1 = (xsb1 + 0x4000) & 0xffff8000; + j2 = (xsb2 + 0x4000) & 0xffff8000; + HI(&t0) = j0; + HI(&t1) = j1; + HI(&t2) = j2; + LO(&t0) = 0; + LO(&t1) = 0; + LO(&t2) = 0; + x0 = (x0 - t0) + y0; + x1 = (x1 - t1) + y1; + x2 = (x2 - t2) + y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * (qq1 + z0 * qq2); + t1 = z1 * (qq1 + z1 * qq2); + t2 = z2 * (qq1 + z2 * qq2); + w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); + w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); + w2 = x2 * (one + z2 * (pp1 + z2 * pp2)); + j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + xsb0 = (xsb0 >> 30) & 2; + xsb1 = (xsb1 >> 30) & 2; + xsb2 = (xsb2 >> 30) & 2; + n0 ^= (xsb0 & ~(n0 << 1)); + n1 ^= (xsb1 & ~(n1 << 1)); + n2 ^= (xsb2 & ~(n2 << 1)); + xsb0 |= 1; + xsb1 |= 1; + xsb2 |= 1; + a0 = __vlibm_TBL_sincos_hi[j0+n0]; + a1 = __vlibm_TBL_sincos_hi[j1+n1]; + a2 = __vlibm_TBL_sincos_hi[j2+n2]; + t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0]; + t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1]; + t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2]; + *py0 = ( a0 + t0 ); + *py1 = ( a1 + t1 ); + *py2 = ( a2 + t2 ); + break; + + case 1: + j0 = n0 & 1; + j1 = (xsb1 + 0x4000) & 0xffff8000; + j2 = (xsb2 + 0x4000) & 0xffff8000; + HI(&t1) = j1; + HI(&t2) = j2; + LO(&t1) = 0; + LO(&t2) = 0; + x0_or_one[0] = x0; + x0_or_one[2] = -x0; + y0_or_zero[0] = y0; + y0_or_zero[2] = -y0; + x1 = (x1 - t1) + y1; + x2 = (x2 - t2) + y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * (poly3[j0] + z0 * poly4[j0]); + t1 = z1 * (qq1 + z1 * qq2); + t2 = z2 * (qq1 + z2 * qq2); + t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0)); + w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); + w2 = x2 * (one + z2 * (pp1 + z2 * pp2)); + j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + xsb1 = (xsb1 >> 30) & 2; + xsb2 = (xsb2 >> 30) & 2; + n1 ^= (xsb1 & ~(n1 << 1)); + n2 ^= (xsb2 & ~(n2 << 1)); + xsb1 |= 1; + xsb2 |= 1; + a1 = __vlibm_TBL_sincos_hi[j1+n1]; + a2 = __vlibm_TBL_sincos_hi[j2+n2]; + t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0); + t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1]; + t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2]; + *py0 = t0; + *py1 = ( a1 + t1 ); + *py2 = ( a2 + t2 ); + break; + + case 2: + j0 = (xsb0 + 0x4000) & 0xffff8000; + j1 = n1 & 1; + j2 = (xsb2 + 0x4000) & 0xffff8000; + HI(&t0) = j0; + HI(&t2) = j2; + LO(&t0) = 0; + LO(&t2) = 0; + x1_or_one[0] = x1; + x1_or_one[2] = -x1; + x0 = (x0 - t0) + y0; + y1_or_zero[0] = y1; + y1_or_zero[2] = -y1; + x2 = (x2 - t2) + y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * (qq1 + z0 * qq2); + t1 = z1 * (poly3[j1] + z1 * poly4[j1]); + t2 = z2 * (qq1 + z2 * qq2); + w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); + t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1)); + w2 = x2 * (one + z2 * (pp1 + z2 * pp2)); + j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + xsb0 = (xsb0 >> 30) & 2; + xsb2 = (xsb2 >> 30) & 2; + n0 ^= (xsb0 & ~(n0 << 1)); + n2 ^= (xsb2 & ~(n2 << 1)); + xsb0 |= 1; + xsb2 |= 1; + a0 = __vlibm_TBL_sincos_hi[j0+n0]; + a2 = __vlibm_TBL_sincos_hi[j2+n2]; + t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0]; + t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1); + t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2]; + *py0 = ( a0 + t0 ); + *py1 = t1; + *py2 = ( a2 + t2 ); + break; + + case 3: + j0 = n0 & 1; + j1 = n1 & 1; + j2 = (xsb2 + 0x4000) & 0xffff8000; + HI(&t2) = j2; + LO(&t2) = 0; + x0_or_one[0] = x0; + x0_or_one[2] = -x0; + x1_or_one[0] = x1; + x1_or_one[2] = -x1; + y0_or_zero[0] = y0; + y0_or_zero[2] = -y0; + y1_or_zero[0] = y1; + y1_or_zero[2] = -y1; + x2 = (x2 - t2) + y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * (poly3[j0] + z0 * poly4[j0]); + t1 = z1 * (poly3[j1] + z1 * poly4[j1]); + t2 = z2 * (qq1 + z2 * qq2); + t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0)); + t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1)); + w2 = x2 * (one + z2 * (pp1 + z2 * pp2)); + j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + xsb2 = (xsb2 >> 30) & 2; + n2 ^= (xsb2 & ~(n2 << 1)); + xsb2 |= 1; + a2 = __vlibm_TBL_sincos_hi[j2+n2]; + t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0); + t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1); + t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2]; + *py0 = t0; + *py1 = t1; + *py2 = ( a2 + t2 ); + break; + + case 4: + j0 = (xsb0 + 0x4000) & 0xffff8000; + j1 = (xsb1 + 0x4000) & 0xffff8000; + j2 = n2 & 1; + HI(&t0) = j0; + HI(&t1) = j1; + LO(&t0) = 0; + LO(&t1) = 0; + x2_or_one[0] = x2; + x2_or_one[2] = -x2; + x0 = (x0 - t0) + y0; + x1 = (x1 - t1) + y1; + y2_or_zero[0] = y2; + y2_or_zero[2] = -y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * (qq1 + z0 * qq2); + t1 = z1 * (qq1 + z1 * qq2); + t2 = z2 * (poly3[j2] + z2 * poly4[j2]); + w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); + w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); + t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2)); + j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + xsb0 = (xsb0 >> 30) & 2; + xsb1 = (xsb1 >> 30) & 2; + n0 ^= (xsb0 & ~(n0 << 1)); + n1 ^= (xsb1 & ~(n1 << 1)); + xsb0 |= 1; + xsb1 |= 1; + a0 = __vlibm_TBL_sincos_hi[j0+n0]; + a1 = __vlibm_TBL_sincos_hi[j1+n1]; + t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0]; + t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1]; + t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2); + *py0 = ( a0 + t0 ); + *py1 = ( a1 + t1 ); + *py2 = t2; + break; + + case 5: + j0 = n0 & 1; + j1 = (xsb1 + 0x4000) & 0xffff8000; + j2 = n2 & 1; + HI(&t1) = j1; + LO(&t1) = 0; + x0_or_one[0] = x0; + x0_or_one[2] = -x0; + x2_or_one[0] = x2; + x2_or_one[2] = -x2; + y0_or_zero[0] = y0; + y0_or_zero[2] = -y0; + x1 = (x1 - t1) + y1; + y2_or_zero[0] = y2; + y2_or_zero[2] = -y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * (poly3[j0] + z0 * poly4[j0]); + t1 = z1 * (qq1 + z1 * qq2); + t2 = z2 * (poly3[j2] + z2 * poly4[j2]); + t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0)); + w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); + t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2)); + j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + xsb1 = (xsb1 >> 30) & 2; + n1 ^= (xsb1 & ~(n1 << 1)); + xsb1 |= 1; + a1 = __vlibm_TBL_sincos_hi[j1+n1]; + t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0); + t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1]; + t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2); + *py0 = t0; + *py1 = ( a1 + t1 ); + *py2 = t2; + break; + + case 6: + j0 = (xsb0 + 0x4000) & 0xffff8000; + j1 = n1 & 1; + j2 = n2 & 1; + HI(&t0) = j0; + LO(&t0) = 0; + x1_or_one[0] = x1; + x1_or_one[2] = -x1; + x2_or_one[0] = x2; + x2_or_one[2] = -x2; + x0 = (x0 - t0) + y0; + y1_or_zero[0] = y1; + y1_or_zero[2] = -y1; + y2_or_zero[0] = y2; + y2_or_zero[2] = -y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * (qq1 + z0 * qq2); + t1 = z1 * (poly3[j1] + z1 * poly4[j1]); + t2 = z2 * (poly3[j2] + z2 * poly4[j2]); + w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); + t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1)); + t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2)); + j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + xsb0 = (xsb0 >> 30) & 2; + n0 ^= (xsb0 & ~(n0 << 1)); + xsb0 |= 1; + a0 = __vlibm_TBL_sincos_hi[j0+n0]; + t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0]; + t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1); + t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2); + *py0 = ( a0 + t0 ); + *py1 = t1; + *py2 = t2; + break; + + case 7: + j0 = n0 & 1; + j1 = n1 & 1; + j2 = n2 & 1; + x0_or_one[0] = x0; + x0_or_one[2] = -x0; + x1_or_one[0] = x1; + x1_or_one[2] = -x1; + x2_or_one[0] = x2; + x2_or_one[2] = -x2; + y0_or_zero[0] = y0; + y0_or_zero[2] = -y0; + y1_or_zero[0] = y1; + y1_or_zero[2] = -y1; + y2_or_zero[0] = y2; + y2_or_zero[2] = -y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * (poly3[j0] + z0 * poly4[j0]); + t1 = z1 * (poly3[j1] + z1 * poly4[j1]); + t2 = z2 * (poly3[j2] + z2 * poly4[j2]); + t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0)); + t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1)); + t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2)); + t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0); + t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1); + t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2); + *py0 = t0; + *py1 = t1; + *py2 = t2; + break; + } + + x += stridex; + y += stridey; + i = 0; + } while (--n > 0); + + if (i > 0) + { + double fn0, fn1, a0, a1, w0, w1, y0, y1; + double t0, t1, z0, z1; + unsigned j0, j1; + int n0, n1; + + if (i > 1) + { + n1 = (int) (x1 * invpio2 + half[xsb1]); + fn1 = (double) n1; + n1 = (n1 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */ + a1 = x1 - fn1 * pio2_1; + w1 = fn1 * pio2_2; + x1 = a1 - w1; + y1 = (a1 - x1) - w1; + a1 = x1; + w1 = fn1 * pio2_3 - y1; + x1 = a1 - w1; + y1 = (a1 - x1) - w1; + a1 = x1; + w1 = fn1 * pio2_3t - y1; + x1 = a1 - w1; + y1 = (a1 - x1) - w1; + xsb1 = HI(&x1); + if ((xsb1 & ~0x80000000) < thresh[n1&1]) + { + j1 = n1 & 1; + x1_or_one[0] = x1; + x1_or_one[2] = -x1; + y1_or_zero[0] = y1; + y1_or_zero[2] = -y1; + z1 = x1 * x1; + t1 = z1 * (poly3[j1] + z1 * poly4[j1]); + t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1)); + t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1); + *py1 = t1; + } + else + { + j1 = (xsb1 + 0x4000) & 0xffff8000; + HI(&t1) = j1; + LO(&t1) = 0; + x1 = (x1 - t1) + y1; + z1 = x1 * x1; + t1 = z1 * (qq1 + z1 * qq2); + w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); + j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + xsb1 = (xsb1 >> 30) & 2; + n1 ^= (xsb1 & ~(n1 << 1)); + xsb1 |= 1; + a1 = __vlibm_TBL_sincos_hi[j1+n1]; + t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1]; + *py1 = ( a1 + t1 ); + } + } + n0 = (int) (x0 * invpio2 + half[xsb0]); + fn0 = (double) n0; + n0 = (n0 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */ + a0 = x0 - fn0 * pio2_1; + w0 = fn0 * pio2_2; + x0 = a0 - w0; + y0 = (a0 - x0) - w0; + a0 = x0; + w0 = fn0 * pio2_3 - y0; + x0 = a0 - w0; + y0 = (a0 - x0) - w0; + a0 = x0; + w0 = fn0 * pio2_3t - y0; + x0 = a0 - w0; + y0 = (a0 - x0) - w0; + xsb0 = HI(&x0); + if ((xsb0 & ~0x80000000) < thresh[n0&1]) + { + j0 = n0 & 1; + x0_or_one[0] = x0; + x0_or_one[2] = -x0; + y0_or_zero[0] = y0; + y0_or_zero[2] = -y0; + z0 = x0 * x0; + t0 = z0 * (poly3[j0] + z0 * poly4[j0]); + t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0)); + t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0); + *py0 = t0; + } + else + { + j0 = (xsb0 + 0x4000) & 0xffff8000; + HI(&t0) = j0; + LO(&t0) = 0; + x0 = (x0 - t0) + y0; + z0 = x0 * x0; + t0 = z0 * (qq1 + z0 * qq2); + w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); + j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + xsb0 = (xsb0 >> 30) & 2; + n0 ^= (xsb0 & ~(n0 << 1)); + xsb0 |= 1; + a0 = __vlibm_TBL_sincos_hi[j0+n0]; + t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0]; + *py0 = ( a0 + t0 ); + } + } + + if (biguns) + __vlibm_vcos_big(nsave, xsave, sxsave, ysave, sysave, 0x413921fb); +} diff --git a/usr/src/lib/libmvec/common/__vcosbigf.c b/usr/src/lib/libmvec/common/__vcosbigf.c new file mode 100644 index 0000000000..41ecaabf04 --- /dev/null +++ b/usr/src/lib/libmvec/common/__vcosbigf.c @@ -0,0 +1,174 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <sys/isa_defs.h> + +#ifdef _LITTLE_ENDIAN +#define HI(x) *(1+(int*)x) +#define LO(x) *(unsigned*)x +#else +#define HI(x) *(int*)x +#define LO(x) *(1+(unsigned*)x) +#endif + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +extern const double __vlibm_TBL_sincos_hi[], __vlibm_TBL_sincos_lo[]; +extern int __vlibm_rem_pio2m(double *, double *, int, int, int); + +static const double + zero = 0.0, + one = 1.0, + two24 = 16777216.0, + pp1 = -1.666666666605760465276263943134982554676e-0001, + pp2 = 8.333261209690963126718376566146180944442e-0003, + p1 = -1.666666666666629669805215138920301589656e-0001, + p2 = 8.333333332390951295683993455280336376663e-0003, + p3 = -1.984126237997976692791551778230098403960e-0004, + p4 = 2.753403624854277237649987622848330351110e-0006, + qq1 = -4.999999999977710986407023955908711557870e-0001, + qq2 = 4.166654863857219350645055881018842089580e-0002, + q1 = -4.999999999999931701464060878888294524481e-0001, + q2 = 4.166666666394861917535640593963708222319e-0002, + q3 = -1.388888552656142867832756687736851681462e-0003, + q4 = 2.478519423681460796618128289454530524759e-0005; + +void +__vlibm_vcos_bigf(int n, float * restrict x, int stridex, float * restrict y, + int stridey) +{ + for (; n--; x += stridex, y += stridey) + { + double tx, tt[3], ty[2], t, w, z, a; + unsigned hx, xsb; + int e0, nx, j; + + tx = *x; + hx = HI(&tx); + xsb = hx & 0x80000000; + hx &= ~0x80000000; + if (hx <= 0x413921fb || hx >= 0x7ff00000) + continue; + e0 = (hx >> 20) - 1046; + HI(&tx) = 0x41600000 | (hx & 0xfffff); + + tt[0] = (double)((int) tx); + tx = (tx - tt[0]) * two24; + if (tx != zero) + { + nx = 2; + tt[1] = (double)((int) tx); + tt[2] = (tx - tt[1]) * two24; + if (tt[2] != zero) + nx = 3; + } + else + { + nx = 1; + tt[1] = tt[2] = zero; + } + nx = __vlibm_rem_pio2m(tt, ty, e0, nx, 2); + if (xsb) + { + nx = -nx; + ty[0] = -ty[0]; + ty[1] = -ty[1]; + } + nx = (nx + 1) & 3; /* Add 1 to turn sin into cos */ + + /* now nx and ty[*] are the quadrant and reduced arg */ + xsb = (nx & 2) << 30; + hx = HI(&ty[0]); + if (nx & 1) + { + if (hx & 0x80000000) + { + ty[0] = -ty[0]; + ty[1] = -ty[1]; + hx &= ~0x80000000; + } + if (hx < 0x3fc40000) + { + z = ty[0] * ty[0]; + t = z * (q1 + z * (q2 + z * (q3 + z * q4))); + a = one + t; + } + else + { + j = (hx + 0x4000) & 0x7fff8000; + HI(&t) = j; + LO(&t) = 0; + ty[0] = (ty[0] - t) + ty[1]; + z = ty[0] * ty[0]; + t = z * (qq1 + z * qq2); + w = ty[0] * (one + z * (pp1 + z * pp2)); + j = ((j - 0x3fc40000) >> 13) & ~3; + a = __vlibm_TBL_sincos_hi[j+1]; + t = __vlibm_TBL_sincos_lo[j+1] - (__vlibm_TBL_sincos_hi[j] * w - a * t); + a += t; + } + } + else + { + if (hx & 0x80000000) + { + ty[0] = -ty[0]; + ty[1] = -ty[1]; + hx &= ~0x80000000; + xsb ^= 0x80000000; + } + if (hx < 0x3fc90000) + { + z = ty[0] * ty[0]; + t = z * (p1 + z * (p2 + z * (p3 + z * p4))); + a = ty[0] + (ty[1] + ty[0] * t); + } + else + { + j = (hx + 0x4000) & 0x7fff8000; + HI(&t) = j; + LO(&t) = 0; + ty[0] = (ty[0] - t) + ty[1]; + z = ty[0] * ty[0]; + t = z * (qq1 + z * qq2); + w = ty[0] * (one + z * (pp1 + z * pp2)); + j = ((j - 0x3fc40000) >> 13) & ~3; + a = __vlibm_TBL_sincos_hi[j]; + t = (__vlibm_TBL_sincos_hi[j+1] * w + a * t) + __vlibm_TBL_sincos_lo[j]; + a += t; + } + } + if (xsb) a = -a; + *y = a; + } +} diff --git a/usr/src/lib/libmvec/common/__vcosf.c b/usr/src/lib/libmvec/common/__vcosf.c new file mode 100644 index 0000000000..2a73a16f60 --- /dev/null +++ b/usr/src/lib/libmvec/common/__vcosf.c @@ -0,0 +1,377 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * __vcosf: single precision vector cos + * + * Algorithm: + * + * For |x| < pi/4, approximate sin(x) by a polynomial x+x*z*(S0+ + * z*(S1+z*S2)) and cos(x) by a polynomial 1+z*(-1/2+z*(C0+z*(C1+ + * z*C2))), where z = x*x, all evaluated in double precision. + * + * Accuracy: + * + * The largest error is less than 0.6 ulps. + */ + +#include <sys/isa_defs.h> + +#ifdef _LITTLE_ENDIAN +#define HI(x) *(1+(int *)&x) +#define LO(x) *(unsigned *)&x +#else +#define HI(x) *(int *)&x +#define LO(x) *(1+(unsigned *)&x) +#endif + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +extern int __vlibm_rem_pio2m(double *, double *, int, int, int); + +static const double C[] = { + -1.66666552424430847168e-01, /* 2^ -3 * -1.5555460000000 */ + 8.33219196647405624390e-03, /* 2^ -7 * 1.11077E0000000 */ + -1.95187909412197768688e-04, /* 2^-13 * -1.9956B60000000 */ + 1.0, + -0.5, + 4.16666455566883087158e-02, /* 2^ -5 * 1.55554A0000000 */ + -1.38873036485165357590e-03, /* 2^-10 * -1.6C0C1E0000000 */ + 2.44309903791872784495e-05, /* 2^-16 * 1.99E24E0000000 */ + 0.636619772367581343075535, /* 2^ -1 * 1.45F306DC9C883 */ + 6755399441055744.0, /* 2^ 52 * 1.8000000000000 */ + 1.570796326734125614166, /* 2^ 0 * 1.921FB54400000 */ + 6.077100506506192601475e-11, /* 2^-34 * 1.0B4611A626331 */ +}; + +#define S0 C[0] +#define S1 C[1] +#define S2 C[2] +#define one C[3] +#define mhalf C[4] +#define C0 C[5] +#define C1 C[6] +#define C2 C[7] +#define invpio2 C[8] +#define c3two51 C[9] +#define pio2_1 C[10] +#define pio2_t C[11] + +#define PREPROCESS(N, index, label) \ + hx = *(int *)x; \ + ix = hx & 0x7fffffff; \ + t = *x; \ + x += stridex; \ + if (ix <= 0x3f490fdb) { /* |x| < pi/4 */ \ + if (ix == 0) { \ + y[index] = one; \ + goto label; \ + } \ + y##N = (double)t; \ + n##N = 1; \ + } else if (ix <= 0x49c90fdb) { /* |x| < 2^19*pi */ \ + y##N = (double)t; \ + medium = 1; \ + } else { \ + if (ix >= 0x7f800000) { /* inf or nan */ \ + y[index] = t / t; \ + goto label; \ + } \ + z##N = y##N = (double)t; \ + hx = HI(y##N); \ + n##N = ((hx >> 20) & 0x7ff) - 1046; \ + HI(z##N) = (hx & 0xfffff) | 0x41600000; \ + n##N = __vlibm_rem_pio2m(&z##N, &y##N, n##N, 1, 0) + 1; \ + z##N = y##N * y##N; \ + if (n##N & 1) { /* compute cos y */ \ + f##N = (float)(one + z##N * (mhalf + z##N * \ + (C0 + z##N * (C1 + z##N * C2)))); \ + } else { /* compute sin y */ \ + f##N = (float)(y##N + y##N * z##N * (S0 + \ + z##N * (S1 + z##N * S2))); \ + } \ + y[index] = (n##N & 2)? -f##N : f##N; \ + goto label; \ + } + +#define PROCESS(N) \ + if (medium) { \ + z##N = y##N * invpio2 + c3two51; \ + n##N = LO(z##N) + 1; \ + z##N -= c3two51; \ + y##N = (y##N - z##N * pio2_1) - z##N * pio2_t; \ + } \ + z##N = y##N * y##N; \ + if (n##N & 1) { /* compute cos y */ \ + f##N = (float)(one + z##N * (mhalf + z##N * (C0 + \ + z##N * (C1 + z##N * C2)))); \ + } else { /* compute sin y */ \ + f##N = (float)(y##N + y##N * z##N * (S0 + z##N * (S1 + \ + z##N * S2))); \ + } \ + *y = (n##N & 2)? -f##N : f##N; \ + y += stridey + +void +__vcosf(int n, float *restrict x, int stridex, float *restrict y, + int stridey) +{ + double y0, y1, y2, y3; + double z0, z1, z2, z3; + float f0, f1, f2, f3, t; + int n0 = 0, n1 = 0, n2 = 0, n3, hx, ix, medium; + + y -= stridey; + + for (;;) { +begin: + y += stridey; + + if (--n < 0) + break; + + medium = 0; + PREPROCESS(0, 0, begin); + + if (--n < 0) + goto process1; + + PREPROCESS(1, stridey, process1); + + if (--n < 0) + goto process2; + + PREPROCESS(2, (stridey << 1), process2); + + if (--n < 0) + goto process3; + + PREPROCESS(3, (stridey << 1) + stridey, process3); + + if (medium) { + z0 = y0 * invpio2 + c3two51; + z1 = y1 * invpio2 + c3two51; + z2 = y2 * invpio2 + c3two51; + z3 = y3 * invpio2 + c3two51; + + n0 = LO(z0) + 1; + n1 = LO(z1) + 1; + n2 = LO(z2) + 1; + n3 = LO(z3) + 1; + + z0 -= c3two51; + z1 -= c3two51; + z2 -= c3two51; + z3 -= c3two51; + + y0 = (y0 - z0 * pio2_1) - z0 * pio2_t; + y1 = (y1 - z1 * pio2_1) - z1 * pio2_t; + y2 = (y2 - z2 * pio2_1) - z2 * pio2_t; + y3 = (y3 - z3 * pio2_1) - z3 * pio2_t; + } + + z0 = y0 * y0; + z1 = y1 * y1; + z2 = y2 * y2; + z3 = y3 * y3; + + hx = (n0 & 1) | ((n1 & 1) << 1) | ((n2 & 1) << 2) | + ((n3 & 1) << 3); + switch (hx) { + case 0: + f0 = (float)(y0 + y0 * z0 * (S0 + z0 * (S1 + z0 * S2))); + f1 = (float)(y1 + y1 * z1 * (S0 + z1 * (S1 + z1 * S2))); + f2 = (float)(y2 + y2 * z2 * (S0 + z2 * (S1 + z2 * S2))); + f3 = (float)(y3 + y3 * z3 * (S0 + z3 * (S1 + z3 * S2))); + break; + + case 1: + f0 = (float)(one + z0 * (mhalf + z0 * (C0 + + z0 * (C1 + z0 * C2)))); + f1 = (float)(y1 + y1 * z1 * (S0 + z1 * (S1 + z1 * S2))); + f2 = (float)(y2 + y2 * z2 * (S0 + z2 * (S1 + z2 * S2))); + f3 = (float)(y3 + y3 * z3 * (S0 + z3 * (S1 + z3 * S2))); + break; + + case 2: + f0 = (float)(y0 + y0 * z0 * (S0 + z0 * (S1 + z0 * S2))); + f1 = (float)(one + z1 * (mhalf + z1 * (C0 + + z1 * (C1 + z1 * C2)))); + f2 = (float)(y2 + y2 * z2 * (S0 + z2 * (S1 + z2 * S2))); + f3 = (float)(y3 + y3 * z3 * (S0 + z3 * (S1 + z3 * S2))); + break; + + case 3: + f0 = (float)(one + z0 * (mhalf + z0 * (C0 + + z0 * (C1 + z0 * C2)))); + f1 = (float)(one + z1 * (mhalf + z1 * (C0 + + z1 * (C1 + z1 * C2)))); + f2 = (float)(y2 + y2 * z2 * (S0 + z2 * (S1 + z2 * S2))); + f3 = (float)(y3 + y3 * z3 * (S0 + z3 * (S1 + z3 * S2))); + break; + + case 4: + f0 = (float)(y0 + y0 * z0 * (S0 + z0 * (S1 + z0 * S2))); + f1 = (float)(y1 + y1 * z1 * (S0 + z1 * (S1 + z1 * S2))); + f2 = (float)(one + z2 * (mhalf + z2 * (C0 + + z2 * (C1 + z2 * C2)))); + f3 = (float)(y3 + y3 * z3 * (S0 + z3 * (S1 + z3 * S2))); + break; + + case 5: + f0 = (float)(one + z0 * (mhalf + z0 * (C0 + + z0 * (C1 + z0 * C2)))); + f1 = (float)(y1 + y1 * z1 * (S0 + z1 * (S1 + z1 * S2))); + f2 = (float)(one + z2 * (mhalf + z2 * (C0 + + z2 * (C1 + z2 * C2)))); + f3 = (float)(y3 + y3 * z3 * (S0 + z3 * (S1 + z3 * S2))); + break; + + case 6: + f0 = (float)(y0 + y0 * z0 * (S0 + z0 * (S1 + z0 * S2))); + f1 = (float)(one + z1 * (mhalf + z1 * (C0 + + z1 * (C1 + z1 * C2)))); + f2 = (float)(one + z2 * (mhalf + z2 * (C0 + + z2 * (C1 + z2 * C2)))); + f3 = (float)(y3 + y3 * z3 * (S0 + z3 * (S1 + z3 * S2))); + break; + + case 7: + f0 = (float)(one + z0 * (mhalf + z0 * (C0 + + z0 * (C1 + z0 * C2)))); + f1 = (float)(one + z1 * (mhalf + z1 * (C0 + + z1 * (C1 + z1 * C2)))); + f2 = (float)(one + z2 * (mhalf + z2 * (C0 + + z2 * (C1 + z2 * C2)))); + f3 = (float)(y3 + y3 * z3 * (S0 + z3 * (S1 + z3 * S2))); + break; + + case 8: + f0 = (float)(y0 + y0 * z0 * (S0 + z0 * (S1 + z0 * S2))); + f1 = (float)(y1 + y1 * z1 * (S0 + z1 * (S1 + z1 * S2))); + f2 = (float)(y2 + y2 * z2 * (S0 + z2 * (S1 + z2 * S2))); + f3 = (float)(one + z3 * (mhalf + z3 * (C0 + + z3 * (C1 + z3 * C2)))); + break; + + case 9: + f0 = (float)(one + z0 * (mhalf + z0 * (C0 + + z0 * (C1 + z0 * C2)))); + f1 = (float)(y1 + y1 * z1 * (S0 + z1 * (S1 + z1 * S2))); + f2 = (float)(y2 + y2 * z2 * (S0 + z2 * (S1 + z2 * S2))); + f3 = (float)(one + z3 * (mhalf + z3 * (C0 + + z3 * (C1 + z3 * C2)))); + break; + + case 10: + f0 = (float)(y0 + y0 * z0 * (S0 + z0 * (S1 + z0 * S2))); + f1 = (float)(one + z1 * (mhalf + z1 * (C0 + + z1 * (C1 + z1 * C2)))); + f2 = (float)(y2 + y2 * z2 * (S0 + z2 * (S1 + z2 * S2))); + f3 = (float)(one + z3 * (mhalf + z3 * (C0 + + z3 * (C1 + z3 * C2)))); + break; + + case 11: + f0 = (float)(one + z0 * (mhalf + z0 * (C0 + + z0 * (C1 + z0 * C2)))); + f1 = (float)(one + z1 * (mhalf + z1 * (C0 + + z1 * (C1 + z1 * C2)))); + f2 = (float)(y2 + y2 * z2 * (S0 + z2 * (S1 + z2 * S2))); + f3 = (float)(one + z3 * (mhalf + z3 * (C0 + + z3 * (C1 + z3 * C2)))); + break; + + case 12: + f0 = (float)(y0 + y0 * z0 * (S0 + z0 * (S1 + z0 * S2))); + f1 = (float)(y1 + y1 * z1 * (S0 + z1 * (S1 + z1 * S2))); + f2 = (float)(one + z2 * (mhalf + z2 * (C0 + + z2 * (C1 + z2 * C2)))); + f3 = (float)(one + z3 * (mhalf + z3 * (C0 + + z3 * (C1 + z3 * C2)))); + break; + + case 13: + f0 = (float)(one + z0 * (mhalf + z0 * (C0 + + z0 * (C1 + z0 * C2)))); + f1 = (float)(y1 + y1 * z1 * (S0 + z1 * (S1 + z1 * S2))); + f2 = (float)(one + z2 * (mhalf + z2 * (C0 + + z2 * (C1 + z2 * C2)))); + f3 = (float)(one + z3 * (mhalf + z3 * (C0 + + z3 * (C1 + z3 * C2)))); + break; + + case 14: + f0 = (float)(y0 + y0 * z0 * (S0 + z0 * (S1 + z0 * S2))); + f1 = (float)(one + z1 * (mhalf + z1 * (C0 + + z1 * (C1 + z1 * C2)))); + f2 = (float)(one + z2 * (mhalf + z2 * (C0 + + z2 * (C1 + z2 * C2)))); + f3 = (float)(one + z3 * (mhalf + z3 * (C0 + + z3 * (C1 + z3 * C2)))); + break; + + default: + f0 = (float)(one + z0 * (mhalf + z0 * (C0 + + z0 * (C1 + z0 * C2)))); + f1 = (float)(one + z1 * (mhalf + z1 * (C0 + + z1 * (C1 + z1 * C2)))); + f2 = (float)(one + z2 * (mhalf + z2 * (C0 + + z2 * (C1 + z2 * C2)))); + f3 = (float)(one + z3 * (mhalf + z3 * (C0 + + z3 * (C1 + z3 * C2)))); + } + + *y = (n0 & 2)? -f0 : f0; + y += stridey; + *y = (n1 & 2)? -f1 : f1; + y += stridey; + *y = (n2 & 2)? -f2 : f2; + y += stridey; + *y = (n3 & 2)? -f3 : f3; + continue; + +process1: + PROCESS(0); + continue; + +process2: + PROCESS(0); + PROCESS(1); + continue; + +process3: + PROCESS(0); + PROCESS(1); + PROCESS(2); + } +} diff --git a/usr/src/lib/libmvec/common/__vexp.c b/usr/src/lib/libmvec/common/__vexp.c new file mode 100644 index 0000000000..9ab50556a1 --- /dev/null +++ b/usr/src/lib/libmvec/common/__vexp.c @@ -0,0 +1,590 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * __vexp: double precision vector exp + * + * Algorithm: + * + * Write x = (k + j/256)ln2 + r, where k and j are integers, j >= 0, + * and |r| <= ln2/512. Then exp(x) = 2^k * 2^(j/256) * exp(r). + * Compute exp(r) by a polynomial approximation exp(r) ~ 1 + p(r) + * where p(r) := r*(1+r*(B1+r*(B2+r*B3))). From a table, obtain + * h and l such that h ~ 2^(j/256) to double precision and h+l + * ~ 2^(j/256) to well more than double precision. Then exp(x) + * ~ 2^k * (h + (l + h * p(r))) to about double precision. Note + * that the multiplication by 2^k requires some finagling when + * the result might be subnormal. + * + * Accuracy: + * + * For normal results, the largest error observed is less than + * 0.6 ulps. For subnormal results, the largest error observed + * is 0.737 ulps. + */ + +#include <sys/isa_defs.h> + +#ifdef _LITTLE_ENDIAN +#define HI(x) *(1+(int *)&x) +#define LO(x) *(unsigned *)&x +#define DBLWORD(x, y) y, x +#else +#define HI(x) *(int *)&x +#define LO(x) *(1+(unsigned *)&x) +#define DBLWORD(x, y) x, y +#endif + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +static const double TBL[] = { + 1.00000000000000000000e+00, 0.00000000000000000000e+00, + 1.00271127505020252180e+00, -3.63661592869226394432e-17, + 1.00542990111280272636e+00, 9.49918653545503175702e-17, + 1.00815589811841754830e+00, -3.25205875608430806089e-17, + 1.01088928605170047526e+00, -1.52347786033685771763e-17, + 1.01363008495148942956e+00, 9.28359976818356758749e-18, + 1.01637831491095309566e+00, -5.77217007319966002766e-17, + 1.01913399607773791367e+00, 3.60190498225966110587e-17, + 1.02189714865411662714e+00, 5.10922502897344389359e-17, + 1.02466779289713572076e+00, -7.56160786848777820704e-17, + 1.02744594911876374610e+00, -4.95607417464536982418e-17, + 1.03023163768604097967e+00, 3.31983004108081294377e-17, + 1.03302487902122841490e+00, 7.60083887402708848935e-18, + 1.03582569360195719810e+00, -7.80678239133763616702e-17, + 1.03863410196137873065e+00, 5.99627378885251061843e-17, + 1.04145012468831610342e+00, 3.78483048028757620966e-17, + 1.04427378242741375480e+00, 8.55188970553796365958e-17, + 1.04710509587928979336e+00, 7.27707724310431474861e-17, + 1.04994408580068721015e+00, 5.59293784812700258637e-17, + 1.05279077300462642341e+00, -9.62948289902693573942e-17, + 1.05564517836055715705e+00, 1.75932573877209198414e-18, + 1.05850732279451276163e+00, -7.15265185663778073796e-17, + 1.06137722728926209292e+00, -1.19735370853656575649e-17, + 1.06425491288446449900e+00, 5.07875419861123039357e-17, + 1.06714040067682369717e+00, -7.89985396684158212226e-17, + 1.07003371182024187291e+00, -9.93716271128891938112e-17, + 1.07293486752597555522e+00, -3.83966884335882380671e-18, + 1.07584388906279104781e+00, -1.00027161511441361125e-17, + 1.07876079775711986031e+00, -6.65666043605659260344e-17, + 1.08168561499321524977e+00, -4.78262390299708626556e-17, + 1.08461836221330920615e+00, 3.16615284581634611576e-17, + 1.08755906091776965994e+00, 5.40934930782029075923e-18, + 1.09050773266525768967e+00, -3.04678207981247114697e-17, + 1.09346439907288583981e+00, 1.44139581472692093420e-17, + 1.09642908181637688259e+00, -5.91993348444931582405e-17, + 1.09940180263022191376e+00, 7.17045959970192322483e-17, + 1.10238258330784089090e+00, 5.26603687157069438656e-17, + 1.10537144570174117320e+00, 8.23928876050021358995e-17, + 1.10836841172367872588e+00, -8.78681384518052661558e-17, + 1.11137350334481754821e+00, 5.56394502666969764311e-17, + 1.11438674259589243221e+00, 1.04102784568455709549e-16, + 1.11740815156736927882e+00, -7.97680590262822045601e-17, + 1.12043775240960674644e+00, -6.20108590655417874998e-17, + 1.12347556733301989773e+00, -9.69973758898704299544e-17, + 1.12652161860824184814e+00, 5.16585675879545612073e-17, + 1.12957592856628807887e+00, 6.71280585872625658758e-17, + 1.13263851959871919561e+00, 3.23735616673800026374e-17, + 1.13570941415780546357e+00, 5.06659992612615524241e-17, + 1.13878863475669156458e+00, 8.91281267602540777782e-17, + 1.14187620396956157620e+00, 4.65109117753141238741e-17, + 1.14497214443180417298e+00, 4.64128989217001065651e-17, + 1.14807647884017893780e+00, 6.89774023662719177044e-17, + 1.15118922995298267331e+00, 3.25071021886382721198e-17, + 1.15431042059021593538e+00, 1.04171289462732661865e-16, + 1.15744007363375112085e+00, -9.12387123113440028710e-17, + 1.16057821202749877898e+00, -3.26104020541739310553e-17, + 1.16372485877757747552e+00, 3.82920483692409349872e-17, + 1.16688003695248165847e+00, -8.79187957999916974198e-17, + 1.17004376968325018993e+00, -1.84774420179000469438e-18, + 1.17321608016363732041e+00, -7.28756258658499447915e-17, + 1.17639699165028122074e+00, 5.55420325421807896277e-17, + 1.17958652746287584456e+00, 1.00923127751003904354e-16, + 1.18278471098434101449e+00, 1.54297543007907605845e-17, + 1.18599156566099384058e+00, -9.20950683529310590495e-18, + 1.18920711500272102690e+00, 3.98201523146564611098e-17, + 1.19243138258315117817e+00, 4.39755141560972082715e-17, + 1.19566439203982732842e+00, 4.61660367048148139743e-17, + 1.19890616707438057986e+00, -9.80919335600842311848e-17, + 1.20215673145270307565e+00, 6.64498149925230124489e-17, + 1.20541610900512385918e+00, -3.35727219326752963448e-17, + 1.20868432362658162482e+00, -4.74672594522898409739e-17, + 1.21196139927680124337e+00, -4.89061107752111835732e-17, + 1.21524735998046895524e+00, -7.71263069268148813091e-17, + 1.21854222982740845183e+00, -9.00672695836383767487e-17, + 1.22184603297275762301e+00, -1.06110212114026911612e-16, + 1.22515879363714552674e+00, -8.90353381426998342947e-17, + 1.22848053610687002468e+00, -1.89878163130252995312e-17, + 1.23181128473407586199e+00, 7.38938247161005024655e-17, + 1.23515106393693341325e+00, -1.07552443443078413783e-16, + 1.23849989819981654016e+00, 2.76770205557396742995e-17, + 1.24185781207348400201e+00, 4.65802759183693679123e-17, + 1.24522483017525797955e+00, -4.67724044984672750044e-17, + 1.24860097718920481924e+00, -8.26181099902196355046e-17, + 1.25198627786631622172e+00, 4.83416715246989759959e-17, + 1.25538075702469109629e+00, -6.71138982129687841853e-18, + 1.25878443954971652730e+00, -8.42178258773059935677e-17, + 1.26219735039425073886e+00, -3.08446488747384584900e-17, + 1.26561951457880628169e+00, 4.25057700345086802072e-17, + 1.26905095719173321989e+00, 2.66793213134218609523e-18, + 1.27249170338940276181e+00, -1.05779162672124210291e-17, + 1.27594177839639200123e+00, 9.91543024421429032951e-17, + 1.27940120750566932450e+00, -9.75909500835606221035e-17, + 1.28287001607877826359e+00, 1.71359491824356096814e-17, + 1.28634822954602556777e+00, -3.41695570693618197638e-17, + 1.28983587340666572274e+00, 8.94925753089759172195e-17, + 1.29333297322908946647e+00, -2.97459044313275164581e-17, + 1.29683955465100964055e+00, 2.53825027948883149593e-17, + 1.30035564337965059423e+00, 5.67872810280221742200e-17, + 1.30388126519193581210e+00, 8.64767559826787117946e-17, + 1.30741644593467731816e+00, -7.33664565287886889230e-17, + 1.31096121152476441374e+00, -7.18153613551945385697e-17, + 1.31451558794935463581e+00, 2.26754331510458564505e-17, + 1.31807960126606404927e+00, -5.45795582714915288619e-17, + 1.32165327760315753913e+00, -2.48063824591302174150e-17, + 1.32523664315974132322e+00, -2.85873121003886075697e-17, + 1.32882972420595435459e+00, 4.08908622391016005195e-17, + 1.33243254708316150037e+00, -5.10158663091674334319e-17, + 1.33604513820414583236e+00, -5.89186635638880135250e-17, + 1.33966752405330291609e+00, 8.92728259483173198426e-17, + 1.34329973118683532185e+00, -5.80258089020143775130e-17, + 1.34694178623294580355e+00, 3.22406510125467916913e-17, + 1.35059371589203447428e+00, -8.28711038146241653260e-17, + 1.35425554693689265129e+00, 7.70094837980298946162e-17, + 1.35792730621290114179e+00, -9.52963574482518886709e-17, + 1.36160902063822475405e+00, 1.53378766127066804593e-18, + 1.36530071720401191548e+00, -1.00053631259747639350e-16, + 1.36900242297459051599e+00, 9.59379791911884877256e-17, + 1.37271416508766841424e+00, -4.49596059523484126201e-17, + 1.37643597075453016920e+00, -6.89858893587180104162e-17, + 1.38016786726023799048e+00, 1.05103145799699839462e-16, + 1.38390988196383202258e+00, -6.77051165879478628716e-17, + 1.38766204229852907481e+00, 8.42298427487541531762e-17, + 1.39142437577192623621e+00, -4.90617486528898870821e-17, + 1.39519690996620027157e+00, -9.32933622422549531960e-17, + 1.39897967253831123635e+00, -9.61421320905132307233e-17, + 1.40277269122020475933e+00, -5.29578324940798922316e-17, + 1.40657599381901543545e+00, 7.03491481213642218800e-18, + 1.41038960821727066275e+00, 4.16654872843506164270e-17, + 1.41421356237309514547e+00, -9.66729331345291345105e-17, + 1.41804788432041517510e+00, 2.27443854218552945230e-17, + 1.42189260216916557589e+00, -1.60778289158902441338e-17, + 1.42574774410549420800e+00, 9.88069075850060728430e-17, + 1.42961333839197002327e+00, -1.20316424890536551792e-17, + 1.43348941336778890054e+00, -5.80245424392682610310e-17, + 1.43737599744898236764e+00, -4.20403401646755661225e-17, + 1.44127311912862565713e+00, 5.60250365087898567501e-18, + 1.44518080697704665027e+00, -3.02375813499398731940e-17, + 1.44909908964203504311e+00, -6.25940500081930925441e-17, + 1.45302799584905262265e+00, -5.77994860939610610226e-17, + 1.45696755440144376514e+00, 5.64867945387699814049e-17, + 1.46091779418064704466e+00, -5.60037718607521580013e-17, + 1.46487874414640573129e+00, 9.53076754358715731900e-17, + 1.46885043333698184220e+00, 8.46588275653362637570e-17, + 1.47283289086936752810e+00, 6.69177408194058937165e-17, + 1.47682614593949934623e+00, -3.48399455689279579579e-17, + 1.48083022782247186733e+00, -9.68695210263061857841e-17, + 1.48484516587275239274e+00, 1.07800867644074807559e-16, + 1.48887098952439700383e+00, 6.15536715774287133031e-17, + 1.49290772829126483501e+00, 1.41929201542840357707e-17, + 1.49695541176723545540e+00, -2.86166325389915821109e-17, + 1.50101406962642558440e+00, -6.41376727579023503859e-17, + 1.50508373162340647333e+00, 7.07471061358284636429e-17, + 1.50916442759342284141e+00, -1.01645532775429503911e-16, + 1.51325618745260981335e+00, 8.88449785133871209093e-17, + 1.51735904119821474190e+00, -4.30869947204334080070e-17, + 1.52147301890881458952e+00, -5.99638767594568341985e-18, + 1.52559815074453819506e+00, 1.11795187801605698722e-16, + 1.52973446694728698603e+00, 3.78579211515721903683e-17, + 1.53388199784095591305e+00, 8.87522684443844614135e-17, + 1.53804077383165682669e+00, 1.01746723511613580618e-16, + 1.54221082540794074411e+00, 7.94983480969762085616e-17, + 1.54639218314102144802e+00, 1.06839600056572198028e-16, + 1.55058487768499997372e+00, -1.46007065906893851791e-17, + 1.55478893977708865215e+00, -8.00316135011603564104e-17, + 1.55900440023783692922e+00, 3.78120705335752750188e-17, + 1.56323128997135762930e+00, 7.48477764559073438896e-17, + 1.56746963996555299659e+00, -1.03520617688497219883e-16, + 1.57171948129234140268e+00, -3.34298400468720006928e-17, + 1.57598084510788649659e+00, -1.01369164712783039808e-17, + 1.58025376265282457844e+00, -5.16340292955446806159e-17, + 1.58453826525249374946e+00, -1.93377170345857029304e-17, + 1.58883438431716395023e+00, -5.99495011882447940052e-18, + 1.59314215134226699888e+00, -1.00944065423119624890e-16, + 1.59746159790862707339e+00, 2.48683927962209992069e-17, + 1.60179275568269341434e+00, -6.05491745352778434252e-17, + 1.60613565641677102924e+00, -1.03545452880599952591e-16, + 1.61049033194925428347e+00, 2.47071925697978878522e-17, + 1.61485681420486071325e+00, -7.31666339912512326264e-17, + 1.61923513519486372836e+00, 2.09413341542290924068e-17, + 1.62362532701732886764e+00, -3.58451285141447470996e-17, + 1.62802742185734783398e+00, -6.71295508470708408630e-17, + 1.63244145198727497181e+00, 9.85281923042999296414e-17, + 1.63686744976696441078e+00, 7.69832507131987557450e-17, + 1.64130544764400632118e+00, -9.24756873764070550805e-17, + 1.64575547815396494578e+00, -1.01256799136747726038e-16, + 1.65021757392061774183e+00, 9.13327958872990419009e-18, + 1.65469176765619430114e+00, 9.64329430319602742879e-17, + 1.65917809216161615815e+00, -7.27554555082304942180e-17, + 1.66367658032673637614e+00, 5.89099269671309967045e-17, + 1.66818726513058246397e+00, 4.26917801957061447430e-17, + 1.67271017964159662839e+00, -5.47671596459956307616e-17, + 1.67724535701787846875e+00, 8.30394950995073155275e-17, + 1.68179283050742900407e+00, 8.19901002058149652013e-17, + 1.68635263344839336774e+00, -7.18146327835800944212e-17, + 1.69092479926930527867e+00, -9.66967147439488016590e-17, + 1.69550936148933262260e+00, 7.23841687284516664081e-17, + 1.70010635371852347753e+00, -8.02371937039770024589e-18, + 1.70471580965805125096e+00, -2.72888328479728156257e-17, + 1.70933776310046292579e+00, -9.86877945663293107628e-17, + 1.71397224792992597386e+00, 6.47397510775336706412e-17, + 1.71861929812247793414e+00, -1.85138041826311098821e-17, + 1.72327894774627399244e+00, -9.52212380039379996275e-17, + 1.72795123096183766975e+00, -1.07509818612046424459e-16, + 1.73263618202231106658e+00, -1.69805107431541549407e-18, + 1.73733383527370621735e+00, 3.16438929929295694659e-17, + 1.74204422515515644498e+00, -1.52595911895078879236e-18, + 1.74676738619916904760e+00, -1.07522904835075145042e-16, + 1.75150335303187820735e+00, -5.12445042059672465939e-17, + 1.75625216037329945351e+00, 2.96014069544887330703e-17, + 1.76101384303758390359e+00, -7.94325312503922771057e-17, + 1.76578843593327272643e+00, 9.46131501808326786660e-17, + 1.77057597406355471392e+00, 5.96179451004055584767e-17, + 1.77537649252652118825e+00, 6.42973179655657203396e-17, + 1.78019002651542446181e+00, -5.28462728909161736517e-17, + 1.78501661131893496481e+00, 1.53304001210313138184e-17, + 1.78985628232140103755e+00, -4.15435466068334977098e-17, + 1.79470907500310716820e+00, 1.82274584279120867698e-17, + 1.79957502494053511732e+00, -2.52688923335889795224e-17, + 1.80445416780662393208e+00, -5.17722240879331788328e-17, + 1.80934653937103195886e+00, -9.03264140245002968190e-17, + 1.81425217550039885595e+00, -9.96953153892034881983e-17, + 1.81917111215860849427e+00, 7.40267690114583888997e-17, + 1.82410338540705341259e+00, -1.01596278622770830650e-16, + 1.82904903140489727420e+00, 6.88919290883569563697e-17, + 1.83400808640934243066e+00, 3.28310722424562658722e-17, + 1.83898058677589371079e+00, 6.91896974027251194233e-18, + 1.84396656895862598446e+00, -5.93974202694996455028e-17, + 1.84896606951045083811e+00, 9.02758044626108928816e-17, + 1.85397912508338547077e+00, 9.76188749072759353840e-17, + 1.85900577242882047990e+00, -9.52870546198994068663e-17, + 1.86404604839778897940e+00, 6.54091268062057047791e-17, + 1.86909998994123860427e+00, -9.93850521425506708290e-17, + 1.87416763411029996256e+00, -6.12276341300414256164e-17, + 1.87924901805656019427e+00, -1.62263155578358447799e-17, + 1.88434417903233453195e+00, -8.22659312553371090551e-17, + 1.88945315439093919352e+00, -9.00516828505912548531e-17, + 1.89457598158696560731e+00, 3.40340353521652967060e-17, + 1.89971269817655530332e+00, -3.85973976937851370678e-17, + 1.90486334181767413831e+00, 6.53385751471827862895e-17, + 1.91002795027038985154e+00, -5.90968800674406023686e-17, + 1.91520656139714740007e+00, -1.06199460561959626376e-16, + 1.92039921316304740273e+00, 7.11668154063031418621e-17, + 1.92560594363612502811e+00, -9.91496376969374092749e-17, + 1.93082679098762710623e+00, 6.16714970616910955284e-17, + 1.93606179349229434727e+00, 1.03323859606763257448e-16, + 1.94131098952864045160e+00, -6.63802989162148798984e-17, + 1.94657441757923321823e+00, 6.81102234953387718436e-17, + 1.95185211623097831790e+00, -2.19901696997935108603e-17, + 1.95714412417540017941e+00, 8.96076779103666776760e-17, + 1.96245048020892731699e+00, 1.09768440009135469493e-16, + 1.96777122323317588126e+00, -1.03149280115311315109e-16, + 1.97310639225523432039e+00, -7.45161786395603748608e-18, + 1.97845602638795092787e+00, 4.03887531092781665750e-17, + 1.98382016485021939189e+00, -2.20345441239106265716e-17, + 1.98919884696726634310e+00, 8.20513263836919941553e-18, + 1.99459211217094023461e+00, 1.79097103520026450854e-17 +}; + +static const union { + unsigned i[2]; + double d; +} C[] = { + { DBLWORD(0x43380000, 0x00000000) }, + { DBLWORD(0x40771547, 0x652b82fe) }, + { DBLWORD(0x3f662e42, 0xfee00000) }, + { DBLWORD(0x3d6a39ef, 0x35793c76) }, + { DBLWORD(0x3ff00000, 0x00000000) }, + { DBLWORD(0x3fdfffff, 0xfffffff6) }, + { DBLWORD(0x3fc55555, 0x721a1d14) }, + { DBLWORD(0x3fa55555, 0x6e0896af) }, + { DBLWORD(0x01000000, 0x00000000) }, + { DBLWORD(0x7f000000, 0x00000000) }, + { DBLWORD(0x40862e42, 0xfefa39ef) }, + { DBLWORD(0xc0874910, 0xd52d3051) }, + { DBLWORD(0xfff00000, 0x00000000) }, + { DBLWORD(0x00000000, 0x00000000) } +}; + +#define round C[0].d +#define invln2_256 C[1].d +#define ln2_256h C[2].d +#define ln2_256l C[3].d +#define one C[4].d +#define B1 C[5].d +#define B2 C[6].d +#define B3 C[7].d +#define tiny C[8].d +#define huge C[9].d +#define othresh C[10].d +#define uthresh C[11].d +#define neginf C[12].d +#define zero C[13].d + +#define PROCESS(N) \ + y##N = (x##N * invln2_256) + round; \ + j##N = LO(y##N); \ + y##N -= round; \ + k##N = j##N >> 8; \ + j##N = (j##N & 0xff) << 1; \ + x##N = (x##N - y##N * ln2_256h) - y##N * ln2_256l; \ + y##N = x##N * (one + x##N * (B1 + x##N * (B2 + x##N * B3))); \ + t##N = TBL[j##N]; \ + y##N = t##N + (TBL[j##N + 1] + t##N * y##N); \ + if (k##N < -1021) { \ + HI(y##N) += (k##N + 0x3ef) << 20; \ + y##N *= tiny; \ + } else { \ + HI(y##N) += k##N << 20; \ + } \ + *y = y##N; \ + y += stridey + +#define PREPROCESS(N, index, label) \ + hx = HI(x[0]); \ + ix = hx & ~0x80000000; \ + x##N = *x; \ + x += stridex; \ + if (ix >= 0x40862e42) { \ + if (ix >= 0x7ff00000) { /* x is inf or nan */ \ + y[index] = (x##N == neginf)? zero : \ + x##N * x##N; \ + goto label; \ + } \ + if (x##N > othresh) { \ + y[index] = huge * huge; \ + goto label; \ + } \ + if (x##N < uthresh) { \ + y[index] = tiny * tiny; \ + goto label; \ + } \ + } else if (ix < 0x3e300000) { /* |x| < 2^-28 */ \ + y[index] = one + x##N; \ + goto label; \ + } + +void +__vexp(int n, double *restrict x, int stridex, double *restrict y, + int stridey) +{ + double x0, x1, x2, x3, x4, x5; + double y0, y1, y2, y3, y4, y5; + double t0, t1, t2, t3, t4, t5; + int k0, k1, k2, k3, k4, k5; + int j0, j1, j2, j3, j4, j5; + int hx, ix; + + y -= stridey; + + for (;;) { +begin: + if (--n < 0) + break; + y += stridey; + + PREPROCESS(0, 0, begin); + + if (--n < 0) + goto process1; + + PREPROCESS(1, stridey, process1); + + if (--n < 0) + goto process2; + + PREPROCESS(2, stridey << 1, process2); + + if (--n < 0) + goto process3; + + PREPROCESS(3, (stridey << 1) + stridey, process3); + + if (--n < 0) + goto process4; + + PREPROCESS(4, stridey << 2, process4); + + if (--n < 0) + goto process5; + + PREPROCESS(5, (stridey << 2) + stridey, process5); + + y0 = (x0 * invln2_256) + round; + y1 = (x1 * invln2_256) + round; + y2 = (x2 * invln2_256) + round; + y3 = (x3 * invln2_256) + round; + y4 = (x4 * invln2_256) + round; + y5 = (x5 * invln2_256) + round; + + j0 = LO(y0); + j1 = LO(y1); + j2 = LO(y2); + j3 = LO(y3); + j4 = LO(y4); + j5 = LO(y5); + + y0 -= round; + y1 -= round; + y2 -= round; + y3 -= round; + y4 -= round; + y5 -= round; + + k0 = j0 >> 8; + k1 = j1 >> 8; + k2 = j2 >> 8; + k3 = j3 >> 8; + k4 = j4 >> 8; + k5 = j5 >> 8; + + j0 = (j0 & 0xff) << 1; + j1 = (j1 & 0xff) << 1; + j2 = (j2 & 0xff) << 1; + j3 = (j3 & 0xff) << 1; + j4 = (j4 & 0xff) << 1; + j5 = (j5 & 0xff) << 1; + + x0 = (x0 - y0 * ln2_256h) - y0 * ln2_256l; + x1 = (x1 - y1 * ln2_256h) - y1 * ln2_256l; + x2 = (x2 - y2 * ln2_256h) - y2 * ln2_256l; + x3 = (x3 - y3 * ln2_256h) - y3 * ln2_256l; + x4 = (x4 - y4 * ln2_256h) - y4 * ln2_256l; + x5 = (x5 - y5 * ln2_256h) - y5 * ln2_256l; + + y0 = x0 * (one + x0 * (B1 + x0 * (B2 + x0 * B3))); + y1 = x1 * (one + x1 * (B1 + x1 * (B2 + x1 * B3))); + y2 = x2 * (one + x2 * (B1 + x2 * (B2 + x2 * B3))); + y3 = x3 * (one + x3 * (B1 + x3 * (B2 + x3 * B3))); + y4 = x4 * (one + x4 * (B1 + x4 * (B2 + x4 * B3))); + y5 = x5 * (one + x5 * (B1 + x5 * (B2 + x5 * B3))); + + t0 = TBL[j0]; + t1 = TBL[j1]; + t2 = TBL[j2]; + t3 = TBL[j3]; + t4 = TBL[j4]; + t5 = TBL[j5]; + + y0 = t0 + (TBL[j0 + 1] + t0 * y0); + y1 = t1 + (TBL[j1 + 1] + t1 * y1); + y2 = t2 + (TBL[j2 + 1] + t2 * y2); + y3 = t3 + (TBL[j3 + 1] + t3 * y3); + y4 = t4 + (TBL[j4 + 1] + t4 * y4); + y5 = t5 + (TBL[j5 + 1] + t5 * y5); + + if (k0 < -1021) { + HI(y0) += (k0 + 0x3ef) << 20; + y0 *= tiny; + } else { + HI(y0) += k0 << 20; + } + if (k1 < -1021) { + HI(y1) += (k1 + 0x3ef) << 20; + y1 *= tiny; + } else { + HI(y1) += k1 << 20; + } + if (k2 < -1021) { + HI(y2) += (k2 + 0x3ef) << 20; + y2 *= tiny; + } else { + HI(y2) += k2 << 20; + } + if (k3 < -1021) { + HI(y3) += (k3 + 0x3ef) << 20; + y3 *= tiny; + } else { + HI(y3) += k3 << 20; + } + if (k4 < -1021) { + HI(y4) += (k4 + 0x3ef) << 20; + y4 *= tiny; + } else { + HI(y4) += k4 << 20; + } + if (k5 < -1021) { + HI(y5) += (k5 + 0x3ef) << 20; + y5 *= tiny; + } else { + HI(y5) += k5 << 20; + } + + y[0] = y0; + y[stridey] = y1; + y[stridey << 1] = y2; + y[(stridey << 1) + stridey] = y3; + y[stridey << 2] = y4; + y[(stridey << 2) + stridey] = y5; + y += (stridey << 2) + stridey; + continue; + +process1: + PROCESS(0); + continue; + +process2: + PROCESS(0); + PROCESS(1); + continue; + +process3: + PROCESS(0); + PROCESS(1); + PROCESS(2); + continue; + +process4: + PROCESS(0); + PROCESS(1); + PROCESS(2); + PROCESS(3); + continue; + +process5: + PROCESS(0); + PROCESS(1); + PROCESS(2); + PROCESS(3); + PROCESS(4); + } +} diff --git a/usr/src/lib/libmvec/common/__vexpf.c b/usr/src/lib/libmvec/common/__vexpf.c new file mode 100644 index 0000000000..9e340bba68 --- /dev/null +++ b/usr/src/lib/libmvec/common/__vexpf.c @@ -0,0 +1,351 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +/* float expf(float x) + * + * Method : + * 1. Special cases: + * for x > 88.722839355...(0x42B17218) => Inf + overflow; + * for x < -103.97207642..(0xc2CFF1B4) => 0 + underflow; + * for x = Inf => Inf; + * for x = -Inf => 0; + * for x = +-NaN => QNaN. + * 2. Computes exponential from: + * exp(x) = 2**a * 2**(k/256) * 2**(y/256) + * Where: + * a = int ( 256 * log2(e) * x ) >> 8; + * k = int ( 256 * log2(e) * x ) & 0xFF; + * y = frac ( 256 * x * log2(e)). + * Note that: + * k = 0, 1, ..., 255; + * y = (-1, 1). + * Then: + * 2**(k/256) is looked up in a table of 2**0, 2**1/256, ... + * 2**(y/256) is computed using approximation: + * 2**(y/256) = a0 + a1 * y + a2 * y**2 + * Multiplication by 2**a is done by adding "a" to + * the biased exponent. + * Accuracy: + * The maximum relative error for the approximating + * polynomial is 2**(-29.18). All calculations are of + * double precision. + * Maximum error observed: less than 0.528 ulp for the whole + * float type range. + * + * NOTE: This implementation has been modified for SPARC to deliver + * zero instead of a subnormal result whenever the argument is less + * than log(2^-126). Therefore the worst case relative error is 1. + */ + +static const double __TBL_exp2f[] = { + /* 2^(i/256) - (((i & 0xff) << 44), i = [0, 255] */ +1.000000000000000000e+00, 9.994025125251012609e-01, 9.988087005564013632e-01, +9.982185740592087742e-01, 9.976321430258502376e-01, 9.970494174757447148e-01, +9.964704074554765478e-01, 9.958951230388689568e-01, 9.953235743270583136e-01, +9.947557714485678604e-01, 9.941917245593818730e-01, 9.936314438430204898e-01, +9.930749395106142074e-01, 9.925222218009785990e-01, 9.919733009806893653e-01, +9.914281873441580517e-01, 9.908868912137068774e-01, 9.903494229396448967e-01, +9.898157929003436051e-01, 9.892860115023132117e-01, 9.887600891802785785e-01, +9.882380363972563808e-01, 9.877198636446310465e-01, 9.872055814422322495e-01, +9.866952003384118486e-01, 9.861887309101209365e-01, 9.856861837629877776e-01, +9.851875695313955239e-01, 9.846928988785599302e-01, 9.842021824966076249e-01, +9.837154311066546031e-01, 9.832326554588848300e-01, 9.827538663326288448e-01, +9.822790745364429199e-01, 9.818082909081884413e-01, 9.813415263151109569e-01, +9.808787916539204454e-01, 9.804200978508705866e-01, 9.799654558618393629e-01, +9.795148766724087741e-01, 9.790683712979462161e-01, 9.786259507836846394e-01, +9.781876262048033732e-01, 9.777534086665099489e-01, 9.773233093041209241e-01, +9.768973392831440394e-01, 9.764755097993595978e-01, 9.760578320789027318e-01, +9.756443173783457823e-01, 9.752349769847807881e-01, 9.748298222159020865e-01, +9.744288644200894689e-01, 9.740321149764913367e-01, 9.736395852951079677e-01, +9.732512868168755604e-01, 9.728672310137493895e-01, 9.724874293887887378e-01, +9.721118934762408292e-01, 9.717406348416250950e-01, 9.713736650818186602e-01, +9.710109958251406104e-01, 9.706526387314379223e-01, 9.702986054921705072e-01, +9.699489078304969203e-01, 9.696035575013605134e-01, 9.692625662915755891e-01, +9.689259460199136642e-01, 9.685937085371902899e-01, 9.682658657263515378e-01, +9.679424295025619296e-01, 9.676234118132908124e-01, 9.673088246384006217e-01, +9.669986799902344776e-01, 9.666929899137042259e-01, 9.663917664863788115e-01, +9.660950218185727634e-01, 9.658027680534350123e-01, 9.655150173670379310e-01, +9.652317819684667066e-01, 9.649530740999082701e-01, 9.646789060367420010e-01, +9.644092900876289898e-01, 9.641442385946024096e-01, 9.638837639331581109e-01, +9.636278785123455481e-01, 9.633765947748582636e-01, 9.631299251971253694e-01, +9.628878822894031408e-01, 9.626504785958666099e-01, 9.624177266947013809e-01, +9.621896391981960006e-01, 9.619662287528346623e-01, 9.617475080393891318e-01, +9.615334897730127839e-01, 9.613241867033328614e-01, 9.611196116145447332e-01, +9.609197773255048203e-01, 9.607246966898252971e-01, 9.605343825959679060e-01, +9.603488479673386591e-01, 9.601681057623822069e-01, 9.599921689746773179e-01, +9.598210506330320246e-01, 9.596547638015787696e-01, 9.594933215798706616e-01, +9.593367371029771773e-01, 9.591850235415807502e-01, 9.590381941020729162e-01, +9.588962620266514580e-01, 9.587592405934176609e-01, 9.586271431164729018e-01, +9.584999829460172371e-01, 9.583777734684463256e-01, 9.582605281064505709e-01, +9.581482603191123770e-01, 9.580409836020059577e-01, 9.579387114872952580e-01, +9.578414575438342071e-01, 9.577492353772650846e-01, 9.576620586301189952e-01, +9.575799409819160113e-01, 9.575028961492645374e-01, 9.574309378859631181e-01, +9.573640799831001358e-01, 9.573023362691556182e-01, 9.572457206101023797e-01, +9.571942469095077177e-01, 9.571479291086353314e-01, 9.571067811865475727e-01, +9.570708171602075875e-01, 9.570400510845827879e-01, 9.570144970527471040e-01, +9.569941691959850116e-01, 9.569790816838944503e-01, 9.569692487244911838e-01, +9.569646845643128286e-01, 9.569654034885233251e-01, 9.569714198210175216e-01, +9.569827479245263113e-01, 9.569994022007218826e-01, 9.570213970903235223e-01, +9.570487470732028656e-01, 9.570814666684909211e-01, 9.571195704346837640e-01, +9.571630729697496731e-01, 9.572119889112359337e-01, 9.572663329363761964e-01, +9.573261197621985019e-01, 9.573913641456324175e-01, 9.574620808836177277e-01, +9.575382848132127922e-01, 9.576199908117032367e-01, 9.577072137967114207e-01, +9.577999687263049067e-01, 9.578982705991073709e-01, 9.580021344544072948e-01, +9.581115753722692086e-01, 9.582266084736434930e-01, 9.583472489204779565e-01, +9.584735119158284133e-01, 9.586054127039703721e-01, 9.587429665705107240e-01, +9.588861888424999869e-01, 9.590350948885443261e-01, 9.591897001189184646e-01, +9.593500199856788146e-01, 9.595160699827764983e-01, 9.596878656461707013e-01, +9.598654225539432483e-01, 9.600487563264122892e-01, 9.602378826262468747e-01, +9.604328171585819751e-01, 9.606335756711334994e-01, 9.608401739543135367e-01, +9.610526278413467072e-01, 9.612709532083855146e-01, 9.614951659746271417e-01, +9.617252821024303566e-01, 9.619613175974318642e-01, 9.622032885086644338e-01, +9.624512109286739170e-01, 9.627051009936374859e-01, 9.629649748834822054e-01, +9.632308488220031606e-01, 9.635027390769824729e-01, 9.637806619603088709e-01, +9.640646338280971506e-01, 9.643546710808080791e-01, 9.646507901633681881e-01, +9.649530075652912320e-01, 9.652613398207983142e-01, 9.655758035089392344e-01, +9.658964152537145020e-01, 9.662231917241966839e-01, 9.665561496346526393e-01, +9.668953057446663113e-01, 9.672406768592617388e-01, 9.675922798290256255e-01, +9.679501315502314629e-01, 9.683142489649629869e-01, 9.686846490612389671e-01, +9.690613488731369962e-01, 9.694443654809188349e-01, 9.698337160111555333e-01, +9.702294176368531087e-01, 9.706314875775782225e-01, 9.710399430995845238e-01, +9.714548015159391037e-01, 9.718760801866497268e-01, 9.723037965187919518e-01, +9.727379679666363632e-01, 9.731786120317773570e-01, 9.736257462632605941e-01, +9.740793882577122309e-01, 9.745395556594674824e-01, 9.750062661607005188e-01, +9.754795375015535841e-01, 9.759593874702675587e-01, 9.764458339033119660e-01, +9.769388946855159794e-01, 9.774385877501994280e-01, 9.779449310793042471e-01, +9.784579427035267063e-01, 9.789776407024486371e-01, 9.795040432046712153e-01, +9.800371683879468554e-01, 9.805770344793129922e-01, 9.811236597552254191e-01, +9.816770625416927354e-01, 9.822372612144102400e-01, 9.828042741988944897e-01, +9.833781199706193021e-01, 9.839588170551499813e-01, 9.845463840282800971e-01, +9.851408395161672660e-01, 9.857422021954695968e-01, 9.863504907934828037e-01, +9.869657240882776517e-01, 9.875879209088370692e-01, 9.882171001351949258e-01, +9.888532806985737000e-01, 9.894964815815237014e-01, 9.901467218180625141e-01, +9.908040204938135531e-01, 9.914683967461471736e-01, 9.921398697643202258e-01, +9.928184587896166091e-01, 9.935041831154891590e-01, 9.941970620877000897e-01, +9.948971151044636585e-01, 9.956043616165879406e-01, 9.963188211276171602e-01, +9.970405131939754639e-01, 9.977694574251096959e-01, 9.985056734836331715e-01, +9.992491810854701173e-01 +}; + +static const double + K256ONLN2 = 369.3299304675746271, + KA2 = 3.66556671660783833261e-06, + KA1 = 2.70760782821392980564e-03, + KA0 = 1.0; + +static const float extreme[2] = { 1.0e30f, 1.0e-30f }; + +#define PROCESS(N) \ + x##N *= K256ONLN2; \ + k##N = (int) x##N; \ + x##N -= (double) k##N; \ + x##N = (KA2 * x##N + KA1) * x##N + KA0; \ + lres##N = ((long long *)__TBL_exp2f)[k##N & 0xff]; \ + lres##N += (long long)k##N << 44; \ + *y = (float) (x##N * *(double *)&lres##N); \ + y += stridey + +#ifdef __sparc + +#define PREPROCESS(N, index, label) \ + xi = *(int *)x; \ + ax = xi & ~0x80000000; \ + fx = *x; \ + x += stridex; \ + if (ax >= 0x42aeac50) /* log(2^126) = 87.3365... */ \ + { \ + sign = (unsigned)xi >> 31; \ + if (ax >= 0x7f800000) /* |x| = inf or nan */ \ + { \ + if (ax > 0x7f800000) /* nan */ \ + { \ + y[index] = fx * fx; \ + goto label; \ + } \ + y[index] = (sign) ? 0.0f : fx; \ + goto label; \ + } \ + if (sign || ax > 0x42b17218) { \ + fx = extreme[sign]; \ + y[index] = fx * fx; \ + goto label; \ + } \ + } \ + x##N = fx + +#else + +#define PREPROCESS(N, index, label) \ + xi = *(int *)x; \ + ax = xi & ~0x80000000; \ + fx = *x; \ + x += stridex; \ + if (ax > 0x42cff1b4) /* 103.972076f */ \ + { \ + sign = (unsigned)xi >> 31; \ + if (ax >= 0x7f800000) /* |x| = inf or nan */ \ + { \ + if (ax > 0x7f800000) /* nan */ \ + { \ + y[index] = fx * fx; \ + goto label; \ + } \ + y[index] = (sign) ? 0.0f : fx; \ + goto label; \ + } \ + fx = extreme[sign]; \ + y[index] = fx * fx; \ + goto label; \ + } \ + x##N = fx + +#endif + +void +__vexpf(int n, float * restrict x, int stridex, float * restrict y, + int stridey) +{ + double x0, x1, x2, x3, x4; + double res0, res1, res2, res3, res4; + float fx; + long long lres0, lres1, lres2, lres3, lres4; + int k0, k1, k2, k3, k4; + int xi, ax, sign; + + y -= stridey; + + for (; ;) + { +begin: + if (--n < 0) + break; + y += stridey; + + PREPROCESS(0, 0, begin); + + if (--n < 0) + goto process1; + + PREPROCESS(1, stridey, process1); + + if (--n < 0) + goto process2; + + PREPROCESS(2, stridey << 1, process2); + + if (--n < 0) + goto process3; + + PREPROCESS(3, (stridey << 1) + stridey, process3); + + if (--n < 0) + goto process4; + + PREPROCESS(4, (stridey << 2), process4); + + x0 *= K256ONLN2; + x1 *= K256ONLN2; + x2 *= K256ONLN2; + x3 *= K256ONLN2; + x4 *= K256ONLN2; + + k0 = (int)x0; + k1 = (int)x1; + k2 = (int)x2; + k3 = (int)x3; + k4 = (int)x4; + + x0 -= (double)k0; + x1 -= (double)k1; + x2 -= (double)k2; + x3 -= (double)k3; + x4 -= (double)k4; + + x0 = (KA2 * x0 + KA1) * x0 + KA0; + x1 = (KA2 * x1 + KA1) * x1 + KA0; + x2 = (KA2 * x2 + KA1) * x2 + KA0; + x3 = (KA2 * x3 + KA1) * x3 + KA0; + x4 = (KA2 * x4 + KA1) * x4 + KA0; + + lres0 = ((long long *)__TBL_exp2f)[k0 & 255]; + lres1 = ((long long *)__TBL_exp2f)[k1 & 255]; + lres2 = ((long long *)__TBL_exp2f)[k2 & 255]; + lres3 = ((long long *)__TBL_exp2f)[k3 & 255]; + lres4 = ((long long *)__TBL_exp2f)[k4 & 255]; + + lres0 += (long long)k0 << 44; + res0 = *(double *)&lres0; + lres1 += (long long)k1 << 44; + res1 = *(double *)&lres1; + lres2 += (long long)k2 << 44; + res2 = *(double *)&lres2; + lres3 += (long long)k3 << 44; + res3 = *(double *)&lres3; + lres4 += (long long)k4 << 44; + res4 = *(double *)&lres4; + + *y = (float)(res0 * x0); + y += stridey; + *y = (float)(res1 * x1); + y += stridey; + *y = (float)(res2 * x2); + y += stridey; + *y = (float)(res3 * x3); + y += stridey; + *y = (float)(res4 * x4); + continue; + +process1: + PROCESS(0); + continue; + +process2: + PROCESS(0); + PROCESS(1); + continue; + +process3: + PROCESS(0); + PROCESS(1); + PROCESS(2); + continue; + +process4: + PROCESS(0); + PROCESS(1); + PROCESS(2); + PROCESS(3); + } +} diff --git a/usr/src/lib/libmvec/common/__vhypot.c b/usr/src/lib/libmvec/common/__vhypot.c new file mode 100644 index 0000000000..6a31134eaf --- /dev/null +++ b/usr/src/lib/libmvec/common/__vhypot.c @@ -0,0 +1,397 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <sys/isa_defs.h> +#include "libm_synonyms.h" +#include "libm_inlines.h" + +#ifdef _LITTLE_ENDIAN +#define HI(x) *(1+(int*)x) +#define LO(x) *(unsigned*)x +#else +#define HI(x) *(int*)x +#define LO(x) *(1+(unsigned*)x) +#endif + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +/* double hypot(double x, double y) + * + * Method : + * 1. Special cases: + * x or y is +Inf or -Inf => +Inf + * x or y is NaN => QNaN + * 2. Computes hypot(x,y): + * hypot(x,y) = m * sqrt(xnm * xnm + ynm * ynm) + * Where: + * m = max(|x|,|y|) + * xnm = x * (1/m) + * ynm = y * (1/m) + * + * Compute xnm * xnm + ynm * ynm by simulating + * muti-precision arithmetic. + * + * Accuracy: + * Maximum error observed: less than 0.872 ulp after 16.777.216.000 + * results. + */ + +#define sqrt __sqrt + +extern double sqrt(double); +extern double fabs(double); + +static const unsigned long long LCONST[] = { +0x41b0000000000000ULL, /* D2ON28 = 2 ** 28 */ +0x0010000000000000ULL, /* D2ONM1022 = 2 ** -1022 */ +0x7fd0000000000000ULL /* D2ONP1022 = 2 ** 1022 */ +}; + +static void +__vhypot_n(int n, double * restrict px, int stridex, double * restrict py, + int stridey, double * restrict pz, int stridez); + +#pragma no_inline(__vhypot_n) + +#define RETURN(ret) \ +{ \ + *pz = (ret); \ + py += stridey; \ + pz += stridez; \ + if (n_n == 0) \ + { \ + hx0 = HI(px); \ + hy0 = HI(py); \ + spx = px; spy = py; spz = pz; \ + continue; \ + } \ + n--; \ + break; \ +} + +void +__vhypot(int n, double * restrict px, int stridex, double * restrict py, + int stridey, double * restrict pz, int stridez) +{ + int hx0, hx1, hy0, j0, diff; + double x_hi, x_lo, y_hi, y_lo; + double scl = 0; + double x, y, res; + double *spx, *spy, *spz; + int n_n; + double D2ON28 = ((double*)LCONST)[0]; /* 2 ** 28 */ + double D2ONM1022 = ((double*)LCONST)[1]; /* 2 **-1022 */ + double D2ONP1022 = ((double*)LCONST)[2]; /* 2 ** 1022 */ + + while (n > 1) + { + n_n = 0; + spx = px; + spy = py; + spz = pz; + hx0 = HI(px); + hy0 = HI(py); + for (; n > 1 ; n--) + { + px += stridex; + hx0 &= 0x7fffffff; + hy0 &= 0x7fffffff; + + if (hx0 >= 0x7fe00000) /* |X| >= 2**1023 or Inf or NaN */ + { + diff = hy0 - hx0; + j0 = diff >> 31; + j0 = hy0 - (diff & j0); + j0 &= 0x7ff00000; + x = *(px - stridex); + y = *py; + x = fabs(x); + y = fabs(y); + if (j0 >= 0x7ff00000) /* |X| or |Y| = Inf or NaN */ + { + int lx = LO((px - stridex)); + int ly = LO(py); + if (hx0 == 0x7ff00000 && lx == 0) res = x == y ? y : x; + else if (hy0 == 0x7ff00000 && ly == 0) res = x == y ? x : y; + else res = x + y; + RETURN (res) + } + else + { + j0 = diff >> 31; + if (((diff ^ j0) - j0) < 0x03600000) /* max(|X|,|Y|)/min(|X|,|Y|) < 2**54 */ + { + x *= D2ONM1022; + y *= D2ONM1022; + + x_hi = (x + D2ON28) - D2ON28; + x_lo = x - x_hi; + y_hi = (y + D2ON28) - D2ON28; + y_lo = y - y_hi; + res = (x_hi * x_hi + y_hi * y_hi); + res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo); + + res = sqrt (res); + + res = D2ONP1022 * res; + RETURN (res) + } + else RETURN (x + y) + } + } + if (hy0 >= 0x7fe00000) /* |Y| >= 2**1023 or Inf or NaN */ + { + diff = hy0 - hx0; + j0 = diff >> 31; + j0 = hy0 - (diff & j0); + j0 &= 0x7ff00000; + x = *(px - stridex); + y = *py; + x = fabs(x); + y = fabs(y); + if (j0 >= 0x7ff00000) /* |X| or |Y| = Inf or NaN */ + { + int lx = LO((px - stridex)); + int ly = LO(py); + if (hx0 == 0x7ff00000 && lx == 0) res = x == y ? y : x; + else if (hy0 == 0x7ff00000 && ly == 0) res = x == y ? x : y; + else res = x + y; + RETURN (res) + } + else + { + j0 = diff >> 31; + if (((diff ^ j0) - j0) < 0x03600000) /* max(|X|,|Y|)/min(|X|,|Y|) < 2**54 */ + { + x *= D2ONM1022; + y *= D2ONM1022; + + x_hi = (x + D2ON28) - D2ON28; + x_lo = x - x_hi; + y_hi = (y + D2ON28) - D2ON28; + y_lo = y - y_hi; + res = (x_hi * x_hi + y_hi * y_hi); + res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo); + + res = sqrt (res); + + res = D2ONP1022 * res; + RETURN (res) + } + else RETURN (x + y) + } + } + + hx1 = HI(px); + + if (hx0 < 0x00100000 && hy0 < 0x00100000) /* X and Y are subnormal */ + { + x = *(px - stridex); + y = *py; + + x *= D2ONP1022; + y *= D2ONP1022; + + x_hi = (x + D2ON28) - D2ON28; + x_lo = x - x_hi; + y_hi = (y + D2ON28) - D2ON28; + y_lo = y - y_hi; + res = (x_hi * x_hi + y_hi * y_hi); + res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo); + + res = sqrt(res); + + res = D2ONM1022 * res; + RETURN (res) + } + + hx0 = hx1; + py += stridey; + pz += stridez; + n_n++; + hy0 = HI(py); + } + if (n_n > 0) + __vhypot_n (n_n, spx, stridex, spy, stridey, spz, stridez); + } + + if (n > 0) + { + x = *px; + y = *py; + hx0 = HI(px); + hy0 = HI(py); + + hx0 &= 0x7fffffff; + hy0 &= 0x7fffffff; + + diff = hy0 - hx0; + j0 = diff >> 31; + j0 = hy0 - (diff & j0); + j0 &= 0x7ff00000; + + if (j0 >= 0x7fe00000) /* max(|X|,|Y|) >= 2**1023 or X or Y = Inf or NaN */ + { + x = fabs(x); + y = fabs(y); + if (j0 >= 0x7ff00000) /* |X| or |Y| = Inf or NaN */ + { + int lx = LO(px); + int ly = LO(py); + if (hx0 == 0x7ff00000 && lx == 0) res = x == y ? y : x; + else if (hy0 == 0x7ff00000 && ly == 0) res = x == y ? x : y; + else res = x + y; + *pz = res; + return; + } + else + { + j0 = diff >> 31; + if (((diff ^ j0) - j0) < 0x03600000) /* max(|X|,|Y|)/min(|X|,|Y|) < 2**54 */ + { + x *= D2ONM1022; + y *= D2ONM1022; + + x_hi = (x + D2ON28) - D2ON28; + x_lo = x - x_hi; + y_hi = (y + D2ON28) - D2ON28; + y_lo = y - y_hi; + res = (x_hi * x_hi + y_hi * y_hi); + res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo); + + res = sqrt (res); + + res = D2ONP1022 * res; + *pz = res; + return; + } + else + { + *pz = x + y; + return; + } + } + } + + if (j0 < 0x00100000) /* X and Y are subnormal */ + { + x *= D2ONP1022; + y *= D2ONP1022; + + x_hi = (x + D2ON28) - D2ON28; + x_lo = x - x_hi; + y_hi = (y + D2ON28) - D2ON28; + y_lo = y - y_hi; + res = (x_hi * x_hi + y_hi * y_hi); + res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo); + + res = sqrt(res); + + res = D2ONM1022 * res; + *pz = res; + return; + } + + HI(&scl) = (0x7fe00000 - j0); + + x *= scl; + y *= scl; + + x_hi = (x + D2ON28) - D2ON28; + y_hi = (y + D2ON28) - D2ON28; + x_lo = x - x_hi; + y_lo = y - y_hi; + + res = (x_hi * x_hi + y_hi * y_hi); + res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo); + + res = sqrt(res); + + HI(&scl) = j0; + + res = scl * res; + *pz = res; + } +} + +static void +__vhypot_n(int n, double * restrict px, int stridex, double * restrict py, + int stridey, double * restrict pz, int stridez) +{ + int hx0, hy0, j0, diff0; + double x_hi0, x_lo0, y_hi0, y_lo0, scl0 = 0; + double x0, y0, res0; + double D2ON28 = ((double*)LCONST)[0]; /* 2 ** 28 */ + + for(; n > 0 ; n--) + { + x0 = *px; + y0 = *py; + hx0 = HI(px); + hy0 = HI(py); + + hx0 &= 0x7fffffff; + hy0 &= 0x7fffffff; + + diff0 = hy0 - hx0; + j0 = diff0 >> 31; + j0 = hy0 - (diff0 & j0); + j0 &= 0x7ff00000; + + px += stridex; + py += stridey; + + HI(&scl0) = (0x7fe00000 - j0); + + x0 *= scl0; + y0 *= scl0; + + x_hi0 = (x0 + D2ON28) - D2ON28; + y_hi0 = (y0 + D2ON28) - D2ON28; + x_lo0 = x0 - x_hi0; + y_lo0 = y0 - y_hi0; + + res0 = (x_hi0 * x_hi0 + y_hi0 * y_hi0); + res0 += ((x0 + x_hi0) * x_lo0 + (y0 + y_hi0) * y_lo0); + + res0 = sqrt(res0); + + HI(&scl0) = j0; + + res0 = scl0 * res0; + *pz = res0; + + pz += stridez; + } +} + diff --git a/usr/src/lib/libmvec/common/__vhypotf.c b/usr/src/lib/libmvec/common/__vhypotf.c new file mode 100644 index 0000000000..5072272c18 --- /dev/null +++ b/usr/src/lib/libmvec/common/__vhypotf.c @@ -0,0 +1,211 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "libm_synonyms.h" +#include "libm_inlines.h" + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +#define sqrt __sqrt + +extern double sqrt(double); + +void +__vhypotf(int n, float * restrict x, int stridex, float * restrict y, + int stridey, float * restrict z, int stridez) +{ + float x0, x1, x2, y0, y1, y2, z0, z1, z2, *pz0, *pz1, *pz2; + unsigned hx0, hx1, hx2, hy0, hy1, hy2; + int i, j0, j1, j2; + + do + { +LOOP0: + hx0 = *(unsigned*)x & ~0x80000000; + hy0 = *(unsigned*)y & ~0x80000000; + *(unsigned*)&x0 = hx0; + *(unsigned*)&y0 = hy0; + if (hy0 > hx0) + { + i = hy0 - hx0; + j0 = hy0 & 0x7f800000; + if (hx0 == 0) + i = 0x7f800000; + } + else + { + i = hx0 - hy0; + j0 = hx0 & 0x7f800000; + if (hy0 == 0) + i = 0x7f800000; + else if (hx0 == 0) + i = 0x7f800000; + } + if (i >= 0x0c800000 || j0 >= 0x7f800000) + { + z0 = x0 + y0; + if (hx0 == 0x7f800000) + z0 = x0; + else if (hy0 == 0x7f800000) + z0 = y0; + else if (hx0 > 0x7f800000 || hy0 > 0x7f800000) + z0 = *x + *y; + *z = z0; + x += stridex; + y += stridey; + z += stridez; + i = 0; + if (--n <= 0) + break; + goto LOOP0; + } + pz0 = z; + x += stridex; + y += stridey; + z += stridez; + i = 1; + if (--n <= 0) + break; + +LOOP1: + hx1 = *(unsigned*)x & ~0x80000000; + hy1 = *(unsigned*)y & ~0x80000000; + *(unsigned*)&x1 = hx1; + *(unsigned*)&y1 = hy1; + if (hy1 > hx1) + { + i = hy1 - hx1; + j1 = hy1 & 0x7f800000; + if (hx1 == 0) + i = 0x7f800000; + } + else + { + i = hx1 - hy1; + j1 = hx1 & 0x7f800000; + if (hy1 == 0) + i = 0x7f800000; + else if (hx1 == 0) + i = 0x7f800000; + } + if (i >= 0x0c800000 || j1 >= 0x7f800000) + { + z1 = x1 + y1; + if (hx1 == 0x7f800000) + z1 = x1; + else if (hy1 == 0x7f800000) + z1 = y1; + else if (hx1 > 0x7f800000 || hy1 > 0x7f800000) + z1 = *x + *y; + *z = z1; + x += stridex; + y += stridey; + z += stridez; + i = 1; + if (--n <= 0) + break; + goto LOOP1; + } + pz1 = z; + x += stridex; + y += stridey; + z += stridez; + i = 2; + if (--n <= 0) + break; + +LOOP2: + hx2 = *(unsigned*)x & ~0x80000000; + hy2 = *(unsigned*)y & ~0x80000000; + *(unsigned*)&x2 = hx2; + *(unsigned*)&y2 = hy2; + if (hy2 > hx2) + { + i = hy2 - hx2; + j2 = hy2 & 0x7f800000; + if (hx2 == 0) + i = 0x7f800000; + } + else + { + i = hx2 - hy2; + j2 = hx2 & 0x7f800000; + if (hy2 == 0) + i = 0x7f800000; + else if (hx2 == 0) + i = 0x7f800000; + } + if (i >= 0x0c800000 || j2 >= 0x7f800000) + { + z2 = x2 + y2; + if (hx2 == 0x7f800000) + z2 = x2; + else if (hy2 == 0x7f800000) + z2 = y2; + else if (hx2 > 0x7f800000 || hy2 > 0x7f800000) + z2 = *x + *y; + *z = z2; + x += stridex; + y += stridey; + z += stridez; + i = 2; + if (--n <= 0) + break; + goto LOOP2; + } + pz2 = z; + + z0 = sqrt(x0 * (double)x0 + y0 * (double)y0); + z1 = sqrt(x1 * (double)x1 + y1 * (double)y1); + z2 = sqrt(x2 * (double)x2 + y2 * (double)y2); + *pz0 = z0; + *pz1 = z1; + *pz2 = z2; + + x += stridex; + y += stridey; + z += stridez; + i = 0; + } while (--n > 0); + + if (i > 0) + { + if (i > 1) + { + z1 = sqrt(x1 * (double)x1 + y1 * (double)y1); + *pz1 = z1; + } + z0 = sqrt(x0 * (double)x0 + y0 * (double)y0); + *pz0 = z0; + } +} diff --git a/usr/src/lib/libmvec/common/__vlog.c b/usr/src/lib/libmvec/common/__vlog.c new file mode 100644 index 0000000000..8106de2ef1 --- /dev/null +++ b/usr/src/lib/libmvec/common/__vlog.c @@ -0,0 +1,787 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * __vlog: double precision vector log + * + * Algorithm: + * + * Write x = 2^n z where 1 - 2^-10 <= z < 2 - 2^-9. Let m = z + * rounded to nine significant bits, so m = 1 + 2^-8 k, where + * 0 <= k <= 255. Let d = z - m. Then + * + * log(x) = n log(2) + log(m) + log(1+(d/m)) + * + * Let ln2hi = log(2) rounded to a multiple of 2^-42 and ln2lo + * ~ log(2) - ln2hi. From a table, obtain mh and ml such that + * mh = log(m) rounded to a multiple of 2^-42 and ml ~ log(m) - + * mh. From the same table, obtain rh and rl such that rh = 1/m + * rounded to a multiple of 2^-10 and rl ~ 1/m - rh. For |y| <= + * 2^-9, approximate log(1+y) by a polynomial y+p(y) where p(y) + * := y*y*(-1/2+y*(P3+y*(P4+y*(P5+y*P6)))). Now letting s = + * d*rh + d*rl in double precision, we can compute the sum above + * accurately as + * + * (n*ln2hi + mh) + (d*rh + (d*rl + (n*ln2lo + ml) + p(s))) + * + * When x is subnormal, we first scale it to the normal range, + * adjusting n accordingly. + * + * Accuracy: + * + * The largest error observed is less than 0.8 ulps. + */ + +#include <sys/isa_defs.h> + +#ifdef _LITTLE_ENDIAN +#define HI(x) *(1+(int *)&x) +#define LO(x) *(unsigned *)&x +#define HIWORD 1 +#define LOWORD 0 +#else +#define HI(x) *(int *)&x +#define LO(x) *(1+(unsigned *)&x) +#define HIWORD 0 +#define LOWORD 1 +#endif + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +static const double TBL[] = { + 1.00000000000000000000e+00, 0.00000000000000000000e+00, + 0.00000000000000000000e+00, 0.00000000000000000000e+00, + 9.96093750000000000000e-01, 1.51994163424124515728e-05, + 3.89864041562759666704e-03, 2.97263469009289512726e-14, + 9.92187500000000000000e-01, 6.05620155038759681518e-05, + 7.78214044203195953742e-03, 2.29894100462035112076e-14, + 9.88281250000000000000e-01, 1.35738416988416988208e-04, + 1.16506172200843138853e-02, -1.09039749717359319029e-13, + 9.84375000000000000000e-01, 2.40384615384615397959e-04, + 1.55041865359635266941e-02, 1.72745674997061065553e-15, + 9.80468750000000000000e-01, 3.74161877394636028203e-04, + 1.93429628432113531744e-02, -8.04185385052258635682e-14, + 9.77539062500000000000e-01, -4.39825858778625927714e-04, + 2.31670592816044518258e-02, -7.00735970431003565857e-14, + 9.73632812500000000000e-01, -2.48782081749049442231e-04, + 2.69765876983001362532e-02, -9.80605051684317662887e-14, + 9.69726562500000000000e-01, -2.95928030303030311244e-05, + 3.07716586667083902285e-02, 4.52981425779092882775e-14, + 9.65820312500000000000e-01, 2.17423349056603779517e-04, + 3.45523815067281248048e-02, -6.83913974232877736961e-14, + 9.62890625000000000000e-01, -4.84609962406015010693e-04, + 3.83188643020275776507e-02, 1.09021543022033016421e-13, + 9.58984375000000000000e-01, -1.82876872659176042957e-04, + 4.20712139207353175152e-02, -4.82631400055112824008e-14, + 9.55078125000000000000e-01, 1.45755597014925360189e-04, + 4.58095360313564015087e-02, -6.21983419947579227529e-14, + 9.52148437500000000000e-01, -4.75575046468401500289e-04, + 4.95339351223265111912e-02, -4.98803091079814255646e-14, + 9.48242187500000000000e-01, -9.40393518518518520526e-05, + 5.32445145188376045553e-02, -2.53216894311744497863e-14, + 9.44335937500000000000e-01, 3.13508994464944631443e-04, + 5.69413764001183153596e-02, 2.01093994355649575698e-14, + 9.41406250000000000000e-01, -2.29779411764705879164e-04, + 6.06246218164869787870e-02, -5.21362063913650408235e-14, + 9.37500000000000000000e-01, 2.28937728937728937530e-04, + 6.42943507054951624013e-02, -9.79051851199021608925e-14, + 9.34570312500000000000e-01, -2.63743156934306572509e-04, + 6.79506619085259444546e-02, -1.81950600301688149235e-14, + 9.30664062500000000000e-01, 2.45028409090909096626e-04, + 7.15936531869374448434e-02, 7.13730822534317801406e-14, + 9.27734375000000000000e-01, -1.98143115942028998078e-04, + 7.52234212375242350390e-02, 6.32906595872454402199e-14, + 9.23828125000000000000e-01, 3.59600631768953083074e-04, + 7.88400617077513743425e-02, 2.46501890617661192316e-14, + 9.20898437500000000000e-01, -3.51281474820143869292e-05, + 8.24436692109884461388e-02, 8.61451293608781447223e-14, + 9.17968750000000000000e-01, -4.06025985663082419983e-04, + 8.60343373417435941519e-02, 5.95592298762564263463e-14, + 9.14062500000000000000e-01, 2.23214285714285707316e-04, + 8.96121586897606903221e-02, -7.35577021943502867846e-14, + 9.11132812500000000000e-01, -1.00784030249110321056e-04, + 9.31772248541165026836e-02, 6.67870851716289831942e-14, + 9.08203125000000000000e-01, -4.01706560283687926730e-04, + 9.67296264584547316190e-02, 9.63806765855227740728e-14, + 9.04296875000000000000e-01, 2.96764575971731443208e-04, + 1.00269453163718935684e-01, -4.37863761707839790971e-14, + 9.01367187500000000000e-01, 4.12632042253521119125e-05, + 1.03796793681567578460e-01, 7.59863659719414144342e-14, + 8.98437500000000000000e-01, -1.91885964912280701945e-04, + 1.07311735789153317455e-01, -6.52667880273107116669e-14, + 8.95507812500000000000e-01, -4.02917395104895122333e-04, + 1.10814366340264314204e-01, 2.57999912830699022513e-14, + 8.91601562500000000000e-01, 3.84500217770034828473e-04, + 1.14304771280103523168e-01, -4.48895335223869926230e-14, + 8.88671875000000000000e-01, 2.17013888888888876842e-04, + 1.17783035656430001836e-01, -4.65472974759844472568e-14, + 8.85742187500000000000e-01, 7.09612889273356431397e-05, + 1.21249243632973957574e-01, -1.04272412782730081647e-13, + 8.82812500000000000000e-01, -5.38793103448275854592e-05, + 1.24703478501032805070e-01, -7.55692068745133691756e-14, + 8.79882812500000000000e-01, -1.57726589347079046649e-04, + 1.28145822691976718488e-01, -4.66803140394579609437e-14, + 8.76953125000000000000e-01, -2.40796232876712315400e-04, + 1.31576357788617315236e-01, 1.01957352237084734958e-13, + 8.74023437500000000000e-01, -3.03300981228668954746e-04, + 1.34995164537485834444e-01, 1.89961580415787680134e-14, + 8.71093750000000000000e-01, -3.45450680272108847594e-04, + 1.38402322859064952354e-01, 5.41833313790089940464e-14, + 8.68164062500000000000e-01, -3.67452330508474583805e-04, + 1.41797911860294334474e-01, -3.69845950669709681858e-14, + 8.65234375000000000000e-01, -3.69510135135135155647e-04, + 1.45182009844575077295e-01, -7.71800133682809851086e-14, + 8.62304687500000000000e-01, -3.51825547138047162871e-04, + 1.48554694323138392065e-01, -1.24915489807515996540e-15, + 8.59375000000000000000e-01, -3.14597315436241590364e-04, + 1.51916042025732167531e-01, 1.09807540998552379211e-13, + 8.56445312500000000000e-01, -2.58021530100334438914e-04, + 1.55266128911080159014e-01, 4.37925082924060541938e-14, + 8.53515625000000000000e-01, -1.82291666666666674979e-04, + 1.58605030176659056451e-01, -2.04723578004619553937e-14, + 8.50585937500000000000e-01, -8.75986295681063168849e-05, + 1.61932820269385047141e-01, -7.17939001929567730476e-14, + 8.47656250000000000000e-01, 2.58692052980132450107e-05, + 1.65249572895390883787e-01, -8.37209109923591205585e-14, + 8.44726562500000000000e-01, 1.57925948844884475120e-04, + 1.68555361029802952544e-01, 3.71439775417047191367e-15, + 8.41796875000000000000e-01, 3.08388157894736824986e-04, + 1.71850256926745714736e-01, -8.64923960721207091374e-14, + 8.38867187500000000000e-01, 4.77074795081967189831e-04, + 1.75134332127754532848e-01, 9.46151658066508147714e-14, + 8.36914062500000000000e-01, -3.12755310457516312941e-04, + 1.78407657472916980623e-01, -9.86835038673494943912e-14, + 8.33984375000000000000e-01, -1.08153501628664488934e-04, + 1.81670303107694053324e-01, -5.93750633338470149673e-14, + 8.31054687500000000000e-01, 1.14143668831168828529e-04, + 1.84922338494061477832e-01, -4.94851676612509959777e-14, + 8.28125000000000000000e-01, 3.53964401294498405386e-04, + 1.88163832418240417610e-01, -5.74307839320075599347e-14, + 8.26171875000000000000e-01, -3.65423387096774205090e-04, + 1.91394852999565046048e-01, 6.44085615069689207389e-14, + 8.23242187500000000000e-01, -9.10620980707395479654e-05, + 1.94615467699577493477e-01, 9.41653814571825038763e-14, + 8.20312500000000000000e-01, 2.00320512820512813563e-04, + 1.97825743329985925811e-01, -6.60454487708238395939e-14, + 8.18359375000000000000e-01, -4.68001198083067100272e-04, + 2.01025746060622623190e-01, -3.18818493754377370219e-14, + 8.15429687500000000000e-01, -1.43063296178343944383e-04, + 2.04215541428766300669e-01, -7.54091651195618882501e-14, + 8.12500000000000000000e-01, 1.98412698412698412526e-04, + 2.07395194345963318483e-01, 1.07268675772897325437e-13, + 8.10546875000000000000e-01, -4.20292721518987358927e-04, + 2.10564769107350002741e-01, -3.65071888317905767114e-16, + 8.07617187500000000000e-01, -4.62095820189274421015e-05, + 2.13724329397791734664e-01, -7.35958018644051430164e-14, + 8.04687500000000000000e-01, 3.43946540880503122493e-04, + 2.16873938300523150247e-01, 9.12093724991498410553e-14, + 8.02734375000000000000e-01, -2.26538009404388704197e-04, + 2.20013658305333592580e-01, -5.14966723414140783686e-14, + 7.99804687500000000000e-01, 1.95312500000000010842e-04, + 2.23143551314251453732e-01, -4.16979658452719528642e-14, + 7.97851562500000000000e-01, -3.43774338006230513552e-04, + 2.26263678650411748094e-01, 4.16412673028722634501e-14, + 7.94921875000000000000e-01, 1.09180900621118015200e-04, + 2.29374101064877322642e-01, -3.14926506519148377243e-14, + 7.92968750000000000000e-01, -3.99090557275541795833e-04, + 2.32474878743005319848e-01, 8.87450729797463158287e-14, + 7.90039062500000000000e-01, 8.43942901234567854386e-05, + 2.35566071312860003673e-01, -9.30945949519688945136e-14, + 7.88085937500000000000e-01, -3.93629807692307670790e-04, + 2.38647737850214980426e-01, -3.99705090953013414198e-14, + 7.85156250000000000000e-01, 1.19823619631901839909e-04, + 2.41719936887193398434e-01, -4.82302894299408858477e-14, + 7.83203125000000000000e-01, -3.28507262996941896190e-04, + 2.44782726417724916246e-01, -3.39998110836183310018e-14, + 7.80273437500000000000e-01, 2.14367378048780488466e-04, + 2.47836163904594286578e-01, -1.30297971733086634357e-14, + 7.78320312500000000000e-01, -2.04810980243161095543e-04, + 2.50880306285807819222e-01, 1.59736634636249040926e-15, + 7.75390625000000000000e-01, 3.66950757575757553416e-04, + 2.53915209980959843961e-01, 3.60017673263733462441e-15, + 7.73437500000000000000e-01, -2.36027190332326283783e-05, + 2.56940930897599173477e-01, -9.87480301596639169955e-14, + 7.71484375000000000000e-01, -4.00037650602409625492e-04, + 2.59957524436913445243e-01, 1.26217293988853160748e-14, + 7.68554687500000000000e-01, 2.14081268768768768606e-04, + 2.62965045500777705456e-01, 1.03646364598966627113e-13, + 7.66601562500000000000e-01, -1.34496631736526949192e-04, + 2.65963548497211377253e-01, -7.34359136986779711761e-14, + 7.64648437500000000000e-01, -4.69333022388059722691e-04, + 2.68953087345607855241e-01, -1.03896307840029875617e-13, + 7.61718750000000000000e-01, 1.86011904761904751579e-04, + 2.71933715483555715764e-01, 8.60430677280873279668e-14, + 7.59765625000000000000e-01, -1.21708086053412463954e-04, + 2.74905485872750432463e-01, 4.88167036467699861016e-14, + 7.57812500000000000000e-01, -4.16050295857988176266e-04, + 2.77868451003541849786e-01, -8.55436000656632193091e-14, + 7.54882812500000000000e-01, 2.79429387905604702334e-04, + 2.80822662900845898548e-01, 4.18860913786370112029e-14, + 7.52929687500000000000e-01, 1.14889705882352939582e-05, + 2.83768173130738432519e-01, -9.38341722366369999987e-14, + 7.50976562500000000000e-01, -2.43424670087976540225e-04, + 2.86705032803865833557e-01, 8.84810960400682115458e-14, + 7.49023437500000000000e-01, -4.85425804093567224515e-04, + 2.89633292582948342897e-01, 9.43339818951269030846e-14, + 7.46093750000000000000e-01, 2.61935131195335281235e-04, + 2.92553002686418039957e-01, -4.05999788601512838979e-14, + 7.44140625000000000000e-01, 4.54215116279069761138e-05, + 2.95464212893875810551e-01, -3.99341638438784391272e-14, + 7.42187500000000000000e-01, -1.58514492753623176778e-04, + 2.98366972551775688771e-01, 2.15926937419734905112e-14, + 7.40234375000000000000e-01, -3.49981936416184958877e-04, + 3.01261330578199704178e-01, -3.79231648020931467980e-14, + 7.37304687500000000000e-01, 4.47473883285302582568e-04, + 3.04147335467405355303e-01, -1.08638286797079129552e-13, + 7.35351562500000000000e-01, 2.80621408045976994047e-04, + 3.07025035294827830512e-01, 8.40315630479242455758e-14, + 7.33398437500000000000e-01, 1.25917800859598846179e-04, + 3.09894477722764349892e-01, 1.00337969820392140548e-13, + 7.31445312500000000000e-01, -1.67410714285714294039e-05, + 3.12755710003784770379e-01, 1.12118007403609819830e-13, + 7.29492187500000000000e-01, -1.47458155270655270810e-04, + 3.15608778986415927648e-01, -1.12592746246808286851e-13, + 7.27539062500000000000e-01, -2.66335227272727253015e-04, + 3.18453731118552241242e-01, -1.76254313121726620573e-14, + 7.25585937500000000000e-01, -3.73472910764872500361e-04, + 3.21290612453822177486e-01, -8.78854276997154463823e-14, + 7.23632812500000000000e-01, -4.68970692090395495540e-04, + 3.24119468654316733591e-01, -1.04757500587765412913e-13, + 7.20703125000000000000e-01, 4.23635563380281667846e-04, + 3.26940344995819032192e-01, 3.42884001266694615699e-14, + 7.18750000000000000000e-01, 3.51123595505617967782e-04, + 3.29753286372579168528e-01, -1.11186713895593226425e-13, + 7.16796875000000000000e-01, 2.89959733893557422817e-04, + 3.32558337300042694551e-01, 3.39068613367222871432e-14, + 7.14843750000000000000e-01, 2.40048882681564236573e-04, + 3.35355541921217081835e-01, -7.92515783138655870267e-14, + 7.12890625000000000000e-01, 2.01297005571030637044e-04, + 3.38144944008718084660e-01, -1.68695012281303904492e-15, + 7.10937500000000000000e-01, 1.73611111111111117737e-04, + 3.40926586970681455568e-01, -8.82452633212564001210e-14, + 7.08984375000000000000e-01, 1.56899238227146807121e-04, + 3.43700513853264055797e-01, 5.43888832989906475149e-14, + 7.07031250000000000000e-01, 1.51070441988950269954e-04, + 3.46466767346100823488e-01, 1.07757430375726404546e-13, + 7.05078125000000000000e-01, 1.56034779614325073201e-04, + 3.49225389785260631470e-01, 2.76727112657366262202e-14, + 7.03125000000000000000e-01, 1.71703296703296716700e-04, + 3.51976423157111639739e-01, 6.65449164332479482515e-14, + 7.01171875000000000000e-01, 1.97988013698630136838e-04, + 3.54719909102868768969e-01, 6.02593863918127820941e-14, + 6.99218750000000000000e-01, 2.34801912568306000561e-04, + 3.57455888921776931966e-01, 2.68422602858563731995e-14, + 6.97265625000000000000e-01, 2.82058923705722061539e-04, + 3.60184403574976386153e-01, 3.14101284357935074430e-14, + 6.95312500000000000000e-01, 3.39673913043478251442e-04, + 3.62905493689368086052e-01, 3.67085697163493829481e-16, + 6.93359375000000000000e-01, 4.07562669376693761502e-04, + 3.65619199561024288414e-01, -5.95770946492931122703e-14, + 6.91406250000000000000e-01, 4.85641891891891918850e-04, + 3.68325561158599157352e-01, 1.08495696229679121506e-13, + 6.90429687500000000000e-01, -4.02733322102425902751e-04, + 3.71024618127876237850e-01, -3.57393774001043846673e-15, + 6.88476562500000000000e-01, -3.04519489247311828540e-04, + 3.73716409793587445165e-01, -3.36434401382552911606e-15, + 6.86523437500000000000e-01, -1.96359752010723855866e-04, + 3.76400975164187912014e-01, 6.51539835645912724894e-14, + 6.84570312500000000000e-01, -7.83338903743315521791e-05, + 3.79078352935039220029e-01, -6.97616377035377091917e-14, + 6.82617187500000000000e-01, 4.94791666666666654379e-05, + 3.81748581490910510183e-01, -6.21703236457339082579e-14, + 6.80664062500000000000e-01, 1.87001329787234041400e-04, + 3.84411698910298582632e-01, 3.34571026954408237380e-14, + 6.78710937500000000000e-01, 3.34155338196286447704e-04, + 3.87067742968383754487e-01, 6.45334117530848658606e-14, + 6.77734375000000000000e-01, -4.85697751322751295790e-04, + 3.89716751139985717600e-01, 3.94957702521028807100e-14, + 6.75781250000000000000e-01, -3.19508575197889187636e-04, + 3.92358760602974143694e-01, -1.10271214775306207128e-13, + 6.73828125000000000000e-01, -1.43914473684210512906e-04, + 3.94993808240769794793e-01, 9.91833135258393974771e-14, + 6.71875000000000000000e-01, 4.10104986876640414256e-05, + 3.97621930647119370406e-01, 1.91186992668509687992e-14, + 6.69921875000000000000e-01, 2.35193062827225135005e-04, + 4.00243164127005002229e-01, 7.70470078193964863175e-15, + 6.67968750000000000000e-01, 4.38560704960835531785e-04, + 4.02857544701191727654e-01, -1.08212998879547184399e-13, + 6.66992187500000000000e-01, -3.25520833333333315263e-04, + 4.05465108108273852849e-01, -1.09470871366066397592e-13, + 6.65039062500000000000e-01, -1.03997564935064929046e-04, + 4.08065889808312931564e-01, -9.11831335065229488419e-14, + 6.63085937500000000000e-01, 1.26497733160621750282e-04, + 4.10659924985338875558e-01, -7.04896239210974659112e-14, + 6.61132812500000000000e-01, 3.65895510335917330171e-04, + 4.13247248550305812387e-01, -8.64814613198628863840e-14, + 6.60156250000000000000e-01, -3.62435567010309291763e-04, + 4.15827895143820569501e-01, -1.09603887929539904968e-13, + 6.58203125000000000000e-01, -1.05438624678663237367e-04, + 4.18401899138871158357e-01, 1.26591539849383157019e-14, + 6.56250000000000000000e-01, 1.60256410256410256271e-04, + 4.20969294644237379543e-01, -1.07743414616095792458e-13, + 6.54296875000000000000e-01, 4.34582800511508948911e-04, + 4.23530115505855064839e-01, -5.17691206942015446275e-14, + 6.53320312500000000000e-01, -2.59088010204081649248e-04, + 4.26084395310908803367e-01, -8.74024251107295313295e-15, + 6.51367187500000000000e-01, 3.23035941475826945284e-05, + 4.28632167389650931000e-01, 4.78292070340653116123e-14, + 6.49414062500000000000e-01, 3.32130393401015248239e-04, + 4.31173464818357388140e-01, 1.39527194700992522593e-14, + 6.48437500000000000000e-01, -3.36234177215189876300e-04, + 4.33708320421601456474e-01, -4.20630377335898599132e-14, + 6.46484375000000000000e-01, -1.97285353535353552123e-05, + 4.36236766774982243078e-01, -6.41727287881571093141e-14, + 6.44531250000000000000e-01, 3.05022040302267011258e-04, + 4.38758836207625790848e-01, 2.14689717834000941735e-15, + 6.43554687500000000000e-01, -3.38607097989949751195e-04, + 4.41274560804913562606e-01, -3.83331165923754571982e-14, + 6.41601562500000000000e-01, 2.44752506265664146815e-06, + 4.43783972410301430500e-01, -4.49328344033376536063e-16, + 6.39648437500000000000e-01, 3.51562499999999986990e-04, + 4.46287102628502907464e-01, -8.33959316905439057284e-14, + 6.38671875000000000000e-01, -2.67884975062344151547e-04, + 4.48783982827080762945e-01, -7.40524322934505657145e-14, + 6.36718750000000000000e-01, 9.71703980099502536783e-05, + 4.51274644139402880683e-01, 5.57044620824077391343e-14, + 6.34765625000000000000e-01, 4.70107009925558303777e-04, + 4.53759117467143369140e-01, -2.28624953086649163255e-14, + 6.33789062500000000000e-01, -1.25696163366336636884e-04, + 4.56237433481646803557e-01, -5.92091761359114736879e-14, + 6.31835937500000000000e-01, 2.62827932098765450035e-04, + 4.58709622626884083729e-01, 9.25811146459912121009e-14, + 6.30859375000000000000e-01, -3.17503078817733981869e-04, + 4.61175715122180918115e-01, -1.07517471912360339462e-14, + 6.28906250000000000000e-01, 8.63789926289926251633e-05, + 4.63635740963127318537e-01, -9.48054446804536471658e-14, + 6.27929687500000000000e-01, -4.78707107843137234706e-04, + 4.66089729924533457961e-01, 6.57665976858006147528e-14, + 6.25976562500000000000e-01, -5.96920843520782368088e-05, + 4.68537711563158154604e-01, 8.11157716400523519546e-14, + 6.24023437500000000000e-01, 3.66806402439024390773e-04, + 4.70979715218845740310e-01, -5.47277630185806178777e-14, + 6.23046875000000000000e-01, -1.75828771289537715006e-04, + 4.73415770016572423629e-01, 9.97077440469968501191e-14, + 6.21093750000000000000e-01, 2.65473300970873776934e-04, + 4.75845904869856894948e-01, 1.07019317621142549209e-13, + 6.20117187500000000000e-01, -2.62465950363196100312e-04, + 4.78270148481442447519e-01, 2.78328646163063623105e-14, + 6.18164062500000000000e-01, 1.93425422705314001282e-04, + 4.80688529345798087888e-01, -4.61802117788209510607e-14, + 6.17187500000000000000e-01, -3.20030120481927722077e-04, + 4.83101075751164898975e-01, -2.90762364463866399448e-14, + 6.15234375000000000000e-01, 1.50240384615384623725e-04, + 4.85507815781602403149e-01, 9.84046527823262695501e-14, + 6.14257812500000000000e-01, -3.48939598321342924619e-04, + 4.87908777319262298988e-01, -2.33257420051882497138e-14, + 6.12304687500000000000e-01, 1.35503887559808614775e-04, + 4.90303988045297955978e-01, -1.04117827384293371195e-13, + 6.11328125000000000000e-01, -3.49604713603818609800e-04, + 4.92693475442592898617e-01, -1.76429214903040463891e-14, + 6.09375000000000000000e-01, 1.48809523809523822947e-04, + 4.95077266797807169496e-01, 4.43451018828153751026e-14, + 6.08398437500000000000e-01, -3.22427998812351533642e-04, + 4.97455389202741571353e-01, 7.73708980421385689768e-14, + 6.06445312500000000000e-01, 1.89758590047393372637e-04, + 4.99827869556384030147e-01, 6.52996738757825591006e-14, + 6.05468750000000000000e-01, -2.67804373522458635890e-04, + 5.02194734566728584468e-01, -1.30901947805436250965e-14, + 6.03515625000000000000e-01, 2.57959905660377355422e-04, + 5.04556010752367001260e-01, 2.82857986090678938760e-14, + 6.02539062500000000000e-01, -1.86121323529411759412e-04, + 5.06911724444762512576e-01, 9.18415373613231066159e-14, + 6.00585937500000000000e-01, 3.53029636150234741275e-04, + 5.09261901789841431309e-01, -3.34845053941249831574e-14, + 5.99609375000000000000e-01, -7.77590749414519956471e-05, + 5.11606568749130019569e-01, -6.79410499533039142111e-14, + 5.97656250000000000000e-01, 4.74591121495327101284e-04, + 5.13945751102255599108e-01, -2.12823065872096837292e-14, + 5.96679687500000000000e-01, 5.69092365967365941461e-05, + 5.16279474448538167053e-01, -8.36708800829965016511e-14, + 5.95703125000000000000e-01, -3.54287790697674440793e-04, + 5.18607764208127264283e-01, -8.16321296891503919914e-14, + 5.93750000000000000000e-01, 2.17517401392111359854e-04, + 5.20930645624275712180e-01, -9.03997701415351032573e-14, + 5.92773437500000000000e-01, -1.80844907407407397368e-04, + 5.23248143764476481010e-01, 7.13555066011812146304e-14, + 5.90820312500000000000e-01, 4.03705975750577367080e-04, + 5.25560283522963800351e-01, -3.64289687078304118459e-14, + 5.89843750000000000000e-01, 1.80011520737327188784e-05, + 5.27867089620940532768e-01, -9.81476542529858082436e-14, + 5.88867187500000000000e-01, -3.61440373563218372236e-04, + 5.30168586609079284244e-01, 4.23335972026522927116e-14, + 5.86914062500000000000e-01, 2.41900802752293591410e-04, + 5.32464798869568767259e-01, -9.69233849737002813365e-14, + 5.85937500000000000000e-01, -1.25143020594965678717e-04, + 5.34755750616113800788e-01, -8.61253103749572066304e-14, + 5.84960937500000000000e-01, -4.86051655251141525530e-04, + 5.37041465896891168086e-01, -7.51351912898166894415e-15, + 5.83007812500000000000e-01, 1.35695472665148063720e-04, + 5.39321968595686485060e-01, -7.76104042041871663206e-14, + 5.82031250000000000000e-01, -2.13068181818181807833e-04, + 5.41597282432803694974e-01, -5.93233971574446149215e-14, + 5.80078125000000000000e-01, 4.20741213151927453007e-04, + 5.43867430967338805203e-01, -5.52875399870574035452e-14, + 5.79101562500000000000e-01, 8.39578619909502261217e-05, + 5.46132437598089381936e-01, 4.62684463909612350375e-14, + 5.78125000000000000000e-01, -2.46896162528216717505e-04, + 5.48392325565600913251e-01, -2.77505026685624314655e-14, + 5.76171875000000000000e-01, 4.04701576576576562902e-04, + 5.50647117952621556469e-01, 4.07227907088846767786e-14, + 5.75195312500000000000e-01, 8.55863764044943823575e-05, + 5.52896837686603248585e-01, 7.44889957023668801898e-14, + 5.74218750000000000000e-01, -2.27718609865470858825e-04, + 5.55141507540611200966e-01, -1.09608250460592783688e-13, + 5.72265625000000000000e-01, 4.41310123042505588354e-04, + 5.57381150134006020380e-01, 3.36669632485986549666e-16, + 5.71289062500000000000e-01, 1.39508928571428563684e-04, + 5.59615787935399566777e-01, 2.31194938380053776320e-14, + 5.70312500000000000000e-01, -1.56597995545657025672e-04, + 5.61845443262654953287e-01, 3.68646286817464054051e-14, + 5.69335937500000000000e-01, -4.47048611111111116653e-04, + 5.64070138284705535625e-01, 9.74304462767037064935e-14, + 5.67382812500000000000e-01, 2.44681956762749441229e-04, + 5.66289895023146527819e-01, -3.06552284854813270707e-14, + 5.66406250000000000000e-01, -3.45685840707964596973e-05, + 5.68504735352689749561e-01, -2.10374825114449422873e-14, + 5.65429687500000000000e-01, -3.08274696467991172252e-04, + 5.70714681003437362961e-01, 3.41818930848065350178e-14, + 5.63476562500000000000e-01, 4.00089482378854644894e-04, + 5.72919753561791367247e-01, -5.85815401264202219115e-15, + 5.62500000000000000000e-01, 1.37362637362637362518e-04, + 5.75119974471363093471e-01, 2.48469505879759890764e-14, + 5.61523437500000000000e-01, -1.19928728070175431939e-04, + 5.77315365034792193910e-01, 3.14104080050449590607e-14, + 5.60546875000000000000e-01, -3.71820295404814028101e-04, + 5.79505946414656136767e-01, -1.39129117330010386790e-14, + 5.58593750000000000000e-01, 3.58215065502183428129e-04, + 5.81691739634607074549e-01, 1.54079711890856738893e-14, + 5.57617187500000000000e-01, 1.17017293028322439969e-04, + 5.83872765580963459797e-01, 1.92193002098161738068e-14, + 5.56640625000000000000e-01, -1.18885869565217396136e-04, + 5.86049045003619539784e-01, -4.13308801481084566682e-14, + 5.55664062500000000000e-01, -3.49528877440347096866e-04, + 5.88220598517182224896e-01, -9.61818609368988642797e-14, + 5.53710937500000000000e-01, 4.01616612554112561388e-04, + 5.90387446602107957006e-01, 6.84176364159146659095e-14, + 5.52734375000000000000e-01, 1.81391738660907137675e-04, + 5.92549609606749072555e-01, -7.74738125310530505286e-14, + 5.51757812500000000000e-01, -3.36745689655172409120e-05, + 5.94707107746671681525e-01, 2.11079891578422983965e-14, + 5.50781250000000000000e-01, -2.43615591397849451990e-04, + 5.96859961107838898897e-01, -4.50623098590974831636e-14, + 5.49804687500000000000e-01, -4.48464324034334772557e-04, + 5.99008189646156097297e-01, -7.26979150253512871478e-14, + 5.47851562500000000000e-01, 3.28309020342612404610e-04, + 6.01151813189289896400e-01, 4.49397919602643900279e-14, + 5.46875000000000000000e-01, 1.33547008547008560445e-04, + 6.03290851438032404985e-01, 5.18573553063418286042e-14, + 5.45898437500000000000e-01, -5.62200159914712159731e-05, + 6.05425323966755968286e-01, -3.90788481567525388100e-14, + 5.44921875000000000000e-01, -2.41023936170212761459e-04, + 6.07555250224550036364e-01, -8.24086314983113070392e-15, + 5.43945312500000000000e-01, -4.20896364118895980992e-04, + 6.09680649536812779843e-01, 4.24936389576037736371e-14, + 5.41992187500000000000e-01, 3.80693855932203405450e-04, + 6.11801541105933210929e-01, 5.96926009653846962309e-14, + 5.41015625000000000000e-01, 2.10590644820295982628e-04, + 6.13917944012428051792e-01, -5.75595951560511011845e-14, + 5.40039062500000000000e-01, 4.53256856540084409344e-05, + 6.16029877215623855591e-01, -1.09835943254384298330e-13, + 5.39062500000000000000e-01, -1.15131578947368418456e-04, + 6.18137359555021248525e-01, 5.74853476805674446129e-14, + 5.38085937500000000000e-01, -2.70811449579831946440e-04, + 6.20240409751886545564e-01, -2.90167125533596631915e-14, + 5.37109375000000000000e-01, -4.21743972746331215531e-04, + 6.22339046408797003096e-01, -1.82614988669165533809e-14, + 5.35156250000000000000e-01, 4.08603556485355630390e-04, + 6.24433288011914555682e-01, -2.10546393306435734475e-14, + 5.34179687500000000000e-01, 2.67076591858037557577e-04, + 6.26523152931440563407e-01, -8.78036279744035513715e-14, + 5.33203125000000000000e-01, 1.30208333333333331526e-04, + 6.28608659422297932906e-01, 7.62048382318937090230e-14, + 5.32226562500000000000e-01, -2.03027546777546788817e-06, + 6.30689825626177480444e-01, 2.12246394140452907525e-14, + 5.31250000000000000000e-01, -1.29668049792531120444e-04, + 6.32766669571083184564e-01, -4.53550186996774688761e-14, + 5.30273437500000000000e-01, -2.52733566252587998902e-04, + 6.34839209172923801816e-01, 8.64101534252508178520e-14, + 5.29296875000000000000e-01, -3.71255165289256208652e-04, + 6.36907462237104482483e-01, -3.52508626243453241145e-14, + 5.28320312500000000000e-01, -4.85260953608247433411e-04, + 6.38971446457844649558e-01, 7.60718216684202016469e-14, + 5.26367187500000000000e-01, 3.81783693415637856959e-04, + 6.41031179420906482846e-01, 2.48082091251967673736e-14, + 5.25390625000000000000e-01, 2.76726129363449673514e-04, + 6.43086678603140171617e-01, -1.12856225215656411367e-13, + 5.24414062500000000000e-01, 1.76101434426229513973e-04, + 6.45137961373620782979e-01, -3.60813136042255739798e-14, + 5.23437500000000000000e-01, 7.98824130879345644567e-05, + 6.47185044995239877608e-01, 6.96725146472247760395e-14, + 5.22460937500000000000e-01, -1.19579081632653055071e-05, + 6.49227946625160257099e-01, -5.04382083563449091526e-14, + 5.21484375000000000000e-01, -9.94462830957230209915e-05, + 6.51266683315043337643e-01, -8.52342468131615437746e-14, + 5.20507812500000000000e-01, -1.82609247967479665321e-04, + 6.53301272012640765752e-01, 1.04873006903856996874e-13, + 5.19531250000000000000e-01, -2.61473123732251517670e-04, + 6.55331729563158660312e-01, -3.10282172335227455825e-14, + 5.18554687500000000000e-01, -3.36064018218623454786e-04, + 6.57358072708348117885e-01, 1.19122567102055698791e-14, + 5.17578125000000000000e-01, -4.06407828282828297722e-04, + 6.59380318089233696810e-01, -1.05870694633429062178e-13, + 5.16601562500000000000e-01, -4.72530241935483884957e-04, + 6.61398482245431296178e-01, -6.62879179039074743232e-14, + 5.14648437500000000000e-01, 4.42105759557344087183e-04, + 6.63412581616967145237e-01, 9.91058598099467920662e-14, + 5.13671875000000000000e-01, 3.84349899598393583006e-04, + 6.65422632545187298092e-01, -9.68491419671810783613e-14, + 5.12695312500000000000e-01, 3.30739604208416838882e-04, + 6.67428651271848139004e-01, 1.08050943383646665619e-13, + 5.11718750000000000000e-01, 2.81249999999999978750e-04, + 6.69430653942526987521e-01, 1.02279777907416200886e-13, + 5.10742187500000000000e-01, 2.35856412175648700539e-04, + 6.71428656605257856427e-01, 4.44668903784876907111e-14, + 5.09765625000000000000e-01, 1.94534362549800786662e-04, + 6.73422675212123067467e-01, 4.36528304869414810551e-14, + 5.08789062500000000000e-01, 1.57259567594433401650e-04, + 6.75412725620162746054e-01, 1.39850267837821649808e-14, + 5.07812500000000000000e-01, 1.24007936507936501053e-04, + 6.77398823591829568613e-01, -2.34278036379790696248e-14, + 5.06835937500000000000e-01, 9.47555693069306959140e-05, + 6.79380984795898257289e-01, -1.00907141981183426552e-13, + 5.05859375000000000000e-01, 6.94787549407114679145e-05, + 6.81359224807920327294e-01, -1.72583456150091690167e-14, + 5.04882812500000000000e-01, 4.81539694280078915244e-05, + 6.83333559111588328960e-01, 3.23592040115024425781e-14, + 5.03906250000000000000e-01, 3.07578740157480310692e-05, + 6.85304003098963221419e-01, -4.38048746232309815355e-14, + 5.02929687500000000000e-01, 1.72673133595284864178e-05, + 6.87270572070929119946e-01, 3.11475515031130920163e-14, + 5.01953125000000000000e-01, 7.65931372549019597214e-06, + 6.89233281238784911693e-01, 2.40686318405286681994e-14, + 5.00976562500000000000e-01, 1.91108121330724059841e-06, + 6.91192145724244255689e-01, -1.02296829368141946888e-13, +}; + +static const double C[] = { + 6.93147180559890330187e-01, + 5.49792301870837115524e-14, + -0.5, + 3.33333333332438282293284931714682042701467889609e-0001, + -2.49999999998669026809069285994497705748522309858e-0001, + 2.00000758613044543658508591796951886624273250472e-0001, + -1.66667492411916229281646821123333564982955309481e-0001, + 4503599627370496.0, + 0.0 +}; + +#define ln2hi C[0] +#define ln2lo C[1] +#define mhalf C[2] +#define P3 C[3] +#define P4 C[4] +#define P5 C[5] +#define P6 C[6] +#define two52 C[7] +#define zero C[8] + +#define PROCESS(N) \ + i##N = (i##N + 0x800) & ~0xfff; \ + e = (i##N & 0x7ff00000) - 0x3ff00000; \ + z##N.i[HIWORD] -= e; \ + w##N.i[HIWORD] = i##N - e; \ + w##N.i[LOWORD] = 0; \ + n##N += (e >> 20); \ + i##N = (i##N >> 10) & 0x3fc; \ + d##N = z##N.d - w##N.d; \ + h##N = d##N * TBL[i##N]; \ + l##N = d##N * TBL[i##N+1]; \ + s##N = h##N + l##N; \ + b##N = (s##N * s##N) * (mhalf + s##N * (P3 + s##N * (P4 + \ + s##N * (P5 + s##N * P6)))); \ + *y = (n##N * ln2hi + TBL[i##N+2]) + (h##N + (l##N + \ + (n##N * ln2lo + TBL[i##N+3]) + b##N)); \ + y += stridey + +#define PREPROCESS(N, index, label) \ + i##N = HI(*x); \ + z##N.d = *x; \ + x += stridex; \ + n##N = 0; \ + if ((i##N & 0x7ff00000) == 0x7ff00000) { /* inf or NaN */ \ + y[index] = z##N.d * ((i##N < 0)? zero : z##N.d); \ + goto label; \ + } else if (i##N < 0x00100000) { /* subnormal, negative, zero */ \ + if (((i##N << 1) | z##N.i[LOWORD]) == 0) { \ + y[index] = mhalf / zero; \ + goto label; \ + } else if (i##N < 0) { \ + y[index] = zero / zero; \ + goto label; \ + } \ + z##N.d *= two52; \ + n##N = -52; \ + i##N = z##N.i[HIWORD]; \ + } + +void +__vlog(int n, double *restrict x, int stridex, double *restrict y, + int stridey) +{ + union { + unsigned i[2]; + double d; + } z0, z1, z2, z3, w0, w1, w2, w3; + double b0, b1, b2, b3; + double d0, d1, d2, d3; + double h0, h1, h2, h3; + double l0, l1, l2, l3; + double s0, s1, s2, s3; + int i0, i1, i2, i3, e; + int n0, n1, n2, n3; + + w0.i[LOWORD] = 0; + w1.i[LOWORD] = 0; + w2.i[LOWORD] = 0; + w3.i[LOWORD] = 0; + + y -= stridey; + + for (;;) { +begin: + y += stridey; + + if (--n < 0) + break; + + PREPROCESS(0, 0, begin); + + if (--n < 0) + goto process1; + + PREPROCESS(1, stridey, process1); + + if (--n < 0) + goto process2; + + PREPROCESS(2, (stridey << 1), process2); + + if (--n < 0) + goto process3; + + PREPROCESS(3, (stridey << 1) + stridey, process3); + + i0 = (i0 + 0x800) & ~0xfff; + e = (i0 & 0x7ff00000) - 0x3ff00000; + z0.i[HIWORD] -= e; + w0.i[HIWORD] = i0 - e; + n0 += (e >> 20); + i0 = (i0 >> 10) & 0x3fc; + + i1 = (i1 + 0x800) & ~0xfff; + e = (i1 & 0x7ff00000) - 0x3ff00000; + z1.i[HIWORD] -= e; + w1.i[HIWORD] = i1 - e; + n1 += (e >> 20); + i1 = (i1 >> 10) & 0x3fc; + + i2 = (i2 + 0x800) & ~0xfff; + e = (i2 & 0x7ff00000) - 0x3ff00000; + z2.i[HIWORD] -= e; + w2.i[HIWORD] = i2 - e; + n2 += (e >> 20); + i2 = (i2 >> 10) & 0x3fc; + + i3 = (i3 + 0x800) & ~0xfff; + e = (i3 & 0x7ff00000) - 0x3ff00000; + z3.i[HIWORD] -= e; + w3.i[HIWORD] = i3 - e; + n3 += (e >> 20); + i3 = (i3 >> 10) & 0x3fc; + + d0 = z0.d - w0.d; + d1 = z1.d - w1.d; + d2 = z2.d - w2.d; + d3 = z3.d - w3.d; + + h0 = d0 * TBL[i0]; + h1 = d1 * TBL[i1]; + h2 = d2 * TBL[i2]; + h3 = d3 * TBL[i3]; + + l0 = d0 * TBL[i0+1]; + l1 = d1 * TBL[i1+1]; + l2 = d2 * TBL[i2+1]; + l3 = d3 * TBL[i3+1]; + + s0 = h0 + l0; + s1 = h1 + l1; + s2 = h2 + l2; + s3 = h3 + l3; + + b0 = (s0 * s0) * (mhalf + s0 * (P3 + s0 * (P4 + + s0 * (P5 + s0 * P6)))); + b1 = (s1 * s1) * (mhalf + s1 * (P3 + s1 * (P4 + + s1 * (P5 + s1 * P6)))); + b2 = (s2 * s2) * (mhalf + s2 * (P3 + s2 * (P4 + + s2 * (P5 + s2 * P6)))); + b3 = (s3 * s3) * (mhalf + s3 * (P3 + s3 * (P4 + + s3 * (P5 + s3 * P6)))); + + *y = (n0 * ln2hi + TBL[i0+2]) + (h0 + (l0 + + (n0 * ln2lo + TBL[i0+3]) + b0)); + y += stridey; + *y = (n1 * ln2hi + TBL[i1+2]) + (h1 + (l1 + + (n1 * ln2lo + TBL[i1+3]) + b1)); + y += stridey; + *y = (n2 * ln2hi + TBL[i2+2]) + (h2 + (l2 + + (n2 * ln2lo + TBL[i2+3]) + b2)); + y += stridey; + *y = (n3 * ln2hi + TBL[i3+2]) + (h3 + (l3 + + (n3 * ln2lo + TBL[i3+3]) + b3)); + continue; + +process1: + PROCESS(0); + continue; + +process2: + PROCESS(0); + PROCESS(1); + continue; + +process3: + PROCESS(0); + PROCESS(1); + PROCESS(2); + } +} diff --git a/usr/src/lib/libmvec/common/__vlogf.c b/usr/src/lib/libmvec/common/__vlogf.c new file mode 100644 index 0000000000..e8eedd363a --- /dev/null +++ b/usr/src/lib/libmvec/common/__vlogf.c @@ -0,0 +1,262 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +/* float logf(float x) + * + * Method : + * 1. Special cases: + * for x is negative, -Inf => QNaN + invalid; + * for x = 0 => -Inf + divide-by-zero; + * for x = +Inf => Inf; + * for x = NaN => QNaN. + * 2. Computes logarithm from: + * x = m * 2**n => log(x) = n * log(2) + log(m), + * m = [1, 2). + * Let m = m0 + dm, where m0 = 1 + k / 32, + * k = [0, 32], + * dm = [-1/64, 1/64]. + * Then log(m) = log(m0 + dm) = log(m0) + log(1+y), + * where y = dm*(1/m0), y = [-1/66, 1/64]. + * Then + * 1/m0 is looked up in a table of 1, 1/(1+1/32), ..., 1/(1+32/32); + * log(m0) is looked up in a table of log(1), log(1+1/32), + * ..., log(1+32/32). + * log(1+y) is computed using approximation: + * log(1+y) = ((a3*y + a2)*y + a1)*y*y + y. + * Accuracy: + * The maximum relative error for the approximating + * polynomial is 2**(-28.41). All calculations are of + * double precision. + * Maximum error observed: less than 0.545 ulp for the + * whole float type range. + */ + +static const double __TBL_logf[] = { + /* __TBL_logf[2*i] = log(1+i/32), i = [0, 32] */ + /* __TBL_logf[2*i+1] = 2**(-23)/(1+i/32), i = [0, 32] */ +0.000000000000000000e+00, 1.192092895507812500e-07, 3.077165866675368733e-02, +1.155968868371212153e-07, 6.062462181643483994e-02, 1.121969784007352926e-07, +8.961215868968713805e-02, 1.089913504464285680e-07, 1.177830356563834557e-01, +1.059638129340277719e-07, 1.451820098444978890e-01, 1.030999260979729787e-07, +1.718502569266592284e-01, 1.003867701480263102e-07, 1.978257433299198675e-01, +9.781275040064102225e-08, 2.231435513142097649e-01, 9.536743164062500529e-08, +2.478361639045812692e-01, 9.304139672256097884e-08, 2.719337154836417580e-01, +9.082612537202380448e-08, 2.954642128938358980e-01, 8.871388989825581272e-08, +3.184537311185345887e-01, 8.669766512784091150e-08, 3.409265869705931928e-01, +8.477105034722222546e-08, 3.629054936893684746e-01, 8.292820142663043248e-08, +3.844116989103320559e-01, 8.116377160904255122e-08, 4.054651081081643849e-01, +7.947285970052082892e-08, 4.260843953109000881e-01, 7.785096460459183052e-08, +4.462871026284195297e-01, 7.629394531250000159e-08, 4.660897299245992387e-01, +7.479798560049019504e-08, 4.855078157817008244e-01, 7.335956280048077330e-08, +5.045560107523953119e-01, 7.197542010613207272e-08, 5.232481437645478684e-01, +7.064254195601851460e-08, 5.415972824327444091e-01, 6.935813210227272390e-08, +5.596157879354226594e-01, 6.811959402901785336e-08, 5.773153650348236132e-01, +6.692451343201754014e-08, 5.947071077466927758e-01, 6.577064251077586116e-08, +6.118015411059929409e-01, 6.465588585805084723e-08, 6.286086594223740942e-01, +6.357828776041666578e-08, 6.451379613735847007e-01, 6.253602074795082293e-08, +6.613984822453650159e-01, 6.152737525201612732e-08, 6.773988235918061429e-01, +6.055075024801586965e-08, 6.931471805599452862e-01, 5.960464477539062500e-08 +}; + +static const double + K3 = -2.49887584306188944706e-01, + K2 = 3.33368809981254554946e-01, + K1 = -5.00000008402474976565e-01; + +static const union { + int i; + float f; +} inf = { 0x7f800000 }; + +#define INF inf.f + +#define PROCESS(N) \ + iy##N = ival##N & 0x007fffff; \ + ival##N = (iy##N + 0x20000) & 0xfffc0000; \ + i##N = ival##N >> 17; \ + iy##N = iy##N - ival##N; \ + ty##N = LN2 * (double) exp##N + __TBL_logf[i##N]; \ + yy##N = (double) iy##N * __TBL_logf[i##N + 1]; \ + yy##N = ((K3 * yy##N + K2) * yy##N + K1) * yy##N * yy##N + yy##N; \ + y[0] = (float)(yy##N + ty##N); \ + y += stridey; + +#define PREPROCESS(N, index, label) \ + ival##N = *(int*)x; \ + value = x[0]; \ + x += stridex; \ + exp##N = (ival##N >> 23) - 127; \ + if ((ival##N & 0x7fffffff) >= 0x7f800000) /* X = NaN or Inf */ \ + { \ + y[index] = value + INF; \ + goto label; \ + } \ + if (ival##N < 0x00800000) \ + { \ + if (ival##N > 0) /* X = denormal */ \ + { \ + value = (float) ival##N; \ + ival##N = *(int*) &value; \ + exp##N = (ival##N >> 23) - (127 + 149); \ + } \ + else \ + { \ + value = 0.0f; \ + y[index] = ((ival##N & 0x7fffffff) == 0) ? \ + -1.0f / value : value / value; \ + goto label; \ + } \ + } + +void +__vlogf(int n, float * restrict x, int stridex, float * restrict y, + int stridey) +{ + double LN2 = __TBL_logf[64]; /* log(2) = 0.6931471805599453094 */ + double yy0, yy1, yy2, yy3, yy4; + double ty0, ty1, ty2, ty3, ty4; + float value; + int i0, i1, i2, i3, i4; + int ival0, ival1, ival2, ival3, ival4; + int exp0, exp1, exp2, exp3, exp4; + int iy0, iy1, iy2, iy3, iy4; + + y -= stridey; + + for (; ;) + { +begin: + y += stridey; + + if (--n < 0) + break; + + PREPROCESS(0, 0, begin) + + if (--n < 0) + goto process1; + + PREPROCESS(1, stridey, process1) + + if (--n < 0) + goto process2; + + PREPROCESS(2, (stridey << 1), process2) + + if (--n < 0) + goto process3; + + PREPROCESS(3, (stridey << 1) + stridey, process3) + + if (--n < 0) + goto process4; + + PREPROCESS(4, (stridey << 2), process4) + + iy0 = ival0 & 0x007fffff; + iy1 = ival1 & 0x007fffff; + iy2 = ival2 & 0x007fffff; + iy3 = ival3 & 0x007fffff; + iy4 = ival4 & 0x007fffff; + + ival0 = (iy0 + 0x20000) & 0xfffc0000; + ival1 = (iy1 + 0x20000) & 0xfffc0000; + ival2 = (iy2 + 0x20000) & 0xfffc0000; + ival3 = (iy3 + 0x20000) & 0xfffc0000; + ival4 = (iy4 + 0x20000) & 0xfffc0000; + + i0 = ival0 >> 17; + i1 = ival1 >> 17; + i2 = ival2 >> 17; + i3 = ival3 >> 17; + i4 = ival4 >> 17; + + iy0 = iy0 - ival0; + iy1 = iy1 - ival1; + iy2 = iy2 - ival2; + iy3 = iy3 - ival3; + iy4 = iy4 - ival4; + + ty0 = LN2 * (double) exp0 + __TBL_logf[i0]; + ty1 = LN2 * (double) exp1 + __TBL_logf[i1]; + ty2 = LN2 * (double) exp2 + __TBL_logf[i2]; + ty3 = LN2 * (double) exp3 + __TBL_logf[i3]; + ty4 = LN2 * (double) exp4 + __TBL_logf[i4]; + + yy0 = (double) iy0 * __TBL_logf[i0 + 1]; + yy1 = (double) iy1 * __TBL_logf[i1 + 1]; + yy2 = (double) iy2 * __TBL_logf[i2 + 1]; + yy3 = (double) iy3 * __TBL_logf[i3 + 1]; + yy4 = (double) iy4 * __TBL_logf[i4 + 1]; + + yy0 = ((K3 * yy0 + K2) * yy0 + K1) * yy0 * yy0 + yy0; + yy1 = ((K3 * yy1 + K2) * yy1 + K1) * yy1 * yy1 + yy1; + yy2 = ((K3 * yy2 + K2) * yy2 + K1) * yy2 * yy2 + yy2; + yy3 = ((K3 * yy3 + K2) * yy3 + K1) * yy3 * yy3 + yy3; + yy4 = ((K3 * yy4 + K2) * yy4 + K1) * yy4 * yy4 + yy4; + + y[0] = (float)(yy0 + ty0); + y += stridey; + y[0] = (float)(yy1 + ty1); + y += stridey; + y[0] = (float)(yy2 + ty2); + y += stridey; + y[0] = (float)(yy3 + ty3); + y += stridey; + y[0] = (float)(yy4 + ty4); + continue; + +process1: + PROCESS(0) + continue; + +process2: + PROCESS(0) + PROCESS(1) + continue; + +process3: + PROCESS(0) + PROCESS(1) + PROCESS(2) + continue; + +process4: + PROCESS(0) + PROCESS(1) + PROCESS(2) + PROCESS(3) + } +} diff --git a/usr/src/lib/libmvec/common/__vpow.c b/usr/src/lib/libmvec/common/__vpow.c new file mode 100644 index 0000000000..d4e2eace5d --- /dev/null +++ b/usr/src/lib/libmvec/common/__vpow.c @@ -0,0 +1,1391 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <sys/isa_defs.h> + +#ifdef _LITTLE_ENDIAN +#define HI(x) *(1+(int*)x) +#define LO(x) *(unsigned*)x +#else +#define HI(x) *(int*)x +#define LO(x) *(1+(unsigned*)x) +#endif + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +/* double pow(double x, double y) + * + * Method : + * 1. Special cases: + * for (anything) ** 0 => 1 + * for (anything) ** NaN => QNaN + invalid + * for NaN ** (anything) => QNaN + invalid + * for +-1 ** +-Inf => QNaN + invalid + * for +-(|x| < 1) ** +Inf => +0 + * for +-(|x| < 1) ** -Inf => +Inf + * for +-(|x| > 1) ** +Inf => +Inf + * for +-(|x| > 1) ** -Inf => +0 + * for +Inf ** (negative) => +0 + * for +Inf ** (positive) => +Inf + * for -Inf ** (negative except odd integer) => +0 + * for -Inf ** (negative odd integer) => -0 + * for -Inf ** (positive except odd integer) => +Inf + * for -Inf ** (positive odd integer) => -Inf + * for (negative) ** (non-integer) => QNaN + invalid + * for +0 ** (negative) => +Inf + overflow + * for +0 ** (positive) => +0 + * for -0 ** (negative except odd integer) => +Inf + overflow + * for -0 ** (negative odd integer) => -Inf + overflow + * for -0 ** (positive except odd integer) => +0 + * for -0 ** (positive odd integer) => -0 + * 2. Computes x**y from: + * x**y = 2**(y*log2(x)) = 2**(w/256), where w = 256*log2(|x|)*y. + * 3. Computes w = 256*log2(|x|)*y from + * |x| = m * 2**n => log2(|x|) = n + log2(m). + * Let m = m0 + dm, where m0 = 1 + k / 256, + * k = [0, 255], + * dm = [-1/512, 1/512]. + * Then 256*log2(m) = 256*log2(m0 + dm) = 256*log2(m0) + 256*log2((1+z)/(1-z)), + * where z = (m-m0)/(m+m0), z = [-1/1025, 1/1025]. + * Then + * 256*log2(m0) is looked up in a table of 256*log2(1), 256*log2(1+1/128), + * ..., 256*log2(1+128/128). + * 256*log2((1+z)/(1-z)) is computed using + * approximation: 256*log2((1+z)/(1-z)) = a0 * z + a1 * z**3 + a1 * z**5. + * Perform w = 256*log2(|x|)*y = w1 + w2 by simulating muti-precision arithmetic. + * 3. For w >= 262144 + * then for (negative) ** (odd integer) => -Inf + overflow + * else => +Inf + overflow + * For w <= -275200 + * then for (negative) ** (odd integer) => -0 + underflow + * else => +0 + underflow + * 4. Computes 2 ** (w/256) from: + * 2 ** (w/256) = 2**a * 2**(k/256) * 2**(r/256) + * Where: + * a = int ( w ) >> 8; + * k = int ( w ) & 0xFF; + * r = frac ( w ). + * Note that: + * k = 0, 1, ..., 255; + * r = (-1, 1). + * Then: + * 2**(k/256) is looked up in a table of 2**0, 2**1/256, ... + * 2**(r/256) is computed using approximation: + * 2**(r/256) = ((((b5 * r + b4) * r + b3) * r + b2) * r + b1) * r + b0 + * Multiplication by 2**a is done by adding "a" to + * the biased exponent. + * Perform 2 ** (w/256) by simulating muti-precision arithmetic. + * 5. For (negative) ** (odd integer) => -(2**(w/256)) + * otherwise => 2**(w/256) + * + * Accuracy: + * Max. relative aproximation error < 2**(-67.94) for 256*log2((1+z)/(1-z)). + * Max. relative aproximation error < 2**(-63.15) for 2**(r/256). + * Maximum error observed: less than 0.761 ulp after 1.300.000.000 + * results. + */ + +static void +__vpowx(int n, double * restrict px, double * restrict py, + int stridey, double * restrict pz, int stridez); + +static const double __TBL_exp2[] = { + /* __TBL_exp2[2*i] = high order bits 2^(i/256), i = [0, 255] */ + /* __TBL_exp2[2*i+1] = least bits 2^(i/256), i = [0, 255] */ + 1.000000000000000000e+00, 0.000000000000000000e+00, 1.002711275050202522e+00, +-3.636615928692263944e-17, 1.005429901112802726e+00, 9.499186535455031757e-17, + 1.008155898118417548e+00,-3.252058756084308061e-17, 1.010889286051700475e+00, +-1.523477860336857718e-17, 1.013630084951489430e+00, 9.283599768183567587e-18, + 1.016378314910953096e+00,-5.772170073199660028e-17, 1.019133996077737914e+00, + 3.601904982259661106e-17, 1.021897148654116627e+00, 5.109225028973443894e-17, + 1.024667792897135721e+00,-7.561607868487779440e-17, 1.027445949118763746e+00, +-4.956074174645370440e-17, 1.030231637686040980e+00, 3.319830041080812944e-17, + 1.033024879021228415e+00, 7.600838874027088489e-18, 1.035825693601957198e+00, +-7.806782391337636167e-17, 1.038634101961378731e+00, 5.996273788852510618e-17, + 1.041450124688316103e+00, 3.784830480287576210e-17, 1.044273782427413755e+00, + 8.551889705537964892e-17, 1.047105095879289793e+00, 7.277077243104314749e-17, + 1.049944085800687210e+00, 5.592937848127002586e-17, 1.052790773004626423e+00, +-9.629482899026935739e-17, 1.055645178360557157e+00, 1.759325738772091599e-18, + 1.058507322794512762e+00,-7.152651856637780738e-17, 1.061377227289262093e+00, +-1.197353708536565756e-17, 1.064254912884464499e+00, 5.078754198611230394e-17, + 1.067140400676823697e+00,-7.899853966841582122e-17, 1.070033711820241873e+00, +-9.937162711288919381e-17, 1.072934867525975555e+00,-3.839668843358823807e-18, + 1.075843889062791048e+00,-1.000271615114413611e-17, 1.078760797757119860e+00, +-6.656660436056592603e-17, 1.081685614993215250e+00,-4.782623902997086266e-17, + 1.084618362213309206e+00, 3.166152845816346116e-17, 1.087559060917769660e+00, + 5.409349307820290759e-18, 1.090507732665257690e+00,-3.046782079812471147e-17, + 1.093464399072885840e+00, 1.441395814726920934e-17, 1.096429081816376883e+00, +-5.919933484449315824e-17, 1.099401802630221914e+00, 7.170459599701923225e-17, + 1.102382583307840891e+00, 5.266036871570694387e-17, 1.105371445701741173e+00, + 8.239288760500213590e-17, 1.108368411723678726e+00,-8.786813845180526616e-17, + 1.111373503344817548e+00, 5.563945026669697643e-17, 1.114386742595892432e+00, + 1.041027845684557095e-16, 1.117408151567369279e+00,-7.976805902628220456e-17, + 1.120437752409606746e+00,-6.201085906554178750e-17, 1.123475567333019898e+00, +-9.699737588987042995e-17, 1.126521618608241848e+00, 5.165856758795456737e-17, + 1.129575928566288079e+00, 6.712805858726256588e-17, 1.132638519598719196e+00, + 3.237356166738000264e-17, 1.135709414157805464e+00, 5.066599926126155859e-17, + 1.138788634756691565e+00, 8.912812676025407778e-17, 1.141876203969561576e+00, + 4.651091177531412387e-17, 1.144972144431804173e+00, 4.641289892170010657e-17, + 1.148076478840178938e+00, 6.897740236627191770e-17, 1.151189229952982673e+00, + 3.250710218863827212e-17, 1.154310420590215935e+00, 1.041712894627326619e-16, + 1.157440073633751121e+00,-9.123871231134400287e-17, 1.160578212027498779e+00, +-3.261040205417393722e-17, 1.163724858777577476e+00, 3.829204836924093499e-17, + 1.166880036952481658e+00,-8.791879579999169742e-17, 1.170043769683250190e+00, +-1.847744201790004694e-18, 1.173216080163637320e+00,-7.287562586584994479e-17, + 1.176396991650281221e+00, 5.554203254218078963e-17, 1.179586527462875845e+00, + 1.009231277510039044e-16, 1.182784710984341014e+00, 1.542975430079076058e-17, + 1.185991565660993841e+00,-9.209506835293105905e-18, 1.189207115002721027e+00, + 3.982015231465646111e-17, 1.192431382583151178e+00, 4.397551415609721443e-17, + 1.195664392039827328e+00, 4.616603670481481397e-17, 1.198906167074380580e+00, +-9.809193356008423118e-17, 1.202156731452703076e+00, 6.644981499252301245e-17, + 1.205416109005123859e+00,-3.357272193267529634e-17, 1.208684323626581625e+00, +-4.746725945228984097e-17, 1.211961399276801243e+00,-4.890611077521118357e-17, + 1.215247359980468955e+00,-7.712630692681488131e-17, 1.218542229827408452e+00, +-9.006726958363837675e-17, 1.221846032972757623e+00,-1.061102121140269116e-16, + 1.225158793637145527e+00,-8.903533814269983429e-17, 1.228480536106870025e+00, +-1.898781631302529953e-17, 1.231811284734075862e+00, 7.389382471610050247e-17, + 1.235151063936933413e+00,-1.075524434430784138e-16, 1.238499898199816540e+00, + 2.767702055573967430e-17, 1.241857812073484002e+00, 4.658027591836936791e-17, + 1.245224830175257980e+00,-4.677240449846727500e-17, 1.248600977189204819e+00, +-8.261810999021963550e-17, 1.251986277866316222e+00, 4.834167152469897600e-17, + 1.255380757024691096e+00,-6.711389821296878419e-18, 1.258784439549716527e+00, +-8.421782587730599357e-17, 1.262197350394250739e+00,-3.084464887473846465e-17, + 1.265619514578806282e+00, 4.250577003450868637e-17, 1.269050957191733220e+00, + 2.667932131342186095e-18, 1.272491703389402762e+00,-1.057791626721242103e-17, + 1.275941778396392001e+00, 9.915430244214290330e-17, 1.279401207505669325e+00, +-9.759095008356062210e-17, 1.282870016078778264e+00, 1.713594918243560968e-17, + 1.286348229546025568e+00,-3.416955706936181976e-17, 1.289835873406665723e+00, + 8.949257530897591722e-17, 1.293332973229089466e+00,-2.974590443132751646e-17, + 1.296839554651009641e+00, 2.538250279488831496e-17, 1.300355643379650594e+00, + 5.678728102802217422e-17, 1.303881265191935812e+00, 8.647675598267871179e-17, + 1.307416445934677318e+00,-7.336645652878868892e-17, 1.310961211524764414e+00, +-7.181536135519453857e-17, 1.314515587949354636e+00, 2.267543315104585645e-17, + 1.318079601266064049e+00,-5.457955827149153502e-17, 1.321653277603157539e+00, +-2.480638245913021742e-17, 1.325236643159741323e+00,-2.858731210038861373e-17, + 1.328829724205954355e+00, 4.089086223910160052e-17, 1.332432547083161500e+00, +-5.101586630916743959e-17, 1.336045138204145832e+00,-5.891866356388801353e-17, + 1.339667524053302916e+00, 8.927282594831731984e-17, 1.343299731186835322e+00, +-5.802580890201437751e-17, 1.346941786232945804e+00, 3.224065101254679169e-17, + 1.350593715892034474e+00,-8.287110381462416533e-17, 1.354255546936892651e+00, + 7.700948379802989462e-17, 1.357927306212901142e+00,-9.529635744825188867e-17, + 1.361609020638224754e+00, 1.533787661270668046e-18, 1.365300717204011915e+00, +-1.000536312597476517e-16, 1.369002422974590516e+00, 9.593797919118848773e-17, + 1.372714165087668414e+00,-4.495960595234841262e-17, 1.376435970754530169e+00, +-6.898588935871801042e-17, 1.380167867260237990e+00, 1.051031457996998395e-16, + 1.383909881963832023e+00,-6.770511658794786287e-17, 1.387662042298529075e+00, + 8.422984274875415318e-17, 1.391424375771926236e+00,-4.906174865288989325e-17, + 1.395196909966200272e+00,-9.329336224225496552e-17, 1.398979672538311236e+00, +-9.614213209051323072e-17, 1.402772691220204759e+00,-5.295783249407989223e-17, + 1.406575993819015435e+00, 7.034914812136422188e-18, 1.410389608217270663e+00, + 4.166548728435062259e-17, 1.414213562373095145e+00,-9.667293313452913451e-17, + 1.418047884320415175e+00, 2.274438542185529452e-17, 1.421892602169165576e+00, +-1.607782891589024413e-17, 1.425747744105494208e+00, 9.880690758500607284e-17, + 1.429613338391970023e+00,-1.203164248905365518e-17, 1.433489413367788901e+00, +-5.802454243926826103e-17, 1.437375997448982368e+00,-4.204034016467556612e-17, + 1.441273119128625657e+00, 5.602503650878985675e-18, 1.445180806977046650e+00, +-3.023758134993987319e-17, 1.449099089642035043e+00,-6.259405000819309254e-17, + 1.453027995849052623e+00,-5.779948609396106102e-17, 1.456967554401443765e+00, + 5.648679453876998140e-17, 1.460917794180647045e+00,-5.600377186075215800e-17, + 1.464878744146405731e+00, 9.530767543587157319e-17, 1.468850433336981842e+00, + 8.465882756533627608e-17, 1.472832890869367528e+00, 6.691774081940589372e-17, + 1.476826145939499346e+00,-3.483994556892795796e-17, 1.480830227822471867e+00, +-9.686952102630618578e-17, 1.484845165872752393e+00, 1.078008676440748076e-16, + 1.488870989524397004e+00, 6.155367157742871330e-17, 1.492907728291264835e+00, + 1.419292015428403577e-17, 1.496955411767235455e+00,-2.861663253899158211e-17, + 1.501014069626425584e+00,-6.413767275790235039e-17, 1.505083731623406473e+00, + 7.074710613582846364e-17, 1.509164427593422841e+00,-1.016455327754295039e-16, + 1.513256187452609813e+00, 8.884497851338712091e-17, 1.517359041198214742e+00, +-4.308699472043340801e-17, 1.521473018908814590e+00,-5.996387675945683420e-18, + 1.525598150744538417e+00,-1.102494171234256094e-16, 1.529734466947286986e+00, + 3.785792115157219653e-17, 1.533881997840955913e+00, 8.875226844438446141e-17, + 1.538040773831656827e+00, 1.017467235116135806e-16, 1.542210825407940744e+00, + 7.949834809697620856e-17, 1.546392183141021448e+00, 1.068396000565721980e-16, + 1.550584877684999974e+00,-1.460070659068938518e-17, 1.554788939777088652e+00, +-8.003161350116035641e-17, 1.559004400237836929e+00, 3.781207053357527502e-17, + 1.563231289971357629e+00, 7.484777645590734389e-17, 1.567469639965552997e+00, +-1.035206176884972199e-16, 1.571719481292341403e+00,-3.342984004687200069e-17, + 1.575980845107886497e+00,-1.013691647127830398e-17, 1.580253762652824578e+00, +-5.163402929554468062e-17, 1.584538265252493749e+00,-1.933771703458570293e-17, + 1.588834384317163950e+00,-5.994950118824479401e-18, 1.593142151342266999e+00, +-1.009440654231196372e-16, 1.597461597908627073e+00, 2.486839279622099613e-17, + 1.601792755682693414e+00,-6.054917453527784343e-17, 1.606135656416771029e+00, +-1.035454528805999526e-16, 1.610490331949254283e+00, 2.470719256979788785e-17, + 1.614856814204860713e+00,-7.316663399125123263e-17, 1.619235135194863728e+00, + 2.094133415422909241e-17, 1.623625327017328868e+00,-3.584512851414474710e-17, + 1.628027421857347834e+00,-6.712955084707084086e-17, 1.632441451987274972e+00, + 9.852819230429992964e-17, 1.636867449766964411e+00, 7.698325071319875575e-17, + 1.641305447644006321e+00,-9.247568737640705508e-17, 1.645755478153964946e+00, +-1.012567991367477260e-16, 1.650217573920617742e+00, 9.133279588729904190e-18, + 1.654691767656194301e+00, 9.643294303196028661e-17, 1.659178092161616158e+00, +-7.275545550823050654e-17, 1.663676580326736376e+00, 5.890992696713099670e-17, + 1.668187265130582464e+00, 4.269178019570615091e-17, 1.672710179641596628e+00, +-5.476715964599563076e-17, 1.677245357017878469e+00, 8.303949509950732785e-17, + 1.681792830507429004e+00, 8.199010020581496520e-17, 1.686352633448393368e+00, +-7.181463278358010675e-17, 1.690924799269305279e+00,-9.669671474394880166e-17, + 1.695509361489332623e+00, 7.238416872845166641e-17, 1.700106353718523478e+00, +-8.023719370397700246e-18, 1.704715809658051251e+00,-2.728883284797281563e-17, + 1.709337763100462926e+00,-9.868779456632931076e-17, 1.713972247929925974e+00, + 6.473975107753367064e-17, 1.718619298122477934e+00,-1.851380418263110988e-17, + 1.723278947746273992e+00,-9.522123800393799963e-17, 1.727951230961837670e+00, +-1.075098186120464245e-16, 1.732636182022311067e+00,-1.698051074315415494e-18, + 1.737333835273706217e+00, 3.164389299292956947e-17, 1.742044225155156445e+00, +-1.525959118950788792e-18, 1.746767386199169048e+00,-1.075229048350751450e-16, + 1.751503353031878207e+00,-5.124450420596724659e-17, 1.756252160373299454e+00, + 2.960140695448873307e-17, 1.761013843037583904e+00,-7.943253125039227711e-17, + 1.765788435933272726e+00, 9.461315018083267867e-17, 1.770575974063554714e+00, + 5.961794510040555848e-17, 1.775376492526521188e+00, 6.429731796556572034e-17, + 1.780190026515424462e+00,-5.284627289091617365e-17, 1.785016611318934965e+00, + 1.533040012103131382e-17, 1.789856282321401038e+00,-4.154354660683350387e-17, + 1.794709075003107168e+00, 1.822745842791208677e-17, 1.799575024940535117e+00, +-2.526889233358897644e-17, 1.804454167806623932e+00,-5.177222408793317883e-17, + 1.809346539371031959e+00,-9.032641402450029682e-17, 1.814252175500398856e+00, +-9.969531538920348820e-17, 1.819171112158608494e+00, 7.402676901145838890e-17, + 1.824103385407053413e+00,-1.015962786227708306e-16, 1.829049031404897274e+00, + 6.889192908835695637e-17, 1.834008086409342431e+00, 3.283107224245627204e-17, + 1.838980586775893711e+00, 6.918969740272511942e-18, 1.843966568958625984e+00, +-5.939742026949964550e-17, 1.848966069510450838e+00, 9.027580446261089288e-17, + 1.853979125083385471e+00, 9.761887490727593538e-17, 1.859005772428820480e+00, +-9.528705461989940687e-17, 1.864046048397788979e+00, 6.540912680620571711e-17, + 1.869099989941238604e+00,-9.938505214255067083e-17, 1.874167634110299963e+00, +-6.122763413004142562e-17, 1.879249018056560194e+00,-1.622631555783584478e-17, + 1.884344179032334532e+00,-8.226593125533710906e-17, 1.889453154390939194e+00, +-9.005168285059126718e-17, 1.894575981586965607e+00, 3.403403535216529671e-17, + 1.899712698176555303e+00,-3.859739769378514323e-17, 1.904863341817674138e+00, + 6.533857514718278629e-17, 1.910027950270389852e+00,-5.909688006744060237e-17, + 1.915206561397147400e+00,-1.061994605619596264e-16, 1.920399213163047403e+00, + 7.116681540630314186e-17, 1.925605943636125028e+00,-9.914963769693740927e-17, + 1.930826790987627106e+00, 6.167149706169109553e-17, 1.936061793492294347e+00, + 1.033238596067632574e-16, 1.941310989528640452e+00,-6.638029891621487990e-17, + 1.946574417579233218e+00, 6.811022349533877184e-17, 1.951852116230978318e+00, +-2.199016969979351086e-17, 1.957144124175400179e+00, 8.960767791036667768e-17, + 1.962450480208927317e+00, 1.097684400091354695e-16, 1.967771223233175881e+00, +-1.031492801153113151e-16, 1.973106392255234320e+00,-7.451617863956037486e-18, + 1.978456026387950928e+00, 4.038875310927816657e-17, 1.983820164850219392e+00, +-2.203454412391062657e-17, 1.989198846967266343e+00, 8.205132638369199416e-18, + 1.994592112170940235e+00, 1.790971035200264509e-17 +}; + +static const double __TBL_log2[] = { + /* __TBL_log2[2*i] = high order rounded 32 bits log2(1+i/256)*256, i = [0, 255] */ + /* __TBL_log2[2*i+1] = low order least bits log2(1+i/256)*256, i = [0, 255] */ + 0.000000000000000000e+00, 0.000000000000000000e+00, 1.439884185791015625e+00, + 4.078417797464839152e-07, 2.874177932739257812e+00,-5.443862030060025621e-07, + 4.302921295166015625e+00, 3.525917800357419922e-07, 5.726161956787109375e+00, +-1.821502755258614180e-06, 7.143936157226562500e+00,-1.035336134691423741e-06, + 8.556289672851562500e+00,-1.279264291071495652e-06, 9.963264465332031250e+00, +-3.206502629414843101e-06, 1.136489105224609375e+01, 3.503517986289194222e-06, + 1.276123046875000000e+01,-1.809406249049319022e-06, 1.415230560302734375e+01, +-2.114722805833714926e-06, 1.553816223144531250e+01,-3.719431504776986979e-06, + 1.691883850097656250e+01,-5.743786819670105240e-06, 1.829435729980468750e+01, + 7.514691093524705578e-06, 1.966479492187500000e+01,-2.076862291588726520e-06, + 2.103015136718750000e+01, 3.219403619538604258e-06, 2.239048767089843750e+01, +-3.108115489869591032e-07, 2.374583435058593750e+01,-6.275103710481114264e-06, + 2.509620666503906250e+01, 6.572855776743687178e-06, 2.644168090820312500e+01, +-1.954725505303359537e-06, 2.778225708007812500e+01, 3.855133152759458770e-06, + 2.911799621582031250e+01,-1.707228100041815487e-06, 3.044891357421875000e+01, + 1.042999152333371737e-06, 3.177505493164062500e+01, 8.966313933586820042e-07, + 3.309646606445312500e+01,-1.372654171244005427e-05, 3.441314697265625000e+01, +-8.996099168734074844e-06, 3.572515869140625000e+01,-1.247731510027211536e-05, + 3.703250122070312500e+01, 8.944258749129049106e-06, 3.833526611328125000e+01, +-3.520082642279872716e-06, 3.963342285156250000e+01, 1.306577612991810031e-05, + 4.092706298828125000e+01,-7.730135593513790229e-07, 4.221618652343750000e+01, +-1.329446142304436745e-05, 4.350079345703125000e+01, 6.912200714904314733e-06, + 4.478097534179687500e+01,-6.216230979739182064e-07, 4.605673217773437500e+01, +-5.133911151040936670e-06, 4.732809448242187500e+01,-6.697901206512330627e-06, + 4.859509277343750000e+01,-5.700153089154811841e-06, 4.985775756835937500e+01, +-2.836263919120346801e-06, 5.111611938476562500e+01, 8.933436604624454391e-07, + 5.237020874023437500e+01, 4.187561748309498307e-06, 5.362005615234375000e+01, + 5.448667394155597532e-06, 5.486569213867187500e+01, 2.786324169943508531e-06, + 5.610714721679687500e+01,-5.978483512667373796e-06, 5.734442138671875000e+01, + 7.207996138368885843e-06, 5.857757568359375000e+01, 9.083351754561760127e-06, + 5.980664062500000000e+01,-3.374516276140515786e-06, 6.103161621093750000e+01, +-2.943717299925017200e-06, 6.225253295898437500e+01, 6.810091060168101732e-06, + 6.346945190429687500e+01,-8.462738988588859704e-06, 6.468237304687500000e+01, +-2.233961135216831566e-05, 6.589129638671875000e+01,-8.657399896582645111e-06, + 6.709625244140625000e+01, 2.797335967336006296e-05, 6.829736328125000000e+01, +-8.863355250907819214e-06, 6.949450683593750000e+01, 2.830758238800374038e-05, + 7.068786621093750000e+01,-1.846073268549083018e-05, 7.187731933593750000e+01, +-2.182503249464459606e-06, 7.306298828125000000e+01,-2.025251442448625989e-05, + 7.424481201171875000e+01, 1.280303154355201204e-05, 7.542291259765625000e+01, +-8.813997363590295654e-07, 7.659722900390625000e+01, 2.370323712746426047e-05, + 7.776788330078125000e+01,-1.176744290134661421e-05, 7.893481445312500000e+01, +-2.273743674288609119e-05, 8.009802246093750000e+01, 1.409185747234803696e-05, + 8.125762939453125000e+01,-2.707246895087010889e-07, 8.241357421875000000e+01, + 1.807241476105480180e-05, 8.356597900390625000e+01,-3.030059664889450720e-05, + 8.471472167968750000e+01,-8.823455531875539245e-07, 8.585992431640625000e+01, + 6.485238524924182146e-06, 8.700158691406250000e+01, 1.382440142980862947e-05, + 8.813977050781250000e+01,-1.808136338482881111e-05, 8.927441406250000000e+01, +-6.579344146543672011e-06, 9.040557861328125000e+01, 8.714227880222726313e-06, + 9.153332519531250000e+01,-1.201308307454951138e-05, 9.265759277343750000e+01, + 1.330278431878087205e-05, 9.377850341796875000e+01,-1.657103990890600482e-05, + 9.489599609375000000e+01,-1.995110226941163424e-05, 9.601007080078125000e+01, + 2.362403148762806632e-05, 9.712084960937500000e+01, 1.236086810905991142e-05, + 9.822827148437500000e+01, 2.738898236946465744e-05, 9.933239746093750000e+01, + 2.758741700388469572e-05, 1.004332885742187500e+02,-2.834285611604269955e-05, + 1.015308227539062500e+02, 1.228649517068771375e-06, 1.026251220703125000e+02, + 1.361792668612316888e-05, 1.037161865234375000e+02, 2.803946653578170389e-05, + 1.048040771484375000e+02, 2.502814149567842806e-06, 1.058887329101562500e+02, + 1.692003190104140317e-05, 1.069702148437500000e+02, 2.896703985131545672e-05, + 1.080485839843750000e+02,-3.844135045484567362e-06, 1.091237792968750000e+02, +-2.093137927645659717e-06, 1.101958618164062500e+02,-8.590030211185738579e-06, + 1.112648315429687500e+02,-5.267967244023324300e-06, 1.123306884765625000e+02, + 2.578347229232600646e-05, 1.133935546875000000e+02,-1.975022555464358195e-05, + 1.144533081054687500e+02,-2.195797778964440179e-06, 1.155100708007812500e+02, +-2.617170507638525077e-05, 1.165637817382812500e+02,-1.334031370958194516e-05, + 1.176145019531250000e+02,-7.581976902412963145e-06, 1.186622314453125000e+02, + 8.112109654298731037e-06, 1.197070312500000000e+02,-1.042875265529314613e-05, + 1.207488403320312500e+02, 1.455233211877492951e-05, 1.217877807617187500e+02, +-2.243432092472914265e-05, 1.228237304687500000e+02, 1.712269952247034061e-05, + 1.238568115234375000e+02, 2.745621214456745937e-05, 1.248870239257812500e+02, + 2.473291989440979066e-05, 1.259143676757812500e+02, 2.498461547595911484e-05, + 1.269389038085937500e+02,-1.692547797717771941e-05, 1.279605712890625000e+02, +-2.419576192770340594e-05, 1.289793701171875000e+02, 1.880972467762623192e-05, + 1.299954833984375000e+02,-5.550757125543327248e-05, 1.310086669921875000e+02, + 1.237226167189998996e-05, 1.320191650390625000e+02,-6.438347630770959254e-06, + 1.330268554687500000e+02, 2.525911246920619613e-05, 1.340318603515625000e+02, + 3.990327953073019333e-07, 1.350340576171875000e+02, 5.593427389035480335e-05, + 1.360336914062500000e+02,-3.751407409478960320e-05, 1.370305175781250000e+02, +-2.116319935859897563e-05, 1.380246582031250000e+02,-2.559468964093475045e-06, + 1.390161132812500000e+02, 3.270409087092109593e-05, 1.400050048828125000e+02, +-2.315157751389992129e-05, 1.409912109375000000e+02,-3.387938973438343638e-05, + 1.419747314453125000e+02, 1.458416266727572812e-05, 1.429556884765625000e+02, + 1.412021555596584681e-05, 1.439340820312500000e+02,-2.143065540113838312e-05, + 1.449097900390625000e+02, 4.373273697503468317e-05, 1.458830566406250000e+02, +-2.090790235253405790e-05, 1.468536376953125000e+02, 4.230297794089183646e-05, + 1.478217773437500000e+02, 2.633401664450247309e-06, 1.487873535156250000e+02, +-4.542835986281740771e-06, 1.497503662109375000e+02, 3.397367848245215483e-05, + 1.507109375000000000e+02, 9.209059510146982590e-06, 1.516689453125000000e+02, + 5.622812858742714859e-05, 1.526246337890625000e+02,-5.621609346274134244e-05, + 1.535776367187500000e+02, 5.088115468603551539e-05, 1.545283203125000000e+02, + 2.400396513473623342e-05, 1.554765625000000000e+02,-2.180099663431456814e-06, + 1.564223632812500000e+02,-1.517056781617965675e-05, 1.573657226562500000e+02, +-2.562756696989711716e-06, 1.583066406250000000e+02, 4.795320325388065854e-05, + 1.592452392578125000e+02, 2.652301982429665372e-05, 1.601815185546875000e+02, +-5.473018439029181240e-05, 1.611152343750000000e+02, 6.036538006249134820e-05, + 1.620467529296875000e+02, 1.753890969321481711e-05, 1.629759521484375000e+02, +-4.928926339732922490e-05, 1.639027099609375000e+02,-6.288016979631557560e-06, + 1.648271484375000000e+02, 3.614482952210960361e-05, 1.657493896484375000e+02, +-3.247597790375142114e-05, 1.666691894531250000e+02, 4.348868072528205213e-05, + 1.675867919921875000e+02, 3.131097214651595330e-05, 1.685021972656250000e+02, +-5.768116554728405733e-05, 1.694151611328125000e+02, 3.189681619086343127e-05, + 1.703260498046875000e+02,-5.500528238559059116e-05, 1.712344970703125000e+02, + 5.890184674174263693e-05, 1.721408691406250000e+02, 1.840407787096519837e-05, + 1.730450439453125000e+02,-4.351222480150346831e-05, 1.739468994140625000e+02, + 6.059331686505290421e-06, 1.748465576171875000e+02, 5.580532332169584454e-05, + 1.757441406250000000e+02,-5.666096094448416139e-06, 1.766395263671875000e+02, +-4.568380948624016041e-05, 1.775327148437500000e+02,-5.372392273978838048e-05, + 1.784237060546875000e+02,-1.933871000131713187e-05, 1.793126220703125000e+02, +-5.422619290693841471e-05, 1.801993408203125000e+02,-2.601847861521447132e-05, + 1.810839843750000000e+02,-4.656229401600182454e-05, 1.819664306640625000e+02, + 1.636297150881445295e-05, 1.828468017578125000e+02, 5.076471489501210225e-05, + 1.837252197265625000e+02,-5.542156510357154555e-05, 1.846014404296875000e+02, +-4.812064810565531807e-05, 1.854755859375000000e+02,-3.953879286781995545e-05, + 1.863476562500000000e+02,-1.988182101010412125e-05, 1.872176513671875000e+02, + 2.057522891062264376e-05, 1.880856933593750000e+02,-3.058156040982771239e-05, + 1.889516601562500000e+02,-4.169340446171797184e-05, 1.898155517578125000e+02, +-3.239118881346662872e-06, 1.906774902343750000e+02,-2.783449132689922134e-05, + 1.915373535156250000e+02, 1.597927683340914293e-05, 1.923952636718750000e+02, + 1.545493412281261116e-05, 1.932512207031250000e+02,-2.014927705264352875e-05, + 1.941051025390625000e+02, 4.043097907577914080e-05, 1.949571533203125000e+02, +-3.781452579504048975e-05, 1.958071289062500000e+02,-1.677810793588779092e-06, + 1.966551513671875000e+02, 3.577570564777057149e-05, 1.975013427734375000e+02, +-3.858128431828155999e-05, 1.983454589843750000e+02, 2.827352539329734468e-05, + 1.991877441406250000e+02, 1.020426695132691908e-06, 2.000280761718750000e+02, + 1.049043785864183866e-05, 2.008665771484375000e+02,-5.668571223208539910e-05, + 2.017030029296875000e+02, 5.227451898157462205e-05, 2.025377197265625000e+02, +-2.025647781341857894e-05, 2.033704833984375000e+02,-2.161281037339224341e-05, + 2.042012939453125000e+02, 5.667325008632565576e-05, 2.050303955078125000e+02, +-2.112821448834358837e-05, 2.058575439453125000e+02,-2.522383155215216853e-06, + 2.066828613281250000e+02,-1.281378348494855858e-06, 2.075063476562500000e+02, +-9.162516382743561384e-06, 2.083280029296875000e+02,-1.797812601298608335e-05, + 2.091478271484375000e+02,-1.959505997696247453e-05, 2.099658203125000000e+02, +-5.934211946670452627e-06, 2.107819824218750000e+02, 3.102996118252714271e-05, + 2.115964355468750000e+02,-2.280040076415178584e-05, 2.124090576171875000e+02, +-3.743515649437846729e-05, 2.132198486328125000e+02,-5.006638631136701490e-06, + 2.140289306640625000e+02,-3.976919665668718942e-05, 2.148361816406250000e+02, +-1.188780735169185652e-05, 2.156417236328125000e+02,-3.571887766413048520e-05, + 2.164454345703125000e+02, 1.847144755636210490e-05, 2.172474365234375000e+02, + 3.622647302213163157e-05, 2.180477294921875000e+02, 2.511032323154433900e-05, + 2.188463134765625000e+02,-7.361941985081681848e-06, 2.196431884765625000e+02, +-5.372390403709574017e-05, 2.204382324218750000e+02, 1.551294579696132803e-05, + 2.212316894531250000e+02,-3.642162925932327343e-05, 2.220233154296875000e+02, + 4.193598594979618241e-05, 2.228133544921875000e+02, 1.372116405796589833e-05, + 2.236016845703125000e+02, 8.233623894335039537e-06, 2.243883056640625000e+02, + 3.265657742833052654e-05, 2.251733398437500000e+02,-2.794287750390687326e-05, + 2.259566650390625000e+02,-4.440243113774530265e-05, 2.267382812500000000e+02, +-9.675114830058622014e-06, 2.275183105468750000e+02,-3.882892066889445600e-05, + 2.282966308593750000e+02,-2.835487591479255673e-06, 2.290733642578125000e+02, +-1.685097895998181422e-05, 2.298483886718750000e+02, 4.806553595480019518e-05, + 2.306219482421875000e+02,-4.539911586906436716e-05, 2.313937988281250000e+02, +-4.631966285757620260e-05, 2.321639404296875000e+02, 5.204609324350696002e-05, + 2.329326171875000000e+02, 1.225763073721718197e-05, 2.336997070312500000e+02, +-3.695637982554016382e-05, 2.344650878906250000e+02, 3.309133292926460016e-05, + 2.352290039062500000e+02,-1.516395380482592629e-05, 2.359913330078125000e+02, +-5.311674305290968619e-05, 2.367519531250000000e+02, 4.779807991226078768e-05, + 2.375111083984375000e+02, 4.989464209345647548e-05, 2.382687988281250000e+02, +-4.041202611322311408e-05, 2.390247802734375000e+02, 2.739433433590848536e-05, + 2.397792968750000000e+02, 1.550965806406508966e-05, 2.405322265625000000e+02, + 5.230206142425020257e-05, 2.412836914062500000e+02, 2.196059540790264514e-05, + 2.420335693359375000e+02, 5.277680785141730338e-05, 2.427819824218750000e+02, + 2.886380247947272558e-05, 2.435289306640625000e+02,-4.363251767645384661e-05, + 2.442742919921875000e+02,-3.653314744654563199e-05, 2.450180664062500000e+02, + 5.623369525922526825e-05, 2.457604980468750000e+02,-3.437446279919778004e-06, + 2.465013427734375000e+02, 3.459290119679066472e-05, 2.472407226562500000e+02, + 5.421724428316440202e-05, 2.479787597656250000e+02,-6.070765164808318435e-05, + 2.487152099609375000e+02,-6.014953987030989107e-05, 2.494501953125000000e+02, +-6.032228506450037554e-05, 2.501837158203125000e+02,-5.540433388359054134e-05, + 2.509157714843750000e+02,-3.960875078622925214e-05, 2.516463623046875000e+02, +-7.182944107105660894e-06, 2.523754882812500000e+02, 4.759160516857532540e-05, + 2.531032714843750000e+02, 8.329299458439681639e-06, 2.538295898437500000e+02, + 2.751627995643241118e-06, 2.545544433593750000e+02, 3.647649263201999678e-05, + 2.552779541015625000e+02,-6.981531437649667064e-06 +}; + +static const unsigned long long LCONST[] = { +0x3c90000000000000ULL, /* 2**(-54) = 5.551115123125782702e-17 */ +0x3ff0000000000000ULL, /* DONE = 1.0 */ +0x4330000000000000ULL, /* DVAIN52 = 2**52 = 4.503599627370496e15 */ +0xffffffff00000000ULL, /* 0xffffffff00000000 */ +0x000fffffffffffffULL, /* 0x000fffffffffffff */ +0x0000080000000000ULL, /* 0x0000080000000000 */ +0xfffff00000000000ULL, /* 0xfffff00000000000 */ +0x0000000000000000ULL, /* DZERO = 0.0 */ +0x4062776d8ce329bdULL, /* KA5 = 5.77078604860893737986e-01*256 */ +0x406ec709dc39fc99ULL, /* KA3 = 9.61796693925765549423e-01*256 */ +0x3f6d94ae0bf85de6ULL, /* KA1_LO = 1.41052154268147309568e-05*256 */ +0x4087154000000000ULL, /* KA1_HI = 2.8853759765625e+00*256 */ +0x40871547652b82feULL, /* KA1 = 2.885390081777926774e+00*256 */ +0x4110000000000000ULL, /* HTHRESH = 262144.0 */ +0xc110cc0000000000ULL, /* LTHRESH = -275200.0 */ +0x3cd5d52893bc7fecULL, /* KB5 = 1.21195555854068860923e-15 */ +0x3d83b2abc07c93d0ULL, /* KB4 = 2.23939573811855104311e-12 */ +0x3e2c6b08d71f5d1eULL, /* KB3 = 3.30830268126604677436e-09 */ +0x3ecebfbdff82c4edULL, /* KB2 = 3.66556559691003767877e-06 */ +0x3f662e42fefa39efULL, /* KB1 = 2.70760617406228636578e-03 */ +0x01a56e1fc2f8f359ULL, /* _TINY = 1.0e-300 */ +0x7e37e43c8800759cULL /* _HUGE = 1.0e+300 */ +}; + +#define SCALE_ARR ((double*)LCONST + 1) +#define _TINY ((double*)LCONST)[20] /* 1.0e-300 */ +#define _HUGE ((double*)LCONST)[21] /* 1.0e+300 */ + +#define RET_SC(I) \ + px += stridex; \ + py += stridey; \ + pz += stridez; \ + if (--n <= 0) \ + break; \ + goto start##I; + +#define RETURN(I, ret) \ +{ \ + pz[0] = (ret); \ + RET_SC(I) \ +} + +#define PREP(I) \ +hx = HI(px); \ +lx = LO(px); \ +hy = HI(py); \ +ly = LO(py); \ +sx = hx >> 31; \ +sy = hy >> 31; \ +hx &= 0x7fffffff; \ +hy &= 0x7fffffff; \ +ull_y0 = *(unsigned long long*)px; \ + \ +if (hy < 0x3bf00000) /* |Y| < 2^(-64) */ \ +{ \ + y0 = *px; \ + if ((hy | ly) == 0) /* pow(X,0) */ \ + RETURN (I, DONE) \ + if (hx > 0x7ff00000 || (hx == 0x7ff00000 && lx != 0)) /* |X| = Nan */ \ + *pz = y0 + y0; \ + else if ((hx | lx) == 0 || (hx == 0x7ff00000 && lx == 0)) /* X = 0 or Inf */ \ + { \ + HI(pz) = hx; \ + LO(pz) = lx; \ + if (sy) \ + *pz = DONE / *pz; \ + } \ + else \ + *pz = (sx) ? DZERO / DZERO : DONE; \ + RET_SC(I) \ +} \ +yisint##I = 0; /* Y - non-integer */ \ +exp = hy >> 20; /* Y exponent */ \ +ull_y0 &= LMMANT; \ +ull_x##I = (ull_y0 | LDONE); \ +x##I = *(double*)&ull_x##I; \ +ull_ax##I = ((ull_x##I + LMROUND) & LMHI20); \ +ax##I = *(double*)&ull_ax##I; \ +if (hx >= 0x7ff00000 || exp >= 0x43e) /* X=Inf,Nan or |Y|>2^63,Inf,Nan */ \ +{ \ + y0 = *px; \ + if (hx > 0x7ff00000 || (hx == 0x7ff00000 && lx != 0) || \ + hy > 0x7ff00000 || (hy == 0x7ff00000 && ly != 0)) /* |X| or |Y| = Nan */ \ + RETURN (I, y0 + *py) \ + if (hy == 0x7ff00000 && (ly == 0)) /* |Y| = Inf */ \ + { \ + if (hx == 0x3ff00000 && (lx == 0)) /* +-1 ** +-Inf */ \ + *pz = *py - *py; \ + else if ((hx < 0x3ff00000) != sy) \ + *pz = DZERO; \ + else \ + { \ + HI(pz) = hy; \ + LO(pz) = ly; \ + } \ + RET_SC(I) \ + } \ + if (exp < 0x43e) /* |Y| < 2^63 */ \ + { \ + if (sx) /* X = -Inf */ \ + { \ + if (exp >= 0x434) /* |Y| >= 2^53 */ \ + yisint##I = 2; /* Y - even */ \ + else \ + { \ + if (exp >= 0x3ff) /* |Y| >= 1 */ \ + { \ + if (exp > (20 + 0x3ff)) \ + { \ + i0 = ly >> (52 - (exp - 0x3ff)); \ + if ((i0 << (52 - (exp - 0x3ff))) == ly) \ + yisint##I = 2 - (i0 & 1); \ + } \ + else if (ly == 0) \ + { \ + i0 = hy >> (20 - (exp - 0x3ff)); \ + if ((i0 << (20 - (exp - 0x3ff))) == hy) \ + yisint##I = 2 - (i0 & 1); \ + } \ + } \ + } \ + } \ + if (sy) \ + hx = lx = 0; \ + hx += yisint##I << 31; \ + HI(pz) = hx; \ + LO(pz) = lx; \ + RET_SC(I) \ + } \ + else /* |Y| >= 2^63 */ \ + { \ + /* |X| = 0, 1, Inf */ \ + if (lx == 0 && (hx == 0 || hx == 0x3ff00000 || hx == 0x7ff00000)) \ + { \ + HI(pz) = hx; \ + LO(pz) = lx; \ + if (sy) \ + *pz = DONE / *pz; \ + } \ + else \ + { \ + y0 = ((hx < 0x3ff00000) != sy) ? _TINY : _HUGE; \ + *pz = y0 * y0; \ + } \ + RET_SC(I) \ + } \ +} \ +if ((sx || (hx | lx)) == 0) /* X <= 0 */ \ +{ \ + if (exp >= 0x434) /* |Y| >= 2^53 */ \ + yisint##I = 2; /* Y - even */ \ + else \ + { \ + if (exp >= 0x3ff) /* |Y| >= 1 */ \ + { \ + if (exp > (20 + 0x3ff)) \ + { \ + i0 = ly >> (52 - (exp - 0x3ff)); \ + if ((i0 << (52 - (exp - 0x3ff))) == ly) \ + yisint##I = 2 - (i0 & 1); \ + } \ + else if (ly == 0) \ + { \ + i0 = hy >> (20 - (exp - 0x3ff)); \ + if ((i0 << (20 - (exp - 0x3ff))) == hy) \ + yisint##I = 2 - (i0 & 1); \ + } \ + } \ + } \ + if ((hx | lx) == 0) /* X == 0 */ \ + { \ + y0 = DZERO; \ + if (sy) \ + y0 = DONE / y0; \ + if (sx & yisint##I) \ + y0 = -y0; \ + RETURN (I, y0) \ + } \ + if (yisint##I == 0) /* pow(neg,non-integer) */ \ + RETURN (I, DZERO / DZERO) /* NaN */ \ +} \ +exp = (hx >> 20); \ +exp##I = exp - 2046; \ +py##I = py; \ +pz##I = pz; \ +ux##I = x##I + ax##I; \ +if (!exp) \ +{ \ + ax##I = (double) ull_y0; \ + ull_ax##I = *(unsigned long long*)&ax##I; \ + ull_x##I = ((ull_ax##I & LMMANT) | LDONE); \ + x##I = *(double*)&ull_x##I; \ + exp##I = ((unsigned int*) & ull_ax##I)[0]; \ + exp##I = (exp##I >> 20) - (2046 + 1023 + 51); \ + ull_ax##I = (ull_x##I + (LMROUND & LMHI20)); \ + ax##I = *(double*)&ull_ax##I; \ + ux##I = x##I + ax##I; \ +} \ +ull_x##I = *(unsigned long long *)&ux##I; \ +hx##I = HI(&ull_ax##I); \ +yd##I = DONE / ux##I; + +void +__vpow(int n, double * restrict px, int stridex, double * restrict py, + int stridey, double * restrict pz, int stridez) +{ + double *py0 = 0, *py1 = 0, *py2; + double *pz0 = 0, *pz1 = 0, *pz2; + double y0, yd0 = 0.0L, u0, s0, s_l0, m_h0; + double y1, yd1 = 0.0L, u1, s1, s_l1, m_h1; + double y2, yd2, u2, s2, s_l2, m_h2; + double ax0 = 0.0L, x0 = 0.0L, s_h0, ux0; + double ax1 = 0.0L, x1 = 0.0L, s_h1, ux1; + double ax2, x2, s_h2, ux2; + int eflag0, gflag0, ind0, i0; + int eflag1, gflag1, ind1, i1; + int eflag2, gflag2, ind2, i2; + int hx0 = 0, yisint0 = 0, exp0 = 0; + int hx1 = 0, yisint1 = 0, exp1 = 0; + int hx2, yisint2, exp2; + int exp, i = 0; + unsigned hx, lx, sx, hy, ly, sy; + unsigned long long ull_y0, ull_x0, ull_x1, ull_x2, ull_ax0, ull_ax1, ull_ax2; + unsigned long long LDONE = ((unsigned long long*)LCONST)[1]; /* 1.0 */ + unsigned long long LMMANT = ((unsigned long long*)LCONST)[4]; /* 0x000fffffffffffff */ + unsigned long long LMROUND = ((unsigned long long*)LCONST)[5]; /* 0x0000080000000000 */ + unsigned long long LMHI20 = ((unsigned long long*)LCONST)[6]; /* 0xfffff00000000000 */ + double DONE = ((double*)LCONST)[1]; /* 1.0 */ + double DZERO = ((double*)LCONST)[7]; /* 0.0 */ + double KA5 = ((double*)LCONST)[8]; /* 5.77078604860893737986e-01*256 */ + double KA3 = ((double*)LCONST)[9]; /* 9.61796693925765549423e-01*256 */ + double KA1_LO = ((double*)LCONST)[10]; /* 1.41052154268147309568e-05*256 */ + double KA1_HI = ((double*)LCONST)[11]; /* 2.8853759765625e+00*256 */ + double KA1 = ((double*)LCONST)[12]; /* 2.885390081777926774e+00*256 */ + double HTHRESH = ((double*)LCONST)[13]; /* 262144.0 */ + double LTHRESH = ((double*)LCONST)[14]; /* -275200.0 */ + double KB5 = ((double*)LCONST)[15]; /* 1.21195555854068860923e-15 */ + double KB4 = ((double*)LCONST)[16]; /* 2.23939573811855104311e-12 */ + double KB3 = ((double*)LCONST)[17]; /* 3.30830268126604677436e-09 */ + double KB2 = ((double*)LCONST)[18]; /* 3.66556559691003767877e-06 */ + double KB1 = ((double*)LCONST)[19]; /* 2.70760617406228636578e-03 */ + + if (stridex == 0) + { + unsigned hx = HI(px); + unsigned lx = LO(px); + + /* if x is a positive normal number not equal to one, + call __vpowx */ + if (hx >= 0x00100000 && hx < 0x7ff00000 && + (hx != 0x3ff00000 || lx != 0)) + { + __vpowx(n, px, py, stridey, pz, stridez); + return; + } + } + + do + { + /* perform si + ydi = 256*log2(xi)*yi */ +start0: + PREP(0) + px += stridex; + py += stridey; + pz += stridez; + i = 1; + if (--n <= 0) + break; + +start1: + PREP(1) + px += stridex; + py += stridey; + pz += stridez; + i = 2; + if (--n <= 0) + break; + +start2: + PREP(2) + + u0 = x0 - ax0; + u1 = x1 - ax1; + u2 = x2 - ax2; + + s0 = u0 * yd0; + LO(&ux0) = 0; + s1 = u1 * yd1; + LO(&ux1) = 0; + s2 = u2 * yd2; + LO(&ux2) = 0; + + y0 = s0 * s0; + s_h0 = s0; + LO(&s_h0) = 0; + y1 = s1 * s1; + s_h1 = s1; + LO(&s_h1) = 0; + y2 = s2 * s2; + s_h2 = s2; + LO(&s_h2) = 0; + + s0 = (KA5 * y0 + KA3) * y0 * s0; + s1 = (KA5 * y1 + KA3) * y1 * s1; + s2 = (KA5 * y2 + KA3) * y2 * s2; + + s_l0 = (x0 - (ux0 - ax0)); + s_l1 = (x1 - (ux1 - ax1)); + s_l2 = (x2 - (ux2 - ax2)); + + s_l0 = u0 - s_h0 * ux0 - s_h0 * s_l0; + s_l1 = u1 - s_h1 * ux1 - s_h1 * s_l1; + s_l2 = u2 - s_h2 * ux2 - s_h2 * s_l2; + + s_l0 = KA1 * yd0 * s_l0; + i0 = (hx0 >> 8) & 0xff0; + exp0 += (hx0 >> 20); + + s_l1 = KA1 * yd1 * s_l1; + i1 = (hx1 >> 8) & 0xff0; + exp1 += (hx1 >> 20); + + s_l2 = KA1 * yd2 * s_l2; + i2 = (hx2 >> 8) & 0xff0; + exp2 += (hx2 >> 20); + + yd0 = KA1_HI * s_h0; + yd1 = KA1_HI * s_h1; + yd2 = KA1_HI * s_h2; + + y0 = *(double *)((char*)__TBL_log2 + i0); + y1 = *(double *)((char*)__TBL_log2 + i1); + y2 = *(double *)((char*)__TBL_log2 + i2); + + y0 += (double)(exp0 << 8); + y1 += (double)(exp1 << 8); + y2 += (double)(exp2 << 8); + + m_h0 = y0 + yd0; + m_h1 = y1 + yd1; + m_h2 = y2 + yd2; + + y0 = s0 - ((m_h0 - y0 - yd0) - s_l0); + y1 = s1 - ((m_h1 - y1 - yd1) - s_l1); + y2 = s2 - ((m_h2 - y2 - yd2) - s_l2); + + y0 += *(double *)((char*)__TBL_log2 + i0 + 8) + KA1_LO * s_h0; + y1 += *(double *)((char*)__TBL_log2 + i1 + 8) + KA1_LO * s_h1; + y2 += *(double *)((char*)__TBL_log2 + i2 + 8) + KA1_LO * s_h2; + + s_h0 = y0 + m_h0; + s_h1 = y1 + m_h1; + s_h2 = y2 + m_h2; + + LO(&s_h0) = 0; + LO(&s_h1) = 0; + LO(&s_h2) = 0; + + yd0 = *py0; + yd1 = *py1; + yd2 = *py2; + s0 = yd0; + s1 = yd1; + s2 = yd2; + LO(&s0) = 0; + LO(&s1) = 0; + LO(&s2) = 0; + + y0 = y0 - (s_h0 - m_h0); + y1 = y1 - (s_h1 - m_h1); + y2 = y2 - (s_h2 - m_h2); + + yd0 = (yd0 - s0) * s_h0 + yd0 * y0; + yd1 = (yd1 - s1) * s_h1 + yd1 * y1; + yd2 = (yd2 - s2) * s_h2 + yd2 * y2; + + s0 = s_h0 * s0; + s1 = s_h1 * s1; + s2 = s_h2 * s2; + + /* perform 2 ** ((si+ydi)/256) */ + if (s0 > HTHRESH) + { + s0 = HTHRESH; + yd0 = DZERO; + } + if (s1 > HTHRESH) + { + s1 = HTHRESH; + yd1 = DZERO; + } + if (s2 > HTHRESH) + { + s2 = HTHRESH; + yd2 = DZERO; + } + + if (s0 < LTHRESH) + { + s0 = LTHRESH; + yd0 = DZERO; + } + ind0 = (int) (s0 + yd0); + if (s1 < LTHRESH) + { + s1 = LTHRESH; + yd1 = DZERO; + } + ind1 = (int) (s1 + yd1); + if (s2 < LTHRESH) + { + s2 = LTHRESH; + yd2 = DZERO; + } + ind2 = (int) (s2 + yd2); + + i0 = (ind0 & 0xff) << 4; + u0 = (double) ind0; + ind0 >>= 8; + + i1 = (ind1 & 0xff) << 4; + u1 = (double)ind1; + ind1 >>= 8; + + i2 = (ind2 & 0xff) << 4; + u2 = (double) ind2; + ind2 >>= 8; + + y0 = s0 - u0 + yd0; + y1 = s1 - u1 + yd1; + y2 = s2 - u2 + yd2; + + u0 = *(double*)((char*)__TBL_exp2 + i0); + y0 = ((((KB5 * y0 + KB4) * y0 + KB3) * y0 + KB2) * y0 + KB1) * y0; + u1 = *(double*)((char*)__TBL_exp2 + i1); + y1 = ((((KB5 * y1 + KB4) * y1 + KB3) * y1 + KB2) * y1 + KB1) * y1; + u2 = *(double*)((char*)__TBL_exp2 + i2); + y2 = ((((KB5 * y2 + KB4) * y2 + KB3) * y2 + KB2) * y2 + KB1) * y2; + + eflag0 = (ind0 + 1021) >> 31; + gflag0 = (1022 - ind0) >> 31; + eflag1 = (ind1 + 1021) >> 31; + gflag1 = (1022 - ind1) >> 31; + eflag2 = (ind2 + 1021) >> 31; + gflag2 = (1022 - ind2) >> 31; + + ind0 = (yisint0 << 11) + ind0 + (54 & eflag0) - (52 & gflag0); + ind0 <<= 20; + ind1 = (yisint1 << 11) + ind1 + (54 & eflag1) - (52 & gflag1); + ind1 <<= 20; + ind2 = (yisint2 << 11) + ind2 + (54 & eflag2) - (52 & gflag2); + ind2 <<= 20; + + u0 = *(double*)((char*)__TBL_exp2 + i0 + 8) + u0 * y0 + u0; + u1 = *(double*)((char*)__TBL_exp2 + i1 + 8) + u1 * y1 + u1; + u2 = *(double*)((char*)__TBL_exp2 + i2 + 8) + u2 * y2 + u2; + + ull_x0 = *(unsigned long long*)&u0; + HI(&ull_x0) += ind0; + u0 = *(double*)&ull_x0; + + ull_x1 = *(unsigned long long*)&u1; + HI(&ull_x1) += ind1; + u1 = *(double*)&ull_x1; + + ull_x2 = *(unsigned long long*)&u2; + HI(&ull_x2) += ind2; + u2 = *(double*)&ull_x2; + + *pz0 = u0 * SCALE_ARR[eflag0 - gflag0]; + *pz1 = u1 * SCALE_ARR[eflag1 - gflag1]; + *pz2 = u2 * SCALE_ARR[eflag2 - gflag2]; + + px += stridex; + py += stridey; + pz += stridez; + i = 0; + + } while (--n > 0); + + if (i > 0) + { + /* perform si + ydi = 256*log2(xi)*yi */ + u0 = x0 - ax0; + s0 = u0 * yd0; + LO(&ux0) = 0; + y0 = s0 * s0; + s_h0 = s0; + LO(&s_h0) = 0; + s0 = (KA5 * y0 + KA3) * y0 * s0; + s_l0 = (x0 - (ux0 - ax0)); + s_l0 = u0 - s_h0 * ux0 - s_h0 * s_l0; + s_l0 = KA1 * yd0 * s_l0; + i0 = (hx0 >> 8) & 0xff0; + exp0 += (hx0 >> 20); + yd0 = KA1_HI * s_h0; + y0 = *(double *)((char*)__TBL_log2 + i0); + y0 += (double)(exp0 << 8); + m_h0 = y0 + yd0; + y0 = s0 - ((m_h0 - y0 - yd0) - s_l0); + y0 += *(double *)((char*)__TBL_log2 + i0 + 8) + KA1_LO * s_h0; + s_h0 = y0 + m_h0; + LO(&s_h0) = 0; + y0 = y0 - (s_h0 - m_h0); + s0 = yd0 = *py0; + LO(&s0) = 0; + yd0 = (yd0 - s0) * s_h0 + yd0 * y0; + s0 = s_h0 * s0; + + /* perform 2 ** ((si+ydi)/256) */ + if (s0 > HTHRESH) + { + s0 = HTHRESH; + yd0 = DZERO; + } + if (s0 < LTHRESH) + { + s0 = LTHRESH; + yd0 = DZERO; + } + ind0 = (int) (s0 + yd0); + i0 = (ind0 & 0xff) << 4; + u0 = (double) ind0; + ind0 >>= 8; + y0 = s0 - u0 + yd0; + u0 = *(double*)((char*)__TBL_exp2 + i0); + y0 = ((((KB5 * y0 + KB4) * y0 + KB3) * y0 + KB2) * y0 + KB1) * y0; + eflag0 = (ind0 + 1021) >> 31; + gflag0 = (1022 - ind0) >> 31; + u0 = *(double*)((char*)__TBL_exp2 + i0 + 8) + u0 * y0 + u0; + ind0 = (yisint0 << 11) + ind0 + (54 & eflag0) - (52 & gflag0); + ind0 <<= 20; + ull_x0 = *(unsigned long long*)&u0; + HI(&ull_x0) += ind0; + u0 = *(double*)&ull_x0; + + *pz0 = u0 * SCALE_ARR[eflag0 - gflag0]; + + if (i > 1) + { + /* perform si + ydi = 256*log2(xi)*yi */ + u0 = x1 - ax1; + s0 = u0 * yd1; + LO(&ux1) = 0; + y0 = s0 * s0; + s_h0 = s0; + LO(&s_h0) = 0; + s0 = (KA5 * y0 + KA3) * y0 * s0; + s_l0 = (x1 - (ux1 - ax1)); + s_l0 = u0 - s_h0 * ux1 - s_h0 * s_l0; + s_l0 = KA1 * yd1 * s_l0; + i0 = (hx1 >> 8) & 0xff0; + exp1 += (hx1 >> 20); + yd0 = KA1_HI * s_h0; + y0 = *(double *)((char*)__TBL_log2 + i0); + y0 += (double)(exp1 << 8); + m_h0 = y0 + yd0; + y0 = s0 - ((m_h0 - y0 - yd0) - s_l0); + y0 += *(double *)((char*)__TBL_log2 + i0 + 8) + KA1_LO * s_h0; + s_h0 = y0 + m_h0; + LO(&s_h0) = 0; + y0 = y0 - (s_h0 - m_h0); + s0 = yd0 = *py1; + LO(&s0) = 0; + yd0 = (yd0 - s0) * s_h0 + yd0 * y0; + s0 = s_h0 * s0; + /* perform 2 ** ((si+ydi)/256) */ + if (s0 > HTHRESH) + { + s0 = HTHRESH; + yd0 = DZERO; + } + if (s0 < LTHRESH) + { + s0 = LTHRESH; + yd0 = DZERO; + } + ind0 = (int) (s0 + yd0); + i0 = (ind0 & 0xff) << 4; + u0 = (double) ind0; + ind0 >>= 8; + y0 = s0 - u0 + yd0; + u0 = *(double*)((char*)__TBL_exp2 + i0); + y0 = ((((KB5 * y0 + KB4) * y0 + KB3) * y0 + KB2) * y0 + KB1) * y0; + eflag0 = (ind0 + 1021) >> 31; + gflag0 = (1022 - ind0) >> 31; + u0 = *(double*)((char*)__TBL_exp2 + i0 + 8) + u0 * y0 + u0; + ind0 = (yisint1 << 11) + ind0 + (54 & eflag0) - (52 & gflag0); + ind0 <<= 20; + ull_x0 = *(unsigned long long*)&u0; + HI(&ull_x0) += ind0; + u0 = *(double*)&ull_x0; + *pz1 = u0 * SCALE_ARR[eflag0 - gflag0]; + } + } +} + +#undef RET_SC +#define RET_SC(I) \ + py += stridey; \ + pz += stridez; \ + if (--n <= 0) \ + break; \ + goto start##I; + +#define PREP_X(I) \ +hy = HI(py); \ +ly = LO(py); \ +sy = hy >> 31; \ +hy &= 0x7fffffff; \ +py##I = py; \ + \ +if (hy < 0x3bf00000) /* |Y| < 2^(-64) */ \ + RETURN (I, DONE) \ +pz##I = pz; \ +if (hy >= 0x43e00000) /* |Y|>2^63,Inf,Nan */ \ +{ \ + if (hy >= 0x7ff00000) /* |Y|=Inf,Nan */ \ + { \ + if (hy == 0x7ff00000 && ly == 0) /* |Y|=Inf */ \ + { \ + if ((hx < 0x3ff00000) != sy) \ + *pz = DZERO; \ + else \ + { \ + HI(pz) = hy; \ + LO(pz) = ly; \ + } \ + } \ + else \ + *pz = *px + *py; /* |Y|=Nan */ \ + } \ + else /* |Y|>2^63 */ \ + { \ + y0 = ((hx < 0x3ff00000) != sy) ? _TINY : _HUGE; \ + *pz = y0 * y0; \ + } \ + RET_SC(I) \ +} \ + +#define LMMANT ((unsigned long long*)LCONST)[4] /* 0x000fffffffffffff */ +#define LMROUND ((unsigned long long*)LCONST)[5] /* 0x0000080000000000 */ +#define LMHI20 ((unsigned long long*)LCONST)[6] /* 0xfffff00000000000 */ +#define MMANT ((double*)LCONST)[4] /* 0x000fffffffffffff */ +#define MROUND ((double*)LCONST)[5] /* 0x0000080000000000 */ +#define MHI20 ((double*)LCONST)[6] /* 0xfffff00000000000 */ +#define KA5 ((double*)LCONST)[8] /* 5.77078604860893737986e-01*256 */ +#define KA3 ((double*)LCONST)[9] /* 9.61796693925765549423e-01*256 */ +#define KA1_LO ((double*)LCONST)[10] /* 1.41052154268147309568e-05*256 */ +#define KA1_HI ((double*)LCONST)[11] /* 2.8853759765625e+00*256 */ +#define KA1 ((double*)LCONST)[12] /* 2.885390081777926774e+00*256 */ + + +static void +__vpowx(int n, double * restrict px, double * restrict py, + int stridey, double * restrict pz, int stridez) +{ + double *py0, *py1 = 0, *py2; + double *pz0, *pz1 = 0, *pz2; + double ux0, y0, yd0, u0, s0; + double y1, yd1, u1, s1; + double y2, yd2, u2, s2; + double yr, s_h0, s_l0, m_h0, x0, ax0; + unsigned long long ull_y0, ull_x0, ull_x1, ull_x2, ull_ax0; + int eflag0, gflag0, ind0, i0, exp0; + int eflag1, gflag1, ind1, i1; + int eflag2, gflag2, ind2, i2; + int i = 0; + unsigned hx, hx0, hy, ly, sy; + double DONE = ((double*)LCONST)[1]; /* 1.0 */ + unsigned long long LDONE = ((unsigned long long*)LCONST)[1]; /* 1.0 */ + double DZERO = ((double*)LCONST)[7]; /* 0.0 */ + double HTHRESH = ((double*)LCONST)[13]; /* 262144.0 */ + double LTHRESH = ((double*)LCONST)[14]; /* -275200.0 */ + double KB5 = ((double*)LCONST)[15]; /* 1.21195555854068860923e-15 */ + double KB4 = ((double*)LCONST)[16]; /* 2.23939573811855104311e-12 */ + double KB3 = ((double*)LCONST)[17]; /* 3.30830268126604677436e-09 */ + double KB2 = ((double*)LCONST)[18]; /* 3.66556559691003767877e-06 */ + double KB1 = ((double*)LCONST)[19]; /* 2.70760617406228636578e-03 */ + + /* perform s_h + yr = 256*log2(x) */ + ull_y0 = *(unsigned long long*)px; + hx = HI(px); + ull_x0 = (ull_y0 & LMMANT) | LDONE; + x0 = *(double*)&ull_x0; + exp0 = (hx >> 20) - 2046; + ull_ax0 = ull_x0 + (LMROUND & LMHI20); + ax0 = *(double*)&ull_ax0; + hx0 = HI(&ax0); + ux0 = x0 + ax0; + yd0 = DONE / ux0; + u0 = x0 - ax0; + s0 = u0 * yd0; + LO(&ux0) = 0; + y0 = s0 * s0; + s_h0 = s0; + LO(&s_h0) = 0; + s0 = (KA5 * y0 + KA3) * y0 * s0; + s_l0 = (x0 - (ux0 - ax0)); + s_l0 = u0 - s_h0 * ux0 - s_h0 * s_l0; + s_l0 = KA1 * yd0 * s_l0; + i0 = (hx0 >> 8) & 0xff0; + exp0 += (hx0 >> 20); + yd0 = KA1_HI * s_h0; + y0 = *(double *)((char*)__TBL_log2 + i0); + y0 += (double)(exp0 << 8); + m_h0 = y0 + yd0; + y0 = s0 - ((m_h0 - y0 - yd0) - s_l0); + y0 += *(double *)((char*)__TBL_log2 + i0 + 8) + KA1_LO * s_h0; + s_h0 = y0 + m_h0; + LO(&s_h0) = 0; + yr = y0 - (s_h0 - m_h0); + + do + { + /* perform 2 ** ((s_h0+yr)*yi/256) */ +start0: + PREP_X(0) + py += stridey; + pz += stridez; + i = 1; + if (--n <= 0) + break; + +start1: + PREP_X(1) + py += stridey; + pz += stridez; + i = 2; + if (--n <= 0) + break; + +start2: + PREP_X(2) + + s0 = yd0 = *py0; + s1 = yd1 = *py1; + s2 = yd2 = *py2; + + LO(&s0) = 0; + LO(&s1) = 0; + LO(&s2) = 0; + + yd0 = (yd0 - s0) * s_h0 + yd0 * yr; + yd1 = (yd1 - s1) * s_h0 + yd1 * yr; + yd2 = (yd2 - s2) * s_h0 + yd2 * yr; + + s0 = s_h0 * s0; + s1 = s_h0 * s1; + s2 = s_h0 * s2; + + if (s0 > HTHRESH) + { + s0 = HTHRESH; + yd0 = DZERO; + } + if (s1 > HTHRESH) + { + s1 = HTHRESH; + yd1 = DZERO; + } + if (s2 > HTHRESH) + { + s2 = HTHRESH; + yd2 = DZERO; + } + + if (s0 < LTHRESH) + { + s0 = LTHRESH; + yd0 = DZERO; + } + ind0 = (int) (s0 + yd0); + if (s1 < LTHRESH) + { + s1 = LTHRESH; + yd1 = DZERO; + } + ind1 = (int) (s1 + yd1); + if (s2 < LTHRESH) + { + s2 = LTHRESH; + yd2 = DZERO; + } + ind2 = (int) (s2 + yd2); + + i0 = (ind0 & 0xff) << 4; + u0 = (double) ind0; + ind0 >>= 8; + + i1 = (ind1 & 0xff) << 4; + u1 = (double) ind1; + ind1 >>= 8; + + i2 = (ind2 & 0xff) << 4; + u2 = (double) ind2; + ind2 >>= 8; + + y0 = s0 - u0 + yd0; + y1 = s1 - u1 + yd1; + y2 = s2 - u2 + yd2; + + u0 = *(double*)((char*)__TBL_exp2 + i0); + y0 = ((((KB5 * y0 + KB4) * y0 + KB3) * y0 + KB2) * y0 + KB1) * y0; + u1 = *(double*)((char*)__TBL_exp2 + i1); + y1 = ((((KB5 * y1 + KB4) * y1 + KB3) * y1 + KB2) * y1 + KB1) * y1; + u2 = *(double*)((char*)__TBL_exp2 + i2); + y2 = ((((KB5 * y2 + KB4) * y2 + KB3) * y2 + KB2) * y2 + KB1) * y2; + + eflag0 = (ind0 + 1021) >> 31; + gflag0 = (1022 - ind0) >> 31; + eflag1 = (ind1 + 1021) >> 31; + gflag1 = (1022 - ind1) >> 31; + eflag2 = (ind2 + 1021) >> 31; + gflag2 = (1022 - ind2) >> 31; + + u0 = *(double*)((char*)__TBL_exp2 + i0 + 8) + u0 * y0 + u0; + ind0 = ind0 + (54 & eflag0) - (52 & gflag0); + ind0 <<= 20; + ull_x0 = *(unsigned long long*)&u0; + HI(&ull_x0) += ind0; + u0 = *(double*)&ull_x0; + + u1 = *(double*)((char*)__TBL_exp2 + i1 + 8) + u1 * y1 + u1; + ind1 = ind1 + (54 & eflag1) - (52 & gflag1); + ind1 <<= 20; + ull_x1 = *(unsigned long long*)&u1; + HI(&ull_x1) += ind1; + u1 = *(double*)&ull_x1; + + u2 = *(double*)((char*)__TBL_exp2 + i2 + 8) + u2 * y2 + u2; + ind2 = ind2 + (54 & eflag2) - (52 & gflag2); + ind2 <<= 20; + ull_x2 = *(unsigned long long*)&u2; + HI(&ull_x2) += ind2; + u2 = *(double*)&ull_x2; + + *pz0 = u0 * SCALE_ARR[eflag0 - gflag0]; + *pz1 = u1 * SCALE_ARR[eflag1 - gflag1]; + *pz2 = u2 * SCALE_ARR[eflag2 - gflag2]; + + py += stridey; + pz += stridez; + i = 0; + + } while (--n > 0); + + if (i > 0) + { + /* perform 2 ** ((s_h0+yr)*yi/256) */ + s0 = y0 = *py0; + LO(&s0) = 0; + yd0 = (y0 - s0) * s_h0 + y0 * yr; + s0 = s_h0 * s0; + if (s0 > HTHRESH) + { + s0 = HTHRESH; + yd0 = DZERO; + } + if (s0 < LTHRESH) + { + s0 = LTHRESH; + yd0 = DZERO; + } + ind0 = (int) (s0 + yd0); + i0 = (ind0 & 0xff) << 4; + u0 = (double) ind0; + ind0 >>= 8; + y0 = s0 - u0 + yd0; + u0 = *(double*)((char*)__TBL_exp2 + i0); + y0 = ((((KB5 * y0 + KB4) * y0 + KB3) * y0 + KB2) * y0 + KB1) * y0; + eflag0 = (ind0 + 1021) >> 31; + gflag0 = (1022 - ind0) >> 31; + u0 = *(double*)((char*)__TBL_exp2 + i0 + 8) + u0 * y0 + u0; + ind0 = ind0 + (54 & eflag0) - (52 & gflag0); + ind0 <<= 20; + ull_x0 = *(unsigned long long*)&u0; + HI(&ull_x0) += ind0; + u0 = *(double*)&ull_x0; + *pz0 = u0 * SCALE_ARR[eflag0 - gflag0]; + + if (i > 1) + { + /* perform 2 ** ((s_h0+yr)*yi/256) */ + s0 = y0 = *py1; + LO(&s0) = 0; + yd0 = (y0 - s0) * s_h0 + y0 * yr; + s0 = s_h0 * s0; + if (s0 > HTHRESH) + { + s0 = HTHRESH; + yd0 = DZERO; + } + if (s0 < LTHRESH) + { + s0 = LTHRESH; + yd0 = DZERO; + } + ind0 = (int) (s0 + yd0); + i0 = (ind0 & 0xff) << 4; + u0 = (double) ind0; + ind0 >>= 8; + y0 = s0 - u0 + yd0; + u0 = *(double*)((char*)__TBL_exp2 + i0); + y0 = ((((KB5 * y0 + KB4) * y0 + KB3) * y0 + KB2) * y0 + KB1) * y0; + eflag0 = (ind0 + 1021) >> 31; + gflag0 = (1022 - ind0) >> 31; + u0 = *(double*)((char*)__TBL_exp2 + i0 + 8) + u0 * y0 + u0; + ind0 = ind0 + (54 & eflag0) - (52 & gflag0); + ind0 <<= 20; + ull_x0 = *(unsigned long long*)&u0; + HI(&ull_x0) += ind0; + u0 = *(double*)&ull_x0; + *pz1 = u0 * SCALE_ARR[eflag0 - gflag0]; + } + } +} diff --git a/usr/src/lib/libmvec/common/__vpowf.c b/usr/src/lib/libmvec/common/__vpowf.c new file mode 100644 index 0000000000..93ba70e7e9 --- /dev/null +++ b/usr/src/lib/libmvec/common/__vpowf.c @@ -0,0 +1,824 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +/* float powf(float x, float y) + * + * Method : + * 1. Special cases: + * for (anything) ** 0 => 1 + * for (anything) ** NaN => QNaN + invalid + * for NaN ** (anything) => QNaN + invalid + * for +-1 ** +-Inf => QNaN + invalid + * for +-(|x| < 1) ** +Inf => +0 + * for +-(|x| < 1) ** -Inf => +Inf + * for +-(|x| > 1) ** +Inf => +Inf + * for +-(|x| > 1) ** -Inf => +0 + * for +Inf ** (negative) => +0 + * for +Inf ** (positive) => +Inf + * for -Inf ** (negative except odd integer) => +0 + * for -Inf ** (negative odd integer) => -0 + * for -Inf ** (positive except odd integer) => +Inf + * for -Inf ** (positive odd integer) => -Inf + * for (negative) ** (non-integer) => QNaN + invalid + * for +0 ** (negative) => +Inf + overflow + * for +0 ** (positive) => +0 + * for -0 ** (negative except odd integer) => +Inf + overflow + * for -0 ** (negative odd integer) => -Inf + overflow + * for -0 ** (positive except odd integer) => +0 + * for -0 ** (positive odd integer) => -0 + * 2. Computes x**y from: + * x**y = 2**(y*log2(x)) = 2**(w/256), where w = 256*log2(|x|)*y. + * 3. Computes w = 256 * log2(|x|) * y from + * |x| = m * 2**n => log2(|x|) = n + log2(m). + * Let m = m0 + dm, where m0 = 1 + k / 128, + * k = [0, 128], + * dm = [-1/256, 1/256]. + * Then 256*log2(m) = 256*log2(m0 + dm) = 256*log2(m0) + 256*log2(1+z), + * where z = dm*(1/m0), z = [-1/258, 1/256]. + * Then + * 1/m0 is looked up in a table of 1, 1/(1+1/128), ..., 1/(1+128/128). + * 256*log2(m0) is looked up in a table of 256*log2(1), 256*log2(1+1/128), + * ..., 256*log2(1+128/128). + * 256*log2(1+z) is computed using approximation: + * 256*log2(1+z) = (((a3*z + a2)*z + a1)*z + a0)*z. + * 3. For w >= 32768 + * then for (negative) ** (odd integer) => -Inf + overflow + * else => +Inf + overflow + * For w <= -38400 + * then for (negative) ** (odd integer) => -0 + underflow + * else => +0 + underflow + * 4. Computes 2 ** (w/256) from: + * 2 ** (w/256) = 2**a * 2**(k/256) * 2**(r/256) + * Where: + * a = int ( w ) >> 8; + * k = int ( w ) & 0xFF; + * r = frac ( w ). + * Note that: + * k = 0, 1, ..., 255; + * r = (-1, 1). + * Then: + * 2**(k/256) is looked up in a table of 2**0, 2**1/256, ... + * 2**(r/256) is computed using approximation: + * 2**(r/256) = a0 + a1 * r + a2 * r**2 + * Multiplication by 2**a is done by adding "a" to + * the biased exponent. + * 5. For (negative) ** (odd integer) => -(2**(w/256)) + * otherwise => 2**(w/256) + * + * Accuracy: + * Max. relative aproximation error < 2**(-37.35) for 256*log2(1+z). + * Max. relative aproximation error < 2**(-29.18) for 2**(r/256). + * All calculations are done in double precision. + * Maximum error observed: less than 0.528 ulp after 700.000.000 + * results. + */ + +static void __vpowfx(int n, float * restrict px, float * restrict py, + int stridey, float * restrict pz, int stridez); + +static void __vpowf_n(int n, float * restrict px, int stridex, float * restrict py, + int stridey, float * restrict pz, int stridez); + +static void __vpowfx_n(int n, double yy, float * restrict py, + int stridey, float * restrict pz, int stridez); + +#pragma no_inline(__vpowfx) +#pragma no_inline(__vpowf_n) +#pragma no_inline(__vpowfx_n) + +static const double __TBL_exp2f[] = { + /* 2^(i/256), i = [0, 255] */ +1.000000000000000000e+00, 1.002711275050202522e+00, 1.005429901112802726e+00, +1.008155898118417548e+00, 1.010889286051700475e+00, 1.013630084951489430e+00, +1.016378314910953096e+00, 1.019133996077737914e+00, 1.021897148654116627e+00, +1.024667792897135721e+00, 1.027445949118763746e+00, 1.030231637686040980e+00, +1.033024879021228415e+00, 1.035825693601957198e+00, 1.038634101961378731e+00, +1.041450124688316103e+00, 1.044273782427413755e+00, 1.047105095879289793e+00, +1.049944085800687210e+00, 1.052790773004626423e+00, 1.055645178360557157e+00, +1.058507322794512762e+00, 1.061377227289262093e+00, 1.064254912884464499e+00, +1.067140400676823697e+00, 1.070033711820241873e+00, 1.072934867525975555e+00, +1.075843889062791048e+00, 1.078760797757119860e+00, 1.081685614993215250e+00, +1.084618362213309206e+00, 1.087559060917769660e+00, 1.090507732665257690e+00, +1.093464399072885840e+00, 1.096429081816376883e+00, 1.099401802630221914e+00, +1.102382583307840891e+00, 1.105371445701741173e+00, 1.108368411723678726e+00, +1.111373503344817548e+00, 1.114386742595892432e+00, 1.117408151567369279e+00, +1.120437752409606746e+00, 1.123475567333019898e+00, 1.126521618608241848e+00, +1.129575928566288079e+00, 1.132638519598719196e+00, 1.135709414157805464e+00, +1.138788634756691565e+00, 1.141876203969561576e+00, 1.144972144431804173e+00, +1.148076478840178938e+00, 1.151189229952982673e+00, 1.154310420590215935e+00, +1.157440073633751121e+00, 1.160578212027498779e+00, 1.163724858777577476e+00, +1.166880036952481658e+00, 1.170043769683250190e+00, 1.173216080163637320e+00, +1.176396991650281221e+00, 1.179586527462875845e+00, 1.182784710984341014e+00, +1.185991565660993841e+00, 1.189207115002721027e+00, 1.192431382583151178e+00, +1.195664392039827328e+00, 1.198906167074380580e+00, 1.202156731452703076e+00, +1.205416109005123859e+00, 1.208684323626581625e+00, 1.211961399276801243e+00, +1.215247359980468955e+00, 1.218542229827408452e+00, 1.221846032972757623e+00, +1.225158793637145527e+00, 1.228480536106870025e+00, 1.231811284734075862e+00, +1.235151063936933413e+00, 1.238499898199816540e+00, 1.241857812073484002e+00, +1.245224830175257980e+00, 1.248600977189204819e+00, 1.251986277866316222e+00, +1.255380757024691096e+00, 1.258784439549716527e+00, 1.262197350394250739e+00, +1.265619514578806282e+00, 1.269050957191733220e+00, 1.272491703389402762e+00, +1.275941778396392001e+00, 1.279401207505669325e+00, 1.282870016078778264e+00, +1.286348229546025568e+00, 1.289835873406665723e+00, 1.293332973229089466e+00, +1.296839554651009641e+00, 1.300355643379650594e+00, 1.303881265191935812e+00, +1.307416445934677318e+00, 1.310961211524764414e+00, 1.314515587949354636e+00, +1.318079601266064049e+00, 1.321653277603157539e+00, 1.325236643159741323e+00, +1.328829724205954355e+00, 1.332432547083161500e+00, 1.336045138204145832e+00, +1.339667524053302916e+00, 1.343299731186835322e+00, 1.346941786232945804e+00, +1.350593715892034474e+00, 1.354255546936892651e+00, 1.357927306212901142e+00, +1.361609020638224754e+00, 1.365300717204011915e+00, 1.369002422974590516e+00, +1.372714165087668414e+00, 1.376435970754530169e+00, 1.380167867260237990e+00, +1.383909881963832023e+00, 1.387662042298529075e+00, 1.391424375771926236e+00, +1.395196909966200272e+00, 1.398979672538311236e+00, 1.402772691220204759e+00, +1.406575993819015435e+00, 1.410389608217270663e+00, 1.414213562373095145e+00, +1.418047884320415175e+00, 1.421892602169165576e+00, 1.425747744105494208e+00, +1.429613338391970023e+00, 1.433489413367788901e+00, 1.437375997448982368e+00, +1.441273119128625657e+00, 1.445180806977046650e+00, 1.449099089642035043e+00, +1.453027995849052623e+00, 1.456967554401443765e+00, 1.460917794180647045e+00, +1.464878744146405731e+00, 1.468850433336981842e+00, 1.472832890869367528e+00, +1.476826145939499346e+00, 1.480830227822471867e+00, 1.484845165872752393e+00, +1.488870989524397004e+00, 1.492907728291264835e+00, 1.496955411767235455e+00, +1.501014069626425584e+00, 1.505083731623406473e+00, 1.509164427593422841e+00, +1.513256187452609813e+00, 1.517359041198214742e+00, 1.521473018908814590e+00, +1.525598150744538417e+00, 1.529734466947286986e+00, 1.533881997840955913e+00, +1.538040773831656827e+00, 1.542210825407940744e+00, 1.546392183141021448e+00, +1.550584877684999974e+00, 1.554788939777088652e+00, 1.559004400237836929e+00, +1.563231289971357629e+00, 1.567469639965552997e+00, 1.571719481292341403e+00, +1.575980845107886497e+00, 1.580253762652824578e+00, 1.584538265252493749e+00, +1.588834384317163950e+00, 1.593142151342266999e+00, 1.597461597908627073e+00, +1.601792755682693414e+00, 1.606135656416771029e+00, 1.610490331949254283e+00, +1.614856814204860713e+00, 1.619235135194863728e+00, 1.623625327017328868e+00, +1.628027421857347834e+00, 1.632441451987274972e+00, 1.636867449766964411e+00, +1.641305447644006321e+00, 1.645755478153964946e+00, 1.650217573920617742e+00, +1.654691767656194301e+00, 1.659178092161616158e+00, 1.663676580326736376e+00, +1.668187265130582464e+00, 1.672710179641596628e+00, 1.677245357017878469e+00, +1.681792830507429004e+00, 1.686352633448393368e+00, 1.690924799269305279e+00, +1.695509361489332623e+00, 1.700106353718523478e+00, 1.704715809658051251e+00, +1.709337763100462926e+00, 1.713972247929925974e+00, 1.718619298122477934e+00, +1.723278947746273992e+00, 1.727951230961837670e+00, 1.732636182022311067e+00, +1.737333835273706217e+00, 1.742044225155156445e+00, 1.746767386199169048e+00, +1.751503353031878207e+00, 1.756252160373299454e+00, 1.761013843037583904e+00, +1.765788435933272726e+00, 1.770575974063554714e+00, 1.775376492526521188e+00, +1.780190026515424462e+00, 1.785016611318934965e+00, 1.789856282321401038e+00, +1.794709075003107168e+00, 1.799575024940535117e+00, 1.804454167806623932e+00, +1.809346539371031959e+00, 1.814252175500398856e+00, 1.819171112158608494e+00, +1.824103385407053413e+00, 1.829049031404897274e+00, 1.834008086409342431e+00, +1.838980586775893711e+00, 1.843966568958625984e+00, 1.848966069510450838e+00, +1.853979125083385471e+00, 1.859005772428820480e+00, 1.864046048397788979e+00, +1.869099989941238604e+00, 1.874167634110299963e+00, 1.879249018056560194e+00, +1.884344179032334532e+00, 1.889453154390939194e+00, 1.894575981586965607e+00, +1.899712698176555303e+00, 1.904863341817674138e+00, 1.910027950270389852e+00, +1.915206561397147400e+00, 1.920399213163047403e+00, 1.925605943636125028e+00, +1.930826790987627106e+00, 1.936061793492294347e+00, 1.941310989528640452e+00, +1.946574417579233218e+00, 1.951852116230978318e+00, 1.957144124175400179e+00, +1.962450480208927317e+00, 1.967771223233175881e+00, 1.973106392255234320e+00, +1.978456026387950928e+00, 1.983820164850219392e+00, 1.989198846967266343e+00, +1.994592112170940235e+00 +}; + +static const double __TBL_log2f[] = { + /* __TBL_log2f[2*i] = 256*log2(1+i/128), i = [0, 128] */ + /* __TBL_log2f[2*i+1] = 2**(-23)/(1+i/128), i = [0, 128] */ +0.000000000000000000e+00, 1.192092895507812500e-07, 2.874177388353054585e+00, +1.182851865310077503e-07, 5.726160135284354524e+00, 1.173753004807692373e-07, +8.556288393587271557e+00, 1.164793058206106825e-07, 1.136489455576407970e+01, +1.155968868371212153e-07, 1.415230348830453799e+01, 1.147277373120300688e-07, +1.691883275718974389e+01, 1.138715601679104456e-07, 1.966479284501270897e+01, +1.130280671296296339e-07, 2.239048736008688678e+01, 1.121969784007352926e-07, +2.509621323789484038e+01, 1.113780223540145949e-07, 2.778226093521127638e+01, +1.105709352355072477e-07, 3.044891461721790193e+01, 1.097754608812949697e-07, +3.309645233791141550e+01, 1.089913504464285680e-07, 3.572514621409114710e+01, +1.082183621453900683e-07, 3.833526259319860685e+01, 1.074562610035211292e-07, +4.092706221526768928e+01, 1.067048186188811188e-07, 4.350080036923196758e+01, +1.059638129340277719e-07, 4.605672704382322280e+01, 1.052330280172413778e-07, +4.859508707328441091e+01, 1.045122538527397202e-07, 5.111612027810928538e+01, +1.038012861394557784e-07, 5.362006160101114460e+01, 1.030999260979729787e-07, +5.610714123831336053e+01, 1.024079802852348971e-07, 5.857758476694550609e+01, +1.017252604166666732e-07, 6.103161326722020164e+01, 1.010515831953642383e-07, +6.346944344155788542e+01, 1.003867701480263102e-07, 6.589128772931884725e+01, +9.973064746732026447e-08, 6.829735441789475203e+01, 9.908304586038961692e-08, +7.068784775020480993e+01, 9.844380040322580637e-08, 7.306296802873558249e+01, +9.781275040064102225e-08, 7.542291171625650748e+01, 9.718973925159236158e-08, +7.776787153333835079e+01, 9.657461431962025166e-08, 8.009803655279496581e+01, +9.596722680817610579e-08, 8.241359229116476115e+01, 9.536743164062500529e-08, +8.471472079734193983e+01, 9.477508734472049048e-08, 8.700160073846393516e+01, +9.419005594135801946e-08, 8.927440748315585495e+01, 9.361220283742331508e-08, +9.153331318222942059e+01, 9.304139672256097884e-08, 9.377848684692884262e+01, +9.247750946969696962e-08, 9.601009442481273481e+01, 9.192041603915663129e-08, +9.822829887335737453e+01, 9.136999438622755046e-08, 1.004332602313626381e+02, +9.082612537202380448e-08, 1.026251356882391832e+02, 9.028869267751479078e-08, +1.048040796512516550e+02, 8.975758272058823405e-08, 1.069702438107898530e+02, +8.923268457602338686e-08, 1.091237772037370775e+02, 8.871388989825581272e-08, +1.112648262750015107e+02, 8.820109284682080489e-08, 1.133935349372744383e+02, +8.769419001436781487e-08, 1.155100446290761766e+02, 8.719308035714285707e-08, +1.176144943711480977e+02, 8.669766512784091150e-08, 1.197070208212473403e+02, +8.620784781073446298e-08, 1.217877583273978246e+02, 8.572353405898876167e-08, +1.238568389796496376e+02, 8.524463163407821503e-08, 1.259143926603967287e+02, +8.477105034722222546e-08, 1.279605470933005762e+02, 8.430270200276242743e-08, +1.299954278908662388e+02, 8.383950034340659995e-08, 1.320191586007148601e+02, +8.338136099726775949e-08, 1.340318607505952855e+02, 8.292820142663043248e-08, +1.360336538921758915e+02, 8.247994087837838296e-08, 1.380246556436560468e+02, +8.203650033602151192e-08, 1.400049817312349774e+02, 8.159780247326202734e-08, +1.419747460294751704e+02, 8.116377160904255122e-08, 1.439340606005945915e+02, +8.073433366402115954e-08, 1.458830357327226466e+02, 8.030941611842105082e-08, +1.478217799771516638e+02, 7.988894797120419333e-08, 1.497504001846159838e+02, +7.947285970052082892e-08, 1.516690015406285852e+02, 7.906108322538860398e-08, +1.535776875999046922e+02, 7.865355186855669953e-08, 1.554765603199003294e+02, +7.825020032051282044e-08, 1.573657200934933087e+02, 7.785096460459183052e-08, +1.592452657808323124e+02, 7.745578204314720208e-08, 1.611152947403800511e+02, +7.706459122474748130e-08, 1.629759028591741128e+02, 7.667733197236181018e-08, +1.648271845823295223e+02, 7.629394531250000159e-08, 1.666692329418057170e+02, +7.591437344527363039e-08, 1.685021395844594565e+02, 7.553855971534653557e-08, +1.703259947994051231e+02, 7.516644858374384321e-08, 1.721408875447028777e+02, +7.479798560049019504e-08, 1.739469054733941960e+02, 7.443311737804878042e-08, +1.757441349589039135e+02, 7.407179156553397416e-08, 1.775326611198272531e+02, +7.371395682367149407e-08, 1.793125678441195987e+02, 7.335956280048077330e-08, +1.810839378127059831e+02, 7.300856010765549954e-08, 1.828468525225273993e+02, +7.266090029761905417e-08, 1.846013923090393973e+02, 7.231653584123223301e-08, +1.863476363681789962e+02, 7.197542010613207272e-08, 1.880856627778145764e+02, +7.163750733568075279e-08, 1.898155485186936176e+02, 7.130275262850466758e-08, +1.915373694949018386e+02, 7.097111191860465018e-08, 1.932512005538479514e+02, +7.064254195601851460e-08, 1.949571155057867031e+02, 7.031700028801843312e-08, +1.966551871428931406e+02, 6.999444524082569196e-08, 1.983454872579004018e+02, +6.967483590182648015e-08, 2.000280866623128588e+02, 6.935813210227272390e-08, +2.017030552042064926e+02, 6.904429440045249486e-08, 2.033704617856271284e+02, +6.873328406531531472e-08, 2.050303743795980154e+02, 6.842506306053811558e-08, +2.066828600467466401e+02, 6.811959402901785336e-08, 2.083279849515614899e+02, +6.781684027777777772e-08, 2.099658143782880586e+02, 6.751676576327433535e-08, +2.115964127464742432e+02, 6.721933507709251725e-08, 2.132198436261738550e+02, +6.692451343201754014e-08, 2.148361697528176535e+02, 6.663226664847161225e-08, +2.164454530417600608e+02, 6.634256114130434863e-08, 2.180477546025107358e+02, +6.605536390692640687e-08, 2.196431347526584545e+02, 6.577064251077586116e-08, +2.212316530314957390e+02, 6.548836507510729591e-08, 2.228133682133515663e+02, +6.520850026709402365e-08, 2.243883383206399174e+02, 6.493101728723404362e-08, +2.259566206366313565e+02, 6.465588585805084723e-08, 2.275182717179543204e+02, +6.438307621308016336e-08, 2.290733474068335340e+02, 6.411255908613445100e-08, +2.306219028430716378e+02, 6.384430570083681460e-08, 2.321639924757807307e+02, +6.357828776041666578e-08, 2.336996700748701699e+02, 6.331447743775933615e-08, +2.352289887422961954e+02, 6.305284736570248109e-08, 2.367520009230799189e+02, +6.279337062757202180e-08, 2.382687584160988763e+02, 6.253602074795082293e-08, +2.397793123846580556e+02, 6.228077168367347501e-08, 2.412837133668454044e+02, +6.202759781504065697e-08, 2.427820112856774699e+02, 6.177647393724696421e-08, +2.442742554590400630e+02, 6.152737525201612732e-08, 2.457604946094287186e+02, +6.128027735943774537e-08, 2.472407768734942692e+02, 6.103515625000000127e-08, +2.487151498113976231e+02, 6.079198829681274795e-08, 2.501836604159786077e+02, +6.055075024801586965e-08, 2.516463551217433974e+02, 6.031141921936758485e-08, +2.531032798136744475e+02, 6.007397268700787318e-08, 2.545544798358676246e+02, +5.983838848039215603e-08, 2.560000000000000000e+02, 5.960464477539062500e-08 +}; + +static const double __TBL_expfb[] = { +7.006492321624085355e-46, 1.401298464324817071e-45, 2.802596928649634142e-45, +5.605193857299268284e-45, 1.121038771459853657e-44, 2.242077542919707313e-44, +4.484155085839414627e-44, 8.968310171678829254e-44, 1.793662034335765851e-43, +3.587324068671531702e-43, 7.174648137343063403e-43, 1.434929627468612681e-42, +2.869859254937225361e-42, 5.739718509874450723e-42, 1.147943701974890145e-41, +2.295887403949780289e-41, 4.591774807899560578e-41, 9.183549615799121156e-41, +1.836709923159824231e-40, 3.673419846319648462e-40, 7.346839692639296925e-40, +1.469367938527859385e-39, 2.938735877055718770e-39, 5.877471754111437540e-39, +1.175494350822287508e-38, 2.350988701644575016e-38, 4.701977403289150032e-38, +9.403954806578300064e-38, 1.880790961315660013e-37, 3.761581922631320025e-37, +7.523163845262640051e-37, 1.504632769052528010e-36, 3.009265538105056020e-36, +6.018531076210112041e-36, 1.203706215242022408e-35, 2.407412430484044816e-35, +4.814824860968089633e-35, 9.629649721936179265e-35, 1.925929944387235853e-34, +3.851859888774471706e-34, 7.703719777548943412e-34, 1.540743955509788682e-33, +3.081487911019577365e-33, 6.162975822039154730e-33, 1.232595164407830946e-32, +2.465190328815661892e-32, 4.930380657631323784e-32, 9.860761315262647568e-32, +1.972152263052529514e-31, 3.944304526105059027e-31, 7.888609052210118054e-31, +1.577721810442023611e-30, 3.155443620884047222e-30, 6.310887241768094443e-30, +1.262177448353618889e-29, 2.524354896707237777e-29, 5.048709793414475555e-29, +1.009741958682895111e-28, 2.019483917365790222e-28, 4.038967834731580444e-28, +8.077935669463160887e-28, 1.615587133892632177e-27, 3.231174267785264355e-27, +6.462348535570528710e-27, 1.292469707114105742e-26, 2.584939414228211484e-26, +5.169878828456422968e-26, 1.033975765691284594e-25, 2.067951531382569187e-25, +4.135903062765138374e-25, 8.271806125530276749e-25, 1.654361225106055350e-24, +3.308722450212110699e-24, 6.617444900424221399e-24, 1.323488980084844280e-23, +2.646977960169688560e-23, 5.293955920339377119e-23, 1.058791184067875424e-22, +2.117582368135750848e-22, 4.235164736271501695e-22, 8.470329472543003391e-22, +1.694065894508600678e-21, 3.388131789017201356e-21, 6.776263578034402713e-21, +1.355252715606880543e-20, 2.710505431213761085e-20, 5.421010862427522170e-20, +1.084202172485504434e-19, 2.168404344971008868e-19, 4.336808689942017736e-19, +8.673617379884035472e-19, 1.734723475976807094e-18, 3.469446951953614189e-18, +6.938893903907228378e-18, 1.387778780781445676e-17, 2.775557561562891351e-17, +5.551115123125782702e-17, 1.110223024625156540e-16, 2.220446049250313081e-16, +4.440892098500626162e-16, 8.881784197001252323e-16, 1.776356839400250465e-15, +3.552713678800500929e-15, 7.105427357601001859e-15, 1.421085471520200372e-14, +2.842170943040400743e-14, 5.684341886080801487e-14, 1.136868377216160297e-13, +2.273736754432320595e-13, 4.547473508864641190e-13, 9.094947017729282379e-13, +1.818989403545856476e-12, 3.637978807091712952e-12, 7.275957614183425903e-12, +1.455191522836685181e-11, 2.910383045673370361e-11, 5.820766091346740723e-11, +1.164153218269348145e-10, 2.328306436538696289e-10, 4.656612873077392578e-10, +9.313225746154785156e-10, 1.862645149230957031e-09, 3.725290298461914062e-09, +7.450580596923828125e-09, 1.490116119384765625e-08, 2.980232238769531250e-08, +5.960464477539062500e-08, 1.192092895507812500e-07, 2.384185791015625000e-07, +4.768371582031250000e-07, 9.536743164062500000e-07, 1.907348632812500000e-06, +3.814697265625000000e-06, 7.629394531250000000e-06, 1.525878906250000000e-05, +3.051757812500000000e-05, 6.103515625000000000e-05, 1.220703125000000000e-04, +2.441406250000000000e-04, 4.882812500000000000e-04, 9.765625000000000000e-04, +1.953125000000000000e-03, 3.906250000000000000e-03, 7.812500000000000000e-03, +1.562500000000000000e-02, 3.125000000000000000e-02, 6.250000000000000000e-02, +1.250000000000000000e-01, 2.500000000000000000e-01, 5.000000000000000000e-01, +1.000000000000000000e+00, 2.000000000000000000e+00, 4.000000000000000000e+00, +8.000000000000000000e+00, 1.600000000000000000e+01, 3.200000000000000000e+01, +6.400000000000000000e+01, 1.280000000000000000e+02, 2.560000000000000000e+02, +5.120000000000000000e+02, 1.024000000000000000e+03, 2.048000000000000000e+03, +4.096000000000000000e+03, 8.192000000000000000e+03, 1.638400000000000000e+04, +3.276800000000000000e+04, 6.553600000000000000e+04, 1.310720000000000000e+05, +2.621440000000000000e+05, 5.242880000000000000e+05, 1.048576000000000000e+06, +2.097152000000000000e+06, 4.194304000000000000e+06, 8.388608000000000000e+06, +1.677721600000000000e+07, 3.355443200000000000e+07, 6.710886400000000000e+07, +1.342177280000000000e+08, 2.684354560000000000e+08, 5.368709120000000000e+08, +1.073741824000000000e+09, 2.147483648000000000e+09, 4.294967296000000000e+09, +8.589934592000000000e+09, 1.717986918400000000e+10, 3.435973836800000000e+10, +6.871947673600000000e+10, 1.374389534720000000e+11, 2.748779069440000000e+11, +5.497558138880000000e+11, 1.099511627776000000e+12, 2.199023255552000000e+12, +4.398046511104000000e+12, 8.796093022208000000e+12, 1.759218604441600000e+13, +3.518437208883200000e+13, 7.036874417766400000e+13, 1.407374883553280000e+14, +2.814749767106560000e+14, 5.629499534213120000e+14, 1.125899906842624000e+15, +2.251799813685248000e+15, 4.503599627370496000e+15, 9.007199254740992000e+15, +1.801439850948198400e+16, 3.602879701896396800e+16, 7.205759403792793600e+16, +1.441151880758558720e+17, 2.882303761517117440e+17, 5.764607523034234880e+17, +1.152921504606846976e+18, 2.305843009213693952e+18, 4.611686018427387904e+18, +9.223372036854775808e+18, 1.844674407370955162e+19, 3.689348814741910323e+19, +7.378697629483820646e+19, 1.475739525896764129e+20, 2.951479051793528259e+20, +5.902958103587056517e+20, 1.180591620717411303e+21, 2.361183241434822607e+21, +4.722366482869645214e+21, 9.444732965739290427e+21, 1.888946593147858085e+22, +3.777893186295716171e+22, 7.555786372591432342e+22, 1.511157274518286468e+23, +3.022314549036572937e+23, 6.044629098073145874e+23, 1.208925819614629175e+24, +2.417851639229258349e+24, 4.835703278458516699e+24, 9.671406556917033398e+24, +1.934281311383406680e+25, 3.868562622766813359e+25, 7.737125245533626718e+25, +1.547425049106725344e+26, 3.094850098213450687e+26, 6.189700196426901374e+26, +1.237940039285380275e+27, 2.475880078570760550e+27, 4.951760157141521100e+27, +9.903520314283042199e+27, 1.980704062856608440e+28, 3.961408125713216880e+28, +7.922816251426433759e+28, 1.584563250285286752e+29, 3.169126500570573504e+29, +6.338253001141147007e+29, 1.267650600228229401e+30, 2.535301200456458803e+30, +5.070602400912917606e+30, 1.014120480182583521e+31, 2.028240960365167042e+31, +4.056481920730334085e+31, 8.112963841460668170e+31, 1.622592768292133634e+32, +3.245185536584267268e+32, 6.490371073168534536e+32, 1.298074214633706907e+33, +2.596148429267413814e+33, 5.192296858534827629e+33, 1.038459371706965526e+34, +2.076918743413931051e+34, 4.153837486827862103e+34, 8.307674973655724206e+34, +1.661534994731144841e+35, 3.323069989462289682e+35, 6.646139978924579365e+35, +1.329227995784915873e+36, 2.658455991569831746e+36, 5.316911983139663492e+36, +1.063382396627932698e+37, 2.126764793255865397e+37, 4.253529586511730793e+37, +8.507059173023461587e+37, 1.701411834604692317e+38, 3.402823669209384635e+38 +}; + +static const double + KA3 = -3.60659926599003171364e-01*256.0, + KA2 = 4.80902715189356683026e-01*256.0, + KA1 = -7.21347520569871841065e-01*256.0, + KA0 = 1.44269504088069658645e+00*256.0, + KB2 = 3.66556671660783833261e-06, + KB1 = 2.70760782821392980564e-03, + DONE = 1.0, + HTHRESH = 32768.0, + LTHRESH = -38400.0; + +#define RETURN(ret) \ +{ \ + *pz = (ret); \ + px += stridex; \ + py += stridey; \ + pz += stridez; \ + if (n_n == 0) \ + { \ + spx = px; spy = py; spz = pz; \ + continue; \ + } \ + n--; \ + break; \ +} + +void +__vpowf(int n, float * restrict px, int stridex, float * restrict py, + int stridey, float * restrict pz, int stridez) +{ + float *spx, *spy, *spz; + double y0, yy0; + long long di0; + unsigned ux, sx, uy, ay, ax0; + int exp, i0, ind0, exp0, yisint0, n_n; + +#ifndef NOPOWFIX + if (stridex == 0) + { + unsigned hx = *(unsigned*)px; + + if ((hx >= 0x00800000) && /* x not zero or subnormal */ + (hx < 0x7f800000) && /* x not inf, nan or negative sign bit */ + (hx != 0x3f800000)) /* x not 1 */ + { + __vpowfx(n, px, py, stridey, pz, stridez); + return; + } + } +#endif + + while (n > 0) + { + n_n = 0; + spx = px; + spy = py; + spz = pz; + for (; n > 0 ; n--) + { + uy = *(unsigned int*)py; + ux = *(unsigned int*)px; + ay = uy & 0x7fffffff; + ax0 = ux & 0x7fffffff; + sx = ux >> 31; + yisint0 = 0; /* Y - non-integer */ + + /* |X| or |Y| = Inf,Nan */ + if (ax0 >= 0x7f800000 || ay >= 0x7f800000) + { + if (ay == 0) + RETURN(1.0f) /* pow(X,0) */ + /* |X| or |Y| = Nan */ + if (ax0 > 0x7f800000 || ay > 0x7f800000) + RETURN (*px + *py) + if (ay == 0x7f800000) /* |Y| = Inf */ + { + float fy; + if (ax0 == 0x3f800000) + fy = *py - *py; /* +-1 ** +-Inf = NaN */ + else + fy = ((ax0 < 0x3f800000) != (uy >> 31)) ? 0.0f : *(float*) &ay; + RETURN(fy) + } + if (sx) /* X = -Inf */ + { + exp = ay >> 23; + if (exp >= 0x97) /* |Y| >= 2^24 */ + yisint0 = 2; /* Y - even */ + else if (exp >= 0x7f) /* |Y| >= 1 */ + { + i0 = ay >> ((0x7f + 23) - exp); + if ((i0 << ((0x7f + 23) - exp)) == ay) + yisint0 = 2 - (i0 & 1); + } + } + if (uy >> 31) + ax0 = 0; + ax0 += yisint0 << 31; + RETURN(*(float*)&ax0) + } + + if ((int)ux < 0x00800000) /* X = denormal or negative */ + { + if (ay == 0) + RETURN(1.0f) /* pow(X,0) */ + exp0 = (ax0 >> 23) - 127; + + if ((int)ax0 < 0x00800000) /* X = denormal */ + { + *((float*) &ax0) = (float) (int)ax0; + exp0 = (ax0 >> 23) - (127 + 149); + } + + if ((int)ux <= 0) /* X <= 0 */ + { + exp = ay >> 23; + if (exp >= 0x97) /* |Y| >= 2^24 */ + yisint0 = 2; /* Y - even */ + else if (exp >= 0x7f) /* |Y| >= 1 */ + { + i0 = ay >> ((0x7f + 23) - exp); + if ((i0 << ((0x7f + 23) - exp)) == ay) + yisint0 = 2 - (i0 & 1); + } + + if (ax0 == 0) /* pow(0,Y) */ + { + float fy; + fy = (uy >> 31) ? 1.0f / 0.0f : 0.0f; + if (sx & yisint0) + fy = -fy; + RETURN(fy) + } + + if (yisint0 == 0) /* pow(neg,non-integer) */ + RETURN(0.0f / 0.0f) /* NaN */ + } + + /* perform yy0 = 256*log2(xi)*yi */ + ax0 &= 0x007fffff; + i0 = (ax0 + 0x8000) & 0xffff0000; + ind0 = i0 >> 15; + i0 = ax0 - i0; + y0 = (double) i0 * __TBL_log2f[ind0 + 1]; + yy0 = __TBL_log2f[ind0] + (double) (exp0 << 8); + yy0 += (((KA3 * y0 + KA2) * y0 + KA1) * y0 + KA0) * y0; + yy0 = (double)py[0] * yy0; + + /* perform 2 ** (yy0/256) */ + if (yy0 >= HTHRESH) + yy0 = HTHRESH; + if (yy0 <= LTHRESH) + yy0 = LTHRESH; + ind0 = (int) yy0; + y0 = yy0 - (double)ind0; + yy0 = (KB2 * y0 + KB1) * y0 + DONE; + di0 = ((long long)((ind0 >> 8) + (yisint0 << 11))) << 52; + di0 += ((long long*)__TBL_exp2f)[ind0 & 255]; + RETURN((float) (yy0 * *(double*)&di0)) + } + px += stridex; + py += stridey; + pz += stridez; + n_n++; + } + if (n_n > 0) + __vpowf_n(n_n, spx, stridex, spy, stridey, spz, stridez); + } +} + + +static void +__vpowf_n(int n, float * restrict px, int stridex, float * restrict py, + int stridey, float * restrict pz, int stridez) +{ + double y0, yy0; + double di0; + int ind0, i0, exp0; + unsigned ax0; + double y1, yy1; + double di1; + int ind1, i1, exp1; + unsigned ax1; + double y2, yy2; + double di2; + int ind2, i2, exp2; + unsigned ax2; + + for (; n > 2 ; n -= 3) + { + /* perform yy0 = 256*log2(xi)*yi */ + ax0 = ((int*)px)[0]; + px += stridex; + ax1 = ((int*)px)[0]; + px += stridex; + ax2 = ((int*)px)[0]; + px += stridex; + exp0 = ((ax0 & 0x7fffffff) >> 23) - 127; + exp1 = ((ax1 & 0x7fffffff) >> 23) - 127; + exp2 = ((ax2 & 0x7fffffff) >> 23) - 127; + ax0 &= 0x007fffff; + ax1 &= 0x007fffff; + ax2 &= 0x007fffff; + i0 = (ax0 + 0x8000) & 0xffff0000; + i1 = (ax1 + 0x8000) & 0xffff0000; + i2 = (ax2 + 0x8000) & 0xffff0000; + ind0 = i0 >> 15; + ind1 = i1 >> 15; + ind2 = i2 >> 15; + i0 = ax0 - i0; + i1 = ax1 - i1; + i2 = ax2 - i2; + y0 = (double) i0 * __TBL_log2f[ind0 + 1]; + y1 = (double) i1 * __TBL_log2f[ind1 + 1]; + y2 = (double) i2 * __TBL_log2f[ind2 + 1]; + yy0 = __TBL_log2f[ind0] + (double) (exp0 << 8); + yy1 = __TBL_log2f[ind1] + (double) (exp1 << 8); + yy2 = __TBL_log2f[ind2] + (double) (exp2 << 8); + yy0 += (((KA3 * y0 + KA2) * y0 + KA1) * y0 + KA0) * y0; + yy1 += (((KA3 * y1 + KA2) * y1 + KA1) * y1 + KA0) * y1; + yy2 += (((KA3 * y2 + KA2) * y2 + KA1) * y2 + KA0) * y2; + yy0 = (double)py[0] * yy0; + py += stridey; + yy1 = (double)py[0] * yy1; + py += stridey; + yy2 = (double)py[0] * yy2; + py += stridey; + + /* perform 2 ** (yy0/256) */ + if (yy0 >= HTHRESH) + yy0 = HTHRESH; + if (yy0 <= LTHRESH) + yy0 = LTHRESH; + if (yy1 >= HTHRESH) + yy1 = HTHRESH; + if (yy1 <= LTHRESH) + yy1 = LTHRESH; + if (yy2 >= HTHRESH) + yy2 = HTHRESH; + if (yy2 <= LTHRESH) + yy2 = LTHRESH; + + ind0 = (int) yy0; + ind1 = (int) yy1; + ind2 = (int) yy2; + y0 = yy0 - (double)ind0; + y1 = yy1 - (double)ind1; + y2 = yy2 - (double)ind2; + yy0 = (KB2 * y0 + KB1) * y0 + DONE; + yy1 = (KB2 * y1 + KB1) * y1 + DONE; + yy2 = (KB2 * y2 + KB1) * y2 + DONE; + di0 = (__TBL_expfb + 150)[ind0 >> 8]; + di1 = (__TBL_expfb + 150)[ind1 >> 8]; + di2 = (__TBL_expfb + 150)[ind2 >> 8]; + di0 *= __TBL_exp2f[ind0 & 255]; + di1 *= __TBL_exp2f[ind1 & 255]; + di2 *= __TBL_exp2f[ind2 & 255]; + pz[0] = (float) (yy0 * di0); + pz += stridez; + pz[0] = (float) (yy1 * di1); + pz += stridez; + pz[0] = (float) (yy2 * di2); + pz += stridez; + } + + for (; n > 0 ; n--) + { + /* perform yy0 = 256*log2(xi)*yi */ + ax0 = ((int*)px)[0]; + exp0 = ((ax0 & 0x7fffffff) >> 23) - 127; + ax0 &= 0x007fffff; + i0 = (ax0 + 0x8000) & 0xffff0000; + ind0 = i0 >> 15; + i0 = ax0 - i0; + y0 = (double) i0 * __TBL_log2f[ind0 + 1]; + yy0 = __TBL_log2f[ind0] + (double) (exp0 << 8); + yy0 += (((KA3 * y0 + KA2) * y0 + KA1) * y0 + KA0) * y0; + yy0 = (double)py[0] * yy0; + + /* perform 2 ** (yy0/256) */ + if (yy0 >= HTHRESH) + yy0 = HTHRESH; + if (yy0 <= LTHRESH) + yy0 = LTHRESH; + ind0 = (int) yy0; + y0 = yy0 - (double)ind0; + yy0 = (KB2 * y0 + KB1) * y0 + DONE; + di0 = (__TBL_expfb + 150)[ind0 >> 8]; + di0 *= __TBL_exp2f[ind0 & 255]; + pz[0] = (float) (yy0 * di0); + px += stridex; + py += stridey; + pz += stridez; + } +} + + +static void +__vpowfx(int n, float * restrict px, float * restrict py, + int stridey, float * restrict pz, int stridez) +{ + float *spy, *spz; + double yy, y0; + int ind0, exp0, i0, n_n; + unsigned ux, ax, ax0, uy, ay; + + /* perform yy = 256*log2(xi)*yi */ + ux = *(unsigned int*)px; + ax = ux & 0x7fffffff; + exp0 = (ax >> 23) - 127; + ax0 = ux & 0x007fffff; + i0 = (ax0 + 0x8000) & 0xffff0000; + ind0 = i0 >> 15; + i0 = ax0 - i0; + y0 = (double) i0 * __TBL_log2f[ind0 + 1]; + yy = __TBL_log2f[ind0] + (double) (exp0 << 8); + yy += (((KA3 * y0 + KA2) * y0 + KA1) * y0 + KA0) * y0; + + while (n > 0) + { + n_n = 0; + spy = py; + spz = pz; + for (; n > 0 ; n--) + { + uy = *(unsigned int*)py; + ay = uy & 0x7fffffff; + + if (ay >= 0x7f800000) /* |Y| = Inf or Nan */ + { + float fy; + if (ay > 0x7f800000) + fy = *py + *py; /* |Y| = Nan */ + else + fy = ((ax < 0x3f800000) != (uy >> 31)) ? 0.0f : *(float*)&ay; + *pz = fy; + py += stridey; + pz += stridez; + if (n_n == 0) + { + spy = py; + spz = pz; + continue; + } + n--; + break; + } + py += stridey; + pz += stridez; + n_n++; + } + if (n_n > 0) + __vpowfx_n(n_n, yy, spy, stridey, spz, stridez); + } +} + + +static void +__vpowfx_n(int n, double yy, float * restrict py, + int stridey, float * restrict pz, int stridez) +{ + double y0, yy0, di0; + double y1, yy1, di1; + double y2, yy2, di2; + int ind0, ind1, ind2; + + for (; n > 2 ; n-= 3) + { + /* perform 2 ** (yy/256) */ + yy0 = (double)py[0] * yy; + py += stridey; + yy1 = (double)py[0] * yy; + py += stridey; + yy2 = (double)py[0] * yy; + py += stridey; + if (yy0 >= HTHRESH) + yy0 = HTHRESH; + if (yy0 <= LTHRESH) + yy0 = LTHRESH; + if (yy1 >= HTHRESH) + yy1 = HTHRESH; + if (yy1 <= LTHRESH) + yy1 = LTHRESH; + if (yy2 >= HTHRESH) + yy2 = HTHRESH; + if (yy2 <= LTHRESH) + yy2 = LTHRESH; + ind0 = (int) yy0; + ind1 = (int) yy1; + ind2 = (int) yy2; + y0 = yy0 - (double)ind0; + y1 = yy1 - (double)ind1; + y2 = yy2 - (double)ind2; + yy0 = (KB2 * y0 + KB1) * y0 + DONE; + yy1 = (KB2 * y1 + KB1) * y1 + DONE; + yy2 = (KB2 * y2 + KB1) * y2 + DONE; + di0 = (__TBL_expfb + 150)[ind0 >> 8]; + di1 = (__TBL_expfb + 150)[ind1 >> 8]; + di2 = (__TBL_expfb + 150)[ind2 >> 8]; + di0 *= __TBL_exp2f[ind0 & 255]; + di1 *= __TBL_exp2f[ind1 & 255]; + di2 *= __TBL_exp2f[ind2 & 255]; + pz[0] = (float) (yy0 * di0); + pz += stridez; + pz[0] = (float) (yy1 * di1); + pz += stridez; + pz[0] = (float) (yy2 * di2); + pz += stridez; + } + for (; n > 0 ; n--) + { + /* perform 2 ** (yy/256) */ + yy0 = (double)py[0] * yy; + if (yy0 >= HTHRESH) + yy0 = HTHRESH; + if (yy0 <= LTHRESH) + yy0 = LTHRESH; + ind0 = (int) yy0; + y0 = yy0 - (double)ind0; + yy0 = (KB2 * y0 + KB1) * y0 + DONE; + di0 = (__TBL_expfb + 150)[ind0 >> 8]; + di0 *= __TBL_exp2f[ind0 & 255]; + pz[0] = (float) (yy0 * di0); + py += stridey; + pz += stridez; + } +} diff --git a/usr/src/lib/libmvec/common/__vrem_pio2m.c b/usr/src/lib/libmvec/common/__vrem_pio2m.c new file mode 100644 index 0000000000..7a36e944ab --- /dev/null +++ b/usr/src/lib/libmvec/common/__vrem_pio2m.c @@ -0,0 +1,309 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * Given X, __vlibm_rem_pio2m finds Y and an integer n such that + * Y = X - n*pi/2 and |Y| < pi/2. + * + * On entry, X is represented by x, an array of nx 24-bit integers + * stored in double precision format, and e: + * + * X = sum (x[i] * 2^(e - 24*i)) + * + * nx must be 1, 2, or 3, and e must be >= -24. For example, a + * suitable representation for the double precision number z can + * be computed as follows: + * + * e = ilogb(z)-23 + * z = scalbn(z,-e) + * for i = 0,1,2 + * x[i] = floor(z) + * z = (z-x[i])*2**24 + * + * On exit, Y is approximated by y[0] if prec is 0 and by the un- + * evaluated sum y[0] + y[1] if prec != 0. The approximation is + * accurate to 53 bits in the former case and to at least 72 bits + * in the latter. + * + * __vlibm_rem_pio2m returns n mod 8. + * + * Notes: + * + * As n is the integer nearest X * 2/pi, we approximate the latter + * product to a precision that is determined dynamically so as to + * ensure that the final value Y is approximated accurately enough. + * We don't bother to compute terms in the product that are multiples + * of 8, so the cost of this multiplication is independent of the + * magnitude of X. The variable ip determines the offset into the + * array ipio2 of the first term we need to use. The variable eq0 + * is the corresponding exponent of the first partial product. + * + * The partial products are scaled, summed, and split into an array + * of non-overlapping 24-bit terms (not necessarily having the same + * signs). Each partial product overlaps three elements of the + * resulting array: + * + * q[i] xxxxxxxxxxxxxx + * q[i+1] xxxxxxxxxxxxxx + * q[i+2] xxxxxxxxxxxxxx + * ... ... + * + * + * r[i] xxxxxx + * r[i+1] xxxxxx + * r[i+2] xxxxxx + * ... ... + * + * In order that the last element of the r array have some correct + * bits, we compute an extra term in the q array, but we don't bother + * to split this last term into 24-bit chunks; thus, the final term + * of the r array could have more than 24 bits, but this doesn't + * matter. + * + * After we subtract the nearest integer to the product, we multiply + * the remaining part of r by pi/2 to obtain Y. Before we compute + * this last product, however, we make sure that the remaining part + * of r has at least five nonzero terms, computing more if need be. + * This ensures that even if the first nonzero term is only a single + * bit and the last term is wrong in several trailing bits, we still + * have enough accuracy to obtain 72 bits of Y. + * + * IMPORTANT: This code assumes that the rounding mode is round-to- + * nearest in several key places. First, after we compute X * 2/pi, + * we round to the nearest integer by adding and subtracting a power + * of two. This step must be done in round-to-nearest mode to ensure + * that the remainder is less than 1/2 in absolute value. (Because + * we only take two adjacent terms of r into account when we perform + * this rounding, in very rare cases the remainder could be just + * barely greater than 1/2, but this shouldn't matter in practice.) + * + * Second, we also split the partial products of X * 2/pi into 24-bit + * pieces by adding and subtracting a power of two. In this step, + * round-to-nearest mode is important in order to guarantee that + * the index of the first nonzero term in the remainder gives an + * accurate indication of the number of significant terms. For + * example, suppose eq0 = -1, so that r[1] is a multiple of 1/2 and + * |r[2]| < 1/2. After we subtract the nearest integer, r[1] could + * be -1/2, and r[2] could be very nearly 1/2, so that r[1] != 0, + * yet the remainder is much smaller than the least significant bit + * corresponding to r[1]. As long as we use round-to-nearest mode, + * this can't happen; instead, the absolute value of each r[j] will + * be less than 1/2 the least significant bit corresponding to r[j-1], + * so that the entire remainder must be at least half as large as + * the first nonzero term (or perhaps just barely smaller than this). + */ + +#include <sys/isa_defs.h> + +#ifdef _LITTLE_ENDIAN +#define HIWORD 1 +#define LOWORD 0 +#else +#define HIWORD 0 +#define LOWORD 1 +#endif + +/* 396 hex digits of 2/pi, with two leading zeroes to make life easier */ +static const double ipio2[] = { + 0, 0, + 0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62, + 0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A, + 0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129, + 0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41, + 0x3991D6, 0x398353, 0x39F49C, 0x845F8B, 0xBDF928, 0x3B1FF8, + 0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF, + 0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5, + 0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08, + 0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3, + 0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880, + 0x4D7327, 0x310606, 0x1556CA, 0x73A8C9, 0x60E27B, 0xC08C6B, +}; + +/* pi/2 in 24-bit pieces */ +static const double pio2[] = { + 1.57079625129699707031e+00, + 7.54978941586159635335e-08, + 5.39030252995776476554e-15, + 3.28200341580791294123e-22, + 1.27065575308067607349e-29, +}; + +/* miscellaneous constants */ +static const double + zero = 0.0, + two24 = 16777216.0, + round1 = 6755399441055744.0, /* 3 * 2^51 */ + round24 = 113336795588871485128704.0, /* 3 * 2^75 */ + twon24 = 5.960464477539062500E-8; + +int +__vlibm_rem_pio2m(double *x, double *y, int e, int nx, int prec) +{ + union { + double d; + int i[2]; + } s; + double z, t, p, q[20], r[21], *pr; + int nq, ip, n, i, j, k, eq0, eqnqm1; + + /* determine ip and eq0; note that -48 <= eq0 <= 2 */ + ip = (e - 3) / 24; + if (ip < 0) + ip = 0; + eq0 = e - 24 * (ip + 1); + + /* compute q[0,...,5] = x * ipio2 and initialize nq and eqnqm1 */ + if (nx == 3) { + q[0] = x[0] * ipio2[ip+2] + x[1] * ipio2[ip+1] + x[2] * ipio2[ip]; + q[1] = x[0] * ipio2[ip+3] + x[1] * ipio2[ip+2] + x[2] * ipio2[ip+1]; + q[2] = x[0] * ipio2[ip+4] + x[1] * ipio2[ip+3] + x[2] * ipio2[ip+2]; + q[3] = x[0] * ipio2[ip+5] + x[1] * ipio2[ip+4] + x[2] * ipio2[ip+3]; + q[4] = x[0] * ipio2[ip+6] + x[1] * ipio2[ip+5] + x[2] * ipio2[ip+4]; + q[5] = x[0] * ipio2[ip+7] + x[1] * ipio2[ip+6] + x[2] * ipio2[ip+5]; + } else if (nx == 2) { + q[0] = x[0] * ipio2[ip+2] + x[1] * ipio2[ip+1]; + q[1] = x[0] * ipio2[ip+3] + x[1] * ipio2[ip+2]; + q[2] = x[0] * ipio2[ip+4] + x[1] * ipio2[ip+3]; + q[3] = x[0] * ipio2[ip+5] + x[1] * ipio2[ip+4]; + q[4] = x[0] * ipio2[ip+6] + x[1] * ipio2[ip+5]; + q[5] = x[0] * ipio2[ip+7] + x[1] * ipio2[ip+6]; + } else { + q[0] = x[0] * ipio2[ip+2]; + q[1] = x[0] * ipio2[ip+3]; + q[2] = x[0] * ipio2[ip+4]; + q[3] = x[0] * ipio2[ip+5]; + q[4] = x[0] * ipio2[ip+6]; + q[5] = x[0] * ipio2[ip+7]; + } + nq = 5; + eqnqm1 = eq0 - 96; + +recompute: + /* propagate carries and incorporate powers of two */ + s.i[HIWORD] = (0x3ff + eqnqm1) << 20; + s.i[LOWORD] = 0; + p = s.d; + z = q[nq] * twon24; + for (j = nq-1; j >= 1; j--) { + z += q[j]; + t = (z + round24) - round24; /* must be rounded to nearest */ + r[j+1] = (z - t) * p; + z = t * twon24; + p *= two24; + } + z += q[0]; + t = (z + round24) - round24; /* must be rounded to nearest */ + r[1] = (z - t) * p; + r[0] = t * p; + + /* form n = [r] mod 8 and leave the fractional part of r */ + if (eq0 > 0) { + /* binary point lies within r[2] */ + z = r[2] + r[3]; + t = (z + round1) - round1; /* must be rounded to nearest */ + r[2] -= t; + n = (int)(r[1] + t); + r[0] = r[1] = zero; + } else if (eq0 > -24) { + /* binary point lies within or just to the right of r[1] */ + z = r[1] + r[2]; + t = (z + round1) - round1; /* must be rounded to nearest */ + r[1] -= t; + z = r[0] + t; + /* cut off high part of z so conversion to int doesn't + overflow */ + t = (z + round24) - round24; + n = (int)(z - t); + r[0] = zero; + } else { + /* binary point lies within or just to the right of r[0] */ + z = r[0] + r[1]; + t = (z + round1) - round1; /* must be rounded to nearest */ + r[0] -= t; + n = (int)t; + } + + /* count the number of leading zeroes in r */ + for (j = 0; j <= nq; j++) { + if (r[j] != zero) + break; + } + + /* if fewer than 5 terms remain, add more */ + if (nq - j < 4) { + k = 4 - (nq - j); + /* + * compute q[nq+1] to q[nq+k] + * + * For some reason, writing out the nx loop explicitly + * for each of the three possible values (as above) seems + * to run a little slower, so we'll leave this code as is. + */ + for (i = nq + 1; i <= nq + k; i++) { + t = x[0] * ipio2[ip+2+i]; + for (j = 1; j < nx; j++) + t += x[j] * ipio2[ip+2+i-j]; + q[i] = t; + eqnqm1 -= 24; + } + nq += k; + goto recompute; + } + + /* set pr and nq so that pr[0,...,nq] is the part of r remaining */ + pr = &r[j]; + nq = nq - j; + + /* compute pio2 * pr[0,...,nq]; note that nq >= 4 here */ + q[0] = pio2[0] * pr[0]; + q[1] = pio2[0] * pr[1] + pio2[1] * pr[0]; + q[2] = pio2[0] * pr[2] + pio2[1] * pr[1] + pio2[2] * pr[0]; + q[3] = pio2[0] * pr[3] + pio2[1] * pr[2] + pio2[2] * pr[1] + + pio2[3] * pr[0]; + for (i = 4; i <= nq; i++) { + q[i] = pio2[0] * pr[i] + pio2[1] * pr[i-1] + pio2[2] * pr[i-2] + + pio2[3] * pr[i-3] + pio2[4] * pr[i-4]; + } + + /* sum q in increasing order to obtain the first term of y */ + t = q[nq]; + for (i = nq - 1; i >= 0; i--) + t += q[i]; + y[0] = t; + if (prec) { + /* subtract and sum again in decreasing order + to obtain the second term */ + t = q[0] - t; + for (i = 1; i <= nq; i++) + t += q[i]; + y[1] = t; + } + + return (n & 7); +} diff --git a/usr/src/lib/libmvec/common/__vrhypot.c b/usr/src/lib/libmvec/common/__vrhypot.c new file mode 100644 index 0000000000..dd5b7b6fba --- /dev/null +++ b/usr/src/lib/libmvec/common/__vrhypot.c @@ -0,0 +1,431 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <sys/isa_defs.h> +#include "libm_synonyms.h" +#include "libm_inlines.h" + +#ifdef _LITTLE_ENDIAN +#define HI(x) *(1+(int*)x) +#define LO(x) *(unsigned*)x +#else +#define HI(x) *(int*)x +#define LO(x) *(1+(unsigned*)x) +#endif + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +/* double rhypot(double x, double y) + * + * Method : + * 1. Special cases: + * x or y = Inf => 0 + * x or y = NaN => QNaN + * x and y = 0 => Inf + divide-by-zero + * 2. Computes rhypot(x,y): + * rhypot(x,y) = m * sqrt(1/(xnm * xnm + ynm * ynm)) + * Where: + * m = 1/max(|x|,|y|) + * xnm = x * m + * ynm = y * m + * + * Compute 1/(xnm * xnm + ynm * ynm) by simulating + * muti-precision arithmetic. + * + * Accuracy: + * Maximum error observed: less than 0.869 ulp after 1.000.000.000 + * results. + */ + +#define sqrt __sqrt + +extern double sqrt(double); + +extern double fabs(double); + +static const int __vlibm_TBL_rhypot[] = { +/* i = [0,127] + * TBL[i] = 0x3ff00000 + *(int*)&(1.0 / *(double*)&(0x3ff0000000000000ULL + (i << 45))); */ + 0x7fe00000, 0x7fdfc07f, 0x7fdf81f8, 0x7fdf4465, + 0x7fdf07c1, 0x7fdecc07, 0x7fde9131, 0x7fde573a, + 0x7fde1e1e, 0x7fdde5d6, 0x7fddae60, 0x7fdd77b6, + 0x7fdd41d4, 0x7fdd0cb5, 0x7fdcd856, 0x7fdca4b3, + 0x7fdc71c7, 0x7fdc3f8f, 0x7fdc0e07, 0x7fdbdd2b, + 0x7fdbacf9, 0x7fdb7d6c, 0x7fdb4e81, 0x7fdb2036, + 0x7fdaf286, 0x7fdac570, 0x7fda98ef, 0x7fda6d01, + 0x7fda41a4, 0x7fda16d3, 0x7fd9ec8e, 0x7fd9c2d1, + 0x7fd99999, 0x7fd970e4, 0x7fd948b0, 0x7fd920fb, + 0x7fd8f9c1, 0x7fd8d301, 0x7fd8acb9, 0x7fd886e5, + 0x7fd86186, 0x7fd83c97, 0x7fd81818, 0x7fd7f405, + 0x7fd7d05f, 0x7fd7ad22, 0x7fd78a4c, 0x7fd767dc, + 0x7fd745d1, 0x7fd72428, 0x7fd702e0, 0x7fd6e1f7, + 0x7fd6c16c, 0x7fd6a13c, 0x7fd68168, 0x7fd661ec, + 0x7fd642c8, 0x7fd623fa, 0x7fd60581, 0x7fd5e75b, + 0x7fd5c988, 0x7fd5ac05, 0x7fd58ed2, 0x7fd571ed, + 0x7fd55555, 0x7fd53909, 0x7fd51d07, 0x7fd50150, + 0x7fd4e5e0, 0x7fd4cab8, 0x7fd4afd6, 0x7fd49539, + 0x7fd47ae1, 0x7fd460cb, 0x7fd446f8, 0x7fd42d66, + 0x7fd41414, 0x7fd3fb01, 0x7fd3e22c, 0x7fd3c995, + 0x7fd3b13b, 0x7fd3991c, 0x7fd38138, 0x7fd3698d, + 0x7fd3521c, 0x7fd33ae4, 0x7fd323e3, 0x7fd30d19, + 0x7fd2f684, 0x7fd2e025, 0x7fd2c9fb, 0x7fd2b404, + 0x7fd29e41, 0x7fd288b0, 0x7fd27350, 0x7fd25e22, + 0x7fd24924, 0x7fd23456, 0x7fd21fb7, 0x7fd20b47, + 0x7fd1f704, 0x7fd1e2ef, 0x7fd1cf06, 0x7fd1bb4a, + 0x7fd1a7b9, 0x7fd19453, 0x7fd18118, 0x7fd16e06, + 0x7fd15b1e, 0x7fd1485f, 0x7fd135c8, 0x7fd12358, + 0x7fd11111, 0x7fd0fef0, 0x7fd0ecf5, 0x7fd0db20, + 0x7fd0c971, 0x7fd0b7e6, 0x7fd0a681, 0x7fd0953f, + 0x7fd08421, 0x7fd07326, 0x7fd0624d, 0x7fd05197, + 0x7fd04104, 0x7fd03091, 0x7fd02040, 0x7fd01010, +}; + +static const unsigned long long LCONST[] = { +0x3ff0000000000000ULL, /* DONE = 1.0 */ +0x4000000000000000ULL, /* DTWO = 2.0 */ +0x4230000000000000ULL, /* D2ON36 = 2**36 */ +0x7fd0000000000000ULL, /* D2ON1022 = 2**1022 */ +0x3cb0000000000000ULL, /* D2ONM52 = 2**-52 */ +}; + +#define RET_SC(I) \ + px += stridex; \ + py += stridey; \ + pz += stridez; \ + if (--n <= 0) \ + break; \ + goto start##I; + +#define RETURN(I, ret) \ +{ \ + pz[0] = (ret); \ + RET_SC(I) \ +} + +#define PREP(I) \ +hx##I = HI(px); \ +hy##I = HI(py); \ +hx##I &= 0x7fffffff; \ +hy##I &= 0x7fffffff; \ +pz##I = pz; \ +if (hx##I >= 0x7ff00000 || hy##I >= 0x7ff00000) /* |X| or |Y| = Inf,NaN */ \ +{ \ + lx = LO(px); \ + ly = LO(py); \ + x = *px; \ + y = *py; \ + if (hx##I == 0x7ff00000 && lx == 0) res0 = 0.0; /* |X| = Inf */ \ + else if (hy##I == 0x7ff00000 && ly == 0) res0 = 0.0; /* |Y| = Inf */ \ + else res0 = fabs(x) + fabs(y); \ + \ + RETURN (I, res0) \ +} \ +x##I = *px; \ +y##I = *py; \ +diff0 = hy##I - hx##I; \ +j0 = diff0 >> 31; \ +if (hx##I < 0x00100000 && hy##I < 0x00100000) /* |X| and |Y| = subnormal or zero */ \ +{ \ + lx = LO(px); \ + ly = LO(py); \ + x = x##I; \ + y = y##I; \ + \ + if ((hx##I | hy##I | lx | ly) == 0) /* |X| and |Y| = 0 */ \ + RETURN (I, DONE / 0.0) \ + \ + x = fabs(x); \ + y = fabs(y); \ + \ + x = *(long long*)&x; \ + y = *(long long*)&y; \ + \ + x *= D2ONM52; \ + y *= D2ONM52; \ + \ + x_hi0 = (x + D2ON36) - D2ON36; \ + y_hi0 = (y + D2ON36) - D2ON36; \ + x_lo0 = x - x_hi0; \ + y_lo0 = y - y_hi0; \ + res0_hi = (x_hi0 * x_hi0 + y_hi0 * y_hi0); \ + res0_lo = ((x + x_hi0) * x_lo0 + (y + y_hi0) * y_lo0); \ + \ + dres0 = res0_hi + res0_lo; \ + \ + iarr0 = HI(&dres0); \ + iexp0 = iarr0 & 0xfff00000; \ + \ + iarr0 = (iarr0 >> 11) & 0x1fc; \ + itbl0 = ((int*)((char*)__vlibm_TBL_rhypot + iarr0))[0]; \ + itbl0 -= iexp0; \ + HI(&dd0) = itbl0; \ + LO(&dd0) = 0; \ + \ + dd0 = dd0 * (DTWO - dd0 * dres0); \ + dd0 = dd0 * (DTWO - dd0 * dres0); \ + dres0 = dd0 * (DTWO - dd0 * dres0); \ + \ + HI(&res0) = HI(&dres0) & 0xffffff00; \ + LO(&res0) = 0; \ + res0 += (DONE - res0_hi * res0 - res0_lo * res0) * dres0; \ + res0 = sqrt (res0); \ + \ + res0 = D2ON1022 * res0; \ + RETURN (I, res0) \ +} \ +j0 = hy##I - (diff0 & j0); \ +j0 &= 0x7ff00000; \ +HI(&scl##I) = 0x7ff00000 - j0; + +void +__vrhypot(int n, double * restrict px, int stridex, double * restrict py, + int stridey, double * restrict pz, int stridez) +{ + int i = 0; + double x, y; + double x_hi0, x_lo0, y_hi0, y_lo0, scl0 = 0; + double x0, y0, res0, dd0; + double res0_hi,res0_lo, dres0; + double x_hi1, x_lo1, y_hi1, y_lo1, scl1 = 0; + double x1 = 0.0L, y1 = 0.0L, res1, dd1; + double res1_hi,res1_lo, dres1; + double x_hi2, x_lo2, y_hi2, y_lo2, scl2 = 0; + double x2, y2, res2, dd2; + double res2_hi,res2_lo, dres2; + + int hx0, hy0, j0, diff0; + int iarr0, iexp0, itbl0; + int hx1, hy1; + int iarr1, iexp1, itbl1; + int hx2, hy2; + int iarr2, iexp2, itbl2; + + int lx, ly; + + double DONE = ((double*)LCONST)[0]; + double DTWO = ((double*)LCONST)[1]; + double D2ON36 = ((double*)LCONST)[2]; + double D2ON1022 = ((double*)LCONST)[3]; + double D2ONM52 = ((double*)LCONST)[4]; + + double *pz0, *pz1 = 0, *pz2; + + do + { +start0: + PREP(0) + px += stridex; + py += stridey; + pz += stridez; + i = 1; + if (--n <= 0) + break; + +start1: + PREP(1) + px += stridex; + py += stridey; + pz += stridez; + i = 2; + if (--n <= 0) + break; + +start2: + PREP(2) + + x0 *= scl0; + y0 *= scl0; + x1 *= scl1; + y1 *= scl1; + x2 *= scl2; + y2 *= scl2; + + x_hi0 = (x0 + D2ON36) - D2ON36; + y_hi0 = (y0 + D2ON36) - D2ON36; + x_hi1 = (x1 + D2ON36) - D2ON36; + y_hi1 = (y1 + D2ON36) - D2ON36; + x_hi2 = (x2 + D2ON36) - D2ON36; + y_hi2 = (y2 + D2ON36) - D2ON36; + x_lo0 = x0 - x_hi0; + y_lo0 = y0 - y_hi0; + x_lo1 = x1 - x_hi1; + y_lo1 = y1 - y_hi1; + x_lo2 = x2 - x_hi2; + y_lo2 = y2 - y_hi2; + res0_hi = (x_hi0 * x_hi0 + y_hi0 * y_hi0); + res1_hi = (x_hi1 * x_hi1 + y_hi1 * y_hi1); + res2_hi = (x_hi2 * x_hi2 + y_hi2 * y_hi2); + res0_lo = ((x0 + x_hi0) * x_lo0 + (y0 + y_hi0) * y_lo0); + res1_lo = ((x1 + x_hi1) * x_lo1 + (y1 + y_hi1) * y_lo1); + res2_lo = ((x2 + x_hi2) * x_lo2 + (y2 + y_hi2) * y_lo2); + + dres0 = res0_hi + res0_lo; + dres1 = res1_hi + res1_lo; + dres2 = res2_hi + res2_lo; + + iarr0 = HI(&dres0); + iarr1 = HI(&dres1); + iarr2 = HI(&dres2); + iexp0 = iarr0 & 0xfff00000; + iexp1 = iarr1 & 0xfff00000; + iexp2 = iarr2 & 0xfff00000; + + iarr0 = (iarr0 >> 11) & 0x1fc; + iarr1 = (iarr1 >> 11) & 0x1fc; + iarr2 = (iarr2 >> 11) & 0x1fc; + itbl0 = ((int*)((char*)__vlibm_TBL_rhypot + iarr0))[0]; + itbl1 = ((int*)((char*)__vlibm_TBL_rhypot + iarr1))[0]; + itbl2 = ((int*)((char*)__vlibm_TBL_rhypot + iarr2))[0]; + itbl0 -= iexp0; + itbl1 -= iexp1; + itbl2 -= iexp2; + HI(&dd0) = itbl0; + HI(&dd1) = itbl1; + HI(&dd2) = itbl2; + LO(&dd0) = 0; + LO(&dd1) = 0; + LO(&dd2) = 0; + + dd0 = dd0 * (DTWO - dd0 * dres0); + dd1 = dd1 * (DTWO - dd1 * dres1); + dd2 = dd2 * (DTWO - dd2 * dres2); + dd0 = dd0 * (DTWO - dd0 * dres0); + dd1 = dd1 * (DTWO - dd1 * dres1); + dd2 = dd2 * (DTWO - dd2 * dres2); + dres0 = dd0 * (DTWO - dd0 * dres0); + dres1 = dd1 * (DTWO - dd1 * dres1); + dres2 = dd2 * (DTWO - dd2 * dres2); + + HI(&res0) = HI(&dres0) & 0xffffff00; + HI(&res1) = HI(&dres1) & 0xffffff00; + HI(&res2) = HI(&dres2) & 0xffffff00; + LO(&res0) = 0; + LO(&res1) = 0; + LO(&res2) = 0; + res0 += (DONE - res0_hi * res0 - res0_lo * res0) * dres0; + res1 += (DONE - res1_hi * res1 - res1_lo * res1) * dres1; + res2 += (DONE - res2_hi * res2 - res2_lo * res2) * dres2; + res0 = sqrt (res0); + res1 = sqrt (res1); + res2 = sqrt (res2); + + res0 = scl0 * res0; + res1 = scl1 * res1; + res2 = scl2 * res2; + + *pz0 = res0; + *pz1 = res1; + *pz2 = res2; + + px += stridex; + py += stridey; + pz += stridez; + i = 0; + + } while (--n > 0); + + if (i > 0) + { + x0 *= scl0; + y0 *= scl0; + + x_hi0 = (x0 + D2ON36) - D2ON36; + y_hi0 = (y0 + D2ON36) - D2ON36; + x_lo0 = x0 - x_hi0; + y_lo0 = y0 - y_hi0; + res0_hi = (x_hi0 * x_hi0 + y_hi0 * y_hi0); + res0_lo = ((x0 + x_hi0) * x_lo0 + (y0 + y_hi0) * y_lo0); + + dres0 = res0_hi + res0_lo; + + iarr0 = HI(&dres0); + iexp0 = iarr0 & 0xfff00000; + + iarr0 = (iarr0 >> 11) & 0x1fc; + itbl0 = ((int*)((char*)__vlibm_TBL_rhypot + iarr0))[0]; + itbl0 -= iexp0; + HI(&dd0) = itbl0; + LO(&dd0) = 0; + + dd0 = dd0 * (DTWO - dd0 * dres0); + dd0 = dd0 * (DTWO - dd0 * dres0); + dres0 = dd0 * (DTWO - dd0 * dres0); + + HI(&res0) = HI(&dres0) & 0xffffff00; + LO(&res0) = 0; + res0 += (DONE - res0_hi * res0 - res0_lo * res0) * dres0; + res0 = sqrt (res0); + + res0 = scl0 * res0; + + *pz0 = res0; + + if (i > 1) + { + x1 *= scl1; + y1 *= scl1; + + x_hi1 = (x1 + D2ON36) - D2ON36; + y_hi1 = (y1 + D2ON36) - D2ON36; + x_lo1 = x1 - x_hi1; + y_lo1 = y1 - y_hi1; + res1_hi = (x_hi1 * x_hi1 + y_hi1 * y_hi1); + res1_lo = ((x1 + x_hi1) * x_lo1 + (y1 + y_hi1) * y_lo1); + + dres1 = res1_hi + res1_lo; + + iarr1 = HI(&dres1); + iexp1 = iarr1 & 0xfff00000; + + iarr1 = (iarr1 >> 11) & 0x1fc; + itbl1 = ((int*)((char*)__vlibm_TBL_rhypot + iarr1))[0]; + itbl1 -= iexp1; + HI(&dd1) = itbl1; + LO(&dd1) = 0; + + dd1 = dd1 * (DTWO - dd1 * dres1); + dd1 = dd1 * (DTWO - dd1 * dres1); + dres1 = dd1 * (DTWO - dd1 * dres1); + + HI(&res1) = HI(&dres1) & 0xffffff00; + LO(&res1) = 0; + res1 += (DONE - res1_hi * res1 - res1_lo * res1) * dres1; + res1 = sqrt (res1); + + res1 = scl1 * res1; + + *pz1 = res1; + } + } +} + diff --git a/usr/src/lib/libmvec/common/__vrhypotf.c b/usr/src/lib/libmvec/common/__vrhypotf.c new file mode 100644 index 0000000000..2b68991294 --- /dev/null +++ b/usr/src/lib/libmvec/common/__vrhypotf.c @@ -0,0 +1,465 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <sys/isa_defs.h> +#include "libm_synonyms.h" +#include "libm_inlines.h" + +#ifdef _LITTLE_ENDIAN +#define HI(x) *(1+(int*)x) +#define LO(x) *(unsigned*)x +#else +#define HI(x) *(int*)x +#define LO(x) *(1+(unsigned*)x) +#endif + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +/* float rhypotf(float x, float y) + * + * Method : + * 1. Special cases: + * for x or y = Inf => 0; + * for x or y = NaN => QNaN; + * for x and y = 0 => +Inf + divide-by-zero; + * 2. Computes d = x * x + y * y; + * 3. Computes reciprocal square root from: + * d = m * 2**n + * Where: + * m = [0.5, 2), + * n = ((exponent + 1) & ~1). + * Then: + * rsqrtf(d) = 1/sqrt( m * 2**n ) = (2 ** (-n/2)) * (1/sqrt(m)) + * 4. Computes 1/sqrt(m) from: + * 1/sqrt(m) = (1/sqrt(m0)) * (1/sqrt(1 + (1/m0)*dm)) + * Where: + * m = m0 + dm, + * m0 = 0.5 * (1 + k/64) for m = [0.5, 0.5+127/256), k = [0, 63]; + * m0 = 1.0 * (0 + k/64) for m = [0.5+127/256, 1.0+127/128), k = [64, 127]; + * Then: + * 1/sqrt(m0), 1/m0 are looked up in a table, + * 1/sqrt(1 + (1/m0)*dm) is computed using approximation: + * 1/sqrt(1 + z) = ((a3 * z + a2) * z + a1) * z + a0 + * where z = [-1/64, 1/64]. + * + * Accuracy: + * The maximum relative error for the approximating + * polynomial is 2**(-27.87). + * Maximum error observed: less than 0.535 ulp after 3.000.000.000 + * results. + */ + +#pragma align 32 (__vlibm_TBL_rhypotf) + +static const double __vlibm_TBL_rhypotf[] = { +/* + i = [0,63] + TBL[2*i+0] = 1.0 / (*(double*)&(0x3ff0000000000000LL + (i << 46))); + TBL[2*i+1] = (double)(0.5/sqrtl(2) / sqrtl(*(double*)&(0x3ff0000000000000LL + (i << 46)))); + TBL[128+2*i+0] = 1.0 / (*(double*)&(0x3ff0000000000000LL + (i << 46))); + TBL[128+2*i+1] = (double)(0.25 / sqrtl(*(double*)&(0x3ff0000000000000LL + (i << 46)))); +*/ + 1.0000000000000000000e+00, 3.5355339059327378637e-01, + 9.8461538461538467004e-01, 3.5082320772281166965e-01, + 9.6969696969696972388e-01, 3.4815531191139570399e-01, + 9.5522388059701490715e-01, 3.4554737023254405992e-01, + 9.4117647058823528106e-01, 3.4299717028501769400e-01, + 9.2753623188405798228e-01, 3.4050261230349943009e-01, + 9.1428571428571425717e-01, 3.3806170189140660742e-01, + 9.0140845070422537244e-01, 3.3567254331867563133e-01, + 8.8888888888888883955e-01, 3.3333333333333331483e-01, + 8.7671232876712323900e-01, 3.3104235544094717802e-01, + 8.6486486486486491287e-01, 3.2879797461071458287e-01, + 8.5333333333333338810e-01, 3.2659863237109043599e-01, + 8.4210526315789469010e-01, 3.2444284226152508843e-01, + 8.3116883116883122362e-01, 3.2232918561015211356e-01, + 8.2051282051282048435e-01, 3.2025630761017426229e-01, + 8.1012658227848100001e-01, 3.1822291367029204023e-01, + 8.0000000000000004441e-01, 3.1622776601683794118e-01, + 7.9012345679012341293e-01, 3.1426968052735443360e-01, + 7.8048780487804880757e-01, 3.1234752377721214378e-01, + 7.7108433734939763049e-01, 3.1046021028253312224e-01, + 7.6190476190476186247e-01, 3.0860669992418382490e-01, + 7.5294117647058822484e-01, 3.0678599553894819740e-01, + 7.4418604651162789665e-01, 3.0499714066520933198e-01, + 7.3563218390804596680e-01, 3.0323921743156134756e-01, + 7.2727272727272729291e-01, 3.0151134457776362918e-01, + 7.1910112359550559802e-01, 2.9981267559834456904e-01, + 7.1111111111111113825e-01, 2.9814239699997197031e-01, + 7.0329670329670335160e-01, 2.9649972666444046610e-01, + 6.9565217391304345895e-01, 2.9488391230979427160e-01, + 6.8817204301075274309e-01, 2.9329423004270660513e-01, + 6.8085106382978721751e-01, 2.9172998299578911663e-01, + 6.7368421052631577428e-01, 2.9019050004400465115e-01, + 6.6666666666666662966e-01, 2.8867513459481286553e-01, + 6.5979381443298967813e-01, 2.8718326344709527165e-01, + 6.5306122448979586625e-01, 2.8571428571428569843e-01, + 6.4646464646464651960e-01, 2.8426762180748055275e-01, + 6.4000000000000001332e-01, 2.8284271247461900689e-01, + 6.3366336633663367106e-01, 2.8143901789211672737e-01, + 6.2745098039215685404e-01, 2.8005601680560193723e-01, + 6.2135922330097081989e-01, 2.7869320571664707442e-01, + 6.1538461538461541878e-01, 2.7735009811261457369e-01, + 6.0952380952380957879e-01, 2.7602622373694168934e-01, + 6.0377358490566035432e-01, 2.7472112789737807015e-01, + 5.9813084112149528249e-01, 2.7343437080986532361e-01, + 5.9259259259259255970e-01, 2.7216552697590867815e-01, + 5.8715596330275232617e-01, 2.7091418459143856712e-01, + 5.8181818181818178992e-01, 2.6967994498529684888e-01, + 5.7657657657657657158e-01, 2.6846242208560971987e-01, + 5.7142857142857139685e-01, 2.6726124191242439654e-01, + 5.6637168141592919568e-01, 2.6607604209509572168e-01, + 5.6140350877192979340e-01, 2.6490647141300877054e-01, + 5.5652173913043478937e-01, 2.6375218935831479250e-01, + 5.5172413793103447510e-01, 2.6261286571944508772e-01, + 5.4700854700854706358e-01, 2.6148818018424535570e-01, + 5.4237288135593220151e-01, 2.6037782196164771520e-01, + 5.3781512605042014474e-01, 2.5928148942086576278e-01, + 5.3333333333333332593e-01, 2.5819888974716115326e-01, + 5.2892561983471075848e-01, 2.5712973861329002645e-01, + 5.2459016393442625681e-01, 2.5607375986579195004e-01, + 5.2032520325203257539e-01, 2.5503068522533534068e-01, + 5.1612903225806450180e-01, 2.5400025400038100942e-01, + 5.1200000000000001066e-01, 2.5298221281347033074e-01, + 5.0793650793650790831e-01, 2.5197631533948483540e-01, + 5.0393700787401574104e-01, 2.5098232205526344041e-01, + 1.0000000000000000000e+00, 2.5000000000000000000e-01, + 9.8461538461538467004e-01, 2.4806946917841690703e-01, + 9.6969696969696972388e-01, 2.4618298195866547551e-01, + 9.5522388059701490715e-01, 2.4433888871261044695e-01, + 9.4117647058823528106e-01, 2.4253562503633296910e-01, + 9.2753623188405798228e-01, 2.4077170617153839660e-01, + 9.1428571428571425717e-01, 2.3904572186687872426e-01, + 9.0140845070422537244e-01, 2.3735633163877067897e-01, + 8.8888888888888883955e-01, 2.3570226039551583908e-01, + 8.7671232876712323900e-01, 2.3408229439226113655e-01, + 8.6486486486486491287e-01, 2.3249527748763856860e-01, + 8.5333333333333338810e-01, 2.3094010767585029797e-01, + 8.4210526315789469010e-01, 2.2941573387056177213e-01, + 8.3116883116883122362e-01, 2.2792115291927589338e-01, + 8.2051282051282048435e-01, 2.2645540682891915352e-01, + 8.1012658227848100001e-01, 2.2501758018520479077e-01, + 8.0000000000000004441e-01, 2.2360679774997896385e-01, + 7.9012345679012341293e-01, 2.2222222222222220989e-01, + 7.8048780487804880757e-01, 2.2086305214969309541e-01, + 7.7108433734939763049e-01, 2.1952851997938069295e-01, + 7.6190476190476186247e-01, 2.1821789023599238999e-01, + 7.5294117647058822484e-01, 2.1693045781865616384e-01, + 7.4418604651162789665e-01, 2.1566554640687682354e-01, + 7.3563218390804596680e-01, 2.1442250696755896233e-01, + 7.2727272727272729291e-01, 2.1320071635561044232e-01, + 7.1910112359550559802e-01, 2.1199957600127200541e-01, + 7.1111111111111113825e-01, 2.1081851067789195153e-01, + 7.0329670329670335160e-01, 2.0965696734438366011e-01, + 6.9565217391304345895e-01, 2.0851441405707477061e-01, + 6.8817204301075274309e-01, 2.0739033894608505104e-01, + 6.8085106382978721751e-01, 2.0628424925175867233e-01, + 6.7368421052631577428e-01, 2.0519567041703082322e-01, + 6.6666666666666662966e-01, 2.0412414523193150862e-01, + 6.5979381443298967813e-01, 2.0306923302672380549e-01, + 6.5306122448979586625e-01, 2.0203050891044216364e-01, + 6.4646464646464651960e-01, 2.0100756305184241945e-01, + 6.4000000000000001332e-01, 2.0000000000000001110e-01, + 6.3366336633663367106e-01, 1.9900743804199783060e-01, + 6.2745098039215685404e-01, 1.9802950859533485772e-01, + 6.2135922330097081989e-01, 1.9706585563285863860e-01, + 6.1538461538461541878e-01, 1.9611613513818404453e-01, + 6.0952380952380957879e-01, 1.9518001458970662965e-01, + 6.0377358490566035432e-01, 1.9425717247145282696e-01, + 5.9813084112149528249e-01, 1.9334729780913270658e-01, + 5.9259259259259255970e-01, 1.9245008972987526219e-01, + 5.8715596330275232617e-01, 1.9156525704423027490e-01, + 5.8181818181818178992e-01, 1.9069251784911847580e-01, + 5.7657657657657657158e-01, 1.8983159915049979682e-01, + 5.7142857142857139685e-01, 1.8898223650461362655e-01, + 5.6637168141592919568e-01, 1.8814417367671945613e-01, + 5.6140350877192979340e-01, 1.8731716231633879777e-01, + 5.5652173913043478937e-01, 1.8650096164806276300e-01, + 5.5172413793103447510e-01, 1.8569533817705186074e-01, + 5.4700854700854706358e-01, 1.8490006540840969729e-01, + 5.4237288135593220151e-01, 1.8411492357966466327e-01, + 5.3781512605042014474e-01, 1.8333969940564226464e-01, + 5.3333333333333332593e-01, 1.8257418583505535814e-01, + 5.2892561983471075848e-01, 1.8181818181818182323e-01, + 5.2459016393442625681e-01, 1.8107149208503706128e-01, + 5.2032520325203257539e-01, 1.8033392693348646030e-01, + 5.1612903225806450180e-01, 1.7960530202677491007e-01, + 5.1200000000000001066e-01, 1.7888543819998317663e-01, + 5.0793650793650790831e-01, 1.7817416127494958844e-01, + 5.0393700787401574104e-01, 1.7747130188322274291e-01, +}; + +#define fabsf __fabsf + +extern float fabsf(float); + +static const double + A0 = 9.99999997962321453275e-01, + A1 =-4.99999998166077580600e-01, + A2 = 3.75066768969515586277e-01, + A3 =-3.12560092408808548438e-01; + +static void +__vrhypotf_n(int n, float * restrict px, int stridex, float * restrict py, + int stridey, float * restrict pz, int stridez); + +#pragma no_inline(__vrhypotf_n) + +#define RETURN(ret) \ +{ \ + *pz = (ret); \ + pz += stridez; \ + if (n_n == 0) \ + { \ + spx = px; spy = py; spz = pz; \ + ay0 = *(int*)py; \ + continue; \ + } \ + n--; \ + break; \ +} + + +void +__vrhypotf(int n, float * restrict px, int stridex, float * restrict py, + int stridey, float * restrict pz, int stridez) +{ + float *spx, *spy, *spz; + int ax0, ay0, n_n; + float res, x0, y0; + + while (n > 1) + { + n_n = 0; + spx = px; + spy = py; + spz = pz; + ax0 = *(int*)px; + ay0 = *(int*)py; + for (; n > 1 ; n--) + { + ax0 &= 0x7fffffff; + ay0 &= 0x7fffffff; + + px += stridex; + + if (ax0 >= 0x7f800000 || ay0 >= 0x7f800000) /* X or Y = NaN or Inf */ + { + x0 = *(px - stridex); + y0 = *py; + res = fabsf(x0) + fabsf(y0); + if (ax0 == 0x7f800000) res = 0.0f; + else if (ay0 == 0x7f800000) res = 0.0f; + ax0 = *(int*)px; + py += stridey; + RETURN (res) + } + ax0 = *(int*)px; + py += stridey; + if (ay0 == 0) /* Y = 0 */ + { + int tx = *(int*)(px - stridex) & 0x7fffffff; + if (tx == 0) /* X = 0 */ + { + RETURN (1.0f / 0.0f) + } + } + pz += stridez; + n_n++; + ay0 = *(int*)py; + } + if (n_n > 0) + __vrhypotf_n(n_n, spx, stridex, spy, stridey, spz, stridez); + } + if (n > 0) + { + ax0 = *(int*)px; + ay0 = *(int*)py; + x0 = *px; + y0 = *py; + + ax0 &= 0x7fffffff; + ay0 &= 0x7fffffff; + + if (ax0 >= 0x7f800000 || ay0 >= 0x7f800000) /* X or Y = NaN or Inf */ + { + res = fabsf(x0) + fabsf(y0); + if (ax0 == 0x7f800000) res = 0.0f; + else if (ay0 == 0x7f800000) res = 0.0f; + *pz = res; + } + else if (ax0 == 0 && ay0 == 0) /* X and Y = 0 */ + { + *pz = 1.0f / 0.0f; + } + else + { + double xx0, res0, hyp0, h_hi0 = 0, dbase0 = 0; + int ibase0, si0, hyp0h; + + hyp0 = x0 * (double)x0 + y0 * (double)y0; + + ibase0 = HI(&hyp0); + + HI(&dbase0) = (0x60000000 - ((ibase0 & 0x7fe00000) >> 1)); + + hyp0h = (ibase0 & 0x000fffff) | 0x3ff00000; + HI(&hyp0) = hyp0h; + HI(&h_hi0) = hyp0h & 0x7fffc000; + + ibase0 >>= 10; + si0 = ibase0 & 0x7f0; + xx0 = ((double*)((char*)__vlibm_TBL_rhypotf + si0))[0]; + + xx0 = (hyp0 - h_hi0) * xx0; + res0 = ((double*)((char*)__vlibm_TBL_rhypotf + si0))[1]; + res0 *= (((A3 * xx0 + A2) * xx0 + A1) * xx0 + A0); + res0 *= dbase0; + *pz = res0; + } + } +} + +static void +__vrhypotf_n(int n, float * restrict px, int stridex, float * restrict py, + int stridey, float * restrict pz, int stridez) +{ + double xx0, res0, hyp0, h_hi0 = 0, dbase0 = 0; + double xx1, res1, hyp1, h_hi1 = 0, dbase1 = 0; + double xx2, res2, hyp2, h_hi2 = 0, dbase2 = 0; + float x0, y0; + float x1, y1; + float x2, y2; + int ibase0, si0, hyp0h; + int ibase1, si1, hyp1h; + int ibase2, si2, hyp2h; + + for (; n > 2 ; n -= 3) + { + x0 = *px; + px += stridex; + x1 = *px; + px += stridex; + x2 = *px; + px += stridex; + + y0 = *py; + py += stridey; + y1 = *py; + py += stridey; + y2 = *py; + py += stridey; + + hyp0 = x0 * (double)x0 + y0 * (double)y0; + hyp1 = x1 * (double)x1 + y1 * (double)y1; + hyp2 = x2 * (double)x2 + y2 * (double)y2; + + ibase0 = HI(&hyp0); + ibase1 = HI(&hyp1); + ibase2 = HI(&hyp2); + + HI(&dbase0) = (0x60000000 - ((ibase0 & 0x7fe00000) >> 1)); + HI(&dbase1) = (0x60000000 - ((ibase1 & 0x7fe00000) >> 1)); + HI(&dbase2) = (0x60000000 - ((ibase2 & 0x7fe00000) >> 1)); + + hyp0h = (ibase0 & 0x000fffff) | 0x3ff00000; + hyp1h = (ibase1 & 0x000fffff) | 0x3ff00000; + hyp2h = (ibase2 & 0x000fffff) | 0x3ff00000; + HI(&hyp0) = hyp0h; + HI(&hyp1) = hyp1h; + HI(&hyp2) = hyp2h; + HI(&h_hi0) = hyp0h & 0x7fffc000; + HI(&h_hi1) = hyp1h & 0x7fffc000; + HI(&h_hi2) = hyp2h & 0x7fffc000; + + ibase0 >>= 10; + ibase1 >>= 10; + ibase2 >>= 10; + si0 = ibase0 & 0x7f0; + si1 = ibase1 & 0x7f0; + si2 = ibase2 & 0x7f0; + xx0 = ((double*)((char*)__vlibm_TBL_rhypotf + si0))[0]; + xx1 = ((double*)((char*)__vlibm_TBL_rhypotf + si1))[0]; + xx2 = ((double*)((char*)__vlibm_TBL_rhypotf + si2))[0]; + + xx0 = (hyp0 - h_hi0) * xx0; + xx1 = (hyp1 - h_hi1) * xx1; + xx2 = (hyp2 - h_hi2) * xx2; + res0 = ((double*)((char*)__vlibm_TBL_rhypotf + si0))[1]; + res1 = ((double*)((char*)__vlibm_TBL_rhypotf + si1))[1]; + res2 = ((double*)((char*)__vlibm_TBL_rhypotf + si2))[1]; + res0 *= (((A3 * xx0 + A2) * xx0 + A1) * xx0 + A0); + res1 *= (((A3 * xx1 + A2) * xx1 + A1) * xx1 + A0); + res2 *= (((A3 * xx2 + A2) * xx2 + A1) * xx2 + A0); + res0 *= dbase0; + res1 *= dbase1; + res2 *= dbase2; + *pz = res0; + pz += stridez; + *pz = res1; + pz += stridez; + *pz = res2; + pz += stridez; + } + + for (; n > 0 ; n--) + { + x0 = *px; + px += stridex; + + y0 = *py; + py += stridey; + + hyp0 = x0 * (double)x0 + y0 * (double)y0; + + ibase0 = HI(&hyp0); + + HI(&dbase0) = (0x60000000 - ((ibase0 & 0x7fe00000) >> 1)); + + hyp0h = (ibase0 & 0x000fffff) | 0x3ff00000; + HI(&hyp0) = hyp0h; + HI(&h_hi0) = hyp0h & 0x7fffc000; + + ibase0 >>= 10; + si0 = ibase0 & 0x7f0; + xx0 = ((double*)((char*)__vlibm_TBL_rhypotf + si0))[0]; + + xx0 = (hyp0 - h_hi0) * xx0; + res0 = ((double*)((char*)__vlibm_TBL_rhypotf + si0))[1]; + res0 *= (((A3 * xx0 + A2) * xx0 + A1) * xx0 + A0); + res0 *= dbase0; + *pz = res0; + pz += stridez; + } +} + diff --git a/usr/src/lib/libmvec/common/__vrsqrt.c b/usr/src/lib/libmvec/common/__vrsqrt.c new file mode 100644 index 0000000000..6fb9cd7414 --- /dev/null +++ b/usr/src/lib/libmvec/common/__vrsqrt.c @@ -0,0 +1,415 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <sys/isa_defs.h> +#include "libm_synonyms.h" +#include "libm_inlines.h" + +#ifdef _LITTLE_ENDIAN +#define HI(x) *(1+(int*)x) +#define LO(x) *(unsigned*)x +#else +#define HI(x) *(int*)x +#define LO(x) *(1+(unsigned*)x) +#endif + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +/* double rsqrt(double x) + * + * Method : + * 1. Special cases: + * for x = NaN => QNaN; + * for x = +Inf => 0; + * for x is negative, -Inf => QNaN + invalid; + * for x = +0 => +Inf + divide-by-zero; + * for x = -0 => -Inf + divide-by-zero. + * 2. Computes reciprocal square root from: + * x = m * 2**n + * Where: + * m = [0.5, 2), + * n = ((exponent + 1) & ~1). + * Then: + * rsqrt(x) = 1/sqrt( m * 2**n ) = (2 ** (-n/2)) * (1/sqrt(m)) + * 2. Computes 1/sqrt(m) from: + * 1/sqrt(m) = (1/sqrt(m0)) * (1/sqrt(1 + (1/m0)*dm)) + * Where: + * m = m0 + dm, + * m0 = 0.5 * (1 + k/64) for m = [0.5, 0.5+127/256), k = [0, 63]; + * m0 = 1.0 * (0 + k/64) for m = [0.5+127/256, 1.0+127/128), k = [64, 127]; + * m0 = 2.0 for m = [1.0+127/128, 2.0), k = 128. + * Then: + * 1/sqrt(m0) is looked up in a table, + * 1/m0 is computed as (1/sqrt(m0)) * (1/sqrt(m0)). + * 1/sqrt(1 + (1/m0)*dm) is computed using approximation: + * 1/sqrt(1 + z) = (((((a6 * z + a5) * z + a4) * z + a3) + * * z + a2) * z + a1) * z + a0 + * where z = [-1/128, 1/128]. + * + * Accuracy: + * The maximum relative error for the approximating + * polynomial is 2**(-56.26). + * Maximum error observed: less than 0.563 ulp after 1.500.000.000 + * results. + */ + +#define sqrt __sqrt + +extern double sqrt (double); +extern const double __vlibm_TBL_rsqrt[]; + +static void +__vrsqrt_n(int n, double * restrict px, int stridex, double * restrict py, int stridey); + +#pragma no_inline(__vrsqrt_n) + +#define RETURN(ret) \ +{ \ + *py = (ret); \ + py += stridey; \ + if (n_n == 0) \ + { \ + spx = px; spy = py; \ + hx = HI(px); \ + continue; \ + } \ + n--; \ + break; \ +} + +static const double + DONE = 1.0, + K1 = -5.00000000000005209867e-01, + K2 = 3.75000000000004884257e-01, + K3 = -3.12499999317136886551e-01, + K4 = 2.73437499359815081532e-01, + K5 = -2.46116125605037803130e-01, + K6 = 2.25606914648617522896e-01; + +void +__vrsqrt(int n, double * restrict px, int stridex, double * restrict py, int stridey) +{ + double *spx, *spy; + int ax, lx, hx, n_n; + double res; + + while (n > 1) + { + n_n = 0; + spx = px; + spy = py; + hx = HI(px); + for (; n > 1 ; n--) + { + px += stridex; + if (hx >= 0x7ff00000) /* X = NaN or Inf */ + { + res = *(px - stridex); + RETURN (DONE / res) + } + + py += stridey; + + if (hx < 0x00100000) /* X = denormal, zero or negative */ + { + py -= stridey; + ax = hx & 0x7fffffff; + lx = LO((px - stridex)); + res = *(px - stridex); + + if ((ax | lx) == 0) /* |X| = zero */ + { + RETURN (DONE / res) + } + else if (hx >= 0) /* X = denormal */ + { + double res_c0, dsqrt_exp0; + int ind0, sqrt_exp0; + double xx0, dexp_hi0, dexp_lo0; + int hx0, resh0, res_ch0; + + res = *(long long*)&res; + + hx0 = HI(&res); + sqrt_exp0 = (0x817 - (hx0 >> 21)) << 20; + ind0 = (((hx0 >> 10) & 0x7f8) + 8) & -16; + + resh0 = (hx0 & 0x001fffff) | 0x3fe00000; + res_ch0 = (resh0 + 0x00002000) & 0x7fffc000; + HI(&res) = resh0; + HI(&res_c0) = res_ch0; + LO(&res_c0) = 0; + + dexp_hi0 = ((double*)((char*)__vlibm_TBL_rsqrt + ind0))[0]; + dexp_lo0 = ((double*)((char*)__vlibm_TBL_rsqrt + ind0))[1]; + xx0 = dexp_hi0 * dexp_hi0; + xx0 = (res - res_c0) * xx0; + res = (((((K6 * xx0 + K5) * xx0 + K4) * xx0 + K3) * xx0 + K2) * xx0 + K1) * xx0; + + res = dexp_hi0 * res + dexp_lo0 + dexp_hi0; + + HI(&dsqrt_exp0) = sqrt_exp0; + LO(&dsqrt_exp0) = 0; + res *= dsqrt_exp0; + + RETURN (res) + } + else /* X = negative */ + { + RETURN (sqrt(res)) + } + } + n_n++; + hx = HI(px); + } + if (n_n > 0) + __vrsqrt_n(n_n, spx, stridex, spy, stridey); + } + if (n > 0) + { + hx = HI(px); + + if (hx >= 0x7ff00000) /* X = NaN or Inf */ + { + res = *px; + *py = DONE / res; + } + else if (hx < 0x00100000) /* X = denormal, zero or negative */ + { + ax = hx & 0x7fffffff; + lx = LO(px); + res = *px; + + if ((ax | lx) == 0) /* |X| = zero */ + { + *py = DONE / res; + } + else if (hx >= 0) /* X = denormal */ + { + double res_c0, dsqrt_exp0; + int ind0, sqrt_exp0; + double xx0, dexp_hi0, dexp_lo0; + int hx0, resh0, res_ch0; + + res = *(long long*)&res; + + hx0 = HI(&res); + sqrt_exp0 = (0x817 - (hx0 >> 21)) << 20; + ind0 = (((hx0 >> 10) & 0x7f8) + 8) & -16; + + resh0 = (hx0 & 0x001fffff) | 0x3fe00000; + res_ch0 = (resh0 + 0x00002000) & 0x7fffc000; + HI(&res) = resh0; + HI(&res_c0) = res_ch0; + LO(&res_c0) = 0; + + dexp_hi0 = ((double*)((char*)__vlibm_TBL_rsqrt + ind0))[0]; + dexp_lo0 = ((double*)((char*)__vlibm_TBL_rsqrt + ind0))[1]; + xx0 = dexp_hi0 * dexp_hi0; + xx0 = (res - res_c0) * xx0; + res = (((((K6 * xx0 + K5) * xx0 + K4) * xx0 + K3) * xx0 + K2) * xx0 + K1) * xx0; + + res = dexp_hi0 * res + dexp_lo0 + dexp_hi0; + + HI(&dsqrt_exp0) = sqrt_exp0; + LO(&dsqrt_exp0) = 0; + res *= dsqrt_exp0; + + *py = res; + } + else /* X = negative */ + { + *py = sqrt(res); + } + } + else + { + double res_c0, dsqrt_exp0; + int ind0, sqrt_exp0; + double xx0, dexp_hi0, dexp_lo0; + int resh0, res_ch0; + + sqrt_exp0 = (0x5fe - (hx >> 21)) << 20; + ind0 = (((hx >> 10) & 0x7f8) + 8) & -16; + + resh0 = (hx & 0x001fffff) | 0x3fe00000; + res_ch0 = (resh0 + 0x00002000) & 0x7fffc000; + HI(&res) = resh0; + LO(&res) = LO(px); + HI(&res_c0) = res_ch0; + LO(&res_c0) = 0; + + dexp_hi0 = ((double*)((char*)__vlibm_TBL_rsqrt + ind0))[0]; + dexp_lo0 = ((double*)((char*)__vlibm_TBL_rsqrt + ind0))[1]; + xx0 = dexp_hi0 * dexp_hi0; + xx0 = (res - res_c0) * xx0; + res = (((((K6 * xx0 + K5) * xx0 + K4) * xx0 + K3) * xx0 + K2) * xx0 + K1) * xx0; + + res = dexp_hi0 * res + dexp_lo0 + dexp_hi0; + + HI(&dsqrt_exp0) = sqrt_exp0; + LO(&dsqrt_exp0) = 0; + res *= dsqrt_exp0; + + *py = res; + } + } +} + +static void +__vrsqrt_n(int n, double * restrict px, int stridex, double * restrict py, int stridey) +{ + double res0, res_c0, dsqrt_exp0; + double res1, res_c1, dsqrt_exp1; + double res2, res_c2, dsqrt_exp2; + int ind0, sqrt_exp0; + int ind1, sqrt_exp1; + int ind2, sqrt_exp2; + double xx0, dexp_hi0, dexp_lo0; + double xx1, dexp_hi1, dexp_lo1; + double xx2, dexp_hi2, dexp_lo2; + int hx0, resh0, res_ch0; + int hx1, resh1, res_ch1; + int hx2, resh2, res_ch2; + + LO(&dsqrt_exp0) = 0; + LO(&dsqrt_exp1) = 0; + LO(&dsqrt_exp2) = 0; + LO(&res_c0) = 0; + LO(&res_c1) = 0; + LO(&res_c2) = 0; + + for(; n > 2 ; n -= 3) + { + hx0 = HI(px); + LO(&res0) = LO(px); + px += stridex; + + hx1 = HI(px); + LO(&res1) = LO(px); + px += stridex; + + hx2 = HI(px); + LO(&res2) = LO(px); + px += stridex; + + sqrt_exp0 = (0x5fe - (hx0 >> 21)) << 20; + sqrt_exp1 = (0x5fe - (hx1 >> 21)) << 20; + sqrt_exp2 = (0x5fe - (hx2 >> 21)) << 20; + ind0 = (((hx0 >> 10) & 0x7f8) + 8) & -16; + ind1 = (((hx1 >> 10) & 0x7f8) + 8) & -16; + ind2 = (((hx2 >> 10) & 0x7f8) + 8) & -16; + + resh0 = (hx0 & 0x001fffff) | 0x3fe00000; + resh1 = (hx1 & 0x001fffff) | 0x3fe00000; + resh2 = (hx2 & 0x001fffff) | 0x3fe00000; + res_ch0 = (resh0 + 0x00002000) & 0x7fffc000; + res_ch1 = (resh1 + 0x00002000) & 0x7fffc000; + res_ch2 = (resh2 + 0x00002000) & 0x7fffc000; + HI(&res0) = resh0; + HI(&res1) = resh1; + HI(&res2) = resh2; + HI(&res_c0) = res_ch0; + HI(&res_c1) = res_ch1; + HI(&res_c2) = res_ch2; + + dexp_hi0 = ((double*)((char*)__vlibm_TBL_rsqrt + ind0))[0]; + dexp_hi1 = ((double*)((char*)__vlibm_TBL_rsqrt + ind1))[0]; + dexp_hi2 = ((double*)((char*)__vlibm_TBL_rsqrt + ind2))[0]; + dexp_lo0 = ((double*)((char*)__vlibm_TBL_rsqrt + ind0))[1]; + dexp_lo1 = ((double*)((char*)__vlibm_TBL_rsqrt + ind1))[1]; + dexp_lo2 = ((double*)((char*)__vlibm_TBL_rsqrt + ind2))[1]; + xx0 = dexp_hi0 * dexp_hi0; + xx1 = dexp_hi1 * dexp_hi1; + xx2 = dexp_hi2 * dexp_hi2; + xx0 = (res0 - res_c0) * xx0; + xx1 = (res1 - res_c1) * xx1; + xx2 = (res2 - res_c2) * xx2; + res0 = (((((K6 * xx0 + K5) * xx0 + K4) * xx0 + K3) * xx0 + K2) * xx0 + K1) * xx0; + res1 = (((((K6 * xx1 + K5) * xx1 + K4) * xx1 + K3) * xx1 + K2) * xx1 + K1) * xx1; + res2 = (((((K6 * xx2 + K5) * xx2 + K4) * xx2 + K3) * xx2 + K2) * xx2 + K1) * xx2; + + res0 = dexp_hi0 * res0 + dexp_lo0 + dexp_hi0; + res1 = dexp_hi1 * res1 + dexp_lo1 + dexp_hi1; + res2 = dexp_hi2 * res2 + dexp_lo2 + dexp_hi2; + + HI(&dsqrt_exp0) = sqrt_exp0; + HI(&dsqrt_exp1) = sqrt_exp1; + HI(&dsqrt_exp2) = sqrt_exp2; + res0 *= dsqrt_exp0; + res1 *= dsqrt_exp1; + res2 *= dsqrt_exp2; + + *py = res0; + py += stridey; + + *py = res1; + py += stridey; + + *py = res2; + py += stridey; + } + + for(; n > 0 ; n--) + { + hx0 = HI(px); + + sqrt_exp0 = (0x5fe - (hx0 >> 21)) << 20; + ind0 = (((hx0 >> 10) & 0x7f8) + 8) & -16; + + resh0 = (hx0 & 0x001fffff) | 0x3fe00000; + res_ch0 = (resh0 + 0x00002000) & 0x7fffc000; + HI(&res0) = resh0; + LO(&res0) = LO(px); + HI(&res_c0) = res_ch0; + LO(&res_c0) = 0; + + px += stridex; + + dexp_hi0 = ((double*)((char*)__vlibm_TBL_rsqrt + ind0))[0]; + dexp_lo0 = ((double*)((char*)__vlibm_TBL_rsqrt + ind0))[1]; + xx0 = dexp_hi0 * dexp_hi0; + xx0 = (res0 - res_c0) * xx0; + res0 = (((((K6 * xx0 + K5) * xx0 + K4) * xx0 + K3) * xx0 + K2) * xx0 + K1) * xx0; + + res0 = dexp_hi0 * res0 + dexp_lo0 + dexp_hi0; + + HI(&dsqrt_exp0) = sqrt_exp0; + LO(&dsqrt_exp0) = 0; + res0 *= dsqrt_exp0; + + *py = res0; + py += stridey; + } +} + diff --git a/usr/src/lib/libmvec/common/__vrsqrtf.c b/usr/src/lib/libmvec/common/__vrsqrtf.c new file mode 100644 index 0000000000..54572a8a33 --- /dev/null +++ b/usr/src/lib/libmvec/common/__vrsqrtf.c @@ -0,0 +1,506 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "libm_synonyms.h" +#include "libm_inlines.h" + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +/* float rsqrtf(float x) + * + * Method : + * 1. Special cases: + * for x = NaN => QNaN; + * for x = +Inf => 0; + * for x is negative, -Inf => QNaN + invalid; + * for x = +0 => +Inf + divide-by-zero; + * for x = -0 => -Inf + divide-by-zero. + * 2. Computes reciprocal square root from: + * x = m * 2**n + * Where: + * m = [0.5, 2), + * n = ((exponent + 1) & ~1). + * Then: + * rsqrtf(x) = 1/sqrt( m * 2**n ) = (2 ** (-n/2)) * (1/sqrt(m)) + * 2. Computes 1/sqrt(m) from: + * 1/sqrt(m) = (1/sqrt(m0)) * (1/sqrt(1 + (1/m0)*dm)) + * Where: + * m = m0 + dm, + * m0 = 0.5 * (1 + k/64) for m = [0.5, 0.5+127/256), k = [0, 63]; + * m0 = 1.0 * (0 + k/64) for m = [0.5+127/256, 1.0+127/128), k = [64, 127]; + * Then: + * 1/sqrt(m0), 1/m0 are looked up in a table, + * 1/sqrt(1 + (1/m0)*dm) is computed using approximation: + * 1/sqrt(1 + z) = ((a3 * z + a2) * z + a1) * z + a0 + * where z = [-1/64, 1/64]. + * + * Accuracy: + * The maximum relative error for the approximating + * polynomial is 2**(-27.87). + * Maximum error observed: less than 0.534 ulp for the + * whole float type range. + */ + +#define sqrtf __sqrtf + +extern float sqrtf(float); + +static const double __TBL_rsqrtf[] = { +/* +i = [0,63] + TBL[2*i ] = 1 / (*(double*)&(0x3fe0000000000000ULL + (i << 46))) * 2**-24; + TBL[2*i+1] = 1 / sqrtl(*(double*)&(0x3fe0000000000000ULL + (i << 46))); +i = [64,127] + TBL[2*i ] = 1 / (*(double*)&(0x3fe0000000000000ULL + (i << 46))) * 2**-23; + TBL[2*i+1] = 1 / sqrtl(*(double*)&(0x3fe0000000000000ULL + (i << 46))); +*/ + 1.1920928955078125000e-07, 1.4142135623730951455e+00, + 1.1737530048076923728e-07, 1.4032928308912466786e+00, + 1.1559688683712121533e-07, 1.3926212476455828160e+00, + 1.1387156016791044559e-07, 1.3821894809301762397e+00, + 1.1219697840073529256e-07, 1.3719886811400707760e+00, + 1.1057093523550724772e-07, 1.3620104492139977204e+00, + 1.0899135044642856803e-07, 1.3522468075656264297e+00, + 1.0745626100352112918e-07, 1.3426901732747025253e+00, + 1.0596381293402777190e-07, 1.3333333333333332593e+00, + 1.0451225385273972023e-07, 1.3241694217637887121e+00, + 1.0309992609797297870e-07, 1.3151918984428583315e+00, + 1.0172526041666667320e-07, 1.3063945294843617440e+00, + 1.0038677014802631022e-07, 1.2977713690461003537e+00, + 9.9083045860389616921e-08, 1.2893167424406084542e+00, + 9.7812750400641022247e-08, 1.2810252304406970492e+00, + 9.6574614319620251657e-08, 1.2728916546811681609e+00, + 9.5367431640625005294e-08, 1.2649110640673517647e+00, + 9.4190055941358019463e-08, 1.2570787221094177344e+00, + 9.3041396722560978838e-08, 1.2493900951088485751e+00, + 9.1920416039156631290e-08, 1.2418408411301324890e+00, + 9.0826125372023804482e-08, 1.2344267996967352996e+00, + 8.9757582720588234048e-08, 1.2271439821557927896e+00, + 8.8713889898255812722e-08, 1.2199885626608373279e+00, + 8.7694190014367814875e-08, 1.2129568697262453902e+00, + 8.6697665127840911497e-08, 1.2060453783110545167e+00, + 8.5723534058988761666e-08, 1.1992507023933782762e+00, + 8.4771050347222225457e-08, 1.1925695879998878812e+00, + 8.3839500343406599951e-08, 1.1859989066577618644e+00, + 8.2928201426630432481e-08, 1.1795356492391770864e+00, + 8.2036500336021511923e-08, 1.1731769201708264205e+00, + 8.1163771609042551220e-08, 1.1669199319831564665e+00, + 8.0309416118421050820e-08, 1.1607620001760186046e+00, + 7.9472859700520828922e-08, 1.1547005383792514621e+00, + 7.8653551868556699530e-08, 1.1487330537883810866e+00, + 7.7850964604591830522e-08, 1.1428571428571427937e+00, + 7.7064591224747481298e-08, 1.1370704872299222110e+00, + 7.6293945312500001588e-08, 1.1313708498984760276e+00, + 7.5538559715346535571e-08, 1.1257560715684669095e+00, + 7.4797985600490195040e-08, 1.1202240672224077489e+00, + 7.4071791565533974158e-08, 1.1147728228665882977e+00, + 7.3359562800480773303e-08, 1.1094003924504582947e+00, + 7.2660900297619054173e-08, 1.1041048949477667573e+00, + 7.1975420106132072725e-08, 1.0988845115895122806e+00, + 7.1302752628504667579e-08, 1.0937374832394612945e+00, + 7.0642541956018514597e-08, 1.0886621079036347126e+00, + 6.9994445240825691959e-08, 1.0836567383657542685e+00, + 6.9358132102272723904e-08, 1.0787197799411873955e+00, + 6.8733284065315314719e-08, 1.0738496883424388795e+00, + 6.8119594029017853361e-08, 1.0690449676496975862e+00, + 6.7516765763274335346e-08, 1.0643041683803828867e+00, + 6.6924513432017540145e-08, 1.0596258856520350822e+00, + 6.6342561141304348632e-08, 1.0550087574332591700e+00, + 6.5770642510775861156e-08, 1.0504514628777803509e+00, + 6.5208500267094023655e-08, 1.0459527207369814228e+00, + 6.4655885858050847233e-08, 1.0415112878465908608e+00, + 6.4112559086134451001e-08, 1.0371259576834630511e+00, + 6.3578287760416665784e-08, 1.0327955589886446131e+00, + 6.3052847365702481089e-08, 1.0285189544531601058e+00, + 6.2536020747950822927e-08, 1.0242950394631678002e+00, + 6.2027597815040656970e-08, 1.0201227409013413627e+00, + 6.1527375252016127325e-08, 1.0160010160015240377e+00, + 6.1035156250000001271e-08, 1.0119288512538813229e+00, + 6.0550750248015869655e-08, 1.0079052613579393416e+00, + 6.0073972687007873182e-08, 1.0039292882210537616e+00, + 1.1920928955078125000e-07, 1.0000000000000000000e+00, + 1.1737530048076923728e-07, 9.9227787671366762812e-01, + 1.1559688683712121533e-07, 9.8473192783466190203e-01, + 1.1387156016791044559e-07, 9.7735555485044178781e-01, + 1.1219697840073529256e-07, 9.7014250014533187638e-01, + 1.1057093523550724772e-07, 9.6308682468615358641e-01, + 1.0899135044642856803e-07, 9.5618288746751489704e-01, + 1.0745626100352112918e-07, 9.4942532655508271588e-01, + 1.0596381293402777190e-07, 9.4280904158206335630e-01, + 1.0451225385273972023e-07, 9.3632917756904454620e-01, + 1.0309992609797297870e-07, 9.2998110995055427441e-01, + 1.0172526041666667320e-07, 9.2376043070340119190e-01, + 1.0038677014802631022e-07, 9.1766293548224708854e-01, + 9.9083045860389616921e-08, 9.1168461167710357351e-01, + 9.7812750400641022247e-08, 9.0582162731567661407e-01, + 9.6574614319620251657e-08, 9.0007032074081916306e-01, + 9.5367431640625005294e-08, 8.9442719099991585541e-01, + 9.4190055941358019463e-08, 8.8888888888888883955e-01, + 9.3041396722560978838e-08, 8.8345220859877238162e-01, + 9.1920416039156631290e-08, 8.7811407991752277180e-01, + 9.0826125372023804482e-08, 8.7287156094396955996e-01, + 8.9757582720588234048e-08, 8.6772183127462465535e-01, + 8.8713889898255812722e-08, 8.6266218562750729415e-01, + 8.7694190014367814875e-08, 8.5769002787023584933e-01, + 8.6697665127840911497e-08, 8.5280286542244176928e-01, + 8.5723534058988761666e-08, 8.4799830400508802164e-01, + 8.4771050347222225457e-08, 8.4327404271156780613e-01, + 8.3839500343406599951e-08, 8.3862786937753464045e-01, + 8.2928201426630432481e-08, 8.3405765622829908246e-01, + 8.2036500336021511923e-08, 8.2956135578434020417e-01, + 8.1163771609042551220e-08, 8.2513699700703468931e-01, + 8.0309416118421050820e-08, 8.2078268166812329287e-01, + 7.9472859700520828922e-08, 8.1649658092772603446e-01, + 7.8653551868556699530e-08, 8.1227693210689522196e-01, + 7.7850964604591830522e-08, 8.0812203564176865456e-01, + 7.7064591224747481298e-08, 8.0403025220736967782e-01, + 7.6293945312500001588e-08, 8.0000000000000004441e-01, + 7.5538559715346535571e-08, 7.9602975216799132241e-01, + 7.4797985600490195040e-08, 7.9211803438133943089e-01, + 7.4071791565533974158e-08, 7.8826342253143455441e-01, + 7.3359562800480773303e-08, 7.8446454055273617811e-01, + 7.2660900297619054173e-08, 7.8072005835882651859e-01, + 7.1975420106132072725e-08, 7.7702868988581130782e-01, + 7.1302752628504667579e-08, 7.7338919123653082632e-01, + 7.0642541956018514597e-08, 7.6980035891950104876e-01, + 6.9994445240825691959e-08, 7.6626102817692109959e-01, + 6.9358132102272723904e-08, 7.6277007139647390321e-01, + 6.8733284065315314719e-08, 7.5932639660199918730e-01, + 6.8119594029017853361e-08, 7.5592894601845450619e-01, + 6.7516765763274335346e-08, 7.5257669470687782454e-01, + 6.6924513432017540145e-08, 7.4926864926535519107e-01, + 6.6342561141304348632e-08, 7.4600384659225105199e-01, + 6.5770642510775861156e-08, 7.4278135270820744296e-01, + 6.5208500267094023655e-08, 7.3960026163363878915e-01, + 6.4655885858050847233e-08, 7.3645969431865865307e-01, + 6.4112559086134451001e-08, 7.3335879762256905856e-01, + 6.3578287760416665784e-08, 7.3029674334022143256e-01, + 6.3052847365702481089e-08, 7.2727272727272729291e-01, + 6.2536020747950822927e-08, 7.2428596834014824513e-01, + 6.2027597815040656970e-08, 7.2133570773394584119e-01, + 6.1527375252016127325e-08, 7.1842120810709964029e-01, + 6.1035156250000001271e-08, 7.1554175279993270653e-01, + 6.0550750248015869655e-08, 7.1269664509979835376e-01, + 6.0073972687007873182e-08, 7.0988520753289097165e-01, +}; + +static const unsigned long long LCONST[] = { +0x3feffffffee7f18fULL, /* A0 = 9.99999997962321453275e-01 */ +0xbfdffffffe07e52fULL, /* A1 =-4.99999998166077580600e-01 */ +0x3fd801180ca296d9ULL, /* A2 = 3.75066768969515586277e-01 */ +0xbfd400fc0bbb8e78ULL, /* A3 =-3.12560092408808548438e-01 */ +}; + +static void +__vrsqrtf_n(int n, float * restrict px, int stridex, float * restrict py, int stridey); + +#pragma no_inline(__vrsqrtf_n) + +#define RETURN(ret) \ +{ \ + *py = (ret); \ + py += stridey; \ + if (n_n == 0) \ + { \ + spx = px; spy = py; \ + ax0 = *(int*)px; \ + continue; \ + } \ + n--; \ + break; \ +} + +void +__vrsqrtf(int n, float * restrict px, int stridex, float * restrict py, int stridey) +{ + float *spx, *spy; + int ax0, n_n; + float res; + float FONE = 1.0f, FTWO = 2.0f; + + while (n > 1) + { + n_n = 0; + spx = px; + spy = py; + ax0 = *(int*)px; + for (; n > 1 ; n--) + { + px += stridex; + if (ax0 >= 0x7f800000) /* X = NaN or Inf */ + { + res = *(px - stridex); + RETURN (FONE / res) + } + + py += stridey; + + if (ax0 < 0x00800000) /* X = denormal, zero or negative */ + { + py -= stridey; + res = *(px - stridex); + + if ((ax0 & 0x7fffffff) == 0) /* |X| = zero */ + { + RETURN (FONE / res) + } + else if (ax0 >= 0) /* X = denormal */ + { + double A0 = ((double*)LCONST)[0]; /* 9.99999997962321453275e-01 */ + double A1 = ((double*)LCONST)[1]; /* -4.99999998166077580600e-01 */ + double A2 = ((double*)LCONST)[2]; /* 3.75066768969515586277e-01 */ + double A3 = ((double*)LCONST)[3]; /* -3.12560092408808548438e-01 */ + + double res0, xx0, tbl_div0, tbl_sqrt0; + float fres0; + int iax0, si0, iexp0; + + res = *(int*)&res; + res *= FTWO; + ax0 = *(int*)&res; + iexp0 = ax0 >> 24; + iexp0 = 0x3f + 0x4b - iexp0; + iexp0 = iexp0 << 23; + + si0 = (ax0 >> 13) & 0x7f0; + + tbl_div0 = ((double*)((char*)__TBL_rsqrtf + si0))[0]; + tbl_sqrt0 = ((double*)((char*)__TBL_rsqrtf + si0))[1]; + iax0 = ax0 & 0x7ffe0000; + iax0 = ax0 - iax0; + xx0 = iax0 * tbl_div0; + res0 = tbl_sqrt0 * (((A3 * xx0 + A2) * xx0 + A1) * xx0 + A0); + + fres0 = res0; + iexp0 += *(int*)&fres0; + RETURN(*(float*)&iexp0) + } + else /* X = negative */ + { + RETURN (sqrtf(res)) + } + } + n_n++; + ax0 = *(int*)px; + } + if (n_n > 0) + __vrsqrtf_n(n_n, spx, stridex, spy, stridey); + } + + if (n > 0) + { + ax0 = *(int*)px; + + if (ax0 >= 0x7f800000) /* X = NaN or Inf */ + { + res = *px; + *py = FONE / res; + } + else if (ax0 < 0x00800000) /* X = denormal, zero or negative */ + { + res = *px; + + if ((ax0 & 0x7fffffff) == 0) /* |X| = zero */ + { + *py = FONE / res; + } + else if (ax0 >= 0) /* X = denormal */ + { + double A0 = ((double*)LCONST)[0]; /* 9.99999997962321453275e-01 */ + double A1 = ((double*)LCONST)[1]; /* -4.99999998166077580600e-01 */ + double A2 = ((double*)LCONST)[2]; /* 3.75066768969515586277e-01 */ + double A3 = ((double*)LCONST)[3]; /* -3.12560092408808548438e-01 */ + double res0, xx0, tbl_div0, tbl_sqrt0; + float fres0; + int iax0, si0, iexp0; + + res = *(int*)&res; + res *= FTWO; + ax0 = *(int*)&res; + iexp0 = ax0 >> 24; + iexp0 = 0x3f + 0x4b - iexp0; + iexp0 = iexp0 << 23; + + si0 = (ax0 >> 13) & 0x7f0; + + tbl_div0 = ((double*)((char*)__TBL_rsqrtf + si0))[0]; + tbl_sqrt0 = ((double*)((char*)__TBL_rsqrtf + si0))[1]; + iax0 = ax0 & 0x7ffe0000; + iax0 = ax0 - iax0; + xx0 = iax0 * tbl_div0; + res0 = tbl_sqrt0 * (((A3 * xx0 + A2) * xx0 + A1) * xx0 + A0); + + fres0 = res0; + iexp0 += *(int*)&fres0; + + *(int*)py = iexp0; + } + else /* X = negative */ + { + *py = sqrtf(res); + } + } + else + { + double A0 = ((double*)LCONST)[0]; /* 9.99999997962321453275e-01 */ + double A1 = ((double*)LCONST)[1]; /* -4.99999998166077580600e-01 */ + double A2 = ((double*)LCONST)[2]; /* 3.75066768969515586277e-01 */ + double A3 = ((double*)LCONST)[3]; /* -3.12560092408808548438e-01 */ + double res0, xx0, tbl_div0, tbl_sqrt0; + float fres0; + int iax0, si0, iexp0; + + iexp0 = ax0 >> 24; + iexp0 = 0x3f - iexp0; + iexp0 = iexp0 << 23; + + si0 = (ax0 >> 13) & 0x7f0; + + tbl_div0 = ((double*)((char*)__TBL_rsqrtf + si0))[0]; + tbl_sqrt0 = ((double*)((char*)__TBL_rsqrtf + si0))[1]; + iax0 = ax0 & 0x7ffe0000; + iax0 = ax0 - iax0; + xx0 = iax0 * tbl_div0; + res0 = tbl_sqrt0 * (((A3 * xx0 + A2) * xx0 + A1) * xx0 + A0); + + fres0 = res0; + iexp0 += *(int*)&fres0; + + *(int*)py = iexp0; + } + } +} + +void +__vrsqrtf_n(int n, float * restrict px, int stridex, float * restrict py, int stridey) +{ + double A0 = ((double*)LCONST)[0]; /* 9.99999997962321453275e-01 */ + double A1 = ((double*)LCONST)[1]; /* -4.99999998166077580600e-01 */ + double A2 = ((double*)LCONST)[2]; /* 3.75066768969515586277e-01 */ + double A3 = ((double*)LCONST)[3]; /* -3.12560092408808548438e-01 */ + double res0, xx0, tbl_div0, tbl_sqrt0; + float fres0; + int iax0, ax0, si0, iexp0; + +#if defined(ARCH_v7) || defined(ARCH_v8) + double res1, xx1, tbl_div1, tbl_sqrt1; + double res2, xx2, tbl_div2, tbl_sqrt2; + float fres1, fres2; + int iax1, ax1, si1, iexp1; + int iax2, ax2, si2, iexp2; + + for(; n > 2 ; n -= 3) + { + ax0 = *(int*)px; + px += stridex; + + ax1 = *(int*)px; + px += stridex; + + ax2 = *(int*)px; + px += stridex; + + iexp0 = ax0 >> 24; + iexp1 = ax1 >> 24; + iexp2 = ax2 >> 24; + iexp0 = 0x3f - iexp0; + iexp1 = 0x3f - iexp1; + iexp2 = 0x3f - iexp2; + + iexp0 = iexp0 << 23; + iexp1 = iexp1 << 23; + iexp2 = iexp2 << 23; + + si0 = (ax0 >> 13) & 0x7f0; + si1 = (ax1 >> 13) & 0x7f0; + si2 = (ax2 >> 13) & 0x7f0; + + tbl_div0 = ((double*)((char*)__TBL_rsqrtf + si0))[0]; + tbl_div1 = ((double*)((char*)__TBL_rsqrtf + si1))[0]; + tbl_div2 = ((double*)((char*)__TBL_rsqrtf + si2))[0]; + tbl_sqrt0 = ((double*)((char*)__TBL_rsqrtf + si0))[1]; + tbl_sqrt1 = ((double*)((char*)__TBL_rsqrtf + si1))[1]; + tbl_sqrt2 = ((double*)((char*)__TBL_rsqrtf + si2))[1]; + iax0 = ax0 & 0x7ffe0000; + iax1 = ax1 & 0x7ffe0000; + iax2 = ax2 & 0x7ffe0000; + iax0 = ax0 - iax0; + iax1 = ax1 - iax1; + iax2 = ax2 - iax2; + xx0 = iax0 * tbl_div0; + xx1 = iax1 * tbl_div1; + xx2 = iax2 * tbl_div2; + res0 = tbl_sqrt0 * (((A3 * xx0 + A2) * xx0 + A1) * xx0 + A0); + res1 = tbl_sqrt1 * (((A3 * xx1 + A2) * xx1 + A1) * xx1 + A0); + res2 = tbl_sqrt2 * (((A3 * xx2 + A2) * xx2 + A1) * xx2 + A0); + + fres0 = res0; + fres1 = res1; + fres2 = res2; + + iexp0 += *(int*)&fres0; + iexp1 += *(int*)&fres1; + iexp2 += *(int*)&fres2; + *(int*)py = iexp0; + py += stridey; + *(int*)py = iexp1; + py += stridey; + *(int*)py = iexp2; + py += stridey; + } +#endif + for(; n > 0 ; n--) + { + ax0 = *(int*)px; + px += stridex; + + iexp0 = ax0 >> 24; + iexp0 = 0x3f - iexp0; + iexp0 = iexp0 << 23; + + si0 = (ax0 >> 13) & 0x7f0; + + tbl_div0 = ((double*)((char*)__TBL_rsqrtf + si0))[0]; + tbl_sqrt0 = ((double*)((char*)__TBL_rsqrtf + si0))[1]; + iax0 = ax0 & 0x7ffe0000; + iax0 = ax0 - iax0; + xx0 = iax0 * tbl_div0; + res0 = tbl_sqrt0 * (((A3 * xx0 + A2) * xx0 + A1) * xx0 + A0); + + fres0 = res0; + iexp0 += *(int*)&fres0; + *(int*)py = iexp0; + py += stridey; + } +} + diff --git a/usr/src/lib/libmvec/common/__vsin.c b/usr/src/lib/libmvec/common/__vsin.c new file mode 100644 index 0000000000..3b024aa2cc --- /dev/null +++ b/usr/src/lib/libmvec/common/__vsin.c @@ -0,0 +1,1108 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <sys/isa_defs.h> +#include <sys/ccompile.h> + +#ifdef _LITTLE_ENDIAN +#define HI(x) *(1+(int*)x) +#define LO(x) *(unsigned*)x +#else +#define HI(x) *(int*)x +#define LO(x) *(1+(unsigned*)x) +#endif + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +extern const double __vlibm_TBL_sincos_hi[], __vlibm_TBL_sincos_lo[]; + +static const double + half[2] = { 0.5, -0.5 }, + one = 1.0, + invpio2 = 0.636619772367581343075535, + pio2_1 = 1.570796326734125614166, + pio2_2 = 6.077100506303965976596e-11, + pio2_3 = 2.022266248711166455796e-21, + pio2_3t = 8.478427660368899643959e-32, + pp1 = -1.666666666605760465276263943134982554676e-0001, + pp2 = 8.333261209690963126718376566146180944442e-0003, + qq1 = -4.999999999977710986407023955908711557870e-0001, + qq2 = 4.166654863857219350645055881018842089580e-0002, + poly1[2]= { -1.666666666666629669805215138920301589656e-0001, + -4.999999999999931701464060878888294524481e-0001 }, + poly2[2]= { 8.333333332390951295683993455280336376663e-0003, + 4.166666666394861917535640593963708222319e-0002 }, + poly3[2]= { -1.984126237997976692791551778230098403960e-0004, + -1.388888552656142867832756687736851681462e-0003 }, + poly4[2]= { 2.753403624854277237649987622848330351110e-0006, + 2.478519423681460796618128289454530524759e-0005 }; + +static const unsigned thresh[2] = { 0x3fc90000, 0x3fc40000 }; + +/* Don't __ the following; acomp will handle it */ +extern double fabs(double); +extern void __vlibm_vsin_big(int, double *, int, double *, int, int); + +void +__vsin(int n, double * restrict x, int stridex, double * restrict y, + int stridey) +{ + double x0_or_one[4], x1_or_one[4], x2_or_one[4]; + double y0_or_zero[4], y1_or_zero[4], y2_or_zero[4]; + double x0, x1, x2, *py0 = 0, *py1 = 0, *py2, *xsave, *ysave; + unsigned hx0, hx1, hx2, xsb0, xsb1 = 0, xsb2; + int i, biguns, nsave, sxsave, sysave; + volatile int v __GNU_UNUSED; + nsave = n; + xsave = x; + sxsave = stridex; + ysave = y; + sysave = stridey; + biguns = 0; + + do + { +LOOP0: + xsb0 = HI(x); + hx0 = xsb0 & ~0x80000000; + if (hx0 > 0x3fe921fb) + { + biguns = 1; + goto MEDIUM; + } + if (hx0 < 0x3e400000) + { + v = *x; + *y = *x; + x += stridex; + y += stridey; + i = 0; + if (--n <= 0) + break; + goto LOOP0; + } + x0 = *x; + py0 = y; + x += stridex; + y += stridey; + i = 1; + if (--n <= 0) + break; + +LOOP1: + xsb1 = HI(x); + hx1 = xsb1 & ~0x80000000; + if (hx1 > 0x3fe921fb) + { + biguns = 2; + goto MEDIUM; + } + if (hx1 < 0x3e400000) + { + v = *x; + *y = *x; + x += stridex; + y += stridey; + i = 1; + if (--n <= 0) + break; + goto LOOP1; + } + x1 = *x; + py1 = y; + x += stridex; + y += stridey; + i = 2; + if (--n <= 0) + break; + +LOOP2: + xsb2 = HI(x); + hx2 = xsb2 & ~0x80000000; + if (hx2 > 0x3fe921fb) + { + biguns = 3; + goto MEDIUM; + } + if (hx2 < 0x3e400000) + { + v = *x; + *y = *x; + x += stridex; + y += stridey; + i = 2; + if (--n <= 0) + break; + goto LOOP2; + } + x2 = *x; + py2 = y; + + i = (hx0 - 0x3fc90000) >> 31; + i |= ((hx1 - 0x3fc90000) >> 30) & 2; + i |= ((hx2 - 0x3fc90000) >> 29) & 4; + switch (i) + { + double a0, a1, a2, w0, w1, w2; + double t0, t1, t2, z0, z1, z2; + unsigned j0, j1, j2; + + case 0: + j0 = (xsb0 + 0x4000) & 0xffff8000; + j1 = (xsb1 + 0x4000) & 0xffff8000; + j2 = (xsb2 + 0x4000) & 0xffff8000; + HI(&t0) = j0; + HI(&t1) = j1; + HI(&t2) = j2; + LO(&t0) = 0; + LO(&t1) = 0; + LO(&t2) = 0; + x0 -= t0; + x1 -= t1; + x2 -= t2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * (qq1 + z0 * qq2); + t1 = z1 * (qq1 + z1 * qq2); + t2 = z2 * (qq1 + z2 * qq2); + w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); + w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); + w2 = x2 * (one + z2 * (pp1 + z2 * pp2)); + j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + xsb0 = (xsb0 >> 30) & 2; + xsb1 = (xsb1 >> 30) & 2; + xsb2 = (xsb2 >> 30) & 2; + a0 = __vlibm_TBL_sincos_hi[j0+xsb0]; + a1 = __vlibm_TBL_sincos_hi[j1+xsb1]; + a2 = __vlibm_TBL_sincos_hi[j2+xsb2]; + t0 = (__vlibm_TBL_sincos_hi[j0+1] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+xsb0]; + t1 = (__vlibm_TBL_sincos_hi[j1+1] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+xsb1]; + t2 = (__vlibm_TBL_sincos_hi[j2+1] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+xsb2]; + *py0 = a0 + t0; + *py1 = a1 + t1; + *py2 = a2 + t2; + break; + + case 1: + j1 = (xsb1 + 0x4000) & 0xffff8000; + j2 = (xsb2 + 0x4000) & 0xffff8000; + HI(&t1) = j1; + HI(&t2) = j2; + LO(&t1) = 0; + LO(&t2) = 0; + x1 -= t1; + x2 -= t2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * (poly3[0] + z0 * poly4[0]); + t1 = z1 * (qq1 + z1 * qq2); + t2 = z2 * (qq1 + z2 * qq2); + t0 = z0 * (poly1[0] + z0 * (poly2[0] + t0)); + w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); + w2 = x2 * (one + z2 * (pp1 + z2 * pp2)); + j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + xsb1 = (xsb1 >> 30) & 2; + xsb2 = (xsb2 >> 30) & 2; + a1 = __vlibm_TBL_sincos_hi[j1+xsb1]; + a2 = __vlibm_TBL_sincos_hi[j2+xsb2]; + t0 = x0 + x0 * t0; + t1 = (__vlibm_TBL_sincos_hi[j1+1] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+xsb1]; + t2 = (__vlibm_TBL_sincos_hi[j2+1] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+xsb2]; + *py0 = t0; + *py1 = a1 + t1; + *py2 = a2 + t2; + break; + + case 2: + j0 = (xsb0 + 0x4000) & 0xffff8000; + j2 = (xsb2 + 0x4000) & 0xffff8000; + HI(&t0) = j0; + HI(&t2) = j2; + LO(&t0) = 0; + LO(&t2) = 0; + x0 -= t0; + x2 -= t2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * (qq1 + z0 * qq2); + t1 = z1 * (poly3[0] + z1 * poly4[0]); + t2 = z2 * (qq1 + z2 * qq2); + w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); + t1 = z1 * (poly1[0] + z1 * (poly2[0] + t1)); + w2 = x2 * (one + z2 * (pp1 + z2 * pp2)); + j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + xsb0 = (xsb0 >> 30) & 2; + xsb2 = (xsb2 >> 30) & 2; + a0 = __vlibm_TBL_sincos_hi[j0+xsb0]; + a2 = __vlibm_TBL_sincos_hi[j2+xsb2]; + t0 = (__vlibm_TBL_sincos_hi[j0+1] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+xsb0]; + t1 = x1 + x1 * t1; + t2 = (__vlibm_TBL_sincos_hi[j2+1] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+xsb2]; + *py0 = a0 + t0; + *py1 = t1; + *py2 = a2 + t2; + break; + + case 3: + j2 = (xsb2 + 0x4000) & 0xffff8000; + HI(&t2) = j2; + LO(&t2) = 0; + x2 -= t2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * (poly3[0] + z0 * poly4[0]); + t1 = z1 * (poly3[0] + z1 * poly4[0]); + t2 = z2 * (qq1 + z2 * qq2); + t0 = z0 * (poly1[0] + z0 * (poly2[0] + t0)); + t1 = z1 * (poly1[0] + z1 * (poly2[0] + t1)); + w2 = x2 * (one + z2 * (pp1 + z2 * pp2)); + j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + xsb2 = (xsb2 >> 30) & 2; + a2 = __vlibm_TBL_sincos_hi[j2+xsb2]; + t0 = x0 + x0 * t0; + t1 = x1 + x1 * t1; + t2 = (__vlibm_TBL_sincos_hi[j2+1] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+xsb2]; + *py0 = t0; + *py1 = t1; + *py2 = a2 + t2; + break; + + case 4: + j0 = (xsb0 + 0x4000) & 0xffff8000; + j1 = (xsb1 + 0x4000) & 0xffff8000; + HI(&t0) = j0; + HI(&t1) = j1; + LO(&t0) = 0; + LO(&t1) = 0; + x0 -= t0; + x1 -= t1; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * (qq1 + z0 * qq2); + t1 = z1 * (qq1 + z1 * qq2); + t2 = z2 * (poly3[0] + z2 * poly4[0]); + w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); + w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); + t2 = z2 * (poly1[0] + z2 * (poly2[0] + t2)); + j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + xsb0 = (xsb0 >> 30) & 2; + xsb1 = (xsb1 >> 30) & 2; + a0 = __vlibm_TBL_sincos_hi[j0+xsb0]; + a1 = __vlibm_TBL_sincos_hi[j1+xsb1]; + t0 = (__vlibm_TBL_sincos_hi[j0+1] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+xsb0]; + t1 = (__vlibm_TBL_sincos_hi[j1+1] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+xsb1]; + t2 = x2 + x2 * t2; + *py0 = a0 + t0; + *py1 = a1 + t1; + *py2 = t2; + break; + + case 5: + j1 = (xsb1 + 0x4000) & 0xffff8000; + HI(&t1) = j1; + LO(&t1) = 0; + x1 -= t1; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * (poly3[0] + z0 * poly4[0]); + t1 = z1 * (qq1 + z1 * qq2); + t2 = z2 * (poly3[0] + z2 * poly4[0]); + t0 = z0 * (poly1[0] + z0 * (poly2[0] + t0)); + w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); + t2 = z2 * (poly1[0] + z2 * (poly2[0] + t2)); + j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + xsb1 = (xsb1 >> 30) & 2; + a1 = __vlibm_TBL_sincos_hi[j1+xsb1]; + t0 = x0 + x0 * t0; + t1 = (__vlibm_TBL_sincos_hi[j1+1] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+xsb1]; + t2 = x2 + x2 * t2; + *py0 = t0; + *py1 = a1 + t1; + *py2 = t2; + break; + + case 6: + j0 = (xsb0 + 0x4000) & 0xffff8000; + HI(&t0) = j0; + LO(&t0) = 0; + x0 -= t0; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * (qq1 + z0 * qq2); + t1 = z1 * (poly3[0] + z1 * poly4[0]); + t2 = z2 * (poly3[0] + z2 * poly4[0]); + w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); + t1 = z1 * (poly1[0] + z1 * (poly2[0] + t1)); + t2 = z2 * (poly1[0] + z2 * (poly2[0] + t2)); + j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + xsb0 = (xsb0 >> 30) & 2; + a0 = __vlibm_TBL_sincos_hi[j0+xsb0]; + t0 = (__vlibm_TBL_sincos_hi[j0+1] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+xsb0]; + t1 = x1 + x1 * t1; + t2 = x2 + x2 * t2; + *py0 = a0 + t0; + *py1 = t1; + *py2 = t2; + break; + + case 7: + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * (poly3[0] + z0 * poly4[0]); + t1 = z1 * (poly3[0] + z1 * poly4[0]); + t2 = z2 * (poly3[0] + z2 * poly4[0]); + t0 = z0 * (poly1[0] + z0 * (poly2[0] + t0)); + t1 = z1 * (poly1[0] + z1 * (poly2[0] + t1)); + t2 = z2 * (poly1[0] + z2 * (poly2[0] + t2)); + t0 = x0 + x0 * t0; + t1 = x1 + x1 * t1; + t2 = x2 + x2 * t2; + *py0 = t0; + *py1 = t1; + *py2 = t2; + break; + } + + x += stridex; + y += stridey; + i = 0; + } while (--n > 0); + + if (i > 0) + { + double a0, a1, w0, w1; + double t0, t1, z0, z1; + unsigned j0, j1; + + if (i > 1) + { + if (hx1 < 0x3fc90000) + { + z1 = x1 * x1; + t1 = z1 * (poly3[0] + z1 * poly4[0]); + t1 = z1 * (poly1[0] + z1 * (poly2[0] + t1)); + t1 = x1 + x1 * t1; + *py1 = t1; + } + else + { + j1 = (xsb1 + 0x4000) & 0xffff8000; + HI(&t1) = j1; + LO(&t1) = 0; + x1 -= t1; + z1 = x1 * x1; + t1 = z1 * (qq1 + z1 * qq2); + w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); + j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + xsb1 = (xsb1 >> 30) & 2; + a1 = __vlibm_TBL_sincos_hi[j1+xsb1]; + t1 = (__vlibm_TBL_sincos_hi[j1+1] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+xsb1]; + *py1 = a1 + t1; + } + } + if (hx0 < 0x3fc90000) + { + z0 = x0 * x0; + t0 = z0 * (poly3[0] + z0 * poly4[0]); + t0 = z0 * (poly1[0] + z0 * (poly2[0] + t0)); + t0 = x0 + x0 * t0; + *py0 = t0; + } + else + { + j0 = (xsb0 + 0x4000) & 0xffff8000; + HI(&t0) = j0; + LO(&t0) = 0; + x0 -= t0; + z0 = x0 * x0; + t0 = z0 * (qq1 + z0 * qq2); + w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); + j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + xsb0 = (xsb0 >> 30) & 2; + a0 = __vlibm_TBL_sincos_hi[j0+xsb0]; + t0 = (__vlibm_TBL_sincos_hi[j0+1] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+xsb0]; + *py0 = a0 + t0; + } + } + + return; + + /* + * MEDIUM RANGE PROCESSING + * Jump here at first sign of medium range argument. We are a bit + * confused due to the jump.. fix up several variables and jump into + * the nth loop, same as was being processed above. + */ + +MEDIUM: + + x0_or_one[1] = 1.0; + x1_or_one[1] = 1.0; + x2_or_one[1] = 1.0; + x0_or_one[3] = -1.0; + x1_or_one[3] = -1.0; + x2_or_one[3] = -1.0; + y0_or_zero[1] = 0.0; + y1_or_zero[1] = 0.0; + y2_or_zero[1] = 0.0; + y0_or_zero[3] = 0.0; + y1_or_zero[3] = 0.0; + y2_or_zero[3] = 0.0; + + if (biguns == 3) + { + biguns = 0; + xsb0 = xsb0 >> 31; + xsb1 = xsb1 >> 31; + goto loop2; + } + else if (biguns == 2) + { + xsb0 = xsb0 >> 31; + biguns = 0; + goto loop1; + } + biguns = 0; + + do + { + double fn0, fn1, fn2, a0, a1, a2, w0, w1, w2, y0, y1, y2; + unsigned hx; + int n0, n1, n2; + +loop0: + hx = HI(x); + xsb0 = hx >> 31; + hx &= ~0x80000000; + if (hx < 0x3e400000) + { + v = *x; + *y = *x; + x += stridex; + y += stridey; + i = 0; + if (--n <= 0) + break; + goto loop0; + } + if (hx > 0x413921fb) + { + if (hx >= 0x7ff00000) + { + x0 = *x; + *y = x0 - x0; + } + else + biguns = 1; + x += stridex; + y += stridey; + i = 0; + if (--n <= 0) + break; + goto loop0; + } + x0 = *x; + py0 = y; + x += stridex; + y += stridey; + i = 1; + if (--n <= 0) + break; + +loop1: + hx = HI(x); + xsb1 = hx >> 31; + hx &= ~0x80000000; + if (hx < 0x3e400000) + { + v = *x; + *y = *x; + x += stridex; + y += stridey; + i = 1; + if (--n <= 0) + break; + goto loop1; + } + if (hx > 0x413921fb) + { + if (hx >= 0x7ff00000) + { + x1 = *x; + *y = x1 - x1; + } + else + biguns = 1; + x += stridex; + y += stridey; + i = 1; + if (--n <= 0) + break; + goto loop1; + } + x1 = *x; + py1 = y; + x += stridex; + y += stridey; + i = 2; + if (--n <= 0) + break; + +loop2: + hx = HI(x); + xsb2 = hx >> 31; + hx &= ~0x80000000; + if (hx < 0x3e400000) + { + v = *x; + *y = *x; + x += stridex; + y += stridey; + i = 2; + if (--n <= 0) + break; + goto loop2; + } + if (hx > 0x413921fb) + { + if (hx >= 0x7ff00000) + { + x2 = *x; + *y = x2 - x2; + } + else + biguns = 1; + x += stridex; + y += stridey; + i = 2; + if (--n <= 0) + break; + goto loop2; + } + x2 = *x; + py2 = y; + + n0 = (int) (x0 * invpio2 + half[xsb0]); + n1 = (int) (x1 * invpio2 + half[xsb1]); + n2 = (int) (x2 * invpio2 + half[xsb2]); + fn0 = (double) n0; + fn1 = (double) n1; + fn2 = (double) n2; + n0 &= 3; + n1 &= 3; + n2 &= 3; + a0 = x0 - fn0 * pio2_1; + a1 = x1 - fn1 * pio2_1; + a2 = x2 - fn2 * pio2_1; + w0 = fn0 * pio2_2; + w1 = fn1 * pio2_2; + w2 = fn2 * pio2_2; + x0 = a0 - w0; + x1 = a1 - w1; + x2 = a2 - w2; + y0 = (a0 - x0) - w0; + y1 = (a1 - x1) - w1; + y2 = (a2 - x2) - w2; + a0 = x0; + a1 = x1; + a2 = x2; + w0 = fn0 * pio2_3 - y0; + w1 = fn1 * pio2_3 - y1; + w2 = fn2 * pio2_3 - y2; + x0 = a0 - w0; + x1 = a1 - w1; + x2 = a2 - w2; + y0 = (a0 - x0) - w0; + y1 = (a1 - x1) - w1; + y2 = (a2 - x2) - w2; + a0 = x0; + a1 = x1; + a2 = x2; + w0 = fn0 * pio2_3t - y0; + w1 = fn1 * pio2_3t - y1; + w2 = fn2 * pio2_3t - y2; + x0 = a0 - w0; + x1 = a1 - w1; + x2 = a2 - w2; + y0 = (a0 - x0) - w0; + y1 = (a1 - x1) - w1; + y2 = (a2 - x2) - w2; + xsb0 = HI(&x0); + i = ((xsb0 & ~0x80000000) - thresh[n0&1]) >> 31; + xsb1 = HI(&x1); + i |= (((xsb1 & ~0x80000000) - thresh[n1&1]) >> 30) & 2; + xsb2 = HI(&x2); + i |= (((xsb2 & ~0x80000000) - thresh[n2&1]) >> 29) & 4; + switch (i) + { + double t0, t1, t2, z0, z1, z2; + unsigned j0, j1, j2; + + case 0: + j0 = (xsb0 + 0x4000) & 0xffff8000; + j1 = (xsb1 + 0x4000) & 0xffff8000; + j2 = (xsb2 + 0x4000) & 0xffff8000; + HI(&t0) = j0; + HI(&t1) = j1; + HI(&t2) = j2; + LO(&t0) = 0; + LO(&t1) = 0; + LO(&t2) = 0; + x0 = (x0 - t0) + y0; + x1 = (x1 - t1) + y1; + x2 = (x2 - t2) + y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * (qq1 + z0 * qq2); + t1 = z1 * (qq1 + z1 * qq2); + t2 = z2 * (qq1 + z2 * qq2); + w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); + w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); + w2 = x2 * (one + z2 * (pp1 + z2 * pp2)); + j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + xsb0 = (xsb0 >> 30) & 2; + xsb1 = (xsb1 >> 30) & 2; + xsb2 = (xsb2 >> 30) & 2; + n0 ^= (xsb0 & ~(n0 << 1)); + n1 ^= (xsb1 & ~(n1 << 1)); + n2 ^= (xsb2 & ~(n2 << 1)); + xsb0 |= 1; + xsb1 |= 1; + xsb2 |= 1; + a0 = __vlibm_TBL_sincos_hi[j0+n0]; + a1 = __vlibm_TBL_sincos_hi[j1+n1]; + a2 = __vlibm_TBL_sincos_hi[j2+n2]; + t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0]; + t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1]; + t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2]; + *py0 = ( a0 + t0 ); + *py1 = ( a1 + t1 ); + *py2 = ( a2 + t2 ); + break; + + case 1: + j0 = n0 & 1; + j1 = (xsb1 + 0x4000) & 0xffff8000; + j2 = (xsb2 + 0x4000) & 0xffff8000; + HI(&t1) = j1; + HI(&t2) = j2; + LO(&t1) = 0; + LO(&t2) = 0; + x0_or_one[0] = x0; + x0_or_one[2] = -x0; + y0_or_zero[0] = y0; + y0_or_zero[2] = -y0; + x1 = (x1 - t1) + y1; + x2 = (x2 - t2) + y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * (poly3[j0] + z0 * poly4[j0]); + t1 = z1 * (qq1 + z1 * qq2); + t2 = z2 * (qq1 + z2 * qq2); + t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0)); + w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); + w2 = x2 * (one + z2 * (pp1 + z2 * pp2)); + j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + xsb1 = (xsb1 >> 30) & 2; + xsb2 = (xsb2 >> 30) & 2; + n1 ^= (xsb1 & ~(n1 << 1)); + n2 ^= (xsb2 & ~(n2 << 1)); + xsb1 |= 1; + xsb2 |= 1; + a1 = __vlibm_TBL_sincos_hi[j1+n1]; + a2 = __vlibm_TBL_sincos_hi[j2+n2]; + t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0); + t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1]; + t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2]; + *py0 = t0; + *py1 = ( a1 + t1 ); + *py2 = ( a2 + t2 ); + break; + + case 2: + j0 = (xsb0 + 0x4000) & 0xffff8000; + j1 = n1 & 1; + j2 = (xsb2 + 0x4000) & 0xffff8000; + HI(&t0) = j0; + HI(&t2) = j2; + LO(&t0) = 0; + LO(&t2) = 0; + x1_or_one[0] = x1; + x1_or_one[2] = -x1; + x0 = (x0 - t0) + y0; + y1_or_zero[0] = y1; + y1_or_zero[2] = -y1; + x2 = (x2 - t2) + y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * (qq1 + z0 * qq2); + t1 = z1 * (poly3[j1] + z1 * poly4[j1]); + t2 = z2 * (qq1 + z2 * qq2); + w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); + t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1)); + w2 = x2 * (one + z2 * (pp1 + z2 * pp2)); + j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + xsb0 = (xsb0 >> 30) & 2; + xsb2 = (xsb2 >> 30) & 2; + n0 ^= (xsb0 & ~(n0 << 1)); + n2 ^= (xsb2 & ~(n2 << 1)); + xsb0 |= 1; + xsb2 |= 1; + a0 = __vlibm_TBL_sincos_hi[j0+n0]; + a2 = __vlibm_TBL_sincos_hi[j2+n2]; + t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0]; + t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1); + t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2]; + *py0 = ( a0 + t0 ); + *py1 = t1; + *py2 = ( a2 + t2 ); + break; + + case 3: + j0 = n0 & 1; + j1 = n1 & 1; + j2 = (xsb2 + 0x4000) & 0xffff8000; + HI(&t2) = j2; + LO(&t2) = 0; + x0_or_one[0] = x0; + x0_or_one[2] = -x0; + x1_or_one[0] = x1; + x1_or_one[2] = -x1; + y0_or_zero[0] = y0; + y0_or_zero[2] = -y0; + y1_or_zero[0] = y1; + y1_or_zero[2] = -y1; + x2 = (x2 - t2) + y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * (poly3[j0] + z0 * poly4[j0]); + t1 = z1 * (poly3[j1] + z1 * poly4[j1]); + t2 = z2 * (qq1 + z2 * qq2); + t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0)); + t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1)); + w2 = x2 * (one + z2 * (pp1 + z2 * pp2)); + j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + xsb2 = (xsb2 >> 30) & 2; + n2 ^= (xsb2 & ~(n2 << 1)); + xsb2 |= 1; + a2 = __vlibm_TBL_sincos_hi[j2+n2]; + t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0); + t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1); + t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2]; + *py0 = t0; + *py1 = t1; + *py2 = ( a2 + t2 ); + break; + + case 4: + j0 = (xsb0 + 0x4000) & 0xffff8000; + j1 = (xsb1 + 0x4000) & 0xffff8000; + j2 = n2 & 1; + HI(&t0) = j0; + HI(&t1) = j1; + LO(&t0) = 0; + LO(&t1) = 0; + x2_or_one[0] = x2; + x2_or_one[2] = -x2; + x0 = (x0 - t0) + y0; + x1 = (x1 - t1) + y1; + y2_or_zero[0] = y2; + y2_or_zero[2] = -y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * (qq1 + z0 * qq2); + t1 = z1 * (qq1 + z1 * qq2); + t2 = z2 * (poly3[j2] + z2 * poly4[j2]); + w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); + w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); + t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2)); + j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + xsb0 = (xsb0 >> 30) & 2; + xsb1 = (xsb1 >> 30) & 2; + n0 ^= (xsb0 & ~(n0 << 1)); + n1 ^= (xsb1 & ~(n1 << 1)); + xsb0 |= 1; + xsb1 |= 1; + a0 = __vlibm_TBL_sincos_hi[j0+n0]; + a1 = __vlibm_TBL_sincos_hi[j1+n1]; + t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0]; + t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1]; + t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2); + *py0 = ( a0 + t0 ); + *py1 = ( a1 + t1 ); + *py2 = t2; + break; + + case 5: + j0 = n0 & 1; + j1 = (xsb1 + 0x4000) & 0xffff8000; + j2 = n2 & 1; + HI(&t1) = j1; + LO(&t1) = 0; + x0_or_one[0] = x0; + x0_or_one[2] = -x0; + x2_or_one[0] = x2; + x2_or_one[2] = -x2; + y0_or_zero[0] = y0; + y0_or_zero[2] = -y0; + x1 = (x1 - t1) + y1; + y2_or_zero[0] = y2; + y2_or_zero[2] = -y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * (poly3[j0] + z0 * poly4[j0]); + t1 = z1 * (qq1 + z1 * qq2); + t2 = z2 * (poly3[j2] + z2 * poly4[j2]); + t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0)); + w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); + t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2)); + j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + xsb1 = (xsb1 >> 30) & 2; + n1 ^= (xsb1 & ~(n1 << 1)); + xsb1 |= 1; + a1 = __vlibm_TBL_sincos_hi[j1+n1]; + t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0); + t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1]; + t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2); + *py0 = t0; + *py1 = ( a1 + t1 ); + *py2 = t2; + break; + + case 6: + j0 = (xsb0 + 0x4000) & 0xffff8000; + j1 = n1 & 1; + j2 = n2 & 1; + HI(&t0) = j0; + LO(&t0) = 0; + x1_or_one[0] = x1; + x1_or_one[2] = -x1; + x2_or_one[0] = x2; + x2_or_one[2] = -x2; + x0 = (x0 - t0) + y0; + y1_or_zero[0] = y1; + y1_or_zero[2] = -y1; + y2_or_zero[0] = y2; + y2_or_zero[2] = -y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * (qq1 + z0 * qq2); + t1 = z1 * (poly3[j1] + z1 * poly4[j1]); + t2 = z2 * (poly3[j2] + z2 * poly4[j2]); + w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); + t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1)); + t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2)); + j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + xsb0 = (xsb0 >> 30) & 2; + n0 ^= (xsb0 & ~(n0 << 1)); + xsb0 |= 1; + a0 = __vlibm_TBL_sincos_hi[j0+n0]; + t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0]; + t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1); + t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2); + *py0 = ( a0 + t0 ); + *py1 = t1; + *py2 = t2; + break; + + case 7: + j0 = n0 & 1; + j1 = n1 & 1; + j2 = n2 & 1; + x0_or_one[0] = x0; + x0_or_one[2] = -x0; + x1_or_one[0] = x1; + x1_or_one[2] = -x1; + x2_or_one[0] = x2; + x2_or_one[2] = -x2; + y0_or_zero[0] = y0; + y0_or_zero[2] = -y0; + y1_or_zero[0] = y1; + y1_or_zero[2] = -y1; + y2_or_zero[0] = y2; + y2_or_zero[2] = -y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * (poly3[j0] + z0 * poly4[j0]); + t1 = z1 * (poly3[j1] + z1 * poly4[j1]); + t2 = z2 * (poly3[j2] + z2 * poly4[j2]); + t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0)); + t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1)); + t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2)); + t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0); + t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1); + t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2); + *py0 = t0; + *py1 = t1; + *py2 = t2; + break; + } + + x += stridex; + y += stridey; + i = 0; + } while (--n > 0); + + if (i > 0) + { + double fn0, fn1, a0, a1, w0, w1, y0, y1; + double t0, t1, z0, z1; + unsigned j0, j1; + int n0, n1; + + if (i > 1) + { + n1 = (int) (x1 * invpio2 + half[xsb1]); + fn1 = (double) n1; + n1 &= 3; + a1 = x1 - fn1 * pio2_1; + w1 = fn1 * pio2_2; + x1 = a1 - w1; + y1 = (a1 - x1) - w1; + a1 = x1; + w1 = fn1 * pio2_3 - y1; + x1 = a1 - w1; + y1 = (a1 - x1) - w1; + a1 = x1; + w1 = fn1 * pio2_3t - y1; + x1 = a1 - w1; + y1 = (a1 - x1) - w1; + xsb1 = HI(&x1); + if ((xsb1 & ~0x80000000) < thresh[n1&1]) + { + j1 = n1 & 1; + x1_or_one[0] = x1; + x1_or_one[2] = -x1; + y1_or_zero[0] = y1; + y1_or_zero[2] = -y1; + z1 = x1 * x1; + t1 = z1 * (poly3[j1] + z1 * poly4[j1]); + t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1)); + t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1); + *py1 = t1; + } + else + { + j1 = (xsb1 + 0x4000) & 0xffff8000; + HI(&t1) = j1; + LO(&t1) = 0; + x1 = (x1 - t1) + y1; + z1 = x1 * x1; + t1 = z1 * (qq1 + z1 * qq2); + w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); + j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + xsb1 = (xsb1 >> 30) & 2; + n1 ^= (xsb1 & ~(n1 << 1)); + xsb1 |= 1; + a1 = __vlibm_TBL_sincos_hi[j1+n1]; + t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1]; + *py1 = ( a1 + t1 ); + } + } + n0 = (int) (x0 * invpio2 + half[xsb0]); + fn0 = (double) n0; + n0 &= 3; + a0 = x0 - fn0 * pio2_1; + w0 = fn0 * pio2_2; + x0 = a0 - w0; + y0 = (a0 - x0) - w0; + a0 = x0; + w0 = fn0 * pio2_3 - y0; + x0 = a0 - w0; + y0 = (a0 - x0) - w0; + a0 = x0; + w0 = fn0 * pio2_3t - y0; + x0 = a0 - w0; + y0 = (a0 - x0) - w0; + xsb0 = HI(&x0); + if ((xsb0 & ~0x80000000) < thresh[n0&1]) + { + j0 = n0 & 1; + x0_or_one[0] = x0; + x0_or_one[2] = -x0; + y0_or_zero[0] = y0; + y0_or_zero[2] = -y0; + z0 = x0 * x0; + t0 = z0 * (poly3[j0] + z0 * poly4[j0]); + t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0)); + t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0); + *py0 = t0; + } + else + { + j0 = (xsb0 + 0x4000) & 0xffff8000; + HI(&t0) = j0; + LO(&t0) = 0; + x0 = (x0 - t0) + y0; + z0 = x0 * x0; + t0 = z0 * (qq1 + z0 * qq2); + w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); + j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + xsb0 = (xsb0 >> 30) & 2; + n0 ^= (xsb0 & ~(n0 << 1)); + xsb0 |= 1; + a0 = __vlibm_TBL_sincos_hi[j0+n0]; + t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0]; + *py0 = ( a0 + t0 ); + } + } + + if (biguns) + __vlibm_vsin_big(nsave, xsave, sxsave, ysave, sysave, 0x413921fb); +} diff --git a/usr/src/lib/libmvec/common/__vsinbig.c b/usr/src/lib/libmvec/common/__vsinbig.c new file mode 100644 index 0000000000..0c9c381bd6 --- /dev/null +++ b/usr/src/lib/libmvec/common/__vsinbig.c @@ -0,0 +1,172 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <sys/isa_defs.h> + +#ifdef _LITTLE_ENDIAN +#define HI(x) *(1+(int*)x) +#define LO(x) *(unsigned*)x +#else +#define HI(x) *(int*)x +#define LO(x) *(1+(unsigned*)x) +#endif + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +extern const double __vlibm_TBL_sincos_hi[], __vlibm_TBL_sincos_lo[]; +extern int __vlibm_rem_pio2m(double *, double *, int, int, int); + +static const double + zero = 0.0, + one = 1.0, + two24 = 16777216.0, + pp1 = -1.666666666605760465276263943134982554676e-0001, + pp2 = 8.333261209690963126718376566146180944442e-0003, + p1 = -1.666666666666629669805215138920301589656e-0001, + p2 = 8.333333332390951295683993455280336376663e-0003, + p3 = -1.984126237997976692791551778230098403960e-0004, + p4 = 2.753403624854277237649987622848330351110e-0006, + qq1 = -4.999999999977710986407023955908711557870e-0001, + qq2 = 4.166654863857219350645055881018842089580e-0002, + q1 = -4.999999999999931701464060878888294524481e-0001, + q2 = 4.166666666394861917535640593963708222319e-0002, + q3 = -1.388888552656142867832756687736851681462e-0003, + q4 = 2.478519423681460796618128289454530524759e-0005; + +void +__vlibm_vsin_big(int n, double * restrict x, int stridex, double * restrict y, + int stridey, int thresh) +{ + for (; n--; x += stridex, y += stridey) + { + double tx, tt[3], ty[2], t, w, z, a; + unsigned hx, xsb; + int e0, nx, j; + + hx = HI(x); + xsb = hx & 0x80000000; + hx &= ~0x80000000; + if (hx <= thresh || hx >= 0x7ff00000) + continue; + e0 = (hx >> 20) - 1046; + HI(&tx) = 0x41600000 | (hx & 0xfffff); + LO(&tx) = LO(x); + tt[0] = (double)((int) tx); + tx = (tx - tt[0]) * two24; + if (tx != zero) + { + nx = 2; + tt[1] = (double)((int) tx); + tt[2] = (tx - tt[1]) * two24; + if (tt[2] != zero) + nx = 3; + } + else + { + nx = 1; + tt[1] = tt[2] = zero; + } + nx = __vlibm_rem_pio2m(tt, ty, e0, nx, 2); + if (xsb) + { + nx = -nx; + ty[0] = -ty[0]; + ty[1] = -ty[1]; + } + + /* now nx and ty[*] are the quadrant and reduced arg */ + xsb = (nx & 2) << 30; + hx = HI(&ty[0]); + if (nx & 1) + { + if (hx & 0x80000000) + { + ty[0] = -ty[0]; + ty[1] = -ty[1]; + hx &= ~0x80000000; + } + if (hx < 0x3fc40000) + { + z = ty[0] * ty[0]; + t = z * (q1 + z * (q2 + z * (q3 + z * q4))); + a = one + t; + } + else + { + j = (hx + 0x4000) & 0x7fff8000; + HI(&t) = j; + LO(&t) = 0; + ty[0] = (ty[0] - t) + ty[1]; + z = ty[0] * ty[0]; + t = z * (qq1 + z * qq2); + w = ty[0] * (one + z * (pp1 + z * pp2)); + j = ((j - 0x3fc40000) >> 13) & ~3; + a = __vlibm_TBL_sincos_hi[j+1]; + t = __vlibm_TBL_sincos_lo[j+1] - (__vlibm_TBL_sincos_hi[j] * w - a * t); + a += t; + } + } + else + { + if (hx & 0x80000000) + { + ty[0] = -ty[0]; + ty[1] = -ty[1]; + hx &= ~0x80000000; + xsb ^= 0x80000000; + } + if (hx < 0x3fc90000) + { + z = ty[0] * ty[0]; + t = z * (p1 + z * (p2 + z * (p3 + z * p4))); + a = ty[0] + (ty[1] + ty[0] * t); + } + else + { + j = (hx + 0x4000) & 0x7fff8000; + HI(&t) = j; + LO(&t) = 0; + ty[0] = (ty[0] - t) + ty[1]; + z = ty[0] * ty[0]; + t = z * (qq1 + z * qq2); + w = ty[0] * (one + z * (pp1 + z * pp2)); + j = ((j - 0x3fc40000) >> 13) & ~3; + a = __vlibm_TBL_sincos_hi[j]; + t = (__vlibm_TBL_sincos_hi[j+1] * w + a * t) + __vlibm_TBL_sincos_lo[j]; + a += t; + } + } + if (xsb) a = -a; + *y = a; + } +} diff --git a/usr/src/lib/libmvec/common/__vsinbig_ultra3.c b/usr/src/lib/libmvec/common/__vsinbig_ultra3.c new file mode 100644 index 0000000000..63ecc325f7 --- /dev/null +++ b/usr/src/lib/libmvec/common/__vsinbig_ultra3.c @@ -0,0 +1,653 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <sys/isa_defs.h> + +#ifdef _LITTLE_ENDIAN +#define HI(x) *(1+(int*)x) +#define LO(x) *(unsigned*)x +#else +#define HI(x) *(int*)x +#define LO(x) *(1+(unsigned*)x) +#endif + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +extern const double __vlibm_TBL_sincos_hi[], __vlibm_TBL_sincos_lo[]; + +static const double + half[2] = { 0.5, -0.5 }, + one = 1.0, + invpio2 = 0.636619772367581343075535, + pio2_1 = 1.570796326734125614166, + pio2_2 = 6.077100506303965976596e-11, + pio2_3 = 2.022266248711166455796e-21, + pio2_3t = 8.478427660368899643959e-32, + pp1 = -1.666666666605760465276263943134982554676e-0001, + pp2 = 8.333261209690963126718376566146180944442e-0003, + qq1 = -4.999999999977710986407023955908711557870e-0001, + qq2 = 4.166654863857219350645055881018842089580e-0002, + poly1[2]= { -1.666666666666629669805215138920301589656e-0001, + -4.999999999999931701464060878888294524481e-0001 }, + poly2[2]= { 8.333333332390951295683993455280336376663e-0003, + 4.166666666394861917535640593963708222319e-0002 }, + poly3[2]= { -1.984126237997976692791551778230098403960e-0004, + -1.388888552656142867832756687736851681462e-0003 }, + poly4[2]= { 2.753403624854277237649987622848330351110e-0006, + 2.478519423681460796618128289454530524759e-0005 }; + +static const unsigned thresh[2] = { 0x3fc90000, 0x3fc40000 }; + +extern void __vlibm_vsin_big(int, double *, int, double *, int, int); + +void +__vlibm_vsin_big_ultra3(int n, double * restrict x, int stridex, double * restrict y, + int stridey, int pthresh) +{ + double x0_or_one[4], x1_or_one[4], x2_or_one[4]; + double y0_or_zero[4], y1_or_zero[4], y2_or_zero[4]; + double x0, x1, x2, *py0, *py1, *py2, *xsave, *ysave; + unsigned xsb0, xsb1, xsb2; + int i, biguns, nsave, sxsave, sysave; + + nsave = n; + xsave = x; + sxsave = stridex; + ysave = y; + sysave = stridey; + biguns = 0; + + x0_or_one[1] = 1.0; + x1_or_one[1] = 1.0; + x2_or_one[1] = 1.0; + x0_or_one[3] = -1.0; + x1_or_one[3] = -1.0; + x2_or_one[3] = -1.0; + y0_or_zero[1] = 0.0; + y1_or_zero[1] = 0.0; + y2_or_zero[1] = 0.0; + y0_or_zero[3] = 0.0; + y1_or_zero[3] = 0.0; + y2_or_zero[3] = 0.0; + + do + { + double fn0, fn1, fn2, a0, a1, a2, w0, w1, w2, y0, y1, y2; + unsigned hx; + int n0, n1, n2; + +loop0: + hx = HI(x); + xsb0 = hx >> 31; + hx &= ~0x80000000; + if (hx <= pthresh || hx > 0x413921fb) + { + if (hx > 0x413921fb && hx < 0x7ff00000) + biguns = 1; + x += stridex; + y += stridey; + i = 0; + if (--n <= 0) + break; + goto loop0; + } + x0 = *x; + py0 = y; + x += stridex; + y += stridey; + i = 1; + if (--n <= 0) + break; + +loop1: + hx = HI(x); + xsb1 = hx >> 31; + hx &= ~0x80000000; + if (hx <= pthresh || hx > 0x413921fb) + { + if (hx > 0x413921fb && hx < 0x7ff00000) + biguns = 1; + x += stridex; + y += stridey; + i = 1; + if (--n <= 0) + break; + goto loop1; + } + x1 = *x; + py1 = y; + x += stridex; + y += stridey; + i = 2; + if (--n <= 0) + break; + +loop2: + hx = HI(x); + xsb2 = hx >> 31; + hx &= ~0x80000000; + if (hx <= pthresh || hx > 0x413921fb) + { + if (hx > 0x413921fb && hx < 0x7ff00000) + biguns = 1; + x += stridex; + y += stridey; + i = 2; + if (--n <= 0) + break; + goto loop2; + } + x2 = *x; + py2 = y; + + n0 = (int) (x0 * invpio2 + half[xsb0]); + n1 = (int) (x1 * invpio2 + half[xsb1]); + n2 = (int) (x2 * invpio2 + half[xsb2]); + fn0 = (double) n0; + fn1 = (double) n1; + fn2 = (double) n2; + n0 &= 3; + n1 &= 3; + n2 &= 3; + a0 = x0 - fn0 * pio2_1; + a1 = x1 - fn1 * pio2_1; + a2 = x2 - fn2 * pio2_1; + w0 = fn0 * pio2_2; + w1 = fn1 * pio2_2; + w2 = fn2 * pio2_2; + x0 = a0 - w0; + x1 = a1 - w1; + x2 = a2 - w2; + y0 = (a0 - x0) - w0; + y1 = (a1 - x1) - w1; + y2 = (a2 - x2) - w2; + a0 = x0; + a1 = x1; + a2 = x2; + w0 = fn0 * pio2_3 - y0; + w1 = fn1 * pio2_3 - y1; + w2 = fn2 * pio2_3 - y2; + x0 = a0 - w0; + x1 = a1 - w1; + x2 = a2 - w2; + y0 = (a0 - x0) - w0; + y1 = (a1 - x1) - w1; + y2 = (a2 - x2) - w2; + a0 = x0; + a1 = x1; + a2 = x2; + w0 = fn0 * pio2_3t - y0; + w1 = fn1 * pio2_3t - y1; + w2 = fn2 * pio2_3t - y2; + x0 = a0 - w0; + x1 = a1 - w1; + x2 = a2 - w2; + y0 = (a0 - x0) - w0; + y1 = (a1 - x1) - w1; + y2 = (a2 - x2) - w2; + xsb0 = HI(&x0); + i = ((xsb0 & ~0x80000000) - thresh[n0&1]) >> 31; + xsb1 = HI(&x1); + i |= (((xsb1 & ~0x80000000) - thresh[n1&1]) >> 30) & 2; + xsb2 = HI(&x2); + i |= (((xsb2 & ~0x80000000) - thresh[n2&1]) >> 29) & 4; + switch (i) + { + double t0, t1, t2, z0, z1, z2; + unsigned j0, j1, j2; + + case 0: + j0 = (xsb0 + 0x4000) & 0xffff8000; + j1 = (xsb1 + 0x4000) & 0xffff8000; + j2 = (xsb2 + 0x4000) & 0xffff8000; + HI(&t0) = j0; + HI(&t1) = j1; + HI(&t2) = j2; + LO(&t0) = 0; + LO(&t1) = 0; + LO(&t2) = 0; + x0 = (x0 - t0) + y0; + x1 = (x1 - t1) + y1; + x2 = (x2 - t2) + y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * (qq1 + z0 * qq2); + t1 = z1 * (qq1 + z1 * qq2); + t2 = z2 * (qq1 + z2 * qq2); + w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); + w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); + w2 = x2 * (one + z2 * (pp1 + z2 * pp2)); + j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + xsb0 = (xsb0 >> 30) & 2; + xsb1 = (xsb1 >> 30) & 2; + xsb2 = (xsb2 >> 30) & 2; + n0 ^= (xsb0 & ~(n0 << 1)); + n1 ^= (xsb1 & ~(n1 << 1)); + n2 ^= (xsb2 & ~(n2 << 1)); + xsb0 |= 1; + xsb1 |= 1; + xsb2 |= 1; + a0 = __vlibm_TBL_sincos_hi[j0+n0]; + a1 = __vlibm_TBL_sincos_hi[j1+n1]; + a2 = __vlibm_TBL_sincos_hi[j2+n2]; + t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0]; + t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1]; + t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2]; + *py0 = ( a0 + t0 ); + *py1 = ( a1 + t1 ); + *py2 = ( a2 + t2 ); + break; + + case 1: + j0 = n0 & 1; + j1 = (xsb1 + 0x4000) & 0xffff8000; + j2 = (xsb2 + 0x4000) & 0xffff8000; + HI(&t1) = j1; + HI(&t2) = j2; + LO(&t1) = 0; + LO(&t2) = 0; + x0_or_one[0] = x0; + x0_or_one[2] = -x0; + y0_or_zero[0] = y0; + y0_or_zero[2] = -y0; + x1 = (x1 - t1) + y1; + x2 = (x2 - t2) + y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * (poly3[j0] + z0 * poly4[j0]); + t1 = z1 * (qq1 + z1 * qq2); + t2 = z2 * (qq1 + z2 * qq2); + t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0)); + w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); + w2 = x2 * (one + z2 * (pp1 + z2 * pp2)); + j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + xsb1 = (xsb1 >> 30) & 2; + xsb2 = (xsb2 >> 30) & 2; + n1 ^= (xsb1 & ~(n1 << 1)); + n2 ^= (xsb2 & ~(n2 << 1)); + xsb1 |= 1; + xsb2 |= 1; + a1 = __vlibm_TBL_sincos_hi[j1+n1]; + a2 = __vlibm_TBL_sincos_hi[j2+n2]; + t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0); + t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1]; + t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2]; + *py0 = t0; + *py1 = ( a1 + t1 ); + *py2 = ( a2 + t2 ); + break; + + case 2: + j0 = (xsb0 + 0x4000) & 0xffff8000; + j1 = n1 & 1; + j2 = (xsb2 + 0x4000) & 0xffff8000; + HI(&t0) = j0; + HI(&t2) = j2; + LO(&t0) = 0; + LO(&t2) = 0; + x1_or_one[0] = x1; + x1_or_one[2] = -x1; + x0 = (x0 - t0) + y0; + y1_or_zero[0] = y1; + y1_or_zero[2] = -y1; + x2 = (x2 - t2) + y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * (qq1 + z0 * qq2); + t1 = z1 * (poly3[j1] + z1 * poly4[j1]); + t2 = z2 * (qq1 + z2 * qq2); + w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); + t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1)); + w2 = x2 * (one + z2 * (pp1 + z2 * pp2)); + j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + xsb0 = (xsb0 >> 30) & 2; + xsb2 = (xsb2 >> 30) & 2; + n0 ^= (xsb0 & ~(n0 << 1)); + n2 ^= (xsb2 & ~(n2 << 1)); + xsb0 |= 1; + xsb2 |= 1; + a0 = __vlibm_TBL_sincos_hi[j0+n0]; + a2 = __vlibm_TBL_sincos_hi[j2+n2]; + t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0]; + t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1); + t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2]; + *py0 = ( a0 + t0 ); + *py1 = t1; + *py2 = ( a2 + t2 ); + break; + + case 3: + j0 = n0 & 1; + j1 = n1 & 1; + j2 = (xsb2 + 0x4000) & 0xffff8000; + HI(&t2) = j2; + LO(&t2) = 0; + x0_or_one[0] = x0; + x0_or_one[2] = -x0; + x1_or_one[0] = x1; + x1_or_one[2] = -x1; + y0_or_zero[0] = y0; + y0_or_zero[2] = -y0; + y1_or_zero[0] = y1; + y1_or_zero[2] = -y1; + x2 = (x2 - t2) + y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * (poly3[j0] + z0 * poly4[j0]); + t1 = z1 * (poly3[j1] + z1 * poly4[j1]); + t2 = z2 * (qq1 + z2 * qq2); + t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0)); + t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1)); + w2 = x2 * (one + z2 * (pp1 + z2 * pp2)); + j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + xsb2 = (xsb2 >> 30) & 2; + n2 ^= (xsb2 & ~(n2 << 1)); + xsb2 |= 1; + a2 = __vlibm_TBL_sincos_hi[j2+n2]; + t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0); + t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1); + t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2]; + *py0 = t0; + *py1 = t1; + *py2 = ( a2 + t2 ); + break; + + case 4: + j0 = (xsb0 + 0x4000) & 0xffff8000; + j1 = (xsb1 + 0x4000) & 0xffff8000; + j2 = n2 & 1; + HI(&t0) = j0; + HI(&t1) = j1; + LO(&t0) = 0; + LO(&t1) = 0; + x2_or_one[0] = x2; + x2_or_one[2] = -x2; + x0 = (x0 - t0) + y0; + x1 = (x1 - t1) + y1; + y2_or_zero[0] = y2; + y2_or_zero[2] = -y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * (qq1 + z0 * qq2); + t1 = z1 * (qq1 + z1 * qq2); + t2 = z2 * (poly3[j2] + z2 * poly4[j2]); + w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); + w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); + t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2)); + j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + xsb0 = (xsb0 >> 30) & 2; + xsb1 = (xsb1 >> 30) & 2; + n0 ^= (xsb0 & ~(n0 << 1)); + n1 ^= (xsb1 & ~(n1 << 1)); + xsb0 |= 1; + xsb1 |= 1; + a0 = __vlibm_TBL_sincos_hi[j0+n0]; + a1 = __vlibm_TBL_sincos_hi[j1+n1]; + t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0]; + t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1]; + t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2); + *py0 = ( a0 + t0 ); + *py1 = ( a1 + t1 ); + *py2 = t2; + break; + + case 5: + j0 = n0 & 1; + j1 = (xsb1 + 0x4000) & 0xffff8000; + j2 = n2 & 1; + HI(&t1) = j1; + LO(&t1) = 0; + x0_or_one[0] = x0; + x0_or_one[2] = -x0; + x2_or_one[0] = x2; + x2_or_one[2] = -x2; + y0_or_zero[0] = y0; + y0_or_zero[2] = -y0; + x1 = (x1 - t1) + y1; + y2_or_zero[0] = y2; + y2_or_zero[2] = -y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * (poly3[j0] + z0 * poly4[j0]); + t1 = z1 * (qq1 + z1 * qq2); + t2 = z2 * (poly3[j2] + z2 * poly4[j2]); + t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0)); + w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); + t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2)); + j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + xsb1 = (xsb1 >> 30) & 2; + n1 ^= (xsb1 & ~(n1 << 1)); + xsb1 |= 1; + a1 = __vlibm_TBL_sincos_hi[j1+n1]; + t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0); + t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1]; + t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2); + *py0 = t0; + *py1 = ( a1 + t1 ); + *py2 = t2; + break; + + case 6: + j0 = (xsb0 + 0x4000) & 0xffff8000; + j1 = n1 & 1; + j2 = n2 & 1; + HI(&t0) = j0; + LO(&t0) = 0; + x1_or_one[0] = x1; + x1_or_one[2] = -x1; + x2_or_one[0] = x2; + x2_or_one[2] = -x2; + x0 = (x0 - t0) + y0; + y1_or_zero[0] = y1; + y1_or_zero[2] = -y1; + y2_or_zero[0] = y2; + y2_or_zero[2] = -y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * (qq1 + z0 * qq2); + t1 = z1 * (poly3[j1] + z1 * poly4[j1]); + t2 = z2 * (poly3[j2] + z2 * poly4[j2]); + w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); + t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1)); + t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2)); + j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + xsb0 = (xsb0 >> 30) & 2; + n0 ^= (xsb0 & ~(n0 << 1)); + xsb0 |= 1; + a0 = __vlibm_TBL_sincos_hi[j0+n0]; + t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0]; + t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1); + t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2); + *py0 = ( a0 + t0 ); + *py1 = t1; + *py2 = t2; + break; + + case 7: + j0 = n0 & 1; + j1 = n1 & 1; + j2 = n2 & 1; + x0_or_one[0] = x0; + x0_or_one[2] = -x0; + x1_or_one[0] = x1; + x1_or_one[2] = -x1; + x2_or_one[0] = x2; + x2_or_one[2] = -x2; + y0_or_zero[0] = y0; + y0_or_zero[2] = -y0; + y1_or_zero[0] = y1; + y1_or_zero[2] = -y1; + y2_or_zero[0] = y2; + y2_or_zero[2] = -y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * (poly3[j0] + z0 * poly4[j0]); + t1 = z1 * (poly3[j1] + z1 * poly4[j1]); + t2 = z2 * (poly3[j2] + z2 * poly4[j2]); + t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0)); + t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1)); + t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2)); + t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0); + t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1); + t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2); + *py0 = t0; + *py1 = t1; + *py2 = t2; + break; + } + + x += stridex; + y += stridey; + i = 0; + } while (--n > 0); + + if (i > 0) + { + double fn0, fn1, a0, a1, w0, w1, y0, y1; + double t0, t1, z0, z1; + unsigned j0, j1; + int n0, n1; + + if (i > 1) + { + n1 = (int) (x1 * invpio2 + half[xsb1]); + fn1 = (double) n1; + n1 &= 3; + a1 = x1 - fn1 * pio2_1; + w1 = fn1 * pio2_2; + x1 = a1 - w1; + y1 = (a1 - x1) - w1; + a1 = x1; + w1 = fn1 * pio2_3 - y1; + x1 = a1 - w1; + y1 = (a1 - x1) - w1; + a1 = x1; + w1 = fn1 * pio2_3t - y1; + x1 = a1 - w1; + y1 = (a1 - x1) - w1; + xsb1 = HI(&x1); + if ((xsb1 & ~0x80000000) < thresh[n1&1]) + { + j1 = n1 & 1; + x1_or_one[0] = x1; + x1_or_one[2] = -x1; + y1_or_zero[0] = y1; + y1_or_zero[2] = -y1; + z1 = x1 * x1; + t1 = z1 * (poly3[j1] + z1 * poly4[j1]); + t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1)); + t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1); + *py1 = t1; + } + else + { + j1 = (xsb1 + 0x4000) & 0xffff8000; + HI(&t1) = j1; + LO(&t1) = 0; + x1 = (x1 - t1) + y1; + z1 = x1 * x1; + t1 = z1 * (qq1 + z1 * qq2); + w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); + j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + xsb1 = (xsb1 >> 30) & 2; + n1 ^= (xsb1 & ~(n1 << 1)); + xsb1 |= 1; + a1 = __vlibm_TBL_sincos_hi[j1+n1]; + t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1]; + *py1 = ( a1 + t1 ); + } + } + n0 = (int) (x0 * invpio2 + half[xsb0]); + fn0 = (double) n0; + n0 &= 3; + a0 = x0 - fn0 * pio2_1; + w0 = fn0 * pio2_2; + x0 = a0 - w0; + y0 = (a0 - x0) - w0; + a0 = x0; + w0 = fn0 * pio2_3 - y0; + x0 = a0 - w0; + y0 = (a0 - x0) - w0; + a0 = x0; + w0 = fn0 * pio2_3t - y0; + x0 = a0 - w0; + y0 = (a0 - x0) - w0; + xsb0 = HI(&x0); + if ((xsb0 & ~0x80000000) < thresh[n0&1]) + { + j0 = n0 & 1; + x0_or_one[0] = x0; + x0_or_one[2] = -x0; + y0_or_zero[0] = y0; + y0_or_zero[2] = -y0; + z0 = x0 * x0; + t0 = z0 * (poly3[j0] + z0 * poly4[j0]); + t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0)); + t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0); + *py0 = t0; + } + else + { + j0 = (xsb0 + 0x4000) & 0xffff8000; + HI(&t0) = j0; + LO(&t0) = 0; + x0 = (x0 - t0) + y0; + z0 = x0 * x0; + t0 = z0 * (qq1 + z0 * qq2); + w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); + j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + xsb0 = (xsb0 >> 30) & 2; + n0 ^= (xsb0 & ~(n0 << 1)); + xsb0 |= 1; + a0 = __vlibm_TBL_sincos_hi[j0+n0]; + t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0]; + *py0 = ( a0 + t0 ); + } + } + + if (biguns) + __vlibm_vsin_big(nsave, xsave, sxsave, ysave, sysave, 0x413921fb); +} diff --git a/usr/src/lib/libmvec/common/__vsinbigf.c b/usr/src/lib/libmvec/common/__vsinbigf.c new file mode 100644 index 0000000000..17a8655217 --- /dev/null +++ b/usr/src/lib/libmvec/common/__vsinbigf.c @@ -0,0 +1,173 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <sys/isa_defs.h> + +#ifdef _LITTLE_ENDIAN +#define HI(x) *(1+(int*)x) +#define LO(x) *(unsigned*)x +#else +#define HI(x) *(int*)x +#define LO(x) *(1+(unsigned*)x) +#endif + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +extern const double __vlibm_TBL_sincos_hi[], __vlibm_TBL_sincos_lo[]; +extern int __vlibm_rem_pio2m(double *, double *, int, int, int); + +static const double + zero = 0.0, + one = 1.0, + two24 = 16777216.0, + pp1 = -1.666666666605760465276263943134982554676e-0001, + pp2 = 8.333261209690963126718376566146180944442e-0003, + p1 = -1.666666666666629669805215138920301589656e-0001, + p2 = 8.333333332390951295683993455280336376663e-0003, + p3 = -1.984126237997976692791551778230098403960e-0004, + p4 = 2.753403624854277237649987622848330351110e-0006, + qq1 = -4.999999999977710986407023955908711557870e-0001, + qq2 = 4.166654863857219350645055881018842089580e-0002, + q1 = -4.999999999999931701464060878888294524481e-0001, + q2 = 4.166666666394861917535640593963708222319e-0002, + q3 = -1.388888552656142867832756687736851681462e-0003, + q4 = 2.478519423681460796618128289454530524759e-0005; + +void +__vlibm_vsin_bigf(int n, float * restrict x, int stridex, float * restrict y, + int stridey) +{ + for (; n--; x += stridex, y += stridey) + { + double tx, tt[3], ty[2], t, w, z, a; + unsigned hx, xsb; + int e0, nx, j; + + tx = *x; + hx = HI(&tx); + xsb = hx & 0x80000000; + hx &= ~0x80000000; + if (hx <= 0x413921fb || hx >= 0x7ff00000) + continue; + e0 = (hx >> 20) - 1046; + HI(&tx) = 0x41600000 | (hx & 0xfffff); + + tt[0] = (double)((int) tx); + tx = (tx - tt[0]) * two24; + if (tx != zero) + { + nx = 2; + tt[1] = (double)((int) tx); + tt[2] = (tx - tt[1]) * two24; + if (tt[2] != zero) + nx = 3; + } + else + { + nx = 1; + tt[1] = tt[2] = zero; + } + nx = __vlibm_rem_pio2m(tt, ty, e0, nx, 2); + if (xsb) + { + nx = -nx; + ty[0] = -ty[0]; + ty[1] = -ty[1]; + } + + /* now nx and ty[*] are the quadrant and reduced arg */ + xsb = (nx & 2) << 30; + hx = HI(&ty[0]); + if (nx & 1) + { + if (hx & 0x80000000) + { + ty[0] = -ty[0]; + ty[1] = -ty[1]; + hx &= ~0x80000000; + } + if (hx < 0x3fc40000) + { + z = ty[0] * ty[0]; + t = z * (q1 + z * (q2 + z * (q3 + z * q4))); + a = one + t; + } + else + { + j = (hx + 0x4000) & 0x7fff8000; + HI(&t) = j; + LO(&t) = 0; + ty[0] = (ty[0] - t) + ty[1]; + z = ty[0] * ty[0]; + t = z * (qq1 + z * qq2); + w = ty[0] * (one + z * (pp1 + z * pp2)); + j = ((j - 0x3fc40000) >> 13) & ~3; + a = __vlibm_TBL_sincos_hi[j+1]; + t = __vlibm_TBL_sincos_lo[j+1] - (__vlibm_TBL_sincos_hi[j] * w - a * t); + a += t; + } + } + else + { + if (hx & 0x80000000) + { + ty[0] = -ty[0]; + ty[1] = -ty[1]; + hx &= ~0x80000000; + xsb ^= 0x80000000; + } + if (hx < 0x3fc90000) + { + z = ty[0] * ty[0]; + t = z * (p1 + z * (p2 + z * (p3 + z * p4))); + a = ty[0] + (ty[1] + ty[0] * t); + } + else + { + j = (hx + 0x4000) & 0x7fff8000; + HI(&t) = j; + LO(&t) = 0; + ty[0] = (ty[0] - t) + ty[1]; + z = ty[0] * ty[0]; + t = z * (qq1 + z * qq2); + w = ty[0] * (one + z * (pp1 + z * pp2)); + j = ((j - 0x3fc40000) >> 13) & ~3; + a = __vlibm_TBL_sincos_hi[j]; + t = (__vlibm_TBL_sincos_hi[j+1] * w + a * t) + __vlibm_TBL_sincos_lo[j]; + a += t; + } + } + if (xsb) a = -a; + *y = a; + } +} diff --git a/usr/src/lib/libmvec/common/__vsincos.c b/usr/src/lib/libmvec/common/__vsincos.c new file mode 100644 index 0000000000..23fc1b5889 --- /dev/null +++ b/usr/src/lib/libmvec/common/__vsincos.c @@ -0,0 +1,1547 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <sys/isa_defs.h> +#include <sys/ccompile.h> + +#ifdef _LITTLE_ENDIAN +#define HI(x) *(1+(int*)x) +#define LO(x) *(unsigned*)x +#else +#define HI(x) *(int*)x +#define LO(x) *(1+(unsigned*)x) +#endif + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +/* + * vsincos.c + * + * Vector sine and cosine function. Just slight modifications to vcos.c. + */ + +extern const double __vlibm_TBL_sincos_hi[], __vlibm_TBL_sincos_lo[]; + +static const double + half[2] = { 0.5, -0.5 }, + one = 1.0, + invpio2 = 0.636619772367581343075535, /* 53 bits of pi/2 */ + pio2_1 = 1.570796326734125614166, /* first 33 bits of pi/2 */ + pio2_2 = 6.077100506303965976596e-11, /* second 33 bits of pi/2 */ + pio2_3 = 2.022266248711166455796e-21, /* third 33 bits of pi/2 */ + pio2_3t = 8.478427660368899643959e-32, /* pi/2 - pio2_3 */ + pp1 = -1.666666666605760465276263943134982554676e-0001, + pp2 = 8.333261209690963126718376566146180944442e-0003, + qq1 = -4.999999999977710986407023955908711557870e-0001, + qq2 = 4.166654863857219350645055881018842089580e-0002, + poly1[2]= { -1.666666666666629669805215138920301589656e-0001, + -4.999999999999931701464060878888294524481e-0001 }, + poly2[2]= { 8.333333332390951295683993455280336376663e-0003, + 4.166666666394861917535640593963708222319e-0002 }, + poly3[2]= { -1.984126237997976692791551778230098403960e-0004, + -1.388888552656142867832756687736851681462e-0003 }, + poly4[2]= { 2.753403624854277237649987622848330351110e-0006, + 2.478519423681460796618128289454530524759e-0005 }; + +/* Don't __ the following; acomp will handle it */ +extern double fabs(double); +extern void __vlibm_vsincos_big(int, double *, int, double *, int, double *, int, int); + +/* + * y[i*stridey] := sin( x[i*stridex] ), for i = 0..n. + * c[i*stridec] := cos( x[i*stridex] ), for i = 0..n. + * + * Calls __vlibm_vsincos_big to handle all elts which have abs >~ 1.647e+06. + * Argument reduction is done here for elts pi/4 < arg < 1.647e+06. + * + * elts < 2^-27 use the approximation 1.0 ~ cos(x). + */ +void +__vsincos(int n, double * restrict x, int stridex, + double * restrict y, int stridey, + double * restrict c, int stridec) +{ + double x0_or_one[4], x1_or_one[4], x2_or_one[4]; + double y0_or_zero[4], y1_or_zero[4], y2_or_zero[4]; + double x0, x1, x2, + *py0, *py1, *py2, + *pc0, *pc1, *pc2, + *xsave, *ysave, *csave; + unsigned hx0, hx1, hx2, xsb0, xsb1, xsb2; + int i, biguns, nsave, sxsave, sysave, scsave; + volatile int v __GNU_UNUSED; + nsave = n; + xsave = x; + sxsave = stridex; + ysave = y; + sysave = stridey; + csave = c; + scsave = stridec; + biguns = 0; + + do /* MAIN LOOP */ + { + + /* Gotos here so _break_ exits MAIN LOOP. */ +LOOP0: /* Find first arg in right range. */ + xsb0 = HI(x); /* get most significant word */ + hx0 = xsb0 & ~0x80000000; /* mask off sign bit */ + if (hx0 > 0x3fe921fb) { + /* Too big: arg reduction needed, so leave for second part */ + biguns = 1; + x += stridex; + y += stridey; + c += stridec; + i = 0; + if (--n <= 0) + break; + goto LOOP0; + } + if (hx0 < 0x3e400000) { + /* Too small. cos x ~ 1, sin x ~ x. */ + v = *x; + *c = 1.0; + *y = *x; + x += stridex; + y += stridey; + c += stridec; + i = 0; + if (--n <= 0) + break; + goto LOOP0; + } + x0 = *x; + py0 = y; + pc0 = c; + x += stridex; + y += stridey; + c += stridec; + i = 1; + if (--n <= 0) + break; + +LOOP1: /* Get second arg, same as above. */ + xsb1 = HI(x); + hx1 = xsb1 & ~0x80000000; + if (hx1 > 0x3fe921fb) + { + biguns = 1; + x += stridex; + y += stridey; + c += stridec; + i = 1; + if (--n <= 0) + break; + goto LOOP1; + } + if (hx1 < 0x3e400000) + { + v = *x; + *c = 1.0; + *y = *x; + x += stridex; + y += stridey; + c += stridec; + i = 1; + if (--n <= 0) + break; + goto LOOP1; + } + x1 = *x; + py1 = y; + pc1 = c; + x += stridex; + y += stridey; + c += stridec; + i = 2; + if (--n <= 0) + break; + +LOOP2: /* Get third arg, same as above. */ + xsb2 = HI(x); + hx2 = xsb2 & ~0x80000000; + if (hx2 > 0x3fe921fb) + { + biguns = 1; + x += stridex; + y += stridey; + c += stridec; + i = 2; + if (--n <= 0) + break; + goto LOOP2; + } + if (hx2 < 0x3e400000) + { + v = *x; + *c = 1.0; + *y = *x; + x += stridex; + y += stridey; + c += stridec; + i = 2; + if (--n <= 0) + break; + goto LOOP2; + } + x2 = *x; + py2 = y; + pc2 = c; + + /* + * 0x3fc40000 = 5/32 ~ 0.15625 + * Get msb after subtraction. Will be 1 only if + * hx0 - 5/32 is negative. + */ + i = (hx2 - 0x3fc40000) >> 31; + i |= ((hx1 - 0x3fc40000) >> 30) & 2; + i |= ((hx0 - 0x3fc40000) >> 29) & 4; + switch (i) + { + double a1_0, a1_1, a1_2, a2_0, a2_1, a2_2; + double w0, w1, w2; + double t0, t1, t2, t1_0, t1_1, t1_2, t2_0, t2_1, t2_2; + double z0, z1, z2; + unsigned j0, j1, j2; + + case 0: /* All are > 5/32 */ + j0 = (xsb0 + 0x4000) & 0xffff8000; + j1 = (xsb1 + 0x4000) & 0xffff8000; + j2 = (xsb2 + 0x4000) & 0xffff8000; + + HI(&t0) = j0; + HI(&t1) = j1; + HI(&t2) = j2; + LO(&t0) = 0; + LO(&t1) = 0; + LO(&t2) = 0; + + x0 -= t0; + x1 -= t1; + x2 -= t2; + + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + + t0 = z0 * (qq1 + z0 * qq2); + t1 = z1 * (qq1 + z1 * qq2); + t2 = z2 * (qq1 + z2 * qq2); + + w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); + w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); + w2 = x2 * (one + z2 * (pp1 + z2 * pp2)); + + j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + + xsb0 = (xsb0 >> 30) & 2; + xsb1 = (xsb1 >> 30) & 2; + xsb2 = (xsb2 >> 30) & 2; + + a1_0 = __vlibm_TBL_sincos_hi[j0+xsb0]; /* sin_hi(t) */ + a1_1 = __vlibm_TBL_sincos_hi[j1+xsb1]; + a1_2 = __vlibm_TBL_sincos_hi[j2+xsb2]; + + a2_0 = __vlibm_TBL_sincos_hi[j0+1]; /* cos_hi(t) */ + a2_1 = __vlibm_TBL_sincos_hi[j1+1]; + a2_2 = __vlibm_TBL_sincos_hi[j2+1]; + /* cos_lo(t) */ + t2_0 = __vlibm_TBL_sincos_lo[j0+1] - (a1_0*w0 - a2_0*t0); + t2_1 = __vlibm_TBL_sincos_lo[j1+1] - (a1_1*w1 - a2_1*t1); + t2_2 = __vlibm_TBL_sincos_lo[j2+1] - (a1_2*w2 - a2_2*t2); + + *pc0 = a2_0 + t2_0; + *pc1 = a2_1 + t2_1; + *pc2 = a2_2 + t2_2; + + t1_0 = a2_0*w0 + a1_0*t0; + t1_1 = a2_1*w1 + a1_1*t1; + t1_2 = a2_2*w2 + a1_2*t2; + + t1_0 += __vlibm_TBL_sincos_lo[j0+xsb0]; /* sin_lo(t) */ + t1_1 += __vlibm_TBL_sincos_lo[j1+xsb1]; + t1_2 += __vlibm_TBL_sincos_lo[j2+xsb2]; + + *py0 = a1_0 + t1_0; + *py1 = a1_1 + t1_1; + *py2 = a1_2 + t1_2; + + break; + + case 1: + j0 = (xsb0 + 0x4000) & 0xffff8000; + j1 = (xsb1 + 0x4000) & 0xffff8000; + HI(&t0) = j0; + HI(&t1) = j1; + LO(&t0) = 0; + LO(&t1) = 0; + x0 -= t0; + x1 -= t1; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * (qq1 + z0 * qq2); + t1 = z1 * (qq1 + z1 * qq2); + t2 = z2 * (poly3[1] + z2 * poly4[1]); + w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); + w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); + t2 = z2 * (poly1[1] + z2 * (poly2[1] + t2)); + j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + xsb0 = (xsb0 >> 30) & 2; + xsb1 = (xsb1 >> 30) & 2; + + a1_0 = __vlibm_TBL_sincos_hi[j0+xsb0]; /* sin_hi(t) */ + a1_1 = __vlibm_TBL_sincos_hi[j1+xsb1]; + + a2_0 = __vlibm_TBL_sincos_hi[j0+1]; /* cos_hi(t) */ + a2_1 = __vlibm_TBL_sincos_hi[j1+1]; + /* cos_lo(t) */ + t2_0 = __vlibm_TBL_sincos_lo[j0+1] - (a1_0*w0 - a2_0*t0); + t2_1 = __vlibm_TBL_sincos_lo[j1+1] - (a1_1*w1 - a2_1*t1); + + *pc0 = a2_0 + t2_0; + *pc1 = a2_1 + t2_1; + *pc2 = one + t2; + + t1_0 = a2_0*w0 + a1_0*t0; + t1_1 = a2_1*w1 + a1_1*t1; + t2 = z2 * (poly3[0] + z2 * poly4[0]); + + t1_0 += __vlibm_TBL_sincos_lo[j0+xsb0]; /* sin_lo(t) */ + t1_1 += __vlibm_TBL_sincos_lo[j1+xsb1]; + t2 = z2 * (poly1[0] + z2 * (poly2[0] + t2)); + + *py0 = a1_0 + t1_0; + *py1 = a1_1 + t1_1; + t2 = x2 + x2 * t2; + *py2 = t2; + + break; + + case 2: + j0 = (xsb0 + 0x4000) & 0xffff8000; + j2 = (xsb2 + 0x4000) & 0xffff8000; + HI(&t0) = j0; + HI(&t2) = j2; + LO(&t0) = 0; + LO(&t2) = 0; + x0 -= t0; + x2 -= t2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * (qq1 + z0 * qq2); + t1 = z1 * (poly3[1] + z1 * poly4[1]); + t2 = z2 * (qq1 + z2 * qq2); + w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); + t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1)); + w2 = x2 * (one + z2 * (pp1 + z2 * pp2)); + j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + xsb0 = (xsb0 >> 30) & 2; + xsb2 = (xsb2 >> 30) & 2; + + a1_0 = __vlibm_TBL_sincos_hi[j0+xsb0]; /* sin_hi(t) */ + a1_2 = __vlibm_TBL_sincos_hi[j2+xsb2]; + + a2_0 = __vlibm_TBL_sincos_hi[j0+1]; /* cos_hi(t) */ + a2_2 = __vlibm_TBL_sincos_hi[j2+1]; + /* cos_lo(t) */ + t2_0 = __vlibm_TBL_sincos_lo[j0+1] - (a1_0*w0 - a2_0*t0); + t2_2 = __vlibm_TBL_sincos_lo[j2+1] - (a1_2*w2 - a2_2*t2); + + *pc0 = a2_0 + t2_0; + *pc1 = one + t1; + *pc2 = a2_2 + t2_2; + + t1_0 = a2_0*w0 + a1_0*t0; + t1 = z1 * (poly3[0] + z1 * poly4[0]); + t1_2 = a2_2*w2 + a1_2*t2; + + t1_0 += __vlibm_TBL_sincos_lo[j0+xsb0]; /* sin_lo(t) */ + t1 = z1 * (poly1[0] + z1 * (poly2[0] + t1)); + t1_2 += __vlibm_TBL_sincos_lo[j2+xsb2]; + + *py0 = a1_0 + t1_0; + t1 = x1 + x1 * t1; + *py1 = t1; + *py2 = a1_2 + t1_2; + + break; + + case 3: + j0 = (xsb0 + 0x4000) & 0xffff8000; + HI(&t0) = j0; + LO(&t0) = 0; + x0 -= t0; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * (qq1 + z0 * qq2); + t1 = z1 * (poly3[1] + z1 * poly4[1]); + t2 = z2 * (poly3[1] + z2 * poly4[1]); + w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); + t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1)); + t2 = z2 * (poly1[1] + z2 * (poly2[1] + t2)); + j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + xsb0 = (xsb0 >> 30) & 2; + a1_0 = __vlibm_TBL_sincos_hi[j0+xsb0]; /* sin_hi(t) */ + + a2_0 = __vlibm_TBL_sincos_hi[j0+1]; /* cos_hi(t) */ + + t2_0 = __vlibm_TBL_sincos_lo[j0+1] - (a1_0*w0 - a2_0*t0); + + *pc0 = a2_0 + t2_0; + *pc1 = one + t1; + *pc2 = one + t2; + + t1_0 = a2_0*w0 + a1_0*t0; + t1 = z1 * (poly3[0] + z1 * poly4[0]); + t2 = z2 * (poly3[0] + z2 * poly4[0]); + + t1_0 += __vlibm_TBL_sincos_lo[j0+xsb0]; /* sin_lo(t) */ + t1 = z1 * (poly1[0] + z1 * (poly2[0] + t1)); + t2 = z2 * (poly1[0] + z2 * (poly2[0] + t2)); + + *py0 = a1_0 + t1_0; + t1 = x1 + x1 * t1; + *py1 = t1; + t2 = x2 + x2 * t2; + *py2 = t2; + + break; + + case 4: + j1 = (xsb1 + 0x4000) & 0xffff8000; + j2 = (xsb2 + 0x4000) & 0xffff8000; + HI(&t1) = j1; + HI(&t2) = j2; + LO(&t1) = 0; + LO(&t2) = 0; + x1 -= t1; + x2 -= t2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * (poly3[1] + z0 * poly4[1]); + t1 = z1 * (qq1 + z1 * qq2); + t2 = z2 * (qq1 + z2 * qq2); + t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0)); + w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); + w2 = x2 * (one + z2 * (pp1 + z2 * pp2)); + j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + xsb1 = (xsb1 >> 30) & 2; + xsb2 = (xsb2 >> 30) & 2; + + a1_1 = __vlibm_TBL_sincos_hi[j1+xsb1]; + a1_2 = __vlibm_TBL_sincos_hi[j2+xsb2]; + + a2_1 = __vlibm_TBL_sincos_hi[j1+1]; + a2_2 = __vlibm_TBL_sincos_hi[j2+1]; + /* cos_lo(t) */ + t2_1 = __vlibm_TBL_sincos_lo[j1+1] - (a1_1*w1 - a2_1*t1); + t2_2 = __vlibm_TBL_sincos_lo[j2+1] - (a1_2*w2 - a2_2*t2); + + *pc0 = one + t0; + *pc1 = a2_1 + t2_1; + *pc2 = a2_2 + t2_2; + + t0 = z0 * (poly3[0] + z0 * poly4[0]); + t1_1 = a2_1*w1 + a1_1*t1; + t1_2 = a2_2*w2 + a1_2*t2; + + t0 = z0 * (poly1[0] + z0 * (poly2[0] + t0)); + t1_1 += __vlibm_TBL_sincos_lo[j1+xsb1]; + t1_2 += __vlibm_TBL_sincos_lo[j2+xsb2]; + + t0 = x0 + x0 * t0; + *py0 = t0; + *py1 = a1_1 + t1_1; + *py2 = a1_2 + t1_2; + + break; + + case 5: + j1 = (xsb1 + 0x4000) & 0xffff8000; + HI(&t1) = j1; + LO(&t1) = 0; + x1 -= t1; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * (poly3[1] + z0 * poly4[1]); + t1 = z1 * (qq1 + z1 * qq2); + t2 = z2 * (poly3[1] + z2 * poly4[1]); + t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0)); + w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); + t2 = z2 * (poly1[1] + z2 * (poly2[1] + t2)); + j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + xsb1 = (xsb1 >> 30) & 2; + + a1_1 = __vlibm_TBL_sincos_hi[j1+xsb1]; + + a2_1 = __vlibm_TBL_sincos_hi[j1+1]; + + t2_1 = __vlibm_TBL_sincos_lo[j1+1] - (a1_1*w1 - a2_1*t1); + + *pc0 = one + t0; + *pc1 = a2_1 + t2_1; + *pc2 = one + t2; + + t0 = z0 * (poly3[0] + z0 * poly4[0]); + t1_1 = a2_1*w1 + a1_1*t1; + t2 = z2 * (poly3[0] + z2 * poly4[0]); + + t0 = z0 * (poly1[0] + z0 * (poly2[0] + t0)); + t1_1 += __vlibm_TBL_sincos_lo[j1+xsb1]; + t2 = z2 * (poly1[0] + z2 * (poly2[0] + t2)); + + t0 = x0 + x0 * t0; + *py0 = t0; + *py1 = a1_1 + t1_1; + t2 = x2 + x2 * t2; + *py2 = t2; + + break; + + case 6: + j2 = (xsb2 + 0x4000) & 0xffff8000; + HI(&t2) = j2; + LO(&t2) = 0; + x2 -= t2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * (poly3[1] + z0 * poly4[1]); + t1 = z1 * (poly3[1] + z1 * poly4[1]); + t2 = z2 * (qq1 + z2 * qq2); + t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0)); + t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1)); + w2 = x2 * (one + z2 * (pp1 + z2 * pp2)); + j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + xsb2 = (xsb2 >> 30) & 2; + a1_2 = __vlibm_TBL_sincos_hi[j2+xsb2]; + + a2_2 = __vlibm_TBL_sincos_hi[j2+1]; + + t2_2 = __vlibm_TBL_sincos_lo[j2+1] - (a1_2*w2 - a2_2*t2); + + *pc0 = one + t0; + *pc1 = one + t1; + *pc2 = a2_2 + t2_2; + + t0 = z0 * (poly3[0] + z0 * poly4[0]); + t1 = z1 * (poly3[0] + z1 * poly4[0]); + t1_2 = a2_2*w2 + a1_2*t2; + + t0 = z0 * (poly1[0] + z0 * (poly2[0] + t0)); + t1 = z1 * (poly1[0] + z1 * (poly2[0] + t1)); + t1_2 += __vlibm_TBL_sincos_lo[j2+xsb2]; + + t0 = x0 + x0 * t0; + *py0 = t0; + t1 = x1 + x1 * t1; + *py1 = t1; + *py2 = a1_2 + t1_2; + + break; + + case 7: /* All are < 5/32 */ + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * (poly3[1] + z0 * poly4[1]); + t1 = z1 * (poly3[1] + z1 * poly4[1]); + t2 = z2 * (poly3[1] + z2 * poly4[1]); + t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0)); + t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1)); + t2 = z2 * (poly1[1] + z2 * (poly2[1] + t2)); + *pc0 = one + t0; + *pc1 = one + t1; + *pc2 = one + t2; + t0 = z0 * (poly3[0] + z0 * poly4[0]); + t1 = z1 * (poly3[0] + z1 * poly4[0]); + t2 = z2 * (poly3[0] + z2 * poly4[0]); + t0 = z0 * (poly1[0] + z0 * (poly2[0] + t0)); + t1 = z1 * (poly1[0] + z1 * (poly2[0] + t1)); + t2 = z2 * (poly1[0] + z2 * (poly2[0] + t2)); + t0 = x0 + x0 * t0; + t1 = x1 + x1 * t1; + t2 = x2 + x2 * t2; + *py0 = t0; + *py1 = t1; + *py2 = t2; + break; + } + + x += stridex; + y += stridey; + c += stridec; + i = 0; + } while (--n > 0); /* END MAIN LOOP */ + + /* + * CLEAN UP last 0, 1, or 2 elts. + */ + if (i > 0) /* Clean up elts at tail. i < 3. */ + { + double a1_0, a1_1, a2_0, a2_1; + double w0, w1; + double t0, t1, t1_0, t1_1, t2_0, t2_1; + double z0, z1; + unsigned j0, j1; + + if (i > 1) + { + if (hx1 < 0x3fc40000) + { + z1 = x1 * x1; + t1 = z1 * (poly3[1] + z1 * poly4[1]); + t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1)); + t1 = one + t1; + *pc1 = t1; + t1 = z1 * (poly3[0] + z1 * poly4[0]); + t1 = z1 * (poly1[0] + z1 * (poly2[0] + t1)); + t1 = x1 + x1 * t1; + *py1 = t1; + } + else + { + j1 = (xsb1 + 0x4000) & 0xffff8000; + HI(&t1) = j1; + LO(&t1) = 0; + x1 -= t1; + z1 = x1 * x1; + t1 = z1 * (qq1 + z1 * qq2); + w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); + j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + xsb1 = (xsb1 >> 30) & 2; + a1_1 = __vlibm_TBL_sincos_hi[j1+xsb1]; + a2_1 = __vlibm_TBL_sincos_hi[j1+1]; + t2_1 = __vlibm_TBL_sincos_lo[j1+1] - (a1_1*w1 - a2_1*t1); + *pc1 = a2_1 + t2_1; + t1_1 = a2_1*w1 + a1_1*t1; + t1_1 += __vlibm_TBL_sincos_lo[j1+xsb1]; + *py1 = a1_1 + t1_1; + } + } + if (hx0 < 0x3fc40000) + { + z0 = x0 * x0; + t0 = z0 * (poly3[1] + z0 * poly4[1]); + t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0)); + t0 = one + t0; + *pc0 = t0; + t0 = z0 * (poly3[0] + z0 * poly4[0]); + t0 = z0 * (poly1[0] + z0 * (poly2[0] + t0)); + t0 = x0 + x0 * t0; + *py0 = t0; + } + else + { + j0 = (xsb0 + 0x4000) & 0xffff8000; + HI(&t0) = j0; + LO(&t0) = 0; + x0 -= t0; + z0 = x0 * x0; + t0 = z0 * (qq1 + z0 * qq2); + w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); + j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + xsb0 = (xsb0 >> 30) & 2; + a1_0 = __vlibm_TBL_sincos_hi[j0+xsb0]; /* sin_hi(t) */ + a2_0 = __vlibm_TBL_sincos_hi[j0+1]; /* cos_hi(t) */ + t2_0 = __vlibm_TBL_sincos_lo[j0+1] - (a1_0*w0 - a2_0*t0); + *pc0 = a2_0 + t2_0; + t1_0 = a2_0*w0 + a1_0*t0; + t1_0 += __vlibm_TBL_sincos_lo[j0+xsb0]; /* sin_lo(t) */ + *py0 = a1_0 + t1_0; + } + } /* END CLEAN UP */ + + if (!biguns) + return; + + /* + * Take care of BIGUNS. + */ + n = nsave; + x = xsave; + stridex = sxsave; + y = ysave; + stridey = sysave; + c = csave; + stridec = scsave; + biguns = 0; + + x0_or_one[1] = 1.0; + x1_or_one[1] = 1.0; + x2_or_one[1] = 1.0; + x0_or_one[3] = -1.0; + x1_or_one[3] = -1.0; + x2_or_one[3] = -1.0; + y0_or_zero[1] = 0.0; + y1_or_zero[1] = 0.0; + y2_or_zero[1] = 0.0; + y0_or_zero[3] = 0.0; + y1_or_zero[3] = 0.0; + y2_or_zero[3] = 0.0; + + do + { + double fn0, fn1, fn2, a0, a1, a2, w0, w1, w2, y0, y1, y2; + unsigned hx; + int n0, n1, n2; + + /* + * Find 3 more to work on: Not already done, not too big. + */ +loop0: + hx = HI(x); + xsb0 = hx >> 31; + hx &= ~0x80000000; + if (hx <= 0x3fe921fb) /* Done above. */ + { + x += stridex; + y += stridey; + c += stridec; + i = 0; + if (--n <= 0) + break; + goto loop0; + } + if (hx > 0x413921fb) /* (1.6471e+06) Too big: leave it. */ + { + if (hx >= 0x7ff00000) /* Inf or NaN */ + { + x0 = *x; + *y = x0 - x0; + *c = x0 - x0; + } + else { + biguns = 1; + } + x += stridex; + y += stridey; + c += stridec; + i = 0; + if (--n <= 0) + break; + goto loop0; + } + x0 = *x; + py0 = y; + pc0 = c; + x += stridex; + y += stridey; + c += stridec; + i = 1; + if (--n <= 0) + break; + +loop1: + hx = HI(x); + xsb1 = hx >> 31; + hx &= ~0x80000000; + if (hx <= 0x3fe921fb) + { + x += stridex; + y += stridey; + c += stridec; + i = 1; + if (--n <= 0) + break; + goto loop1; + } + if (hx > 0x413921fb) + { + if (hx >= 0x7ff00000) + { + x1 = *x; + *y = x1 - x1; + *c = x1 - x1; + } + else { + biguns = 1; + } + x += stridex; + y += stridey; + c += stridec; + i = 1; + if (--n <= 0) + break; + goto loop1; + } + x1 = *x; + py1 = y; + pc1 = c; + x += stridex; + y += stridey; + c += stridec; + i = 2; + if (--n <= 0) + break; + +loop2: + hx = HI(x); + xsb2 = hx >> 31; + hx &= ~0x80000000; + if (hx <= 0x3fe921fb) + { + x += stridex; + y += stridey; + c += stridec; + i = 2; + if (--n <= 0) + break; + goto loop2; + } + if (hx > 0x413921fb) + { + if (hx >= 0x7ff00000) + { + x2 = *x; + *y = x2 - x2; + *c = x2 - x2; + } + else { + biguns = 1; + } + x += stridex; + y += stridey; + c += stridec; + i = 2; + if (--n <= 0) + break; + goto loop2; + } + x2 = *x; + py2 = y; + pc2 = c; + + n0 = (int) (x0 * invpio2 + half[xsb0]); + n1 = (int) (x1 * invpio2 + half[xsb1]); + n2 = (int) (x2 * invpio2 + half[xsb2]); + fn0 = (double) n0; + fn1 = (double) n1; + fn2 = (double) n2; + n0 &= 3; + n1 &= 3; + n2 &= 3; + a0 = x0 - fn0 * pio2_1; + a1 = x1 - fn1 * pio2_1; + a2 = x2 - fn2 * pio2_1; + w0 = fn0 * pio2_2; + w1 = fn1 * pio2_2; + w2 = fn2 * pio2_2; + x0 = a0 - w0; + x1 = a1 - w1; + x2 = a2 - w2; + y0 = (a0 - x0) - w0; + y1 = (a1 - x1) - w1; + y2 = (a2 - x2) - w2; + a0 = x0; + a1 = x1; + a2 = x2; + w0 = fn0 * pio2_3 - y0; + w1 = fn1 * pio2_3 - y1; + w2 = fn2 * pio2_3 - y2; + x0 = a0 - w0; + x1 = a1 - w1; + x2 = a2 - w2; + y0 = (a0 - x0) - w0; + y1 = (a1 - x1) - w1; + y2 = (a2 - x2) - w2; + a0 = x0; + a1 = x1; + a2 = x2; + w0 = fn0 * pio2_3t - y0; + w1 = fn1 * pio2_3t - y1; + w2 = fn2 * pio2_3t - y2; + x0 = a0 - w0; + x1 = a1 - w1; + x2 = a2 - w2; + y0 = (a0 - x0) - w0; + y1 = (a1 - x1) - w1; + y2 = (a2 - x2) - w2; + xsb2 = HI(&x2); + i = ((xsb2 & ~0x80000000) - 0x3fc40000) >> 31; + xsb1 = HI(&x1); + i |= (((xsb1 & ~0x80000000) - 0x3fc40000) >> 30) & 2; + xsb0 = HI(&x0); + i |= (((xsb0 & ~0x80000000) - 0x3fc40000) >> 29) & 4; + switch (i) + { + double a1_0, a1_1, a1_2, a2_0, a2_1, a2_2; + double t0, t1, t2, t1_0, t1_1, t1_2, t2_0, t2_1, t2_2; + double z0, z1, z2; + unsigned j0, j1, j2; + + case 0: + j0 = (xsb0 + 0x4000) & 0xffff8000; + j1 = (xsb1 + 0x4000) & 0xffff8000; + j2 = (xsb2 + 0x4000) & 0xffff8000; + HI(&t0) = j0; + HI(&t1) = j1; + HI(&t2) = j2; + LO(&t0) = 0; + LO(&t1) = 0; + LO(&t2) = 0; + x0 = (x0 - t0) + y0; + x1 = (x1 - t1) + y1; + x2 = (x2 - t2) + y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * (qq1 + z0 * qq2); + t1 = z1 * (qq1 + z1 * qq2); + t2 = z2 * (qq1 + z2 * qq2); + w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); + w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); + w2 = x2 * (one + z2 * (pp1 + z2 * pp2)); + j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + xsb0 = (xsb0 >> 30) & 2; + xsb1 = (xsb1 >> 30) & 2; + xsb2 = (xsb2 >> 30) & 2; + n0 ^= (xsb0 & ~(n0 << 1)); + n1 ^= (xsb1 & ~(n1 << 1)); + n2 ^= (xsb2 & ~(n2 << 1)); + xsb0 |= 1; + xsb1 |= 1; + xsb2 |= 1; + + a1_0 = __vlibm_TBL_sincos_hi[j0+n0]; + a1_1 = __vlibm_TBL_sincos_hi[j1+n1]; + a1_2 = __vlibm_TBL_sincos_hi[j2+n2]; + + a2_0 = __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)]; + a2_1 = __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)]; + a2_2 = __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)]; + + t2_0 = __vlibm_TBL_sincos_lo[j0+((n0+xsb0)&3)] - (a1_0*w0 - a2_0*t0); + t2_1 = __vlibm_TBL_sincos_lo[j1+((n1+xsb1)&3)] - (a1_1*w1 - a2_1*t1); + t2_2 = __vlibm_TBL_sincos_lo[j2+((n2+xsb2)&3)] - (a1_2*w2 - a2_2*t2); + + w0 *= a2_0; + w1 *= a2_1; + w2 *= a2_2; + + *pc0 = a2_0 + t2_0; + *pc1 = a2_1 + t2_1; + *pc2 = a2_2 + t2_2; + + t1_0 = w0 + a1_0*t0; + t1_1 = w1 + a1_1*t1; + t1_2 = w2 + a1_2*t2; + + t1_0 += __vlibm_TBL_sincos_lo[j0+n0]; + t1_1 += __vlibm_TBL_sincos_lo[j1+n1]; + t1_2 += __vlibm_TBL_sincos_lo[j2+n2]; + + *py0 = a1_0 + t1_0; + *py1 = a1_1 + t1_1; + *py2 = a1_2 + t1_2; + + break; + + case 1: + j0 = (xsb0 + 0x4000) & 0xffff8000; + j1 = (xsb1 + 0x4000) & 0xffff8000; + j2 = n2 & 1; + HI(&t0) = j0; + HI(&t1) = j1; + LO(&t0) = 0; + LO(&t1) = 0; + x2_or_one[0] = x2; + x2_or_one[2] = -x2; + x0 = (x0 - t0) + y0; + x1 = (x1 - t1) + y1; + y2_or_zero[0] = y2; + y2_or_zero[2] = -y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * (qq1 + z0 * qq2); + t1 = z1 * (qq1 + z1 * qq2); + t2 = z2 * (poly3[j2] + z2 * poly4[j2]); + w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); + w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); + t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2)); + j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + xsb0 = (xsb0 >> 30) & 2; + xsb1 = (xsb1 >> 30) & 2; + n0 ^= (xsb0 & ~(n0 << 1)); + n1 ^= (xsb1 & ~(n1 << 1)); + xsb0 |= 1; + xsb1 |= 1; + a1_0 = __vlibm_TBL_sincos_hi[j0+n0]; + a1_1 = __vlibm_TBL_sincos_hi[j1+n1]; + + a2_0 = __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)]; + a2_1 = __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)]; + + t2_0 = __vlibm_TBL_sincos_lo[j0+((n0+xsb0)&3)] - (a1_0*w0 - a2_0*t0); + t2_1 = __vlibm_TBL_sincos_lo[j1+((n1+xsb1)&3)] - (a1_1*w1 - a2_1*t1); + t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2); + + *pc0 = a2_0 + t2_0; + *pc1 = a2_1 + t2_1; + *py2 = t2; + + n2 = (n2 + 1) & 3; + j2 = (j2 + 1) & 1; + t2 = z2 * (poly3[j2] + z2 * poly4[j2]); + + t1_0 = a2_0*w0 + a1_0*t0; + t1_1 = a2_1*w1 + a1_1*t1; + t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2)); + + t1_0 += __vlibm_TBL_sincos_lo[j0+n0]; + t1_1 += __vlibm_TBL_sincos_lo[j1+n1]; + t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2); + + *py0 = a1_0 + t1_0; + *py1 = a1_1 + t1_1; + *pc2 = t2; + + break; + + case 2: + j0 = (xsb0 + 0x4000) & 0xffff8000; + j1 = n1 & 1; + j2 = (xsb2 + 0x4000) & 0xffff8000; + HI(&t0) = j0; + HI(&t2) = j2; + LO(&t0) = 0; + LO(&t2) = 0; + x1_or_one[0] = x1; + x1_or_one[2] = -x1; + x0 = (x0 - t0) + y0; + y1_or_zero[0] = y1; + y1_or_zero[2] = -y1; + x2 = (x2 - t2) + y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * (qq1 + z0 * qq2); + t1 = z1 * (poly3[j1] + z1 * poly4[j1]); + t2 = z2 * (qq1 + z2 * qq2); + w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); + t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1)); + w2 = x2 * (one + z2 * (pp1 + z2 * pp2)); + j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + xsb0 = (xsb0 >> 30) & 2; + xsb2 = (xsb2 >> 30) & 2; + n0 ^= (xsb0 & ~(n0 << 1)); + n2 ^= (xsb2 & ~(n2 << 1)); + xsb0 |= 1; + xsb2 |= 1; + + a1_0 = __vlibm_TBL_sincos_hi[j0+n0]; + a1_2 = __vlibm_TBL_sincos_hi[j2+n2]; + + a2_0 = __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)]; + a2_2 = __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)]; + + t2_0 = __vlibm_TBL_sincos_lo[j0+((n0+xsb0)&3)] - (a1_0*w0 - a2_0*t0); + t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1); + t2_2 = __vlibm_TBL_sincos_lo[j2+((n2+xsb2)&3)] - (a1_2*w2 - a2_2*t2); + + *pc0 = a2_0 + t2_0; + *py1 = t1; + *pc2 = a2_2 + t2_2; + + n1 = (n1 + 1) & 3; + j1 = (j1 + 1) & 1; + t1 = z1 * (poly3[j1] + z1 * poly4[j1]); + + t1_0 = a2_0*w0 + a1_0*t0; + t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1)); + t1_2 = a2_2*w2 + a1_2*t2; + + t1_0 += __vlibm_TBL_sincos_lo[j0+n0]; + t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1); + t1_2 += __vlibm_TBL_sincos_lo[j2+n2]; + + *py0 = a1_0 + t1_0; + *pc1 = t1; + *py2 = a1_2 + t1_2; + + break; + + case 3: + j0 = (xsb0 + 0x4000) & 0xffff8000; + j1 = n1 & 1; + j2 = n2 & 1; + HI(&t0) = j0; + LO(&t0) = 0; + x1_or_one[0] = x1; + x1_or_one[2] = -x1; + x2_or_one[0] = x2; + x2_or_one[2] = -x2; + x0 = (x0 - t0) + y0; + y1_or_zero[0] = y1; + y1_or_zero[2] = -y1; + y2_or_zero[0] = y2; + y2_or_zero[2] = -y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * (qq1 + z0 * qq2); + t1 = z1 * (poly3[j1] + z1 * poly4[j1]); + t2 = z2 * (poly3[j2] + z2 * poly4[j2]); + w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); + t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1)); + t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2)); + j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + xsb0 = (xsb0 >> 30) & 2; + n0 ^= (xsb0 & ~(n0 << 1)); + xsb0 |= 1; + + a1_0 = __vlibm_TBL_sincos_hi[j0+n0]; + a2_0 = __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)]; + + t2_0 = __vlibm_TBL_sincos_lo[j0+((n0+xsb0)&3)] - (a1_0*w0 - a2_0*t0); + t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1); + t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2); + + *pc0 = a2_0 + t2_0; + *py1 = t1; + *py2 = t2; + + n1 = (n1 + 1) & 3; + n2 = (n2 + 1) & 3; + j1 = (j1 + 1) & 1; + j2 = (j2 + 1) & 1; + + t1_0 = a2_0*w0 + a1_0*t0; + t1 = z1 * (poly3[j1] + z1 * poly4[j1]); + t2 = z2 * (poly3[j2] + z2 * poly4[j2]); + + t1_0 += __vlibm_TBL_sincos_lo[j0+n0]; + t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1)); + t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2)); + + t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1); + t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2); + + *py0 = a1_0 + t1_0; + *pc1 = t1; + *pc2 = t2; + + break; + + case 4: + j0 = n0 & 1; + j1 = (xsb1 + 0x4000) & 0xffff8000; + j2 = (xsb2 + 0x4000) & 0xffff8000; + HI(&t1) = j1; + HI(&t2) = j2; + LO(&t1) = 0; + LO(&t2) = 0; + x0_or_one[0] = x0; + x0_or_one[2] = -x0; + y0_or_zero[0] = y0; + y0_or_zero[2] = -y0; + x1 = (x1 - t1) + y1; + x2 = (x2 - t2) + y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * (poly3[j0] + z0 * poly4[j0]); + t1 = z1 * (qq1 + z1 * qq2); + t2 = z2 * (qq1 + z2 * qq2); + t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0)); + w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); + w2 = x2 * (one + z2 * (pp1 + z2 * pp2)); + j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + xsb1 = (xsb1 >> 30) & 2; + xsb2 = (xsb2 >> 30) & 2; + n1 ^= (xsb1 & ~(n1 << 1)); + n2 ^= (xsb2 & ~(n2 << 1)); + xsb1 |= 1; + xsb2 |= 1; + + a1_1 = __vlibm_TBL_sincos_hi[j1+n1]; + a1_2 = __vlibm_TBL_sincos_hi[j2+n2]; + + a2_1 = __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)]; + a2_2 = __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)]; + + t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0); + t2_1 = __vlibm_TBL_sincos_lo[j1+((n1+xsb1)&3)] - (a1_1*w1 - a2_1*t1); + t2_2 = __vlibm_TBL_sincos_lo[j2+((n2+xsb2)&3)] - (a1_2*w2 - a2_2*t2); + + *py0 = t0; + *pc1 = a2_1 + t2_1; + *pc2 = a2_2 + t2_2; + + n0 = (n0 + 1) & 3; + j0 = (j0 + 1) & 1; + t0 = z0 * (poly3[j0] + z0 * poly4[j0]); + + t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0)); + t1_1 = a2_1*w1 + a1_1*t1; + t1_2 = a2_2*w2 + a1_2*t2; + + t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0); + t1_1 += __vlibm_TBL_sincos_lo[j1+n1]; + t1_2 += __vlibm_TBL_sincos_lo[j2+n2]; + + *py1 = a1_1 + t1_1; + *py2 = a1_2 + t1_2; + *pc0 = t0; + + break; + + case 5: + j0 = n0 & 1; + j1 = (xsb1 + 0x4000) & 0xffff8000; + j2 = n2 & 1; + HI(&t1) = j1; + LO(&t1) = 0; + x0_or_one[0] = x0; + x0_or_one[2] = -x0; + x2_or_one[0] = x2; + x2_or_one[2] = -x2; + y0_or_zero[0] = y0; + y0_or_zero[2] = -y0; + x1 = (x1 - t1) + y1; + y2_or_zero[0] = y2; + y2_or_zero[2] = -y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * (poly3[j0] + z0 * poly4[j0]); + t1 = z1 * (qq1 + z1 * qq2); + t2 = z2 * (poly3[j2] + z2 * poly4[j2]); + t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0)); + w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); + t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2)); + j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + xsb1 = (xsb1 >> 30) & 2; + n1 ^= (xsb1 & ~(n1 << 1)); + xsb1 |= 1; + + a1_1 = __vlibm_TBL_sincos_hi[j1+n1]; + a2_1 = __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)]; + + t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0); + t2_1 = __vlibm_TBL_sincos_lo[j1+((n1+xsb1)&3)] - (a1_1*w1 - a2_1*t1); + t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2); + + *py0 = t0; + *pc1 = a2_1 + t2_1; + *py2 = t2; + + n0 = (n0 + 1) & 3; + n2 = (n2 + 1) & 3; + j0 = (j0 + 1) & 1; + j2 = (j2 + 1) & 1; + + t0 = z0 * (poly3[j0] + z0 * poly4[j0]); + t1_1 = a2_1*w1 + a1_1*t1; + t2 = z2 * (poly3[j2] + z2 * poly4[j2]); + + t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0)); + t1_1 += __vlibm_TBL_sincos_lo[j1+n1]; + t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2)); + + t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0); + t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2); + + *pc0 = t0; + *py1 = a1_1 + t1_1; + *pc2 = t2; + + break; + + case 6: + j0 = n0 & 1; + j1 = n1 & 1; + j2 = (xsb2 + 0x4000) & 0xffff8000; + HI(&t2) = j2; + LO(&t2) = 0; + x0_or_one[0] = x0; + x0_or_one[2] = -x0; + x1_or_one[0] = x1; + x1_or_one[2] = -x1; + y0_or_zero[0] = y0; + y0_or_zero[2] = -y0; + y1_or_zero[0] = y1; + y1_or_zero[2] = -y1; + x2 = (x2 - t2) + y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * (poly3[j0] + z0 * poly4[j0]); + t1 = z1 * (poly3[j1] + z1 * poly4[j1]); + t2 = z2 * (qq1 + z2 * qq2); + t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0)); + t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1)); + w2 = x2 * (one + z2 * (pp1 + z2 * pp2)); + j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + xsb2 = (xsb2 >> 30) & 2; + n2 ^= (xsb2 & ~(n2 << 1)); + xsb2 |= 1; + + a1_2 = __vlibm_TBL_sincos_hi[j2+n2]; + a2_2 = __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)]; + + t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0); + t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1); + t2_2 = __vlibm_TBL_sincos_lo[j2+((n2+xsb2)&3)] - (a1_2*w2 - a2_2*t2); + + *py0 = t0; + *py1 = t1; + *pc2 = a2_2 + t2_2; + + n0 = (n0 + 1) & 3; + n1 = (n1 + 1) & 3; + j0 = (j0 + 1) & 1; + j1 = (j1 + 1) & 1; + + t0 = z0 * (poly3[j0] + z0 * poly4[j0]); + t1 = z1 * (poly3[j1] + z1 * poly4[j1]); + t1_2 = a2_2*w2 + a1_2*t2; + + t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0)); + t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1)); + t1_2 += __vlibm_TBL_sincos_lo[j2+n2]; + + t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0); + t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1); + + *pc0 = t0; + *pc1 = t1; + *py2 = a1_2 + t1_2; + + break; + + case 7: + j0 = n0 & 1; + j1 = n1 & 1; + j2 = n2 & 1; + x0_or_one[0] = x0; + x0_or_one[2] = -x0; + x1_or_one[0] = x1; + x1_or_one[2] = -x1; + x2_or_one[0] = x2; + x2_or_one[2] = -x2; + y0_or_zero[0] = y0; + y0_or_zero[2] = -y0; + y1_or_zero[0] = y1; + y1_or_zero[2] = -y1; + y2_or_zero[0] = y2; + y2_or_zero[2] = -y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * (poly3[j0] + z0 * poly4[j0]); + t1 = z1 * (poly3[j1] + z1 * poly4[j1]); + t2 = z2 * (poly3[j2] + z2 * poly4[j2]); + t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0)); + t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1)); + t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2)); + t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0); + t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1); + t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2); + *py0 = t0; + *py1 = t1; + *py2 = t2; + + n0 = (n0 + 1) & 3; + n1 = (n1 + 1) & 3; + n2 = (n2 + 1) & 3; + j0 = (j0 + 1) & 1; + j1 = (j1 + 1) & 1; + j2 = (j2 + 1) & 1; + t0 = z0 * (poly3[j0] + z0 * poly4[j0]); + t1 = z1 * (poly3[j1] + z1 * poly4[j1]); + t2 = z2 * (poly3[j2] + z2 * poly4[j2]); + t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0)); + t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1)); + t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2)); + t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0); + t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1); + t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2); + *pc0 = t0; + *pc1 = t1; + *pc2 = t2; + break; + } + + x += stridex; + y += stridey; + c += stridec; + i = 0; + } while (--n > 0); + + if (i > 0) + { + double a1_0, a1_1, a2_0, a2_1; + double t0, t1, t1_0, t1_1, t2_0, t2_1; + double fn0, fn1, a0, a1, w0, w1, y0, y1; + double z0, z1; + unsigned j0, j1; + int n0, n1; + + if (i > 1) + { + n1 = (int) (x1 * invpio2 + half[xsb1]); + fn1 = (double) n1; + n1 &= 3; + a1 = x1 - fn1 * pio2_1; + w1 = fn1 * pio2_2; + x1 = a1 - w1; + y1 = (a1 - x1) - w1; + a1 = x1; + w1 = fn1 * pio2_3 - y1; + x1 = a1 - w1; + y1 = (a1 - x1) - w1; + a1 = x1; + w1 = fn1 * pio2_3t - y1; + x1 = a1 - w1; + y1 = (a1 - x1) - w1; + xsb1 = HI(&x1); + if ((xsb1 & ~0x80000000) < 0x3fc40000) + { + j1 = n1 & 1; + x1_or_one[0] = x1; + x1_or_one[2] = -x1; + y1_or_zero[0] = y1; + y1_or_zero[2] = -y1; + z1 = x1 * x1; + t1 = z1 * (poly3[j1] + z1 * poly4[j1]); + t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1)); + t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1); + *py1 = t1; + n1 = (n1 + 1) & 3; + j1 = (j1 + 1) & 1; + t1 = z1 * (poly3[j1] + z1 * poly4[j1]); + t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1)); + t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1); + *pc1 = t1; + } + else + { + j1 = (xsb1 + 0x4000) & 0xffff8000; + HI(&t1) = j1; + LO(&t1) = 0; + x1 = (x1 - t1) + y1; + z1 = x1 * x1; + t1 = z1 * (qq1 + z1 * qq2); + w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); + j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + xsb1 = (xsb1 >> 30) & 2; + n1 ^= (xsb1 & ~(n1 << 1)); + xsb1 |= 1; + a1_1 = __vlibm_TBL_sincos_hi[j1+n1]; + a2_1 = __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)]; + t2_1 = __vlibm_TBL_sincos_lo[j1+((n1+xsb1)&3)] - (a1_1*w1 - a2_1*t1); + *pc1 = a2_1 + t2_1; + t1_1 = a2_1*w1 + a1_1*t1; + t1_1 += __vlibm_TBL_sincos_lo[j1+n1]; + *py1 = a1_1 + t1_1; + } + } + n0 = (int) (x0 * invpio2 + half[xsb0]); + fn0 = (double) n0; + n0 &= 3; + a0 = x0 - fn0 * pio2_1; + w0 = fn0 * pio2_2; + x0 = a0 - w0; + y0 = (a0 - x0) - w0; + a0 = x0; + w0 = fn0 * pio2_3 - y0; + x0 = a0 - w0; + y0 = (a0 - x0) - w0; + a0 = x0; + w0 = fn0 * pio2_3t - y0; + x0 = a0 - w0; + y0 = (a0 - x0) - w0; + xsb0 = HI(&x0); + if ((xsb0 & ~0x80000000) < 0x3fc40000) + { + j0 = n0 & 1; + x0_or_one[0] = x0; + x0_or_one[2] = -x0; + y0_or_zero[0] = y0; + y0_or_zero[2] = -y0; + z0 = x0 * x0; + t0 = z0 * (poly3[j0] + z0 * poly4[j0]); + t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0)); + t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0); + *py0 = t0; + n0 = (n0 + 1) & 3; + j0 = (j0 + 1) & 1; + t0 = z0 * (poly3[j0] + z0 * poly4[j0]); + t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0)); + t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0); + *pc0 = t0; + } + else + { + j0 = (xsb0 + 0x4000) & 0xffff8000; + HI(&t0) = j0; + LO(&t0) = 0; + x0 = (x0 - t0) + y0; + z0 = x0 * x0; + t0 = z0 * (qq1 + z0 * qq2); + w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); + j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; + xsb0 = (xsb0 >> 30) & 2; + n0 ^= (xsb0 & ~(n0 << 1)); + xsb0 |= 1; + a1_0 = __vlibm_TBL_sincos_hi[j0+n0]; + a2_0 = __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)]; + t2_0 = __vlibm_TBL_sincos_lo[j0+((n0+xsb0)&3)] - (a1_0*w0 - a2_0*t0); + *pc0 = a2_0 + t2_0; + t1_0 = a2_0*w0 + a1_0*t0; + t1_0 += __vlibm_TBL_sincos_lo[j0+n0]; + *py0 = a1_0 + t1_0; + } + } + + if (biguns) { + __vlibm_vsincos_big(nsave, xsave, sxsave, ysave, sysave, csave, scsave, 0x413921fb); + } +} diff --git a/usr/src/lib/libmvec/common/__vsincosbig.c b/usr/src/lib/libmvec/common/__vsincosbig.c new file mode 100644 index 0000000000..dea6e37985 --- /dev/null +++ b/usr/src/lib/libmvec/common/__vsincosbig.c @@ -0,0 +1,174 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <sys/isa_defs.h> + +#ifdef _LITTLE_ENDIAN +#define HI(x) *(1+(int*)x) +#define LO(x) *(unsigned*)x +#else +#define HI(x) *(int*)x +#define LO(x) *(1+(unsigned*)x) +#endif + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +extern const double __vlibm_TBL_sincos_hi[], __vlibm_TBL_sincos_lo[]; +extern int __vlibm_rem_pio2m(double *, double *, int, int, int); + +static const double + zero = 0.0, + one = 1.0, + two24 = 16777216.0, + pp1 = -1.666666666605760465276263943134982554676e-0001, + pp2 = 8.333261209690963126718376566146180944442e-0003, + p1 = -1.666666666666629669805215138920301589656e-0001, + p2 = 8.333333332390951295683993455280336376663e-0003, + p3 = -1.984126237997976692791551778230098403960e-0004, + p4 = 2.753403624854277237649987622848330351110e-0006, + qq1 = -4.999999999977710986407023955908711557870e-0001, + qq2 = 4.166654863857219350645055881018842089580e-0002, + q1 = -4.999999999999931701464060878888294524481e-0001, + q2 = 4.166666666394861917535640593963708222319e-0002, + q3 = -1.388888552656142867832756687736851681462e-0003, + q4 = 2.478519423681460796618128289454530524759e-0005; + +void +__vlibm_vsincos_big(int n, double * restrict x, int stridex, + double * restrict ss, int stridess, + double * restrict cc, int stridecc, int thresh) +{ + for (; n--; x += stridex, ss += stridess, cc += stridecc) + { + double ts, tc, tx, tt[3], ty[2], t, w, z, c, s; + unsigned hx, xsb; + int e0, nx, j; + + hx = HI(x); + xsb = hx & 0x80000000; + hx &= ~0x80000000; + if (hx <= thresh || hx >= 0x7ff00000) + continue; + + /* + * Argument reduction part. + */ + e0 = (hx >> 20) - 1046; + HI(&tx) = 0x41600000 | (hx & 0xfffff); + LO(&tx) = LO(x); + tt[0] = (double)((int) tx); + tx = (tx - tt[0]) * two24; + if (tx != zero) + { + nx = 2; + tt[1] = (double)((int) tx); + tt[2] = (tx - tt[1]) * two24; + if (tt[2] != zero) + nx = 3; + } + else + { + nx = 1; + tt[1] = tt[2] = zero; + } + nx = __vlibm_rem_pio2m(tt, ty, e0, nx, 2); + if (xsb) + { + nx = -nx; + ty[0] = -ty[0]; + ty[1] = -ty[1]; + } + + /* now nx and ty[*] are the quadrant and reduced arg */ + hx = HI(&ty[0]); + xsb = 0; + if (hx & 0x80000000) + { + ty[0] = -ty[0]; + ty[1] = -ty[1]; + hx &= ~0x80000000; + xsb = 1; + } + if (hx < 0x3fc40000) + { + z = ty[0] * ty[0]; + t = z * (q1 + z * (q2 + z * (q3 + z * q4))); + c = one + t; + t = z * (p1 + z * (p2 + z * (p3 + z * p4))); + s = ty[0] + (ty[1] + ty[0] * t); + } + else { + j = (hx + 0x4000) & 0x7fff8000; + HI(&t) = j; + LO(&t) = 0; + ty[0] = (ty[0] - t) + ty[1]; + z = ty[0] * ty[0]; + t = z * (qq1 + z * qq2); + w = ty[0] * (one + z * (pp1 + z * pp2)); + j = ((j - 0x3fc40000) >> 13) & ~3; + + c = __vlibm_TBL_sincos_hi[j+1]; + tc = __vlibm_TBL_sincos_lo[j+1] - (__vlibm_TBL_sincos_hi[j] * w - c * t); + c += tc; + + s = __vlibm_TBL_sincos_hi[j]; + ts = (__vlibm_TBL_sincos_hi[j+1] * w + s * t) + __vlibm_TBL_sincos_lo[j]; + s += ts; + } + if (xsb) { + s = -s; + } + + switch (nx & 3) { + case 0: + *ss = s; + *cc = c; + break; + + case 1: + *ss = c; + *cc = -s; + break; + + case 2: + *ss = -s; + *cc = -c; + break; + + case 3: + *ss = -c; + *cc = s; + break; + } + } +} diff --git a/usr/src/lib/libmvec/common/__vsincosbigf.c b/usr/src/lib/libmvec/common/__vsincosbigf.c new file mode 100644 index 0000000000..15d436b945 --- /dev/null +++ b/usr/src/lib/libmvec/common/__vsincosbigf.c @@ -0,0 +1,171 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <sys/isa_defs.h> + +#ifdef _LITTLE_ENDIAN +#define HI(x) *(1+(int*)x) +#define LO(x) *(unsigned*)x +#else +#define HI(x) *(int*)x +#define LO(x) *(1+(unsigned*)x) +#endif + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +extern const double __vlibm_TBL_sincos_hi[], __vlibm_TBL_sincos_lo[]; +extern int __vlibm_rem_pio2m(double *, double *, int, int, int); + +static const double + zero = 0.0, + one = 1.0, + two24 = 16777216.0, + pp1 = -1.666666666605760465276263943134982554676e-0001, + pp2 = 8.333261209690963126718376566146180944442e-0003, + p1 = -1.666666666666629669805215138920301589656e-0001, + p2 = 8.333333332390951295683993455280336376663e-0003, + p3 = -1.984126237997976692791551778230098403960e-0004, + p4 = 2.753403624854277237649987622848330351110e-0006, + qq1 = -4.999999999977710986407023955908711557870e-0001, + qq2 = 4.166654863857219350645055881018842089580e-0002, + q1 = -4.999999999999931701464060878888294524481e-0001, + q2 = 4.166666666394861917535640593963708222319e-0002, + q3 = -1.388888552656142867832756687736851681462e-0003, + q4 = 2.478519423681460796618128289454530524759e-0005; + +void +__vlibm_vsincos_bigf(int n, float * restrict x, int stridex, + float * restrict ss, int stridess, float * restrict cc, int stridecc) +{ + for (; n--; x += stridex, ss += stridess, cc += stridecc) + { + double ts, tc, tx, tt[3], ty[2], t, w, z, c, s; + unsigned hx, xsb; + int e0, nx, j; + + tx = *x; + hx = HI(&tx); + xsb = hx & 0x80000000; + hx &= ~0x80000000; + if (hx <= 0x413921fb || hx >= 0x7ff00000) + continue; + e0 = (hx >> 20) - 1046; + HI(&tx) = 0x41600000 | (hx & 0xfffff); + + tt[0] = (double)((int) tx); + tx = (tx - tt[0]) * two24; + if (tx != zero) + { + nx = 2; + tt[1] = (double)((int) tx); + tt[2] = (tx - tt[1]) * two24; + if (tt[2] != zero) + nx = 3; + } + else + { + nx = 1; + tt[1] = tt[2] = zero; + } + nx = __vlibm_rem_pio2m(tt, ty, e0, nx, 2); + if (xsb) + { + nx = -nx; + ty[0] = -ty[0]; + ty[1] = -ty[1]; + } + + /* now nx and ty[*] are the quadrant and reduced arg */ + xsb = 0; + hx = HI(&ty[0]); + if (hx & 0x80000000) + { + ty[0] = -ty[0]; + ty[1] = -ty[1]; + hx &= ~0x80000000; + xsb = 1; + } + if (hx < 0x3fc40000) + { + z = ty[0] * ty[0]; + t = z * (q1 + z * (q2 + z * (q3 + z * q4))); + c = one + t; + + t = z * (p1 + z * (p2 + z * (p3 + z * p4))); + s = ty[0] + (ty[1] + ty[0] * t); + } + else { + j = (hx + 0x4000) & 0x7fff8000; + HI(&t) = j; + LO(&t) = 0; + ty[0] = (ty[0] - t) + ty[1]; + z = ty[0] * ty[0]; + t = z * (qq1 + z * qq2); + w = ty[0] * (one + z * (pp1 + z * pp2)); + j = ((j - 0x3fc40000) >> 13) & ~3; + + c = __vlibm_TBL_sincos_hi[j+1]; + tc = __vlibm_TBL_sincos_lo[j+1] - (__vlibm_TBL_sincos_hi[j] * w - c * t); + c += tc; + + s = __vlibm_TBL_sincos_hi[j]; + ts = (__vlibm_TBL_sincos_hi[j+1] * w + s * t) + __vlibm_TBL_sincos_lo[j]; + s += ts; + } + if (xsb) { + s = -s; + } + + switch (nx & 3) { + case 0: + *ss = s; + *cc = c; + break; + + case 1: + *ss = c; + *cc = -s; + break; + + case 2: + *ss = -s; + *cc = -c; + break; + + case 3: + *ss = -c; + *cc = s; + break; + } + } +} diff --git a/usr/src/lib/libmvec/common/__vsincosf.c b/usr/src/lib/libmvec/common/__vsincosf.c new file mode 100644 index 0000000000..835a160de6 --- /dev/null +++ b/usr/src/lib/libmvec/common/__vsincosf.c @@ -0,0 +1,314 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * __vsincosf: single precision vector sincos + * + * Algorithm: + * + * For |x| < pi/4, approximate sin(x) by a polynomial x+x*z*(S0+ + * z*(S1+z*S2)) and cos(x) by a polynomial 1+z*(-1/2+z*(C0+z*(C1+ + * z*C2))), where z = x*x, all evaluated in double precision. + * + * Accuracy: + * + * The largest error is less than 0.6 ulps. + */ + +#include <sys/isa_defs.h> + +#ifdef _LITTLE_ENDIAN +#define HI(x) *(1+(int *)&x) +#define LO(x) *(unsigned *)&x +#else +#define HI(x) *(int *)&x +#define LO(x) *(1+(unsigned *)&x) +#endif + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +extern int __vlibm_rem_pio2m(double *, double *, int, int, int); + +static const double C[] = { + -1.66666552424430847168e-01, /* 2^ -3 * -1.5555460000000 */ + 8.33219196647405624390e-03, /* 2^ -7 * 1.11077E0000000 */ + -1.95187909412197768688e-04, /* 2^-13 * -1.9956B60000000 */ + 1.0, + -0.5, + 4.16666455566883087158e-02, /* 2^ -5 * 1.55554A0000000 */ + -1.38873036485165357590e-03, /* 2^-10 * -1.6C0C1E0000000 */ + 2.44309903791872784495e-05, /* 2^-16 * 1.99E24E0000000 */ + 0.636619772367581343075535, /* 2^ -1 * 1.45F306DC9C883 */ + 6755399441055744.0, /* 2^ 52 * 1.8000000000000 */ + 1.570796326734125614166, /* 2^ 0 * 1.921FB54400000 */ + 6.077100506506192601475e-11, /* 2^-34 * 1.0B4611A626331 */ +}; + +#define S0 C[0] +#define S1 C[1] +#define S2 C[2] +#define one C[3] +#define mhalf C[4] +#define C0 C[5] +#define C1 C[6] +#define C2 C[7] +#define invpio2 C[8] +#define c3two51 C[9] +#define pio2_1 C[10] +#define pio2_t C[11] + +#define PREPROCESS(N, sindex, cindex, label) \ + hx = *(int *)x; \ + ix = hx & 0x7fffffff; \ + t = *x; \ + x += stridex; \ + if (ix <= 0x3f490fdb) { /* |x| < pi/4 */ \ + if (ix == 0) { \ + s[sindex] = t; \ + c[cindex] = one; \ + goto label; \ + } \ + y##N = (double)t; \ + n##N = 0; \ + } else if (ix <= 0x49c90fdb) { /* |x| < 2^19*pi */ \ + y##N = (double)t; \ + medium = 1; \ + } else { \ + if (ix >= 0x7f800000) { /* inf or nan */ \ + s[sindex] = c[cindex] = t / t; \ + goto label; \ + } \ + z##N = y##N = (double)t; \ + hx = HI(y##N); \ + n##N = ((hx >> 20) & 0x7ff) - 1046; \ + HI(z##N) = (hx & 0xfffff) | 0x41600000; \ + n##N = __vlibm_rem_pio2m(&z##N, &y##N, n##N, 1, 0); \ + if (hx < 0) { \ + y##N = -y##N; \ + n##N = -n##N; \ + } \ + z##N = y##N * y##N; \ + f##N = (float)(y##N + y##N * z##N * (S0 + z##N * \ + (S1 + z##N * S2))); \ + g##N = (float)(one + z##N * (mhalf + z##N * (C0 + \ + z##N * (C1 + z##N * C2)))); \ + if (n##N & 2) { \ + f##N = -f##N; \ + g##N = -g##N; \ + } \ + if (n##N & 1) { \ + s[sindex] = g##N; \ + c[cindex] = -f##N; \ + } else { \ + s[sindex] = f##N; \ + c[cindex] = g##N; \ + } \ + goto label; \ + } + +#define PROCESS(N) \ + if (medium) { \ + z##N = y##N * invpio2 + c3two51; \ + n##N = LO(z##N); \ + z##N -= c3two51; \ + y##N = (y##N - z##N * pio2_1) - z##N * pio2_t; \ + } \ + z##N = y##N * y##N; \ + f##N = (float)(y##N + y##N * z##N * (S0 + z##N * (S1 + z##N * S2)));\ + g##N = (float)(one + z##N * (mhalf + z##N * (C0 + z##N * \ + (C1 + z##N * C2)))); \ + if (n##N & 2) { \ + f##N = -f##N; \ + g##N = -g##N; \ + } \ + if (n##N & 1) { \ + *s = g##N; \ + *c = -f##N; \ + } else { \ + *s = f##N; \ + *c = g##N; \ + } \ + s += strides; \ + c += stridec + +void +__vsincosf(int n, float *restrict x, int stridex, + float *restrict s, int strides, float *restrict c, int stridec) +{ + double y0, y1, y2, y3; + double z0, z1, z2, z3; + float f0, f1, f2, f3, t; + float g0, g1, g2, g3; + int n0 = 0, n1 = 0, n2 = 0, n3, hx, ix, medium; + + s -= strides; + c -= stridec; + + for (;;) { +begin: + s += strides; + c += stridec; + + if (--n < 0) + break; + + medium = 0; + PREPROCESS(0, 0, 0, begin); + + if (--n < 0) + goto process1; + + PREPROCESS(1, strides, stridec, process1); + + if (--n < 0) + goto process2; + + PREPROCESS(2, (strides << 1), (stridec << 1), process2); + + if (--n < 0) + goto process3; + + PREPROCESS(3, (strides << 1) + strides, + (stridec << 1) + stridec, process3); + + if (medium) { + z0 = y0 * invpio2 + c3two51; + z1 = y1 * invpio2 + c3two51; + z2 = y2 * invpio2 + c3two51; + z3 = y3 * invpio2 + c3two51; + + n0 = LO(z0); + n1 = LO(z1); + n2 = LO(z2); + n3 = LO(z3); + + z0 -= c3two51; + z1 -= c3two51; + z2 -= c3two51; + z3 -= c3two51; + + y0 = (y0 - z0 * pio2_1) - z0 * pio2_t; + y1 = (y1 - z1 * pio2_1) - z1 * pio2_t; + y2 = (y2 - z2 * pio2_1) - z2 * pio2_t; + y3 = (y3 - z3 * pio2_1) - z3 * pio2_t; + } + + z0 = y0 * y0; + z1 = y1 * y1; + z2 = y2 * y2; + z3 = y3 * y3; + + f0 = (float)(y0 + y0 * z0 * (S0 + z0 * (S1 + z0 * S2))); + f1 = (float)(y1 + y1 * z1 * (S0 + z1 * (S1 + z1 * S2))); + f2 = (float)(y2 + y2 * z2 * (S0 + z2 * (S1 + z2 * S2))); + f3 = (float)(y3 + y3 * z3 * (S0 + z3 * (S1 + z3 * S2))); + + g0 = (float)(one + z0 * (mhalf + z0 * (C0 + z0 * + (C1 + z0 * C2)))); + g1 = (float)(one + z1 * (mhalf + z1 * (C0 + z1 * + (C1 + z1 * C2)))); + g2 = (float)(one + z2 * (mhalf + z2 * (C0 + z2 * + (C1 + z2 * C2)))); + g3 = (float)(one + z3 * (mhalf + z3 * (C0 + z3 * + (C1 + z3 * C2)))); + + if (n0 & 2) { + f0 = -f0; + g0 = -g0; + } + if (n1 & 2) { + f1 = -f1; + g1 = -g1; + } + if (n2 & 2) { + f2 = -f2; + g2 = -g2; + } + if (n3 & 2) { + f3 = -f3; + g3 = -g3; + } + + if (n0 & 1) { + *s = g0; + *c = -f0; + } else { + *s = f0; + *c = g0; + } + s += strides; + c += stridec; + + if (n1 & 1) { + *s = g1; + *c = -f1; + } else { + *s = f1; + *c = g1; + } + s += strides; + c += stridec; + + if (n2 & 1) { + *s = g2; + *c = -f2; + } else { + *s = f2; + *c = g2; + } + s += strides; + c += stridec; + + if (n3 & 1) { + *s = g3; + *c = -f3; + } else { + *s = f3; + *c = g3; + } + continue; + +process1: + PROCESS(0); + continue; + +process2: + PROCESS(0); + PROCESS(1); + continue; + +process3: + PROCESS(0); + PROCESS(1); + PROCESS(2); + } +} diff --git a/usr/src/lib/libmvec/common/__vsinf.c b/usr/src/lib/libmvec/common/__vsinf.c new file mode 100644 index 0000000000..33b6ad6e07 --- /dev/null +++ b/usr/src/lib/libmvec/common/__vsinf.c @@ -0,0 +1,381 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * __vsinf: single precision vector sin + * + * Algorithm: + * + * For |x| < pi/4, approximate sin(x) by a polynomial x+x*z*(S0+ + * z*(S1+z*S2)) and cos(x) by a polynomial 1+z*(-1/2+z*(C0+z*(C1+ + * z*C2))), where z = x*x, all evaluated in double precision. + * + * Accuracy: + * + * The largest error is less than 0.6 ulps. + */ + +#include <sys/isa_defs.h> + +#ifdef _LITTLE_ENDIAN +#define HI(x) *(1+(int *)&x) +#define LO(x) *(unsigned *)&x +#else +#define HI(x) *(int *)&x +#define LO(x) *(1+(unsigned *)&x) +#endif + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +extern int __vlibm_rem_pio2m(double *, double *, int, int, int); + +static const double C[] = { + -1.66666552424430847168e-01, /* 2^ -3 * -1.5555460000000 */ + 8.33219196647405624390e-03, /* 2^ -7 * 1.11077E0000000 */ + -1.95187909412197768688e-04, /* 2^-13 * -1.9956B60000000 */ + 1.0, + -0.5, + 4.16666455566883087158e-02, /* 2^ -5 * 1.55554A0000000 */ + -1.38873036485165357590e-03, /* 2^-10 * -1.6C0C1E0000000 */ + 2.44309903791872784495e-05, /* 2^-16 * 1.99E24E0000000 */ + 0.636619772367581343075535, /* 2^ -1 * 1.45F306DC9C883 */ + 6755399441055744.0, /* 2^ 52 * 1.8000000000000 */ + 1.570796326734125614166, /* 2^ 0 * 1.921FB54400000 */ + 6.077100506506192601475e-11, /* 2^-34 * 1.0B4611A626331 */ +}; + +#define S0 C[0] +#define S1 C[1] +#define S2 C[2] +#define one C[3] +#define mhalf C[4] +#define C0 C[5] +#define C1 C[6] +#define C2 C[7] +#define invpio2 C[8] +#define c3two51 C[9] +#define pio2_1 C[10] +#define pio2_t C[11] + +#define PREPROCESS(N, index, label) \ + hx = *(int *)x; \ + ix = hx & 0x7fffffff; \ + t = *x; \ + x += stridex; \ + if (ix <= 0x3f490fdb) { /* |x| < pi/4 */ \ + if (ix == 0) { \ + y[index] = t; \ + goto label; \ + } \ + y##N = (double)t; \ + n##N = 0; \ + } else if (ix <= 0x49c90fdb) { /* |x| < 2^19*pi */ \ + y##N = (double)t; \ + medium = 1; \ + } else { \ + if (ix >= 0x7f800000) { /* inf or nan */ \ + y[index] = t / t; \ + goto label; \ + } \ + z##N = y##N = (double)t; \ + hx = HI(y##N); \ + n##N = ((hx >> 20) & 0x7ff) - 1046; \ + HI(z##N) = (hx & 0xfffff) | 0x41600000; \ + n##N = __vlibm_rem_pio2m(&z##N, &y##N, n##N, 1, 0); \ + if (hx < 0) { \ + y##N = -y##N; \ + n##N = -n##N; \ + } \ + z##N = y##N * y##N; \ + if (n##N & 1) { /* compute cos y */ \ + f##N = (float)(one + z##N * (mhalf + z##N * \ + (C0 + z##N * (C1 + z##N * C2)))); \ + } else { /* compute sin y */ \ + f##N = (float)(y##N + y##N * z##N * (S0 + \ + z##N * (S1 + z##N * S2))); \ + } \ + y[index] = (n##N & 2)? -f##N : f##N; \ + goto label; \ + } + +#define PROCESS(N) \ + if (medium) { \ + z##N = y##N * invpio2 + c3two51; \ + n##N = LO(z##N); \ + z##N -= c3two51; \ + y##N = (y##N - z##N * pio2_1) - z##N * pio2_t; \ + } \ + z##N = y##N * y##N; \ + if (n##N & 1) { /* compute cos y */ \ + f##N = (float)(one + z##N * (mhalf + z##N * (C0 + \ + z##N * (C1 + z##N * C2)))); \ + } else { /* compute sin y */ \ + f##N = (float)(y##N + y##N * z##N * (S0 + z##N * (S1 + \ + z##N * S2))); \ + } \ + *y = (n##N & 2)? -f##N : f##N; \ + y += stridey + +void +__vsinf(int n, float *restrict x, int stridex, float *restrict y, + int stridey) +{ + double y0, y1, y2, y3; + double z0, z1, z2, z3; + float f0, f1, f2, f3, t; + int n0 = 0, n1 = 0, n2 = 0, n3, hx, ix, medium; + + y -= stridey; + + for (;;) { +begin: + y += stridey; + + if (--n < 0) + break; + + medium = 0; + PREPROCESS(0, 0, begin); + + if (--n < 0) + goto process1; + + PREPROCESS(1, stridey, process1); + + if (--n < 0) + goto process2; + + PREPROCESS(2, (stridey << 1), process2); + + if (--n < 0) + goto process3; + + PREPROCESS(3, (stridey << 1) + stridey, process3); + + if (medium) { + z0 = y0 * invpio2 + c3two51; + z1 = y1 * invpio2 + c3two51; + z2 = y2 * invpio2 + c3two51; + z3 = y3 * invpio2 + c3two51; + + n0 = LO(z0); + n1 = LO(z1); + n2 = LO(z2); + n3 = LO(z3); + + z0 -= c3two51; + z1 -= c3two51; + z2 -= c3two51; + z3 -= c3two51; + + y0 = (y0 - z0 * pio2_1) - z0 * pio2_t; + y1 = (y1 - z1 * pio2_1) - z1 * pio2_t; + y2 = (y2 - z2 * pio2_1) - z2 * pio2_t; + y3 = (y3 - z3 * pio2_1) - z3 * pio2_t; + } + + z0 = y0 * y0; + z1 = y1 * y1; + z2 = y2 * y2; + z3 = y3 * y3; + + hx = (n0 & 1) | ((n1 & 1) << 1) | ((n2 & 1) << 2) | + ((n3 & 1) << 3); + switch (hx) { + case 0: + f0 = (float)(y0 + y0 * z0 * (S0 + z0 * (S1 + z0 * S2))); + f1 = (float)(y1 + y1 * z1 * (S0 + z1 * (S1 + z1 * S2))); + f2 = (float)(y2 + y2 * z2 * (S0 + z2 * (S1 + z2 * S2))); + f3 = (float)(y3 + y3 * z3 * (S0 + z3 * (S1 + z3 * S2))); + break; + + case 1: + f0 = (float)(one + z0 * (mhalf + z0 * (C0 + + z0 * (C1 + z0 * C2)))); + f1 = (float)(y1 + y1 * z1 * (S0 + z1 * (S1 + z1 * S2))); + f2 = (float)(y2 + y2 * z2 * (S0 + z2 * (S1 + z2 * S2))); + f3 = (float)(y3 + y3 * z3 * (S0 + z3 * (S1 + z3 * S2))); + break; + + case 2: + f0 = (float)(y0 + y0 * z0 * (S0 + z0 * (S1 + z0 * S2))); + f1 = (float)(one + z1 * (mhalf + z1 * (C0 + + z1 * (C1 + z1 * C2)))); + f2 = (float)(y2 + y2 * z2 * (S0 + z2 * (S1 + z2 * S2))); + f3 = (float)(y3 + y3 * z3 * (S0 + z3 * (S1 + z3 * S2))); + break; + + case 3: + f0 = (float)(one + z0 * (mhalf + z0 * (C0 + + z0 * (C1 + z0 * C2)))); + f1 = (float)(one + z1 * (mhalf + z1 * (C0 + + z1 * (C1 + z1 * C2)))); + f2 = (float)(y2 + y2 * z2 * (S0 + z2 * (S1 + z2 * S2))); + f3 = (float)(y3 + y3 * z3 * (S0 + z3 * (S1 + z3 * S2))); + break; + + case 4: + f0 = (float)(y0 + y0 * z0 * (S0 + z0 * (S1 + z0 * S2))); + f1 = (float)(y1 + y1 * z1 * (S0 + z1 * (S1 + z1 * S2))); + f2 = (float)(one + z2 * (mhalf + z2 * (C0 + + z2 * (C1 + z2 * C2)))); + f3 = (float)(y3 + y3 * z3 * (S0 + z3 * (S1 + z3 * S2))); + break; + + case 5: + f0 = (float)(one + z0 * (mhalf + z0 * (C0 + + z0 * (C1 + z0 * C2)))); + f1 = (float)(y1 + y1 * z1 * (S0 + z1 * (S1 + z1 * S2))); + f2 = (float)(one + z2 * (mhalf + z2 * (C0 + + z2 * (C1 + z2 * C2)))); + f3 = (float)(y3 + y3 * z3 * (S0 + z3 * (S1 + z3 * S2))); + break; + + case 6: + f0 = (float)(y0 + y0 * z0 * (S0 + z0 * (S1 + z0 * S2))); + f1 = (float)(one + z1 * (mhalf + z1 * (C0 + + z1 * (C1 + z1 * C2)))); + f2 = (float)(one + z2 * (mhalf + z2 * (C0 + + z2 * (C1 + z2 * C2)))); + f3 = (float)(y3 + y3 * z3 * (S0 + z3 * (S1 + z3 * S2))); + break; + + case 7: + f0 = (float)(one + z0 * (mhalf + z0 * (C0 + + z0 * (C1 + z0 * C2)))); + f1 = (float)(one + z1 * (mhalf + z1 * (C0 + + z1 * (C1 + z1 * C2)))); + f2 = (float)(one + z2 * (mhalf + z2 * (C0 + + z2 * (C1 + z2 * C2)))); + f3 = (float)(y3 + y3 * z3 * (S0 + z3 * (S1 + z3 * S2))); + break; + + case 8: + f0 = (float)(y0 + y0 * z0 * (S0 + z0 * (S1 + z0 * S2))); + f1 = (float)(y1 + y1 * z1 * (S0 + z1 * (S1 + z1 * S2))); + f2 = (float)(y2 + y2 * z2 * (S0 + z2 * (S1 + z2 * S2))); + f3 = (float)(one + z3 * (mhalf + z3 * (C0 + + z3 * (C1 + z3 * C2)))); + break; + + case 9: + f0 = (float)(one + z0 * (mhalf + z0 * (C0 + + z0 * (C1 + z0 * C2)))); + f1 = (float)(y1 + y1 * z1 * (S0 + z1 * (S1 + z1 * S2))); + f2 = (float)(y2 + y2 * z2 * (S0 + z2 * (S1 + z2 * S2))); + f3 = (float)(one + z3 * (mhalf + z3 * (C0 + + z3 * (C1 + z3 * C2)))); + break; + + case 10: + f0 = (float)(y0 + y0 * z0 * (S0 + z0 * (S1 + z0 * S2))); + f1 = (float)(one + z1 * (mhalf + z1 * (C0 + + z1 * (C1 + z1 * C2)))); + f2 = (float)(y2 + y2 * z2 * (S0 + z2 * (S1 + z2 * S2))); + f3 = (float)(one + z3 * (mhalf + z3 * (C0 + + z3 * (C1 + z3 * C2)))); + break; + + case 11: + f0 = (float)(one + z0 * (mhalf + z0 * (C0 + + z0 * (C1 + z0 * C2)))); + f1 = (float)(one + z1 * (mhalf + z1 * (C0 + + z1 * (C1 + z1 * C2)))); + f2 = (float)(y2 + y2 * z2 * (S0 + z2 * (S1 + z2 * S2))); + f3 = (float)(one + z3 * (mhalf + z3 * (C0 + + z3 * (C1 + z3 * C2)))); + break; + + case 12: + f0 = (float)(y0 + y0 * z0 * (S0 + z0 * (S1 + z0 * S2))); + f1 = (float)(y1 + y1 * z1 * (S0 + z1 * (S1 + z1 * S2))); + f2 = (float)(one + z2 * (mhalf + z2 * (C0 + + z2 * (C1 + z2 * C2)))); + f3 = (float)(one + z3 * (mhalf + z3 * (C0 + + z3 * (C1 + z3 * C2)))); + break; + + case 13: + f0 = (float)(one + z0 * (mhalf + z0 * (C0 + + z0 * (C1 + z0 * C2)))); + f1 = (float)(y1 + y1 * z1 * (S0 + z1 * (S1 + z1 * S2))); + f2 = (float)(one + z2 * (mhalf + z2 * (C0 + + z2 * (C1 + z2 * C2)))); + f3 = (float)(one + z3 * (mhalf + z3 * (C0 + + z3 * (C1 + z3 * C2)))); + break; + + case 14: + f0 = (float)(y0 + y0 * z0 * (S0 + z0 * (S1 + z0 * S2))); + f1 = (float)(one + z1 * (mhalf + z1 * (C0 + + z1 * (C1 + z1 * C2)))); + f2 = (float)(one + z2 * (mhalf + z2 * (C0 + + z2 * (C1 + z2 * C2)))); + f3 = (float)(one + z3 * (mhalf + z3 * (C0 + + z3 * (C1 + z3 * C2)))); + break; + + default: + f0 = (float)(one + z0 * (mhalf + z0 * (C0 + + z0 * (C1 + z0 * C2)))); + f1 = (float)(one + z1 * (mhalf + z1 * (C0 + + z1 * (C1 + z1 * C2)))); + f2 = (float)(one + z2 * (mhalf + z2 * (C0 + + z2 * (C1 + z2 * C2)))); + f3 = (float)(one + z3 * (mhalf + z3 * (C0 + + z3 * (C1 + z3 * C2)))); + } + + *y = (n0 & 2)? -f0 : f0; + y += stridey; + *y = (n1 & 2)? -f1 : f1; + y += stridey; + *y = (n2 & 2)? -f2 : f2; + y += stridey; + *y = (n3 & 2)? -f3 : f3; + continue; + +process1: + PROCESS(0); + continue; + +process2: + PROCESS(0); + PROCESS(1); + continue; + +process3: + PROCESS(0); + PROCESS(1); + PROCESS(2); + } +} diff --git a/usr/src/lib/libmvec/common/__vsqrt.c b/usr/src/lib/libmvec/common/__vsqrt.c new file mode 100644 index 0000000000..2cf40b4cba --- /dev/null +++ b/usr/src/lib/libmvec/common/__vsqrt.c @@ -0,0 +1,53 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "libm_synonyms.h" +#include "libm_inlines.h" + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +#define sqrt __sqrt + +extern double sqrt(double); + +void +__vsqrt(int n, double * restrict x, int stridex, double * restrict y, int stridey) +{ + for(; n > 0 ; n--) + { + *y = sqrt(*x); + x += stridex; + y += stridey; + } +} + diff --git a/usr/src/lib/libmvec/common/__vsqrtf.c b/usr/src/lib/libmvec/common/__vsqrtf.c new file mode 100644 index 0000000000..c0baefecc7 --- /dev/null +++ b/usr/src/lib/libmvec/common/__vsqrtf.c @@ -0,0 +1,53 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +#include "libm_synonyms.h" +#include "libm_inlines.h" + +#define sqrtf __sqrtf + +extern float sqrtf(float); + +void +__vsqrtf(int n, float * restrict x, int stridex, float * restrict y, int stridey) +{ + for(; n > 0 ; n--) + { + *y = sqrtf(*x); + x += stridex; + y += stridey; + } +} + diff --git a/usr/src/lib/libmvec/common/__vz_abs.c b/usr/src/lib/libmvec/common/__vz_abs.c new file mode 100644 index 0000000000..4617877960 --- /dev/null +++ b/usr/src/lib/libmvec/common/__vz_abs.c @@ -0,0 +1,44 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +extern void __vhypot(int, double *, int, double *, int, double *, int); + +void +__vz_abs(int n, double * restrict x, int stridex, double * restrict y, + int stridey) +{ + stridex <<= 1; + __vhypot(n, x, stridex, x + 1, stridex, y, stridey); +} diff --git a/usr/src/lib/libmvec/common/__vz_exp.c b/usr/src/lib/libmvec/common/__vz_exp.c new file mode 100644 index 0000000000..57a472ec2a --- /dev/null +++ b/usr/src/lib/libmvec/common/__vz_exp.c @@ -0,0 +1,54 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +extern void __vexp(int, double *, int, double *, int); +extern void __vsincos(int, double *, int, double *, int, double *, int); + +void +__vz_exp(int n, double * restrict x, int stridex, double * restrict y, + int stridey, double * restrict tmp) +{ + int i, j; + + stridex <<= 1; + stridey <<= 1; + __vexp(n, x, stridex, tmp, 1); + __vsincos(n, x + 1, stridex, y + 1, stridey, y, stridey); + for (i = j = 0; i < n; i++, j += stridey) + { + y[j] *= tmp[i]; + y[j+1] *= tmp[i]; + } +} diff --git a/usr/src/lib/libmvec/common/__vz_log.c b/usr/src/lib/libmvec/common/__vz_log.c new file mode 100644 index 0000000000..602173b104 --- /dev/null +++ b/usr/src/lib/libmvec/common/__vz_log.c @@ -0,0 +1,49 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +extern void __vatan2(int, double *, int, double *, int, double *, int); +extern void __vhypot(int, double *, int, double *, int, double *, int); +extern void __vlog(int, double *, int, double *, int); + +void +__vz_log(int n, double * restrict x, int stridex, double * restrict y, + int stridey) +{ + stridex <<= 1; + stridey <<= 1; + __vhypot(n, x, stridex, x + 1, stridex, y + 1, stridey); + __vlog(n, y + 1, stridey, y, stridey); + __vatan2(n, x + 1, stridex, x, stridex, y + 1, stridey); +} diff --git a/usr/src/lib/libmvec/common/__vz_pow.c b/usr/src/lib/libmvec/common/__vz_pow.c new file mode 100644 index 0000000000..c6485a32cf --- /dev/null +++ b/usr/src/lib/libmvec/common/__vz_pow.c @@ -0,0 +1,56 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +extern void __vz_exp(int, double *, int, double *, int, double *); +extern void __vz_log(int, double *, int, double *, int); + +void +__vz_pow(int n, double * restrict x, int stridex, double * restrict y, + int stridey, double * restrict z, int stridez, double * restrict tmp) +{ + double r; + int i, j, k; + + __vz_log(n, x, stridex, tmp, 1); + stridey <<= 1; + for (i = j = 0; i < n; i++, j += stridey) + { + k = i << 1; + r = y[j] * tmp[k] - y[j+1] * tmp[k+1]; + tmp[k+1] = y[j+1] * tmp[k] + y[j] * tmp[k+1]; + tmp[k] = r; + } + __vz_exp(n, tmp, 1, z, stridez, tmp + n + n); +} diff --git a/usr/src/lib/libmvec/common/mapfile-vers b/usr/src/lib/libmvec/common/mapfile-vers new file mode 100644 index 0000000000..de0c7877e5 --- /dev/null +++ b/usr/src/lib/libmvec/common/mapfile-vers @@ -0,0 +1,160 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright 2011 Nexenta Systems, Inc. All rights reserved. +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# Interface definition for libmvec.so.1 + +# +# MAPFILE HEADER START +# +# WARNING: STOP NOW. DO NOT MODIFY THIS FILE. +# Object versioning must comply with the rules detailed in +# +# usr/src/lib/README.mapfiles +# +# You should not be making modifications here until you've read the most current +# copy of that file. If you need help, contact a gatekeeper for guidance. +# +# MAPFILE HEADER END +# + +$mapfile_version 2 + +$if _ELF32 +$add lf64 +$endif +$if _sparc && _ELF32 +$add sparc32 +$endif +$if _sparc && _ELF64 +$add sparcv9 +$endif +$if _x86 && _ELF32 +$add i386 +$endif +$if _x86 && _ELF64 +$add amd64 +$endif + +SYMBOL_VERSION SUNW_1.1 { + global: + __vatan2; #LSARC/2003/737 + __vatan2_; #LSARC/2003/737 + __vatan2f; #LSARC/2003/737 + __vatan2f_; #LSARC/2003/737 + __vatan; #LSARC/2003/737 + __vatan_; #LSARC/2003/737 + __vatanf; #LSARC/2003/737 + __vatanf_; #LSARC/2003/737 + __vc_abs; #LSARC/2003/737 + __vc_abs_; #LSARC/2003/737 + __vc_exp; #LSARC/2003/737 + __vc_exp_; #LSARC/2003/737 + __vc_log; #LSARC/2003/737 + __vc_log_; #LSARC/2003/737 + __vc_pow; #LSARC/2003/737 + __vc_pow_; #LSARC/2003/737 + __vcos; #LSARC/2003/737 + __vcos_; #LSARC/2003/737 + __vcosf; #LSARC/2003/737 + __vcosf_; #LSARC/2003/737 + __vexp; #LSARC/2003/737 + __vexp_; #LSARC/2003/737 + __vexpf; #LSARC/2003/737 + __vexpf_; #LSARC/2003/737 + __vhypot; #LSARC/2003/737 + __vhypot_; #LSARC/2003/737 + __vhypotf; #LSARC/2003/737 + __vhypotf_; #LSARC/2003/737 + __vlog; #LSARC/2003/737 + __vlog_; #LSARC/2003/737 + __vlogf; #LSARC/2003/737 + __vlogf_; #LSARC/2003/737 + __vpow; #LSARC/2003/737 + __vpow_; #LSARC/2003/737 + __vpowf; #LSARC/2003/737 + __vpowf_; #LSARC/2003/737 + __vrhypot; #LSARC/2003/737 + __vrhypot_; #LSARC/2003/737 + __vrhypotf; #LSARC/2003/737 + __vrhypotf_; #LSARC/2003/737 + __vrsqrt; #LSARC/2003/737 + __vrsqrt_; #LSARC/2003/737 + __vrsqrtf; #LSARC/2003/737 + __vrsqrtf_; #LSARC/2003/737 + __vsin; #LSARC/2003/737 + __vsin_; #LSARC/2003/737 + __vsincos; #LSARC/2003/737 + __vsincos_; #LSARC/2003/737 + __vsincosf; #LSARC/2003/737 + __vsincosf_; #LSARC/2003/737 + __vsinf; #LSARC/2003/737 + __vsinf_; #LSARC/2003/737 + __vsqrt; #LSARC/2003/737 + __vsqrt_; #LSARC/2003/737 + __vsqrtf; #LSARC/2003/737 + __vsqrtf_; #LSARC/2003/737 + __vz_abs; #LSARC/2003/737 + __vz_abs_; #LSARC/2003/737 + __vz_exp; #LSARC/2003/737 + __vz_exp_; #LSARC/2003/737 + __vz_log; #LSARC/2003/737 + __vz_log_; #LSARC/2003/737 + __vz_pow; #LSARC/2003/737 + __vz_pow_; #LSARC/2003/737 + vatan2_; #LSARC/2003/737 + vatan2f_; #LSARC/2003/737 + vatan_; #LSARC/2003/737 + vatanf_; #LSARC/2003/737 + vc_abs_; #LSARC/2003/737 + vc_exp_; #LSARC/2003/737 + vc_log_; #LSARC/2003/737 + vc_pow_; #LSARC/2003/737 + vcos_; #LSARC/2003/737 + vcosf_; #LSARC/2003/737 + vexp_; #LSARC/2003/737 + vexpf_; #LSARC/2003/737 + vhypot_; #LSARC/2003/737 + vhypotf_; #LSARC/2003/737 + vlog_; #LSARC/2003/737 + vlogf_; #LSARC/2003/737 + vpow_; #LSARC/2003/737 + vpowf_; #LSARC/2003/737 + vrhypot_; #LSARC/2003/737 + vrhypotf_; #LSARC/2003/737 + vrsqrt_; #LSARC/2003/737 + vrsqrtf_; #LSARC/2003/737 + vsin_; #LSARC/2003/737 + vsincos_; #LSARC/2003/737 + vsincosf_; #LSARC/2003/737 + vsinf_; #LSARC/2003/737 + vsqrt_; #LSARC/2003/737 + vsqrtf_; #LSARC/2003/737 + vz_abs_; #LSARC/2003/737 + vz_exp_; #LSARC/2003/737 + vz_log_; #LSARC/2003/737 + vz_pow_; #LSARC/2003/737 + local: + *; +}; diff --git a/usr/src/lib/libmvec/common/mapfilevis-vers b/usr/src/lib/libmvec/common/mapfilevis-vers new file mode 100644 index 0000000000..f7f7bb6dc2 --- /dev/null +++ b/usr/src/lib/libmvec/common/mapfilevis-vers @@ -0,0 +1,72 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright 2011 Nexenta Systems, Inc. All rights reserved. +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# Interface definition for cpu/sparcv8plus+vis/libmvec_isa.so.1 + +# +# MAPFILE HEADER START +# +# WARNING: STOP NOW. DO NOT MODIFY THIS FILE. +# Object versioning must comply with the rules detailed in +# +# usr/src/lib/README.mapfiles +# +# You should not be making modifications here until you've read the most current +# copy of that file. If you need help, contact a gatekeeper for guidance. +# +# MAPFILE HEADER END +# + +$mapfile_version 2 + +SYMBOL_VERSION SUNW_1.1 { + global: + __vatan; #LSARC/2003/737 + __vatan2; #LSARC/2003/737 + __vatan2f; #LSARC/2003/737 + __vatanf; #LSARC/2003/737 + __vcos; #LSARC/2003/737 + __vcosf; #LSARC/2003/737 + __vexp; #LSARC/2003/737 + __vexpf; #LSARC/2003/737 + __vhypot; #LSARC/2003/737 + __vhypotf; #LSARC/2003/737 + __vlog; #LSARC/2003/737 + __vlogf; #LSARC/2003/737 + __vpow; #LSARC/2003/737 + __vpowf; #LSARC/2003/737 + __vrhypot; #LSARC/2003/737 + __vrhypotf; #LSARC/2003/737 + __vrsqrt; #LSARC/2003/737 + __vrsqrtf; #LSARC/2003/737 + __vsin; #LSARC/2003/737 + __vsincos; #LSARC/2003/737 + __vsincosf; #LSARC/2003/737 + __vsinf; #LSARC/2003/737 + __vsqrt; #LSARC/2003/737 + __vsqrtf; #LSARC/2003/737 + local: + *; +}; diff --git a/usr/src/lib/libmvec/common/mapfilevis2-vers b/usr/src/lib/libmvec/common/mapfilevis2-vers new file mode 100644 index 0000000000..6c754c055e --- /dev/null +++ b/usr/src/lib/libmvec/common/mapfilevis2-vers @@ -0,0 +1,52 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright 2011 Nexenta Systems, Inc. All rights reserved. +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# Interface definition for cpu/sparcv9+vis2/libmvec_isa.so.1 + +# +# MAPFILE HEADER START +# +# WARNING: STOP NOW. DO NOT MODIFY THIS FILE. +# Object versioning must comply with the rules detailed in +# +# usr/src/lib/README.mapfiles +# +# You should not be making modifications here until you've read the most current +# copy of that file. If you need help, contact a gatekeeper for guidance. +# +# MAPFILE HEADER END +# + +$mapfile_version 2 + +SYMBOL_VERSION SUNW_1.1 { + global: + __vcos; #LSARC/2003/737 + __vlog; #LSARC/2003/737 + __vsin; #LSARC/2003/737 + __vsqrtf; #LSARC/2003/737 + local: + *; +}; diff --git a/usr/src/lib/libmvec/common/vatan2_.c b/usr/src/lib/libmvec/common/vatan2_.c new file mode 100644 index 0000000000..3ca34c2847 --- /dev/null +++ b/usr/src/lib/libmvec/common/vatan2_.c @@ -0,0 +1,40 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +extern void __vatan2(int, double *, int, double *, int, double *, int); + +#pragma weak vatan2_ = __vatan2_ + +/* just invoke the serial function */ +void +__vatan2_(int *n, double *y, int *stridey, double *x, int *stridex, + double *z, int *stridez) +{ + __vatan2(*n, y, *stridey, x, *stridex, z, *stridez); +} diff --git a/usr/src/lib/libmvec/common/vatan2f_.c b/usr/src/lib/libmvec/common/vatan2f_.c new file mode 100644 index 0000000000..de847ef763 --- /dev/null +++ b/usr/src/lib/libmvec/common/vatan2f_.c @@ -0,0 +1,40 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +extern void __vatan2f(int, float *, int, float *, int, float *, int); + +#pragma weak vatan2f_ = __vatan2f_ + +/* just invoke the serial function */ +void +__vatan2f_(int *n, float *y, int *stridey, float *x, int *stridex, + float *z, int *stridez) +{ + __vatan2f(*n, y, *stridey, x, *stridex, z, *stridez); +} diff --git a/usr/src/lib/libmvec/common/vatan_.c b/usr/src/lib/libmvec/common/vatan_.c new file mode 100644 index 0000000000..e983958841 --- /dev/null +++ b/usr/src/lib/libmvec/common/vatan_.c @@ -0,0 +1,39 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +extern void __vatan(int, double *, int, double *, int); + +#pragma weak vatan_ = __vatan_ + +/* just invoke the serial function */ +void +__vatan_(int *n, double *x, int *stridex, double *y, int *stridey) +{ + __vatan(*n, x, *stridex, y, *stridey); +} diff --git a/usr/src/lib/libmvec/common/vatanf_.c b/usr/src/lib/libmvec/common/vatanf_.c new file mode 100644 index 0000000000..2917885fc2 --- /dev/null +++ b/usr/src/lib/libmvec/common/vatanf_.c @@ -0,0 +1,39 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +extern void __vatanf(int, float *, int, float *, int); + +#pragma weak vatanf_ = __vatanf_ + +/* just invoke the serial function */ +void +__vatanf_(int *n, float *x, int *stridex, float *y, int *stridey) +{ + __vatanf(*n, x, *stridex, y, *stridey); +} diff --git a/usr/src/lib/libmvec/common/vc_abs_.c b/usr/src/lib/libmvec/common/vc_abs_.c new file mode 100644 index 0000000000..3a9b078829 --- /dev/null +++ b/usr/src/lib/libmvec/common/vc_abs_.c @@ -0,0 +1,39 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +extern void __vc_abs(int, float *, int, float *, int); + +#pragma weak vc_abs_ = __vc_abs_ + +/* just invoke the serial function */ +void +__vc_abs_(int *n, float *x, int *stridex, float *y, int *stridey) +{ + __vc_abs(*n, x, *stridex, y, *stridey); +} diff --git a/usr/src/lib/libmvec/common/vc_exp_.c b/usr/src/lib/libmvec/common/vc_exp_.c new file mode 100644 index 0000000000..ebce9dc584 --- /dev/null +++ b/usr/src/lib/libmvec/common/vc_exp_.c @@ -0,0 +1,40 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +extern void __vc_exp(int, float *, int, float *, int, float *); + +#pragma weak vc_exp_ = __vc_exp_ + +/* just invoke the serial function */ +void +__vc_exp_(int *n, float *x, int *stridex, float *y, int *stridey, + float *tmp) +{ + __vc_exp(*n, x, *stridex, y, *stridey, tmp); +} diff --git a/usr/src/lib/libmvec/common/vc_log_.c b/usr/src/lib/libmvec/common/vc_log_.c new file mode 100644 index 0000000000..25cb3df6fd --- /dev/null +++ b/usr/src/lib/libmvec/common/vc_log_.c @@ -0,0 +1,39 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +extern void __vc_log(int, float *, int, float *, int); + +#pragma weak vc_log_ = __vc_log_ + +/* just invoke the serial function */ +void +__vc_log_(int *n, float *x, int *stridex, float *y, int *stridey) +{ + __vc_log(*n, x, *stridex, y, *stridey); +} diff --git a/usr/src/lib/libmvec/common/vc_pow_.c b/usr/src/lib/libmvec/common/vc_pow_.c new file mode 100644 index 0000000000..821952a7af --- /dev/null +++ b/usr/src/lib/libmvec/common/vc_pow_.c @@ -0,0 +1,41 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +extern void __vc_pow(int, float *, int, float *, int, float *, int, + float *); + +#pragma weak vc_pow_ = __vc_pow_ + +/* just invoke the serial function */ +void +__vc_pow_(int *n, float *x, int *stridex, float *y, int *stridey, + float *z, int *stridez, float *tmp) +{ + __vc_pow(*n, x, *stridex, y, *stridey, z, *stridez, tmp); +} diff --git a/usr/src/lib/libmvec/common/vcos_.c b/usr/src/lib/libmvec/common/vcos_.c new file mode 100644 index 0000000000..7549290aaf --- /dev/null +++ b/usr/src/lib/libmvec/common/vcos_.c @@ -0,0 +1,74 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +extern void __vcos(int, double *, int, double *, int); + +#if !defined(LIBMVEC_SO_BUILD) +#if defined(ARCH_v8plusa) || defined(ARCH_v8plusb) || defined(ARCH_v9a) || defined(ARCH_v9b) +#define CHECK_ULTRA3 +#endif +#endif /* !defined(LIBMVEC_SO_BUILD) */ + +#ifdef CHECK_ULTRA3 +#include <strings.h> +#define sysinfo _sysinfo +#include <sys/systeminfo.h> + +#define BUFLEN 257 + +static int use_ultra3 = 0; + +extern void __vcos_ultra3(int, double *, int, double *, int); +#endif + +#pragma weak vcos_ = __vcos_ + +/* just invoke the serial function */ +void +__vcos_(int *n, double *x, int *stridex, double *y, int *stridey) +{ +#ifdef CHECK_ULTRA3 + int u; + char buf[BUFLEN]; + + u = use_ultra3; + if (!u) { + /* use __vcos_ultra3 on Cheetah (and ???) */ + if (sysinfo(SI_ISALIST, buf, BUFLEN) > 0 && !strncmp(buf, "sparcv9+vis2", 12)) + u = 3; + else + u = 1; + use_ultra3 = u; + } + if (u & 2) + __vcos_ultra3(*n, x, *stridex, y, *stridey); + else +#endif + __vcos(*n, x, *stridex, y, *stridey); +} diff --git a/usr/src/lib/libmvec/common/vcosf_.c b/usr/src/lib/libmvec/common/vcosf_.c new file mode 100644 index 0000000000..1c3abadd7b --- /dev/null +++ b/usr/src/lib/libmvec/common/vcosf_.c @@ -0,0 +1,39 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +extern void __vcosf(int, float *, int, float *, int); + +#pragma weak vcosf_ = __vcosf_ + +/* just invoke the serial function */ +void +__vcosf_(int *n, float *x, int *stridex, float *y, int *stridey) +{ + __vcosf(*n, x, *stridex, y, *stridey); +} diff --git a/usr/src/lib/libmvec/common/vexp_.c b/usr/src/lib/libmvec/common/vexp_.c new file mode 100644 index 0000000000..19812d5d7d --- /dev/null +++ b/usr/src/lib/libmvec/common/vexp_.c @@ -0,0 +1,39 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +extern void __vexp(int, double *, int, double *, int); + +#pragma weak vexp_ = __vexp_ + +/* just invoke the serial function */ +void +__vexp_(int *n, double *x, int *stridex, double *y, int *stridey) +{ + __vexp(*n, x, *stridex, y, *stridey); +} diff --git a/usr/src/lib/libmvec/common/vexpf_.c b/usr/src/lib/libmvec/common/vexpf_.c new file mode 100644 index 0000000000..73a8cc2b1c --- /dev/null +++ b/usr/src/lib/libmvec/common/vexpf_.c @@ -0,0 +1,39 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +extern void __vexpf(int, float *, int, float *, int); + +#pragma weak vexpf_ = __vexpf_ + +/* just invoke the serial function */ +void +__vexpf_(int *n, float *x, int *stridex, float *y, int *stridey) +{ + __vexpf(*n, x, *stridex, y, *stridey); +} diff --git a/usr/src/lib/libmvec/common/vhypot_.c b/usr/src/lib/libmvec/common/vhypot_.c new file mode 100644 index 0000000000..e7a46566af --- /dev/null +++ b/usr/src/lib/libmvec/common/vhypot_.c @@ -0,0 +1,40 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +extern void __vhypot(int, double *, int, double *, int, double *, int); + +#pragma weak vhypot_ = __vhypot_ + +/* just invoke the serial function */ +void +__vhypot_(int *n, double *x, int *stridex, double *y, int *stridey, + double *z, int *stridez) +{ + __vhypot(*n, x, *stridex, y, *stridey, z, *stridez); +} diff --git a/usr/src/lib/libmvec/common/vhypotf_.c b/usr/src/lib/libmvec/common/vhypotf_.c new file mode 100644 index 0000000000..f9d919b6a9 --- /dev/null +++ b/usr/src/lib/libmvec/common/vhypotf_.c @@ -0,0 +1,40 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +extern void __vhypotf(int, float *, int, float *, int, float *, int); + +#pragma weak vhypotf_ = __vhypotf_ + +/* just invoke the serial function */ +void +__vhypotf_(int *n, float *x, int *stridex, float *y, int *stridey, + float *z, int *stridez) +{ + __vhypotf(*n, x, *stridex, y, *stridey, z, *stridez); +} diff --git a/usr/src/lib/libmvec/common/vis/__vatan.S b/usr/src/lib/libmvec/common/vis/__vatan.S new file mode 100644 index 0000000000..b5b7b1d8d1 --- /dev/null +++ b/usr/src/lib/libmvec/common/vis/__vatan.S @@ -0,0 +1,572 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "__vatan.S" + +#include "libm.h" + + RO_DATA + +! following is the C version of the ATAN algorithm +! #include <math.h> +! #include <stdio.h> +! double jkatan(double *x) +! { +! double f, z, ans, ansu, ansl, tmp, poly, conup, conlo, dummy; +! int index, sign, intf, intz; +! extern const double __vlibm_TBL_atan1[]; +! long *pf = (long *) &f, *pz = (long *) &z; +! +! /* Power series atan(x) = x + p1*x**3 + p2*x**5 + p3*x**7 +! * Error = -3.08254E-18 On the interval |x| < 1/64 */ +! +! /* define dummy names for readability. Use parray to help compiler optimize loads */ +! #define p3 parray[0] +! #define p2 parray[1] +! #define p1 parray[2] +! #define soffset 3 +! +! static const double parray[] = { +! -1.428029046844299722E-01, /* p[3] */ +! 1.999999917247000615E-01, /* p[2] */ +! -3.333333333329292858E-01, /* p[1] */ +! 1.0, /* not used for p[0], though */ +! -1.0, /* used to flip sign of answer */ +! }; +! +! f = *x; /* fetch argument */ +! intf = pf[0]; /* grab upper half */ +! sign = intf & 0x80000000; /* sign of argument */ +! intf ^= sign; /* abs(upper argument) */ +! sign = (unsigned) sign >> 31; /* sign bit = 0 or 1 */ +! pf[0] = intf; +! +! if( (intf > 0x43600000) || (intf < 0x3e300000) ) /* filter out special cases */ +! { +! if( (intf > 0x7ff00000) || +! ((intf == 0x7ff00000) && (pf[1] !=0)) ) return (*x-*x);/* return NaN if x=NaN*/ +! if( intf < 0x3e300000 ) /* avoid underflow for small arg */ +! { +! dummy = 1.0e37 + f; +! dummy = dummy; +! return (*x); +! } +! if( intf > 0x43600000 ) /* avoid underflow for big arg */ +! { +! index = 2; +! f = __vlibm_TBL_atan1[index] + __vlibm_TBL_atan1[index+1];/* pi/2 up + pi/2 low */ +! f = parray[soffset + sign] * f; /* put sign bit on ans */ +! return (f); +! } +! } +! +! index = 0; /* points to 0,0 in table */ +! if (intf > 0x40500000) /* if(|x| > 64 */ +! { f = -1.0/f; +! index = 2; /* point to pi/2 upper, lower */ +! } +! else if( intf >= 0x3f900000 ) /* if |x| >= (1/64)... */ +! { +! intz = (intf + 0x00008000) & 0x7fff0000;/* round arg, keep upper */ +! pz[0] = intz; /* store as a double (z) */ +! pz[1] = 0; /* ...lower */ +! f = (f - z)/(1.0 + f*z); /* get reduced argument */ +! index = (intz - 0x3f900000) >> 15; /* (index >> 16) << 1) */ +! index += 4; /* skip over 0,0,pi/2,pi/2 */ +! } +! conup = __vlibm_TBL_atan1[index]; /* upper table */ +! conlo = __vlibm_TBL_atan1[index+1]; /* lower table */ +! tmp = f*f; +! poly = (f*tmp)*((p3*tmp + p2)*tmp + p1); +! ansu = conup + f; /* compute atan(f) upper */ +! ansl = (((conup - ansu) + f) + poly) + conlo; +! ans = ansu + ansl; +! ans = parray[soffset + sign] * ans; +! return ans; +! } + +/* 8 bytes = 1 double f.p. word */ +#define WSIZE 8 + + .align 32 !align with full D-cache line +.COEFFS: + .double 0r-1.428029046844299722E-01 !p[3] + .double 0r1.999999917247000615E-01 !p[2] + .double 0r-3.333333333329292858E-01 !p[1] + .double 0r-1.0, !constant -1.0 + .word 0x00008000,0x0 !for fp rounding of reduced arg + .word 0x7fff0000,0x0 !for fp truncation + .word 0x47900000,0 !a number close to 1.0E37 + .word 0x80000000,0x0 !mask for fp sign bit + .word 0x3f800000,0x0 !1.0/128.0 dummy "safe" argument + .type .COEFFS,#object + + ENTRY(__vatan) + save %sp,-SA(MINFRAME)-16,%sp + PIC_SETUP(g5) + PIC_SET(g5,__vlibm_TBL_atan1,o4) + PIC_SET(g5,.COEFFS,o0) +/* + __vatan(int n, double *x, int stridex, double *y, stridey) + computes y(i) = atan( x(i) ), for 1=1,n. Stridex, stridey + are the distance between x and y elements + + %i0 n + %i1 address of x + %i2 stride x + %i3 address of y + %i4 stride y +*/ + cmp %i0,0 !if n <=0, + ble,pn %icc,.RETURN !....then do nothing + sll %i2,3,%i2 !convert stride to byte count + sll %i4,3,%i4 !convert stride to byte count + +/* pre-load constants before beginning main loop */ + + ldd [%o0],%f58 !load p[3] + mov 2,%i5 !argcount = 3 + + ldd [%o0+WSIZE],%f60 !load p[2] + add %fp,STACK_BIAS-8,%l1 !yaddr1 = &dummy + fzero %f18 !ansu1 = 0 + + ldd [%o0+2*WSIZE],%f62 !load p[1] + add %fp,STACK_BIAS-8,%l2 !yaddr2 = &dummy + fzero %f12 !(poly1) = 0 + + ldd [%o0+3*WSIZE],%f56 !-1.0 + fzero %f14 !tmp1 = 0 + + ldd [%o0+4*WSIZE],%f52 !load rounding mask + fzero %f16 !conup1 = 0 + + ldd [%o0+5*WSIZE],%f54 !load truncation mask + fzero %f36 !f1 = 0 + + ldd [%o0+6*WSIZE],%f50 !1.0e37 + fzero %f38 !f2 = 0 + + ldd [%o0+7*WSIZE],%f32 !mask for sign bit + + ldd [%o4+2*WSIZE],%f46 !pi/2 upper + ldd [%o4+(2*WSIZE+8)],%f48 !pi/2 lower + sethi %hi(0x40500000),%l6 !64.0 + sethi %hi(0x3f900000),%l7 !1/64.0 + mov 0,%l4 !index1 = 0 + mov 0,%l5 !index2 = 0 + +.MAINLOOP: + + /*--------------------------------------------------------------------------*/ + /*--------------------------------------------------------------------------*/ + /*--------------------------------------------------------------------------*/ + +.LOOP0: + deccc %i0 !--n + bneg 1f + mov %i1,%o5 !xuse = x (delay slot) + + ba 2f + nop !delay slot +1: + PIC_SET(g5,.COEFFS+8*WSIZE,o5) + dec %i5 !argcount-- +2: + sethi %hi(0x80000000),%o7 !mask for sign bit +/*2 */ sethi %hi(0x43600000),%o1 !big = 0x43600000,0 + ld [%o5],%o0 !intf = pf[0] = f upper + ldd [%o4+%l5],%f26 !conup2 = __vlibm_TBL_atan1[index2] + + sethi %hi(0x3e300000),%o2 !small = 0x3e300000,0 +/*4 */ andn %o0,%o7,%o0 !intf = fabs(intf) + ldd [%o5],%f34 !f = *x into f34 + + sub %o1,%o0,%o1 !(-) if intf > big +/*6 */ sub %o0,%o2,%o2 !(-) if intf < small + fand %f34,%f32,%f40 !sign0 = sign bit + fmuld %f38,%f38,%f24 !tmp2= f2*f2 + +/*7 */ orcc %o1,%o2,%g0 !(-) if either true + bneg,pn %icc,.SPECIAL0 !if (-) goto special cases below + fabsd %f34,%f34 !abs(f) (delay slot) + !---------------------- + + + sethi %hi(0x8000),%o7 !rounding bit +/*8 */ fpadd32 %f34,%f52,%f0 !intf + 0x00008000 (again) + faddd %f26,%f38,%f28 !ansu2 = conup2 + f2 + + add %o0,%o7,%o0 !intf + 0x00008000 (delay slot) +/*9*/ fand %f0,%f54,%f0 !pz[0] = intz = (intf + 0x00008000) & 0x7fff0000 (again) + fmuld %f58,%f24,%f22 !p[3]*tmp2 + +/*10 */ sethi %hi(0x7fff0000),%o7 !mask for rounding argument + fmuld %f34,%f0,%f10 !f*z + fsubd %f34,%f0,%f20 !f - z + add %o4,%l4,%l4 !base addr + index1 + fmuld %f14,%f12,%f12 !poly1 = (f1*tmp1)*((p3*tmp1 + p2)*tmp1 + p1) + faddd %f16,%f36,%f16 !(conup1 - ansu1) + f1 + +/*12 */ and %o0,%o7,%o0 !intz = (intf + 0x00008000) & 0x7fff0000 + faddd %f22,%f60,%f22 !p[3]*tmp2 + p[2] + ldd [%l4+WSIZE],%f14 !conlo1 = __vlibm_TBL_atan1[index+1] + +/*13 */ sub %o0,%l7,%o2 !intz - 0x3f900000 + fsubd %f10,%f56,%f10 !(f*z - (-1.0)) + faddd %f16,%f12,%f12 !((conup1 - ansu1) + f1) + poly1 + + cmp %o0,%l6 !(|f| > 64) + ble .ELSE0 !if(|f| > 64) then +/*15 */ sra %o2,15,%o3 !index = (intz - 0x3f900000) >> 15 + mov 2,%o1 !index == 2, point to conup, conlo = pi/2 upper, lower + ba .ENDIF0 !continue +/*16 */ fdivd %f56,%f34,%f34 !f = -1.0/f (delay slot) + .ELSE0: !else f( |x| >= (1/64)) + cmp %o0,%l7 !if intf >= 1/64 + bl .ENDIF0 !if( |x| >= (1/64) ) then... + mov 0,%o1 !index == 0 , point to conup,conlo = 0,0 + add %o3,4,%o1 !index = index + 4 +/*16 */ fdivd %f20,%f10,%f34 !f = (f - z)/(1.0 + f*z), reduced argument + .ENDIF0: + +/*17*/ sll %o1,3,%l3 !index0 = index + mov %i3,%l0 !yaddr0 = address of y + faddd %f12,%f14,%f12 !ansl1 = (((conup1 - ansu)1 + f1) + poly1) + conlo1 + fmuld %f22,%f24,%f22 !(p3*tmp2 + p2)*tmp2 + fsubd %f26,%f28,%f26 !conup2 - ansu2 + +/*20*/ add %i1,%i2,%i1 !x += stridex + add %i3,%i4,%i3 !y += stridey + faddd %f18,%f12,%f36 !ans1 = ansu1 + ansl1 + fmuld %f38,%f24,%f24 !f*tmp2 + faddd %f22,%f62,%f22 !(p3*tmp2 + p2)*tmp2 + p1 + +/*23*/ for %f36,%f42,%f36 !sign(ans1) = sign of argument + std %f36,[%l1] !*yaddr1 = ans1 + add %o4,%l5,%l5 !base addr + index2 + fmuld %f24,%f22,%f22 !poly2 = (f2*tmp2)*((p3*tmp2 + p2)*tmp2 + p1) + faddd %f26,%f38,%f26 !(conup2 - ansu2) + f2 + cmp %i5,0 !if argcount =0, we are done + be .RETURN + nop + + /*--------------------------------------------------------------------------*/ + /*--------------------------------------------------------------------------*/ + /*--------------------------------------------------------------------------*/ + +.LOOP1: +/*25*/ deccc %i0 !--n + bneg 1f + mov %i1,%o5 !xuse = x (delay slot) + ba 2f + nop !delay slot +1: + PIC_SET(g5,.COEFFS+8*WSIZE,o5) + dec %i5 !argcount-- +2: + +/*26*/ sethi %hi(0x80000000),%o7 !mask for sign bit + sethi %hi(0x43600000),%o1 !big = 0x43600000,0 + ld [%o5],%o0 !intf = pf[0] = f upper + +/*28*/ sethi %hi(0x3e300000),%o2 !small = 0x3e300000,0 + andn %o0,%o7,%o0 !intf = fabs(intf) + ldd [%o5],%f36 !f = *x into f36 + +/*30*/ sub %o1,%o0,%o1 !(-) if intf > big + sub %o0,%o2,%o2 !(-) if intf < small + fand %f36,%f32,%f42 !sign1 = sign bit + +/*31*/ orcc %o1,%o2,%g0 !(-) if either true + bneg,pn %icc,.SPECIAL1 !if (-) goto special cases below + fabsd %f36,%f36 !abs(f) (delay slot) + !---------------------- + +/*32*/ fpadd32 %f36,%f52,%f0 !intf + 0x00008000 (again) + ldd [%l5+WSIZE],%f24 !conlo2 = __vlibm_TBL_atan1[index2+1] + +/*33*/ fand %f0,%f54,%f0 !pz[0] = intz = (intf + 0x00008000) & 0x7fff0000 (again) + sethi %hi(0x8000),%o7 !rounding bit + faddd %f26,%f22,%f22 !((conup2 - ansu2) + f2) + poly2 + +/*34*/ add %o0,%o7,%o0 !intf + 0x00008000 (delay slot) + sethi %hi(0x7fff0000),%o7 !mask for rounding argument + fmuld %f36,%f0,%f10 !f*z + fsubd %f36,%f0,%f20 !f - z + +/*35*/ and %o0,%o7,%o0 !intz = (intf + 0x00008000) & 0x7fff0000 + faddd %f22,%f24,%f22 !ansl2 = (((conup2 - ansu2) + f2) + poly2) + conlo2 + +/*37*/ sub %o0,%l7,%o2 !intz - 0x3f900000 + fsubd %f10,%f56,%f10 !(f*z - (-1.0)) + ldd [%o4+%l3],%f6 !conup0 = __vlibm_TBL_atan1[index0] + + cmp %o0,%l6 !(|f| > 64) + ble .ELSE1 !if(|f| > 64) then +/*38*/ sra %o2,15,%o3 !index = (intz - 0x3f900000) >> 15 + mov 2,%o1 !index == 2, point to conup, conlo = pi/2 upper, lower + ba .ENDIF1 !continue +/*40*/ fdivd %f56,%f36,%f36 !f = -1.0/f (delay slot) + .ELSE1: !else f( |x| >= (1/64)) + cmp %o0,%l7 !if intf >= 1/64 + bl .ENDIF1 !if( |x| >= (1/64) ) then... + mov 0,%o1 !index == 0 , point to conup,conlo = 0,0 + add %o3,4,%o1 !index = index + 4 +/*40*/ fdivd %f20,%f10,%f36 !f = (f - z)/(1.0 + f*z), reduced argument + .ENDIF1: + +/*41*/sll %o1,3,%l4 !index1 = index + mov %i3,%l1 !yaddr1 = address of y + fmuld %f34,%f34,%f4 !tmp0= f0*f0 + faddd %f28,%f22,%f38 !ans2 = ansu2 + ansl2 + +/*44*/add %i1,%i2,%i1 !x += stridex + add %i3,%i4,%i3 !y += stridey + fmuld %f58,%f4,%f2 !p[3]*tmp0 + faddd %f6,%f34,%f8 !ansu0 = conup0 + f0 + for %f38,%f44,%f38 !sign(ans2) = sign of argument + std %f38,[%l2] !*yaddr2 = ans2 + cmp %i5,0 !if argcount =0, we are done + be .RETURN + nop + + /*--------------------------------------------------------------------------*/ + /*--------------------------------------------------------------------------*/ + /*--------------------------------------------------------------------------*/ + +.LOOP2: +/*46*/ deccc %i0 !--n + bneg 1f + mov %i1,%o5 !xuse = x (delay slot) + ba 2f + nop !delay slot +1: + PIC_SET(g5,.COEFFS+8*WSIZE,o5) + dec %i5 !argcount-- +2: + +/*47*/ sethi %hi(0x80000000),%o7 !mask for sign bit + sethi %hi(0x43600000),%o1 !big = 0x43600000,0 + ld [%o5],%o0 !intf = pf[0] = f upper + +/*49*/ sethi %hi(0x3e300000),%o2 !small = 0x3e300000,0 + andn %o0,%o7,%o0 !intf = fabs(intf) + ldd [%o5],%f38 !f = *x into f38 + +/*51*/ sub %o1,%o0,%o1 !(-) if intf > big + sub %o0,%o2,%o2 !(-) if intf < small + fand %f38,%f32,%f44 !sign2 = sign bit + +/*52*/ orcc %o1,%o2,%g0 !(-) if either true + bneg,pn %icc,.SPECIAL2 !if (-) goto special cases below + fabsd %f38,%f38 !abs(f) (delay slot) + !---------------------- + +/*53*/ fpadd32 %f38,%f52,%f0 !intf + 0x00008000 (again) + faddd %f2,%f60,%f2 !p[3]*tmp0 + p[2] + +/*54*/ sethi %hi(0x8000),%o7 !rounding bit + fand %f0,%f54,%f0 !pz[0] = intz = (intf + 0x00008000) & 0x7fff0000 (again) + +/*55*/ add %o0,%o7,%o0 !intf + 0x00008000 (delay slot) + sethi %hi(0x7fff0000),%o7 !mask for rounding argument + fmuld %f38,%f0,%f10 !f*z + fsubd %f38,%f0,%f20 !f - z + +/*56*/ and %o0,%o7,%o0 !intz = (intf + 0x00008000) & 0x7fff0000 + fmuld %f2,%f4,%f2 !(p3*tmp0 + p2)*tmp0 + fsubd %f6,%f8,%f6 !conup0 - ansu0 + +/*58*/ sub %o0,%l7,%o2 !intz - 0x3f900000 + fsubd %f10,%f56,%f10 !(f*z - (-1.0)) + ldd [%o4+%l4],%f16 !conup1 = __vlibm_TBL_atan1[index1] + + cmp %o0,%l6 !(|f| > 64) + ble .ELSE2 !if(|f| > 64) then +/*60*/ sra %o2,15,%o3 !index = (intz - 0x3f900000) >> 15 + mov 2,%o1 !index == 2, point to conup, conlo = pi/2 upper, lower + ba .ENDIF2 !continue +/*61*/ fdivd %f56,%f38,%f38 !f = -1.0/f (delay slot) + .ELSE2: !else f( |x| >= (1/64)) + cmp %o0,%l7 !if intf >= 1/64 + bl .ENDIF2 !if( |x| >= (1/64) ) then... + mov 0,%o1 !index == 0 , point to conup,conlo = 0,0 + add %o3,4,%o1 !index = index + 4 +/*61*/ fdivd %f20,%f10,%f38 !f = (f - z)/(1.0 + f*z), reduced argument + .ENDIF2: + + +/*62*/ sll %o1,3,%l5 !index2 = index + mov %i3,%l2 !yaddr2 = address of y + fmuld %f34,%f4,%f4 !f0*tmp0 + faddd %f2,%f62,%f2 !(p3*tmp0 + p2)*tmp0 + p1 + fmuld %f36,%f36,%f14 !tmp1= f1*f1 + +/*65*/add %o4,%l3,%l3 !base addr + index0 + fmuld %f4,%f2,%f2 !poly0 = (f0*tmp0)*((p3*tmp0 + p2)*tmp0 + p1) + faddd %f6,%f34,%f6 !(conup0 - ansu0) + f0 + fmuld %f58,%f14,%f12 !p[3]*tmp1 + faddd %f16,%f36,%f18 !ansu1 = conup1 + f1 + ldd [%l3+WSIZE],%f4 !conlo0 = __vlibm_TBL_atan1[index0+1] + +/*68*/ add %i1,%i2,%i1 !x += stridex + add %i3,%i4,%i3 !y += stridey + faddd %f6,%f2,%f2 !((conup0 - ansu0) + f0) + poly0 + faddd %f12,%f60,%f12 !p[3]*tmp1 + p[2] + +/*71*/faddd %f2,%f4,%f2 !ansl0 = (((conup0 - ansu)0 + f0) + poly0) + conlo0 + fmuld %f12,%f14,%f12 !(p3*tmp1 + p2)*tmp1 + fsubd %f16,%f18,%f16 !conup1 - ansu1 + +/*74*/faddd %f8,%f2,%f34 !ans0 = ansu0 + ansl0 + fmuld %f36,%f14,%f14 !f1*tmp1 + faddd %f12,%f62,%f12 !(p3*tmp1 + p2)*tmp1 + p1 + +/*77*/ for %f34,%f40,%f34 !sign(ans0) = sign of argument + std %f34,[%l0] !*yaddr0 = ans, always gets stored (delay slot) + cmp %i5,0 !if argcount =0, we are done + bg .MAINLOOP + nop + + /*--------------------------------------------------------------------------*/ + /*--------------------------------------------------------------------------*/ + /*--------------------------------------------------------------------------*/ + +.RETURN: + ret + restore %g0,%g0,%g0 + + /*--------------------------------------------------------------------------*/ + /*------------SPECIAL CASE HANDLING FOR LOOP0 ------------------------------*/ + /*--------------------------------------------------------------------------*/ + +/* at this point + %i1 x address + %o0 intf + %o2 intf - 0x3e300000 + %f34,36,38 f0,f1,f2 + %f40,42,44 sign0,sign1,sign2 +*/ + + .align 32 !align on I-cache boundary +.SPECIAL0: + orcc %o2,%g0,%g0 !(-) if intf < 0x3e300000 + bpos 1f !if >=...continue + sethi %hi(0x7ff00000),%g1 !upper word of Inf (we use 64-bit wide int for this) + ba 3f + faddd %f34,%f50,%f30 !dummy op just to generate exception (delay slot) +1: + ld [%o5+4],%o5 !load x lower word + sllx %o0,32,%o0 !left justify intf + sllx %g1,32,%g1 !left justify Inf + or %o0,%o5,%o0 !merge in lower intf + cmp %o0,%g1 !if intf > 0x7ff00000 00000000 + ble,pt %xcc,2f !pass thru if NaN + nop + fmuld %f34,%f34,%f34 !...... (x*x) trigger invalid exception + ba 3f + nop +2: + faddd %f46,%f48,%f34 !ans = pi/2 upper + pi/2 lower +3: + add %i1,%i2,%i1 !x += stridex + for %f34,%f40,%f34 !sign(ans) = sign of argument + std %f34,[%i3] !*y = ans + ba .LOOP0 !keep looping + add %i3,%i4,%i3 !y += stridey (delay slot) + + /*--------------------------------------------------------------------------*/ + /*-----------SPECIAL CASE HANDLING FOR LOOP1 -------------------------------*/ + /*--------------------------------------------------------------------------*/ + + .align 32 !align on I-cache boundary +.SPECIAL1: + orcc %o2,%g0,%g0 !(-) if intf < 0x3e300000 + bpos 1f !if >=...continue + sethi %hi(0x7ff00000),%g1 !upper word of Inf (we use 64-bit wide int for this) + ba 3f + faddd %f36,%f50,%f30 !dummy op just to generate exception (delay slot) +1: + ld [%o5+4],%o5 !load x lower word + sllx %o0,32,%o0 !left justify intf + sllx %g1,32,%g1 !left justify Inf + or %o0,%o5,%o0 !merge in lower intf + cmp %o0,%g1 !if intf > 0x7ff00000 00000000 + ble,pt %xcc,2f !pass thru if NaN + nop + fmuld %f36,%f36,%f36 !...... (x*x) trigger invalid exception + ba 3f + nop +2: + faddd %f46,%f48,%f36 !ans = pi/2 upper + pi/2 lower +3: + add %i1,%i2,%i1 !x += stridex + for %f36,%f42,%f36 !sign(ans) = sign of argument + std %f36,[%i3] !*y = ans + ba .LOOP1 !keep looping + add %i3,%i4,%i3 !y += stridey (delay slot) + + /*--------------------------------------------------------------------------*/ + /*------------SPECIAL CASE HANDLING FOR LOOP2 ------------------------------*/ + /*--------------------------------------------------------------------------*/ + + .align 32 !align on I-cache boundary +.SPECIAL2: + orcc %o2,%g0,%g0 !(-) if intf < 0x3e300000 + bpos 1f !if >=...continue + sethi %hi(0x7ff00000),%g1 !upper word of Inf (we use 64-bit wide int for this) + ba 3f + faddd %f38,%f50,%f30 !dummy op just to generate exception (delay slot) +1: + ld [%o5+4],%o5 !load x lower word + sllx %o0,32,%o0 !left justify intf + sllx %g1,32,%g1 !left justify Inf + or %o0,%o5,%o0 !merge in lower intf + cmp %o0,%g1 !if intf > 0x7ff00000 00000000 + ble,pt %xcc,2f !pass thru if NaN + nop + fmuld %f38,%f38,%f38 !...... (x*x) trigger invalid exception + ba 3f + nop +2: + faddd %f46,%f48,%f38 !ans = pi/2 upper + pi/2 lower +3: + add %i1,%i2,%i1 !x += stridex + for %f38,%f44,%f38 !sign(ans) = sign of argument + std %f38,[%i3] !*y = ans + ba .LOOP2 !keep looping + add %i3,%i4,%i3 !y += stridey + + /*--------------------------------------------------------------------------*/ + /*--------------------------------------------------------------------------*/ + /*--------------------------------------------------------------------------*/ + + SET_SIZE(__vatan) + +! .ident "03-20-96 Sparc V9 3-way-unrolled version" diff --git a/usr/src/lib/libmvec/common/vis/__vatan2.S b/usr/src/lib/libmvec/common/vis/__vatan2.S new file mode 100644 index 0000000000..7df30825b3 --- /dev/null +++ b/usr/src/lib/libmvec/common/vis/__vatan2.S @@ -0,0 +1,1078 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "__vatan2.S" + +#include "libm.h" + + RO_DATA + .align 64 +constants: + .word 0x3ff921fb,0x54442d18 ! pio2 + .word 0x3c91a626,0x33145c07 ! pio2_lo + .word 0xbfd55555,0x555554ee ! p1 + .word 0x3fc99999,0x997a1559 ! p2 + .word 0xbfc24923,0x158dfe02 ! p3 + .word 0x3fbc639d,0x0ed1347b ! p4 + .word 0xffffffff,0x00000000 ! mask + .word 0x3fc00000,0x00000000 ! twom3 + .word 0x46d00000,0x00000000 ! two110 + .word 0x3fe921fb,0x54442d18 ! pio4 + +! local storage indices + +#define xscl STACK_BIAS-0x8 +#define yscl STACK_BIAS-0x10 +#define twom3 STACK_BIAS-0x18 +#define two110 STACK_BIAS-0x20 +#define pio4 STACK_BIAS-0x28 +#define junk STACK_BIAS-0x30 +! sizeof temp storage - must be a multiple of 16 for V9 +#define tmps 0x30 + +! register use + +! i0 n +! i1 y +! i2 stridey +! i3 x +! i4 stridex +! i5 z + +! l0 k0 +! l1 k1 +! l2 k2 +! l3 hx +! l4 pz0 +! l5 pz1 +! l6 pz2 +! l7 stridez + +! the following are 64-bit registers in both V8+ and V9 + +! g1 __vlibm_TBL_atan2 +! g5 + +! o0 hy +! o1 0x00004000 +! o2 0x1420 +! o3 0x7fe00000 +! o4 0x03600000 +! o5 0x00100000 +! o7 + +! f0 y0 +! f2 x0 +! f4 t0 +! f6 ah0 +! f8 al0 +! f10 y1 +! f12 x1 +! f14 t1 +! f16 ah1 +! f18 al1 +! f20 y2 +! f22 x2 +! f24 t2 +! f26 ah2 +! f28 al2 +! f30 +! f32 +! f34 +! f36 sx0 +! f38 sx1 +! f40 sx2 +! f42 sy0 +! f44 sy1 +! f46 sy2 + +#define mask %f48 +#define signbit %f50 +#define pio2 %f52 +#define pio2_lo %f54 +#define p1 %f56 +#define p2 %f58 +#define p3 %f60 +#define p4 %f62 + + ENTRY(__vatan2) + save %sp,-SA(MINFRAME)-tmps,%sp + PIC_SETUP(l7) + PIC_SET(l7,constants,o0) + PIC_SET(l7,__vlibm_TBL_atan2,o1) + wr %g0,0x82,%asi ! set %asi for non-faulting loads + mov %o1, %g1 +#ifdef __sparcv9 + ldx [%fp+STACK_BIAS+0xb0],%l7 +#else + ld [%fp+0x5c],%l7 +#endif + ldd [%o0+0x00],pio2 ! load/set up constants + ldd [%o0+0x08],pio2_lo + ldd [%o0+0x10],p1 + ldd [%o0+0x18],p2 + ldd [%o0+0x20],p3 + ldd [%o0+0x28],p4 + ldd [%o0+0x30],mask + fzero signbit + fnegd signbit,signbit + sethi %hi(0x00004000),%o1 + sethi %hi(0x1420),%o2 + or %o2,%lo(0x1420),%o2 + sethi %hi(0x7fe00000),%o3 + sethi %hi(0x03600000),%o4 + sethi %hi(0x00100000),%o5 + ldd [%o0+0x38],%f0 ! copy rarely used constants to stack + ldd [%o0+0x40],%f2 + ldd [%o0+0x48],%f4 + std %f0,[%fp+twom3] + std %f2,[%fp+two110] + std %f4,[%fp+pio4] + sll %i2,3,%i2 ! scale strides + sll %i4,3,%i4 + sll %l7,3,%l7 + fzero %f20 ! loop prologue + fzero %f22 + fzero %f24 + fzero %f26 + fzero %f46 + add %fp,junk,%l6 + ld [%i1],%f0 ! *y + ld [%i1+4],%f1 + ld [%i3],%f8 ! *x + ld [%i3+4],%f9 + ld [%i1],%o0 ! hy + ba .loop + ld [%i3],%l3 ! hx + +! 16-byte aligned + .align 16 +.loop: + fabsd %f0,%f4 + mov %i5,%l4 + add %i1,%i2,%i1 ! y += stridey + + fabsd %f8,%f2 + add %i3,%i4,%i3 ! x += stridex + add %i5,%l7,%i5 ! z += stridez + + fand %f0,signbit,%f42 + sethi %hi(0x80000000),%g5 + + fand %f8,signbit,%f36 + andn %o0,%g5,%o0 + andn %l3,%g5,%l3 + + fcmpd %fcc0,%f4,%f2 + + fmovd %f4,%f0 + + fmovdg %fcc0,%f2,%f0 ! swap if |y| > |x| + + fmovdg %fcc0,%f4,%f2 + mov %o0,%o7 + lda [%i1]%asi,%f10 ! preload next argument + + faddd %f26,%f20,%f26 + lda [%i1+4]%asi,%f11 + + faddd %f22,%f24,%f22 + movg %fcc0,%l3,%o0 + + movg %fcc0,%o7,%l3 + + fbu,pn %fcc0,.nan0 ! if x or y is nan +! delay slot + lda [%i3]%asi,%f18 + + sub %l3,%o0,%l0 ! hx - hy + sub %l3,%o3,%g5 + fabsd %f10,%f14 + lda [%i3+4]%asi,%f19 + + sub %l0,%o4,%o7 + faddd %f22,%f26,%f26 + + andcc %g5,%o7,%g0 + bge,pn %icc,.big0 ! if |x| or |x/y| is big +! delay slot + nop + + fabsd %f18,%f12 + cmp %o0,%o5 + bl,pn %icc,.small0 ! if |y| is small +! delay slot + lda [%i1]%asi,%o0 + + add %l0,%o1,%l0 ! k + addcc %i0,-1,%i0 + ble,pn %icc,.last1 +! delay slot + lda [%i3]%asi,%l3 + +.cont1: + srl %l0,10,%l0 + mov %i5,%l5 + fxor %f26,%f46,%f26 + st %f26,[%l6] + + fand %f10,signbit,%f44 + andn %l0,0x1f,%l0 + add %i1,%i2,%i1 + st %f27,[%l6+4] + + fand %f18,signbit,%f38 + cmp %l0,%o2 + movg %icc,%o2,%l0 + + fcmpd %fcc1,%f14,%f12 + add %i3,%i4,%i3 + add %i5,%l7,%i5 + + fmovd %f14,%f10 + add %l0,%g1,%l0 + sethi %hi(0x80000000),%g5 + + ldd [%l0+0x10],%f4 + fand %f2,mask,%f6 + andn %o0,%g5,%o0 + andn %l3,%g5,%l3 + + fmovdg %fcc1,%f12,%f10 + + fmovdg %fcc1,%f14,%f12 + mov %o0,%o7 + lda [%i1]%asi,%f20 + + fsubd %f2,%f6,%f30 + fmuld %f6,%f4,%f6 + movg %fcc1,%l3,%o0 + + fmuld %f0,%f4,%f8 + movg %fcc1,%o7,%l3 + + lda [%i1+4]%asi,%f21 + fbu,pn %fcc1,.nan1 +! delay slot + nop + + lda [%i3]%asi,%f28 + sub %l3,%o0,%l1 + sub %l3,%o3,%g5 + + lda [%i3+4]%asi,%f29 + fmuld %f30,%f4,%f30 + fsubd %f0,%f6,%f4 + sub %l1,%o4,%o7 + + fabsd %f20,%f24 + andcc %g5,%o7,%g0 + bge,pn %icc,.big1 +! delay slot + nop + + faddd %f2,%f8,%f8 + cmp %o0,%o5 + bl,pn %icc,.small1 +! delay slot + lda [%i1]%asi,%o0 + + fabsd %f28,%f22 + add %l1,%o1,%l1 + addcc %i0,-1,%i0 + lda [%i3]%asi,%l3 + + fsubd %f4,%f30,%f4 + srl %l1,10,%l1 + ble,pn %icc,.last2 +! delay slot + mov %i5,%l6 + +.cont2: + fand %f20,signbit,%f46 + andn %l1,0x1f,%l1 + add %i1,%i2,%i1 + + fand %f28,signbit,%f40 + cmp %l1,%o2 + movg %icc,%o2,%l1 + + fcmpd %fcc2,%f24,%f22 + add %i3,%i4,%i3 + add %i5,%l7,%i5 + + fdivd %f4,%f8,%f4 + fmovd %f24,%f20 + add %l1,%g1,%l1 + sethi %hi(0x80000000),%g5 + + ldd [%l1+0x10],%f14 + fand %f12,mask,%f16 + andn %o0,%g5,%o0 + andn %l3,%g5,%l3 + + fmovdg %fcc2,%f22,%f20 + + fmovdg %fcc2,%f24,%f22 + mov %o0,%o7 + + fsubd %f12,%f16,%f32 + fmuld %f16,%f14,%f16 + movg %fcc2,%l3,%o0 + + fnegd pio2_lo,%f8 ! al + fmuld %f10,%f14,%f18 + movg %fcc2,%o7,%l3 + + fzero %f0 + fbu,pn %fcc2,.nan2 +! delay slot + nop + + fmovdg %fcc0,signbit,%f0 + sub %l3,%o0,%l2 + sub %l3,%o3,%g5 + + fmuld %f32,%f14,%f32 + fsubd %f10,%f16,%f14 + sub %l2,%o4,%o7 + + faddd %f12,%f18,%f18 + andcc %g5,%o7,%g0 + bge,pn %icc,.big2 +! delay slot + nop + + fxor %f36,%f0,%f36 + cmp %o0,%o5 + bl,pn %icc,.small2 +! delay slot + nop + +.cont3: + fmovdg %fcc0,signbit,%f8 + add %l2,%o1,%l2 + + fsubd %f14,%f32,%f14 + srl %l2,10,%l2 + + fxor %f36,pio2_lo,%f30 ! al + andn %l2,0x1f,%l2 + + fxor %f36,pio2,%f0 ! ah + cmp %l2,%o2 + movg %icc,%o2,%l2 + + fxor %f42,%f36,%f42 ! sy + + faddd %f8,%f30,%f8 + ldd [%l0+0x8],%f30 + add %l2,%g1,%l2 + + fdivd %f14,%f18,%f14 + fzero %f10 + + ldd [%l2+0x10],%f24 + fand %f22,mask,%f26 + + fmovdg %fcc1,signbit,%f10 + + fmuld %f4,%f4,%f36 + faddd %f8,%f30,%f8 + + fsubd %f22,%f26,%f34 + fmuld %f26,%f24,%f26 + + fmuld %f20,%f24,%f28 + fxor %f38,%f10,%f38 + + fmuld %f4,p3,%f6 + fnegd pio2_lo,%f18 + + fmuld %f36,p2,%f2 + fmovdg %fcc1,signbit,%f18 + + fmuld %f36,%f4,%f36 + fxor %f38,pio2,%f10 + + fmuld %f34,%f24,%f34 + fsubd %f20,%f26,%f24 + + faddd %f22,%f28,%f28 + + faddd %f2,p1,%f2 + + fmuld %f36,p4,%f30 + fxor %f38,pio2_lo,%f32 + + fsubd %f24,%f34,%f24 + + fxor %f44,%f38,%f44 + + fmuld %f36,%f2,%f2 + faddd %f18,%f32,%f18 + ldd [%l1+0x8],%f32 + + fmuld %f36,%f36,%f36 + faddd %f6,%f30,%f30 + + fdivd %f24,%f28,%f24 + fzero %f20 + + fmovdg %fcc2,signbit,%f20 + + faddd %f2,%f8,%f2 + + fmuld %f14,%f14,%f38 + faddd %f18,%f32,%f18 + + fmuld %f36,%f30,%f36 + fxor %f40,%f20,%f40 + + fnegd pio2,%f6 ! ah + fmuld %f14,p3,%f16 + + fmovdg %fcc0,signbit,%f6 + + fmuld %f38,p2,%f12 + fnegd pio2_lo,%f28 + + faddd %f2,%f36,%f2 + fmuld %f38,%f14,%f38 + + faddd %f6,%f0,%f6 + ldd [%l0],%f0 + + fmovdg %fcc2,signbit,%f28 + + faddd %f12,p1,%f12 + + fmuld %f38,p4,%f32 + fxor %f40,pio2_lo,%f34 + + fxor %f40,pio2,%f20 + + faddd %f2,%f4,%f2 + + fmuld %f38,%f12,%f12 + fxor %f46,%f40,%f46 + + fmuld %f38,%f38,%f38 + faddd %f16,%f32,%f32 + + faddd %f28,%f34,%f28 + ldd [%l2+0x8],%f34 + + faddd %f6,%f0,%f6 + lda [%i1]%asi,%f0 ! preload next argument + + faddd %f12,%f18,%f12 + lda [%i1+4]%asi,%f1 + + fmuld %f24,%f24,%f40 + lda [%i3]%asi,%f8 + + fmuld %f38,%f32,%f38 + faddd %f28,%f34,%f28 + lda [%i3+4]%asi,%f9 + + fnegd pio2,%f16 + fmuld %f24,p3,%f26 + lda [%i1]%asi,%o0 + + fmovdg %fcc1,signbit,%f16 + lda [%i3]%asi,%l3 + + fmuld %f40,p2,%f22 + + faddd %f12,%f38,%f12 + fmuld %f40,%f24,%f40 + + faddd %f2,%f6,%f6 + + faddd %f16,%f10,%f16 + ldd [%l1],%f10 + + faddd %f22,p1,%f22 + + faddd %f12,%f14,%f12 + fmuld %f40,p4,%f34 + + fxor %f6,%f42,%f6 + st %f6,[%l4] + + faddd %f16,%f10,%f16 + st %f7,[%l4+4] + + fmuld %f40,%f22,%f22 + + fmuld %f40,%f40,%f40 + faddd %f26,%f34,%f34 + + fnegd pio2,%f26 + + faddd %f12,%f16,%f16 + + faddd %f22,%f28,%f22 + + fmuld %f40,%f34,%f40 + fmovdg %fcc2,signbit,%f26 + +! - + + fxor %f16,%f44,%f16 + st %f16,[%l5] + + faddd %f26,%f20,%f26 + st %f17,[%l5+4] + addcc %i0,-1,%i0 + + faddd %f22,%f40,%f22 + bg,pt %icc,.loop +! delay slot + ldd [%l2],%f20 + + + faddd %f26,%f20,%f26 + faddd %f22,%f24,%f22 + faddd %f22,%f26,%f26 +.done_from_special0: + fxor %f26,%f46,%f26 + st %f26,[%l6] + st %f27,[%l6+4] + ret + restore + + + + .align 16 +.last1: + fmovd pio2,%f10 ! set up dummy arguments + fmovd pio2,%f18 + fabsd %f10,%f14 + fabsd %f18,%f12 + sethi %hi(0x3ff921fb),%o0 + or %o0,%lo(0x3ff921fb),%o0 + mov %o0,%l3 + ba,pt %icc,.cont1 +! delay slot + add %fp,junk,%i5 + + + + .align 16 +.last2: + fmovd pio2,%f20 + fmovd pio2,%f28 + fabsd %f20,%f24 + fabsd %f28,%f22 + sethi %hi(0x3ff921fb),%o0 + or %o0,%lo(0x3ff921fb),%o0 + mov %o0,%l3 + ba,pt %icc,.cont2 +! delay slot + add %fp,junk,%l6 + + + + .align 16 +.nan0: + faddd %f22,%f26,%f26 +.nan0_from_special0: + fabsd %f10,%f14 + lda [%i3+4]%asi,%f19 + fabsd %f18,%f12 + lda [%i1]%asi,%o0 + lda [%i3]%asi,%l3 + ba,pt %icc,.special0 +! delay slot + fmuld %f0,%f2,%f6 + + + .align 16 +.big0: + fabsd %f18,%f12 + lda [%i1]%asi,%o0 + lda [%i3]%asi,%l3 + cmp %g5,%o5 + bge,pn %icc,.return_ah0 ! if hx >= 0x7ff00000 +! delay slot + nop + cmp %l0,%o4 + bge,pn %icc,1f ! if hx - hy >= 0x03600000 +! delay slot + nop + ldd [%fp+twom3],%f6 + fmuld %f0,%f6,%f0 + fmuld %f2,%f6,%f2 + add %l0,%o1,%l0 + addcc %i0,-1,%i0 + ble,pn %icc,.last1 +! delay slot + nop + ba,pt %icc,.cont1 +! delay slot + nop +1: + fbg,pn %fcc0,.return_ah0 +! delay slot + nop + fcmpd %fcc3,%f8,signbit + fbl,pn %fcc3,.return_ah0 +! delay slot + nop + ba,pt %icc,.special0 +! delay slot + fdivd %f0,%f2,%f6 + + + .align 16 +.small0: + lda [%i3]%asi,%l3 + fcmpd %fcc3,%f0,signbit + fbe,pt %fcc3,.return_ah0 +! delay slot + nop + ldd [%fp+two110],%f6 + fmuld %f0,%f6,%f0 + fmuld %f2,%f6,%f2 + st %f0,[%fp+yscl] + ld [%fp+yscl],%o7 + st %f2,[%fp+xscl] + ld [%fp+xscl],%l0 + sub %l0,%o7,%l0 + add %l0,%o1,%l0 + addcc %i0,-1,%i0 + ble,pn %icc,.last1 +! delay slot + nop + ba,pt %icc,.cont1 +! delay slot + nop + + + .align 16 +.return_ah0: + fzero %f0 + fmovdg %fcc0,signbit,%f0 + fxor %f36,%f0,%f36 + fxor %f36,pio2,%f0 + fxor %f42,%f36,%f42 + fnegd pio2,%f6 + fmovdg %fcc0,signbit,%f6 + faddd %f6,%f0,%f6 + sub %g5,%l0,%o7 + cmp %o7,%o5 + bl,pt %icc,1f ! if hy < 0x7ff00000 +! delay slot + nop + ldd [%fp+pio4],%f0 + faddd %f6,%f0,%f6 +1: + fdtoi %f6,%f4 +.special0: + fxor %f6,%f42,%f6 + st %f6,[%l4] + st %f7,[%l4+4] + addcc %i0,-1,%i0 + ble,pn %icc,.done_from_special0 +! delay slot + nop + fmovd %f10,%f0 + fmovd %f18,%f8 + fmovd %f14,%f4 + fmovd %f12,%f2 + mov %i5,%l4 + add %i1,%i2,%i1 + add %i3,%i4,%i3 + add %i5,%l7,%i5 + fand %f0,signbit,%f42 + sethi %hi(0x80000000),%g5 + fand %f8,signbit,%f36 + andn %o0,%g5,%o0 + andn %l3,%g5,%l3 + fcmpd %fcc0,%f4,%f2 + fmovd %f4,%f0 + fmovdg %fcc0,%f2,%f0 + fmovdg %fcc0,%f4,%f2 + mov %o0,%o7 + movg %fcc0,%l3,%o0 + movg %fcc0,%o7,%l3 + lda [%i1]%asi,%f10 + lda [%i1+4]%asi,%f11 + fbu,pn %fcc0,.nan0_from_special0 +! delay slot + lda [%i3]%asi,%f18 + fabsd %f10,%f14 + lda [%i3+4]%asi,%f19 + sub %l3,%o0,%l0 + sub %l3,%o3,%g5 + sub %l0,%o4,%o7 + andcc %g5,%o7,%g0 + bge,pn %icc,.big0 +! delay slot + nop + fabsd %f18,%f12 + cmp %o0,%o5 + bl,pn %icc,.small0 +! delay slot + lda [%i1]%asi,%o0 + add %l0,%o1,%l0 + addcc %i0,-1,%i0 + ble,pn %icc,.last1 +! delay slot + lda [%i3]%asi,%l3 + ba,pt %icc,.cont1 +! delay slot + nop + + + + .align 16 +.nan1: + fmuld %f30,%f4,%f30 + fsubd %f0,%f6,%f4 + faddd %f2,%f8,%f8 + fsubd %f4,%f30,%f4 +.nan1_from_special1: + lda [%i3]%asi,%f28 + lda [%i3+4]%asi,%f29 + fabsd %f20,%f24 + lda [%i1]%asi,%o0 + fabsd %f28,%f22 + lda [%i3]%asi,%l3 + mov %i5,%l6 + ba,pt %icc,.special1 +! delay slot + fmuld %f10,%f12,%f16 + + + .align 16 +.big1: + faddd %f2,%f8,%f8 + fsubd %f4,%f30,%f4 +.big1_from_special1: + lda [%i1]%asi,%o0 + fabsd %f28,%f22 + lda [%i3]%asi,%l3 + mov %i5,%l6 + cmp %g5,%o5 + bge,pn %icc,.return_ah1 +! delay slot + nop + cmp %l1,%o4 + bge,pn %icc,1f +! delay slot + nop + ldd [%fp+twom3],%f16 + fmuld %f10,%f16,%f10 + fmuld %f12,%f16,%f12 + add %l1,%o1,%l1 + srl %l1,10,%l1 + addcc %i0,-1,%i0 + ble,pn %icc,.last2 +! delay slot + nop + ba,pt %icc,.cont2 +! delay slot + nop +1: + fbg,pn %fcc1,.return_ah1 +! delay slot + nop + fcmpd %fcc3,%f18,signbit + fbl,pn %fcc3,.return_ah1 +! delay slot + nop + ba,pt %icc,.special1 +! delay slot + fdivd %f10,%f12,%f16 + + + .align 16 +.small1: + fsubd %f4,%f30,%f4 +.small1_from_special1: + fabsd %f28,%f22 + lda [%i3]%asi,%l3 + mov %i5,%l6 + fcmpd %fcc3,%f10,signbit + fbe,pt %fcc3,.return_ah1 +! delay slot + nop + ldd [%fp+two110],%f16 + fmuld %f10,%f16,%f10 + fmuld %f12,%f16,%f12 + st %f10,[%fp+yscl] + ld [%fp+yscl],%o7 + st %f12,[%fp+xscl] + ld [%fp+xscl],%l1 + sub %l1,%o7,%l1 + add %l1,%o1,%l1 + srl %l1,10,%l1 + addcc %i0,-1,%i0 + ble,pn %icc,.last2 +! delay slot + nop + ba,pt %icc,.cont2 +! delay slot + nop + + + .align 16 +.return_ah1: + fzero %f10 + fmovdg %fcc1,signbit,%f10 + fxor %f38,%f10,%f38 + fxor %f38,pio2,%f10 + fxor %f44,%f38,%f44 + fnegd pio2,%f16 + fmovdg %fcc1,signbit,%f16 + faddd %f16,%f10,%f16 + sub %g5,%l1,%o7 + cmp %o7,%o5 + bl,pt %icc,1f +! delay slot + nop + ldd [%fp+pio4],%f10 + faddd %f16,%f10,%f16 +1: + fdtoi %f16,%f14 +.special1: + fxor %f16,%f44,%f16 + st %f16,[%l5] + st %f17,[%l5+4] + addcc %i0,-1,%i0 + bg,pn %icc,1f +! delay slot + nop + fmovd pio2,%f20 ! set up dummy argument + fmovd pio2,%f28 + fabsd %f20,%f24 + fabsd %f28,%f22 + sethi %hi(0x3ff921fb),%o0 + or %o0,%lo(0x3ff921fb),%o0 + mov %o0,%l3 + add %fp,junk,%i5 +1: + fmovd %f20,%f10 + fmovd %f28,%f18 + fmovd %f24,%f14 + fmovd %f22,%f12 + mov %i5,%l5 + add %i1,%i2,%i1 + add %i3,%i4,%i3 + add %i5,%l7,%i5 + fand %f10,signbit,%f44 + sethi %hi(0x80000000),%g5 + fand %f18,signbit,%f38 + andn %o0,%g5,%o0 + andn %l3,%g5,%l3 + fcmpd %fcc1,%f14,%f12 + fmovd %f14,%f10 + fmovdg %fcc1,%f12,%f10 + fmovdg %fcc1,%f14,%f12 + mov %o0,%o7 + movg %fcc1,%l3,%o0 + movg %fcc1,%o7,%l3 + lda [%i1]%asi,%f20 + lda [%i1+4]%asi,%f21 + fbu,pn %fcc1,.nan1_from_special1 +! delay slot + nop + lda [%i3]%asi,%f28 + lda [%i3+4]%asi,%f29 + fabsd %f20,%f24 + sub %l3,%o0,%l1 + sub %l3,%o3,%g5 + sub %l1,%o4,%o7 + andcc %g5,%o7,%g0 + bge,pn %icc,.big1_from_special1 +! delay slot + nop + cmp %o0,%o5 + bl,pn %icc,.small1_from_special1 +! delay slot + lda [%i1]%asi,%o0 + fabsd %f28,%f22 + lda [%i3]%asi,%l3 + add %l1,%o1,%l1 + srl %l1,10,%l1 + addcc %i0,-1,%i0 + ble,pn %icc,.last2 +! delay slot + mov %i5,%l6 + ba,pt %icc,.cont2 +! delay slot + nop + + + + .align 16 +.nan2: + fmovdg %fcc0,signbit,%f0 + fmuld %f32,%f14,%f32 + fsubd %f10,%f16,%f14 + faddd %f12,%f18,%f18 + fxor %f36,%f0,%f36 +.nan2_from_special2: + ba,pt %icc,.special2 +! delay slot + fmuld %f20,%f22,%f26 + + + .align 16 +.big2: + fxor %f36,%f0,%f36 +.big2_from_special2: + cmp %g5,%o5 + bge,pn %icc,.return_ah2 +! delay slot + nop + cmp %l2,%o4 + bge,pn %icc,1f +! delay slot + nop + ldd [%fp+twom3],%f26 + fmuld %f20,%f26,%f20 + fmuld %f22,%f26,%f22 + ba,pt %icc,.cont3 +! delay slot + nop +1: + fbg,pn %fcc2,.return_ah2 +! delay slot + nop + fcmpd %fcc3,%f28,signbit + fbl,pn %fcc3,.return_ah2 +! delay slot + nop + ba,pt %icc,.special2 +! delay slot + fdivd %f20,%f22,%f26 + + + .align 16 +.small2: + fcmpd %fcc3,%f20,signbit + fbe,pt %fcc3,.return_ah2 +! delay slot + nop + ldd [%fp+two110],%f26 + fmuld %f20,%f26,%f20 + fmuld %f22,%f26,%f22 + st %f20,[%fp+yscl] + ld [%fp+yscl],%o7 + st %f22,[%fp+xscl] + ld [%fp+xscl],%l2 + sub %l2,%o7,%l2 + ba,pt %icc,.cont3 +! delay slot + nop + + + .align 16 +.return_ah2: + fzero %f20 + fmovdg %fcc2,signbit,%f20 + fxor %f40,%f20,%f40 + fxor %f40,pio2,%f20 + fxor %f46,%f40,%f46 + fnegd pio2,%f26 + fmovdg %fcc2,signbit,%f26 + faddd %f26,%f20,%f26 + sub %g5,%l2,%o7 + cmp %o7,%o5 + bl,pt %icc,1f +! delay slot + nop + ldd [%fp+pio4],%f20 + faddd %f26,%f20,%f26 +1: + fdtoi %f26,%f24 +.special2: + fxor %f26,%f46,%f26 + st %f26,[%l6] + st %f27,[%l6+4] + addcc %i0,-1,%i0 + bg,pn %icc,1f +! delay slot + nop + fmovd pio2,%f20 ! set up dummy argument + fmovd pio2,%f22 + fzero %f40 + fzero %f46 + mov 0,%l2 + ba,pt %icc,.cont3 +! delay slot + add %fp,junk,%l6 +1: + lda [%i1]%asi,%f20 + lda [%i1+4]%asi,%f21 + lda [%i3]%asi,%f28 + lda [%i3+4]%asi,%f29 + fabsd %f20,%f24 + lda [%i1]%asi,%o0 + fabsd %f28,%f22 + lda [%i3]%asi,%l3 + mov %i5,%l6 + fand %f20,signbit,%f46 + add %i1,%i2,%i1 + fand %f28,signbit,%f40 + fcmpd %fcc2,%f24,%f22 + add %i3,%i4,%i3 + add %i5,%l7,%i5 + fmovd %f24,%f20 + sethi %hi(0x80000000),%g5 + andn %o0,%g5,%o0 + andn %l3,%g5,%l3 + fmovdg %fcc2,%f22,%f20 + fmovdg %fcc2,%f24,%f22 + mov %o0,%o7 + movg %fcc2,%l3,%o0 + movg %fcc2,%o7,%l3 + fbu,pn %fcc2,.nan2_from_special2 +! delay slot + nop + sub %l3,%o0,%l2 + sub %l3,%o3,%g5 + sub %l2,%o4,%o7 + andcc %g5,%o7,%g0 + bge,pn %icc,.big2_from_special2 +! delay slot + nop + cmp %o0,%o5 + bl,pn %icc,.small2 +! delay slot + nop + ba,pt %icc,.cont3 +! delay slot + nop + + SET_SIZE(__vatan2) + diff --git a/usr/src/lib/libmvec/common/vis/__vatan2f.S b/usr/src/lib/libmvec/common/vis/__vatan2f.S new file mode 100644 index 0000000000..2e6319eac6 --- /dev/null +++ b/usr/src/lib/libmvec/common/vis/__vatan2f.S @@ -0,0 +1,3379 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "__vatan2f.S" + +#include "libm.h" + + RO_DATA + .align 64 +.CONST_TBL: + .word 0xbff921fb, 0x54442d18 ! -M_PI_2 + .word 0x3ff921fb, 0x54442d18 ! M_PI_2 + .word 0xbff921fb, 0x54442d18 ! -M_PI_2 + .word 0x3ff921fb, 0x54442d18 ! M_PI_2 + .word 0xc00921fb, 0x54442d18 ! -M_PI + .word 0x400921fb, 0x54442d18 ! M_PI + .word 0x80000000, 0x00000000 ! -0.0 + .word 0x00000000, 0x00000000 ! 0.0 + + .word 0xbff00000, 0x00000000 ! -1.0 + .word 0x3ff00000, 0x00000000 ! 1.0 + + .word 0x3fefffff, 0xfe79bf93 ! K0 = 9.99999997160545464888e-01 + .word 0xbfd55552, 0xf0db4320 ! K1 = -3.33332762919825514315e-01 + .word 0x3fc998f8, 0x2493d066 ! K2 = 1.99980752811487135558e-01 + .word 0xbfc240b8, 0xd994abf9 ! K3 = -1.42600160828209047720e-01 + .word 0x3fbbfc9e, 0x8c2b0243 ! K4 = 1.09323415013030928421e-01 + .word 0xbfb56013, 0x64b1cac3 ! K5 = -8.34972496830160174704e-02 + .word 0x3fad3ad7, 0x9f53e142 ! K6 = 5.70895559303061900411e-02 + .word 0xbf9f148f, 0x2a829af1 ! K7 = -3.03518647857811706139e-02 + .word 0x3f857a8c, 0x747ed314 ! K8 = 1.04876492549493055747e-02 + .word 0xbf5bdf39, 0x729124b6 ! K9 = -1.70117006406859722727e-03 + + .word 0x3fe921fb, 0x54442d18 ! M_PI_4 + .word 0x36a00000, 0x00000000 ! 2^(-149) + +#define counter %o3 +#define stridex %i4 +#define stridey %i5 +#define stridez %l1 +#define cmul_arr %i0 +#define cadd_arr %i2 +#define _0x7fffffff %l0 +#define _0x7f800000 %l2 + +#define K0 %f42 +#define K1 %f44 +#define K2 %f46 +#define K3 %f48 +#define K4 %f50 +#define K5 %f52 +#define K6 %f54 +#define K7 %f56 +#define K8 %f58 +#define K9 %f60 + +#define tmp_counter STACK_BIAS-32 +#define tmp_py STACK_BIAS-24 +#define tmp_px STACK_BIAS-16 +#define tmp_pz STACK_BIAS-8 + +! sizeof temp storage - must be a multiple of 16 for V9 +#define tmps 0x20 + +!-------------------------------------------------------------------- +! !!!!! vatan2f algorithm !!!!! +! uy0 = *(int*)py; +! ux0 = *(int*)px; +! ay0 = uy0 & 0x7fffffff; +! ax0 = ux0 & 0x7fffffff; +! if ( ax0 >= 0x7f800000 || ay0 >= 0x7f800000 ) +! { +! /* |X| or |Y| = Nan */ +! if ( ax0 > 0x7f800000 || ay0 > 0x7f800000 ) +! { +! ftmp0 = *(float*)&ax0 * *(float*)&ay0; +! *pz = ftmp0; +! } +! signx0 = (unsigned)ux0 >> 30; +! signx0 &= 2; +! signy0 = uy0 >> 31; +! if (ay0 == 0x7f800000) +! signx0 = (ax0 == 0x7f800000) ? signx0 + 1 : 2; +! else +! signx0 += signx0; +! res = signx0 * M_PI_4; +! signy0 <<= 3; +! dtmp0 = *(double*)((char*)(cmul_arr + 1) + signy0); +! res *= dtmp0; +! ftmp0 = (float) res; +! *pz = ftmp0; +! goto next; +! } +! if ( ax0 == 0 && ay0 == 0 ) +! { +! signy0 = uy0 >> 28; +! signx0 = ux0 >> 27; +! ldiff0 = ax0 - ay0; +! ldiff0 >>= 31; +! signx0 &= -16; +! signy0 &= -8; +! ldiff0 <<= 5; +! signx0 += signy0; +! res = *(double*)((char*)(cadd_arr + 7) + ldiff0 + signx0 + signy0); +! ftmp0 = (float) res; +! *pz = ftmp0; +! goto next; +! } +! ldiff0 = ax0 - ay0; +! ldiff0 >>= 31; +! addrc0 = (char*)px - (char*)py; +! addrc0 &= ldiff0; +! fy0 = *(float*)((char*)py + addrc0); +! fx0 = *(float*)((char*)px - addrc0); +! itmp0 = *(int*)&fy0; +! if((itmp0 & 0x7fffffff) < 0x00800000) +! { +! itmp0 >>= 28; +! itmp0 &= -8; +! fy0 = fabsf(fy0); +! dtmp0 = (double) *(int*)&fy0; +! dtmp0 *= C2ONM149; +! dsign = *(double*)((char*)cmul_arr + itmp0); +! dtmp0 *= dsign; +! y0 = dtm0; +! } +! else +! y0 = (double)fy0; +! itmp0 = *(int*)&fx0; +! if((itmp0 & 0x7fffffff) < 0x00800000) +! { +! itmp0 >>= 28; +! itmp0 &= -8; +! fx0 = fabsf(fx0); +! dtmp0 = (double) *(int*)&fx0; +! dtmp0 *= C2ONM149; +! dsign = *(double*)((char*)cmul_arr + itmp0); +! dtmp0 *= dsign; +! x0 = dtmp0; +! } +! else +! x0 = (double)fx0; +! px += stridex; +! py += stridey; +! x0 = y0 / x0; +! x20 = x0 * x0; +! dtmp0 = K9 * x20; +! dtmp0 += K8; +! dtmp0 *= x20; +! dtmp0 += K7; +! dtmp0 *= x20; +! dtmp0 += K6; +! dtmp0 *= x20; +! dtmp0 += K5; +! dtmp0 *= x20; +! dtmp0 += K4; +! dtmp0 *= x20; +! dtmp0 += K3; +! dtmp0 *= x20; +! dtmp0 += K2; +! dtmp0 *= x20; +! dtmp0 += K1; +! dtmp0 *= x20; +! dtmp0 += K0; +! x0 = dtmp0 * x0; +! signy0 = uy0 >> 28; +! signy0 &= -8; +! signx0 = ux0 >> 27; +! signx0 &= -16; +! ltmp0 = ldiff0 << 5; +! ltmp0 += (char*)cadd_arr; +! ltmp0 += signx0; +! cadd0 = *(double*)(ltmp0 + signy0); +! cmul0_ind = ldiff0 << 3; +! cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); +! dtmp0 = cmul0 * x0; +! dtmp0 = cadd0 + dtmp0; +! ftmp0 = (float)dtmp0; +! *pz = ftmp0; +! pz += stridez; +! +!-------------------------------------------------------------------- + + ENTRY(__vatan2f) + save %sp,-SA(MINFRAME)-tmps,%sp + PIC_SETUP(l7) + PIC_SET(l7,.CONST_TBL,g5) + +#ifdef __sparcv9 + ldx [%fp+STACK_BIAS+176],%l7 +#else + ld [%fp+STACK_BIAS+92],%l7 +#endif + + st %i0,[%fp+tmp_counter] + sethi %hi(0x7ffffc00),_0x7fffffff + add _0x7fffffff,1023,_0x7fffffff + or %g0,%i2,%o2 + sll %l7,2,stridez + + sethi %hi(0x7f800000),_0x7f800000 + mov %g5,%g1 + + or %g0,stridey,%o4 + add %g1,56,cadd_arr + + sll %o2,2,stridey + add %g1,72,cmul_arr + + ldd [%g1+80],K0 + ldd [%g1+80+8],K1 + ldd [%g1+80+16],K2 + ldd [%g1+80+24],K3 + ldd [%g1+80+32],K4 + ldd [%g1+80+40],K5 + ldd [%g1+80+48],K6 + ldd [%g1+80+56],K7 + ldd [%g1+80+64],K8 + ldd [%g1+80+72],K9 + + sll stridex,2,stridex + + stx %i1,[%fp+tmp_py] + stx %i3,[%fp+tmp_px] +.begin: + ld [%fp+tmp_counter],counter + ldx [%fp+tmp_py],%i1 + ldx [%fp+tmp_px],%i3 + st %g0,[%fp+tmp_counter] +.begin1: + subcc counter,1,counter + bneg,pn %icc,.exit + nop + + lda [%i1]0x82,%l4 ! (0_0) uy0 = *(int*)py; + + lda [%i3]0x82,%l3 ! (0_0) ux0 = *(int*)px; + + and %l4,_0x7fffffff,%l7 ! (0_0) ay0 = uy0 & 0x7fffffff; + + cmp %l7,_0x7f800000 + bge,pn %icc,.spec0 + and %l3,_0x7fffffff,%l6 ! (0_0) ax0 = ux0 & 0x7fffffff; + + cmp %l6,_0x7f800000 + bge,pn %icc,.spec0 + sethi %hi(0x00800000),%o5 + + cmp %l6,%o5 + bl,pn %icc,.spec1 + sub %l6,%l7,%o2 ! (0_0) ldiff0 = ax0 - ay0; + + cmp %l7,%o5 + bl,pn %icc,.spec1 + nop + + stx %o4,[%fp+tmp_pz] + sra %o2,31,%l7 ! (0_0) ldiff0 >>= 31; + sub %i3,%i1,%l6 ! (0_0) addrc0 = (char*)px - (char*)py; + + and %l6,%l7,%o2 ! (0_0) addrc0 &= ldiff0; + + lda [%i1+%o2]0x82,%f0 ! (0_0) fy0 = *(float*)((char*)py + addrc0); + sub %i3,%o2,%o4 ! (0_0) (char*)px - addrc0 + + lda [%o4]0x82,%f2 ! (0_0) fx0 = *(float*)((char*)px - addrc0); + sll %l7,5,%l6 ! (0_0) ltmp0 = ldiff0 << 5; + + sra %l3,27,%o5 ! (0_0) signx0 = ux0 >> 27; + add %i1,stridey,%i1 ! py += stridey + + add %i3,stridex,%i3 ! px += stridex + + lda [%i1]0x82,%l3 ! (1_0) uy0 = *(int*)py; + sra %l4,28,%o4 ! (0_0) signy0 = uy0 >> 28; + + add %l6,cadd_arr,%l6 ! (0_0) ltmp0 += (char*)cadd_arr; + + fstod %f0,%f40 ! (0_0) y0 = (double)fy0; + + fstod %f2,%f2 ! (0_0) x0 = (double)fx0; + +.spec1_cont: + lda [%i3]0x82,%l4 ! (1_0) ux0 = *(int*)px; + and %o5,-16,%o5 ! (0_0) signx0 &= -16; + + and %o4,-8,%o4 ! (0_0) signy0 &= -8; + + fdivd %f40,%f2,%f12 ! (0_0) x0 = y0 / x0; + + add %l6,%o5,%o1 ! (0_0) ltmp0 += signx0; + + and %l4,_0x7fffffff,%l6 ! (1_0) ax0 = ux0 & 0x7fffffff; + sethi %hi(0x00800000),%o5 + + cmp %l6,%o5 + bl,pn %icc,.u0 + and %l3,_0x7fffffff,%g1 ! (1_0) ay0 = uy0 & 0x7fffffff; +.c0: + cmp %g1,%o5 + bl,pn %icc,.u1 + ldd [%o1+%o4],%f34 ! (0_0) cadd0 = *(double*)(ltmp0 + signy0); +.c1: + cmp %l6,_0x7f800000 + bge,pn %icc,.u2 + sub %l6,%g1,%o1 ! (1_0) ldiff0 = ax0 - ay0; +.c2: + cmp %g1,_0x7f800000 + bge,pn %icc,.u3 + nop +.c3: + sra %o1,31,%g1 ! (1_0) ldiff0 >>= 31; + sub %i3,%i1,%l6 ! (1_0) addrc0 = (char*)px - (char*)py; + + and %l6,%g1,%o1 ! (1_0) addrc0 &= ldiff0; + + lda [%i1+%o1]0x82,%f0 ! (1_0) fy0 = *(float*)((char*)py + addrc0); + sub %i3,%o1,%o4 ! (1_0) (char*)px - addrc0; + + lda [%o4]0x82,%f2 ! (1_0) fx0 = *(float*)((char*)px - addrc0); + sll %g1,5,%l6 ! (1_0) ltmp0 = ldiff0 << 5; + + cmp %o5,_0x7f800000 ! (1_0) b0 ? 0x7f800000 + bge,pn %icc,.update0 ! (1_0) if ( b0 > 0x7f800000 ) + nop +.cont0: + add %i1,stridey,%i1 ! py += stridey + fstod %f0,%f40 ! (1_0) y0 = (double)fy0; + + sra %l4,27,%o5 ! (1_0) signx0 = ux0 >> 27; + add %i3,stridex,%i3 ! px += stridex + + sra %l3,28,%o4 ! (1_0) signy0 = uy0 >> 28; + add %l6,cadd_arr,%l6 ! (1_0) ltmp0 += (char*)cadd_arr; + fstod %f2,%f2 ! (1_0) x0 = (double)fx0; +.d0: + and %o5,-16,%o5 ! (1_0) signx0 &= -16; + and %o4,-8,%o4 ! (1_0) signy0 &= -8; + + lda [%i1]0x82,%l4 ! (2_0) uy0 = *(int*)py; + + lda [%i3]0x82,%l3 ! (2_0) ux0 = *(int*)px; + fdivd %f40,%f2,%f10 ! (1_0) x0 = y0 / x0; + + fmuld %f12,%f12,%f20 ! (0_0) x20 = x0 * x0; + + add %l6,%o5,%o2 ! (1_0) ltmp0 += signx0; + + and %l3,_0x7fffffff,%l6 ! (2_0) ax0 = ux0 & 0x7fffffff; + sethi %hi(0x00800000),%o5 + + cmp %l6,%o5 + bl,pn %icc,.u4 + and %l4,_0x7fffffff,%g5 ! (2_0) ay0 = uy0 & 0x7fffffff; +.c4: + cmp %g5,%o5 + bl,pn %icc,.u5 + fmuld K9,%f20,%f40 ! (0_0) dtmp0 = K9 * x20; +.c5: + cmp %l6,_0x7f800000 + bge,pn %icc,.u6 + ldd [%o2+%o4],%f32 ! (1_0) cadd0 = *(double*)(ltmp0 + signy0); +.c6: + cmp %g5,_0x7f800000 + bge,pn %icc,.u7 + sub %l6,%g5,%o2 ! (2_0) ldiff0 = ax0 - ay0; +.c7: + sra %o2,31,%g5 ! (2_0) ldiff0 >>= 31; + sub %i3,%i1,%l6 ! (2_0) addrc0 = (char*)px - (char*)py; + + faddd %f40,K8,%f40 ! (0_0) dtmp0 += K8; + and %l6,%g5,%o2 ! (2_0) addrc0 &= ldiff0; + + lda [%i1+%o2]0x82,%f0 ! (2_0) fy0 = *(float*)((char*)py + addrc0); + sub %i3,%o2,%o4 ! (2_0) (char*)px - addrc0; + + lda [%o4]0x82,%f2 ! (2_0) fx0 = *(float*)((char*)px - addrc0); + + cmp %o5,_0x7f800000 ! (2_0) b0 ? 0x7f800000 + bge,pn %icc,.update1 ! (2_0) if ( b0 > 0x7f800000 ) + nop +.cont1: + fmuld %f40,%f20,%f30 ! (0_0) dtmp0 *= x20; + sll %g5,5,%l6 ! (2_0) ltmp0 = ldiff0 << 5; + add %i1,stridey,%i1 ! py += stridey + fstod %f0,%f40 ! (2_0) y0 = (double)fy0; + + sra %l3,27,%o5 ! (2_0) signx0 = ux0 >> 27; + add %i3,stridex,%i3 ! px += stridex + + fstod %f2,%f2 ! (2_0) x0 = (double)fx0; + sra %l4,28,%o4 ! (2_0) signy0 = uy0 >> 28; + add %l6,cadd_arr,%l6 ! (2_0) ltmp0 += (char*)cadd_arr; +.d1: + lda [%i1]0x82,%l3 ! (3_0) uy0 = *(int*)py; + and %o5,-16,%o5 ! (2_0) signx0 &= -16; + faddd %f30,K7,%f30 ! (0_0) dtmp0 += K7; + + lda [%i3]0x82,%l4 ! (3_0) ux0 = *(int*)px; + + fdivd %f40,%f2,%f8 ! (2_0) x0 = y0 / x0; + + fmuld %f10,%f10,%f18 ! (1_0) x20 = x0 * x0; + + add %l6,%o5,%o1 ! (2_0) ltmp0 += signx0; + and %o4,-8,%o4 ! (2_0) signy0 &= -8; + fmuld %f30,%f20,%f30 ! (0_0) dtmp0 *= x20; + + and %l4,_0x7fffffff,%l6 ! (3_0) ax0 = ux0 & 0x7fffffff; + sethi %hi(0x00800000),%o5 + + cmp %l6,%o5 + bl,pn %icc,.u8 + and %l3,_0x7fffffff,%o0 ! (3_0) ay0 = uy0 & 0x7fffffff; +.c8: + cmp %o0,%o5 + bl,pn %icc,.u9 + fmuld K9,%f18,%f40 ! (1_0) dtmp0 = K9 * x20; +.c9: + cmp %l6,_0x7f800000 + bge,pn %icc,.u10 + faddd %f30,K6,%f16 ! (0_0) dtmp0 += K6; +.c10: + cmp %o0,_0x7f800000 + bge,pn %icc,.u11 + ldd [%o1+%o4],%f30 ! (2_0) cadd0 = *(double*)(ltmp0 + signy0); +.c11: + sub %l6,%o0,%o1 ! (3_0) ldiff0 = ax0 - ay0; + + sra %o1,31,%o0 ! (3_0) ldiff0 >>= 31; + sub %i3,%i1,%l6 ! (3_0) addrc0 = (char*)px - (char*)py; + + faddd %f40,K8,%f40 ! (1_0) dtmp0 += K8; + and %l6,%o0,%o1 ! (3_0) addrc0 &= ldiff0; + fmuld %f16,%f20,%f16 ! (0_0) dtmp0 *= x20; + + lda [%i1+%o1]0x82,%f0 ! (3_0) fy0 = *(float*)((char*)py + addrc0); + sub %i3,%o1,%o4 ! (3_0) (char*)px - addrc0; + + lda [%o4]0x82,%f1 ! (3_0) fx0 = *(float*)((char*)px - addrc0); + + cmp %o5,_0x7f800000 ! (3_0) b0 ? 0x7f800000 + bge,pn %icc,.update2 ! (3_0) if ( b0 > 0x7f800000 ) + nop +.cont2: + fmuld %f40,%f18,%f28 ! (1_0) dtmp0 *= x20; + sll %o0,5,%l6 ! (3_0) ltmp0 = ldiff0 << 5; + add %i1,stridey,%i1 ! py += stridey + fstod %f0,%f40 ! (3_0) y0 = (double)fy0; + + faddd %f16,K5,%f2 ! (0_0) dtmp0 += K5; + sra %l4,27,%o5 ! (3_0) signx0 = ux0 >> 27; + add %i3,stridex,%i3 ! px += stridex + + sra %l3,28,%o4 ! (3_0) signy0 = uy0 >> 28; + fstod %f1,%f16 ! (3_0) x0 = (double)fx0; +.d2: + faddd %f28,K7,%f28 ! (1_0) dtmp0 += K7; + add %l6,cadd_arr,%l6 ! (3_0) ltmp0 += (char*)cadd_arr; + and %o5,-16,%o5 ! (3_0) signx0 &= -16; + + lda [%i1]0x82,%l4 ! (4_0) uy0 = *(int*)py; + fmuld %f2,%f20,%f2 ! (0_0) dtmp0 *= x20; + + lda [%i3]0x82,%l3 ! (4_0) ux0 = *(int*)px; + fdivd %f40,%f16,%f6 ! (3_0) x0 = y0 / x0; + + and %o4,-8,%o4 ! (3_0) signy0 &= -8; + fmuld %f8,%f8,%f16 ! (2_0) x20 = x0 * x0; + + add %l6,%o5,%o2 ! (3_0) ltmp0 += signx0; + fmuld %f28,%f18,%f28 ! (1_0) dtmp0 *= x20; + + and %l3,_0x7fffffff,%l6 ! (4_0) ax0 = ux0 & 0x7fffffff; + sethi %hi(0x00800000),%o5 + faddd %f2,K4,%f2 ! (0_0) dtmp0 += K4; + + cmp %l6,%o5 + bl,pn %icc,.u12 + and %l4,_0x7fffffff,%l5 ! (4_0) ay0 = uy0 & 0x7fffffff; +.c12: + cmp %l5,%o5 + bl,pn %icc,.u13 + fmuld K9,%f16,%f40 ! (2_0) dtmp0 = K9 * x20; +.c13: + cmp %l6,_0x7f800000 + bge,pn %icc,.u14 + faddd %f28,K6,%f4 ! (1_0) dtmp0 += K6; +.c14: + ldd [%o2+%o4],%f28 ! (3_0) cadd0 = *(double*)(ltmp0 + signy0); + cmp %l5,_0x7f800000 + bge,pn %icc,.u15 + fmuld %f2,%f20,%f24 ! (0_0) dtmp0 *= x20; +.c15: + sub %l6,%l5,%o2 ! (4_0) ldiff0 = ax0 - ay0; + + sra %o2,31,%l5 ! (4_0) ldiff0 >>= 31; + sub %i3,%i1,%l6 ! (4_0) addrc0 = (char*)px - (char*)py; + + faddd %f40,K8,%f40 ! (2_0) dtmp0 += K8; + and %l6,%l5,%o2 ! (4_0) addrc0 &= ldiff0; + fmuld %f4,%f18,%f4 ! (1_0) dtmp0 *= x20; + + lda [%i1+%o2]0x82,%f0 ! (4_0) fy0 = *(float*)((char*)py + addrc0); + sub %i3,%o2,%o4 ! (4_0) (char*)px - addrc0; + faddd %f24,K3,%f24 ! (0_0) dtmp0 += K3; + + lda [%o4]0x82,%f2 ! (4_0) fx0 = *(float*)((char*)px - addrc0); + + cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 + bge,pn %icc,.update3 ! (4_0) if ( b0 > 0x7f800000 ) + nop +.cont3: + fmuld %f40,%f16,%f26 ! (2_0) dtmp0 *= x20; + sll %l5,5,%l6 ! (4_0) ltmp0 = ldiff0 << 5; + add %i1,stridey,%i1 ! py += stridey + fstod %f0,%f40 ! (4_0) y0 = (double)fy0; + + faddd %f4,K5,%f62 ! (1_0) dtmp0 += K5; + add %i3,stridex,%i3 ! px += stridex + fmuld %f24,%f20,%f24 ! (0_0) dtmp0 *= x20; + + fstod %f2,%f2 ! (4_0) x0 = (double)fx0; + sra %l3,27,%o5 ! (4_0) signx0 = ux0 >> 27; + sra %l4,28,%o4 ! (4_0) signy0 = uy0 >> 28; +.d3: + lda [%i1]0x82,%l3 ! (5_0) uy0 = *(int*)py; + add %l6,cadd_arr,%l6 ! (4_0) ltmp0 += (char*)cadd_arr; + faddd %f26,K7,%f26 ! (2_0) dtmp0 += K7; + + fmuld %f62,%f18,%f4 ! (1_0) dtmp0 *= x20; + and %o5,-16,%o5 ! (4_0) signx0 &= -16; + + lda [%i3]0x82,%l4 ! (5_1) ux0 = *(int*)px; + fdivd %f40,%f2,%f62 ! (4_1) x0 = y0 / x0; + faddd %f24,K2,%f40 ! (0_1) dtmp0 += K2; + + and %o4,-8,%o4 ! (4_1) signy0 &= -8; + fmuld %f6,%f6,%f24 ! (3_1) x20 = x0 * x0; + + add %l6,%o5,%o1 ! (4_1) ltmp0 += signx0; + fmuld %f26,%f16,%f26 ! (2_1) dtmp0 *= x20; + + and %l4,_0x7fffffff,%l6 ! (5_1) ax0 = ux0 & 0x7fffffff; + sethi %hi(0x00800000),%o5 + faddd %f4,K4,%f4 ! (1_1) dtmp0 += K4; + + cmp %l6,%o5 + bl,pn %icc,.u16 + and %l3,_0x7fffffff,%o7 ! (5_1) ay0 = uy0 & 0x7fffffff; +.c16: + cmp %o7,%o5 + bl,pn %icc,.u17 + fmuld %f40,%f20,%f38 ! (0_1) dtmp0 *= x20; +.c17: + cmp %l6,_0x7f800000 + bge,pn %icc,.u18 + fmuld K9,%f24,%f40 ! (3_1) dtmp0 = K9 * x20; +.c18: + cmp %o7,_0x7f800000 + bge,pn %icc,.u19 + faddd %f26,K6,%f22 ! (2_1) dtmp0 += K6; +.c19: + ldd [%o1+%o4],%f26 ! (4_1) cadd0 = *(double*)(ltmp0 + signy0); + fmuld %f4,%f18,%f4 ! (1_1) dtmp0 *= x20; + + sub %l6,%o7,%o1 ! (5_1) ldiff0 = ax0 - ay0; + + sra %o1,31,%o7 ! (5_1) ldiff0 >>= 31; + sub %i3,%i1,%l6 ! (5_1) addrc0 = (char*)px - (char*)py; + faddd %f38,K1,%f38 ! (0_1) dtmp0 += K1; + + faddd %f40,K8,%f40 ! (3_1) dtmp0 += K8; + and %l6,%o7,%o1 ! (5_1) addrc0 &= ldiff0; + fmuld %f22,%f16,%f22 ! (2_1) dtmp0 *= x20; + + lda [%i1+%o1]0x82,%f0 ! (5_1) fy0 = *(float*)((char*)py + addrc0); + sll %o7,5,%l6 ! (5_1) ltmp0 = ldiff0 << 5; + sub %i3,%o1,%o4 ! (5_1) (char*)px - addrc0; + faddd %f4,K3,%f4 ! (1_1) dtmp0 += K3; + + lda [%o4]0x82,%f1 ! (5_1) fx0 = *(float*)((char*)px - addrc0); + + fmuld %f38,%f20,%f38 ! (0_1) dtmp0 *= x20; + cmp %o5,_0x7f800000 ! (5_1) b0 ? 0x7f800000 + bge,pn %icc,.update4 ! (5_1) if ( b0 > 0x7f800000 ) + nop +.cont4: + fmuld %f40,%f24,%f36 ! (3_1) dtmp0 *= x20; + fstod %f0,%f40 ! (5_1) y0 = (double)fy0; + + faddd %f22,K5,%f14 ! (2_1) dtmp0 += K5; + fmuld %f4,%f18,%f4 ! (1_1) dtmp0 *= x20; + + add %i3,stridex,%i3 ! px += stridex + sll %l7,3,%l7 ! (0_1) cmul0_ind = ldiff0 << 3; + fstod %f1,%f2 ! (5_1) x0 = (double)fx0; +.d4: + sra %l3,28,%o4 ! (5_1) signy0 = uy0 >> 28; + add %i1,stridey,%i1 ! py += stridey + + faddd %f36,K7,%f36 ! (3_1) dtmp0 += K7; + sra %l4,27,%o5 ! (5_1) signx0 = ux0 >> 27; + + lda [%i1]0x82,%l4 ! (0_0) uy0 = *(int*)py; + add %l6,cadd_arr,%l6 ! (5_1) ltmp0 += (char*)cadd_arr; + fmuld %f14,%f16,%f22 ! (2_1) dtmp0 *= x20; + faddd %f38,K0,%f38 ! (0_1) dtmp0 += K0; + + lda [%i3]0x82,%l3 ! (0_0) ux0 = *(int*)px; + and %o5,-16,%o5 ! (5_1) signx0 &= -16; + fdivd %f40,%f2,%f14 ! (5_1) x0 = y0 / x0; + faddd %f4,K2,%f40 ! (1_1) dtmp0 += K2; + + fmuld %f62,%f62,%f4 ! (4_1) x20 = x0 * x0; + + ldd [cmul_arr+%l7],%f0 ! (0_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); + add %l6,%o5,%o2 ! (5_1) ltmp0 += signx0; + and %o4,-8,%o4 ! (5_1) signy0 &= -8; + fmuld %f36,%f24,%f36 ! (3_1) dtmp0 *= x20; + + fmuld %f38,%f12,%f12 ! (0_1) x0 = dtmp0 * x0; + and %l4,_0x7fffffff,%l7 ! (0_0) ay0 = uy0 & 0x7fffffff; + sethi %hi(0x00800000),%o5 + faddd %f22,K4,%f22 ! (2_1) dtmp0 += K4; + + and %l3,_0x7fffffff,%l6 ! (0_0) ax0 = ux0 & 0x7fffffff; + cmp %l7,%o5 + bl,pn %icc,.u20 + fmuld %f40,%f18,%f38 ! (1_1) dtmp0 *= x20; +.c20: + cmp %l6,%o5 + bl,pn %icc,.u21 + fmuld K9,%f4,%f40 ! (4_1) dtmp0 = K9 * x20; +.c21: + cmp %l7,_0x7f800000 + bge,pn %icc,.u22 + faddd %f36,K6,%f20 ! (3_1) dtmp0 += K6; +.c22: + ldd [%o2+%o4],%f36 ! (5_1) cadd0 = *(double*)(ltmp0 + signy0); + cmp %l6,_0x7f800000 + bge,pn %icc,.u23 + fmuld %f22,%f16,%f22 ! (2_1) dtmp0 *= x20; +.c23: + sub %l6,%l7,%o2 ! (0_0) ldiff0 = ax0 - ay0; + + fmuld %f0,%f12,%f12 ! (0_1) dtmp0 = cmul0 * x0; + sra %o2,31,%l7 ! (0_0) ldiff0 >>= 31; + sub %i3,%i1,%l6 ! (0_0) addrc0 = (char*)px - (char*)py; + faddd %f38,K1,%f38 ! (1_1) dtmp0 += K1; + + faddd %f40,K8,%f40 ! (4_1) dtmp0 += K8; + and %l6,%l7,%o2 ! (0_0) addrc0 &= ldiff0; + fmuld %f20,%f24,%f20 ! (3_1) dtmp0 *= x20; + + lda [%i1+%o2]0x82,%f0 ! (0_0) fy0 = *(float*)((char*)py + addrc0); + sll %g1,3,%g1 ! (1_1) cmul0_ind = ldiff0 << 3; + sub %i3,%o2,%o4 ! (0_0) (char*)px - addrc0 + faddd %f22,K3,%f22 ! (2_1) dtmp0 += K3; + + lda [%o4]0x82,%f2 ! (0_0) fx0 = *(float*)((char*)px - addrc0); + sll %l7,5,%l6 ! (0_0) ltmp0 = ldiff0 << 5; + + fmuld %f38,%f18,%f38 ! (1_1) dtmp0 *= x20; + cmp %o5,_0x7f800000 ! (0_0) b0 ? 0x7f800000 + bge,pn %icc,.update5 ! (0_0) if ( b0 > 0x7f800000 ) + faddd %f34,%f12,%f18 ! (0_1) dtmp0 = cadd0 + dtmp0; +.cont5: + fmuld %f40,%f4,%f34 ! (4_1) dtmp0 *= x20; + sra %l3,27,%o5 ! (0_0) signx0 = ux0 >> 27; + add %i3,stridex,%i3 ! px += stridex + fstod %f0,%f40 ! (0_0) y0 = (double)fy0; + + faddd %f20,K5,%f12 ! (3_1) dtmp0 += K5; + add %i1,stridey,%i1 ! py += stridey + fmuld %f22,%f16,%f22 ! (2_1) dtmp0 *= x20; + + lda [%i1]0x82,%l3 ! (1_0) uy0 = *(int*)py; + sra %l4,28,%o4 ! (0_0) signy0 = uy0 >> 28; + add %l6,cadd_arr,%l6 ! (0_0) ltmp0 += (char*)cadd_arr; + fstod %f2,%f2 ! (0_0) x0 = (double)fx0; +.d5: + lda [%i3]0x82,%l4 ! (1_0) ux0 = *(int*)px; + and %o5,-16,%o5 ! (0_0) signx0 &= -16; + faddd %f34,K7,%f34 ! (4_1) dtmp0 += K7; + + ldx [%fp+tmp_pz],%o1 + fmuld %f12,%f24,%f20 ! (3_1) dtmp0 *= x20; + and %o4,-8,%o4 ! (0_0) signy0 &= -8; + faddd %f38,K0,%f38 ! (1_1) dtmp0 += K0; + + fdivd %f40,%f2,%f12 ! (0_0) x0 = y0 / x0; + faddd %f22,K2,%f40 ! (2_1) dtmp0 += K2; + + fdtos %f18,%f2 ! (0_1) ftmp0 = (float)dtmp0; + st %f2,[%o1] ! (0_1) *pz = ftmp0 + add %o1,stridez,%o2 + fmuld %f14,%f14,%f22 ! (5_1) x20 = x0 * x0; + + subcc counter,1,counter + bneg,a,pn %icc,.begin + or %g0,%o2,%o4 + + ldd [cmul_arr+%g1],%f0 ! (1_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); + add %l6,%o5,%o1 ! (0_0) ltmp0 += signx0; + fmuld %f34,%f4,%f34 ! (4_1) dtmp0 *= x20; + + fmuld %f38,%f10,%f10 ! (1_1) x0 = dtmp0 * x0; + and %l4,_0x7fffffff,%l6 ! (1_0) ax0 = ux0 & 0x7fffffff; + sethi %hi(0x00800000),%o5 + faddd %f20,K4,%f20 ! (3_1) dtmp0 += K4; + + and %l3,_0x7fffffff,%g1 ! (1_0) ay0 = uy0 & 0x7fffffff; + cmp %l6,%o5 + bl,pn %icc,.u24 + fmuld %f40,%f16,%f38 ! (2_1) dtmp0 *= x20; +.c24: + cmp %g1,%o5 + bl,pn %icc,.u25 + fmuld K9,%f22,%f40 ! (5_1) dtmp0 = K9 * x20; +.c25: + cmp %l6,_0x7f800000 + bge,pn %icc,.u26 + faddd %f34,K6,%f18 ! (4_1) dtmp0 += K6; +.c26: + ldd [%o1+%o4],%f34 ! (0_0) cadd0 = *(double*)(ltmp0 + signy0); + cmp %g1,_0x7f800000 + bge,pn %icc,.u27 + fmuld %f20,%f24,%f20 ! (3_1) dtmp0 *= x20; +.c27: + sub %l6,%g1,%o1 ! (1_0) ldiff0 = ax0 - ay0; + + fmuld %f0,%f10,%f10 ! (1_1) dtmp0 = cmul0 * x0; + sra %o1,31,%g1 ! (1_0) ldiff0 >>= 31; + sub %i3,%i1,%l6 ! (1_0) addrc0 = (char*)px - (char*)py; + faddd %f38,K1,%f38 ! (2_1) dtmp0 += K1; + + faddd %f40,K8,%f40 ! (5_1) dtmp0 += K8; + and %l6,%g1,%o1 ! (1_0) addrc0 &= ldiff0; + fmuld %f18,%f4,%f18 ! (4_1) dtmp0 *= x20; + + lda [%i1+%o1]0x82,%f0 ! (1_0) fy0 = *(float*)((char*)py + addrc0); + sll %g5,3,%g5 ! (2_1) cmul0_ind = ldiff0 << 3; + sub %i3,%o1,%o4 ! (1_0) (char*)px - addrc0; + faddd %f20,K3,%f20 ! (3_1) dtmp0 += K3; + + lda [%o4]0x82,%f2 ! (1_0) fx0 = *(float*)((char*)px - addrc0); + sll %g1,5,%l6 ! (1_0) ltmp0 = ldiff0 << 5; + add %o2,stridez,%o1 ! pz += stridez + + fmuld %f38,%f16,%f38 ! (2_1) dtmp0 *= x20; + cmp %o5,_0x7f800000 ! (1_0) b0 ? 0x7f800000 + bge,pn %icc,.update6 ! (1_0) if ( b0 > 0x7f800000 ) + faddd %f32,%f10,%f16 ! (1_1) dtmp0 = cadd0 + dtmp0; +.cont6: + fmuld %f40,%f22,%f32 ! (5_1) dtmp0 *= x20; + add %i1,stridey,%i1 ! py += stridey + fstod %f0,%f40 ! (1_0) y0 = (double)fy0; + + faddd %f18,K5,%f10 ! (4_1) dtmp0 += K5; + sra %l4,27,%o5 ! (1_0) signx0 = ux0 >> 27; + add %i3,stridex,%i3 ! px += stridex + fmuld %f20,%f24,%f20 ! (3_1) dtmp0 *= x20; + + sra %l3,28,%o4 ! (1_0) signy0 = uy0 >> 28; + add %l6,cadd_arr,%l6 ! (1_0) ltmp0 += (char*)cadd_arr; + fstod %f2,%f2 ! (1_0) x0 = (double)fx0; +.d6: + faddd %f32,K7,%f32 ! (5_1) dtmp0 += K7; + and %o5,-16,%o5 ! (1_0) signx0 &= -16; + and %o4,-8,%o4 ! (1_0) signy0 &= -8; + + lda [%i1]0x82,%l4 ! (2_0) uy0 = *(int*)py; + fmuld %f10,%f4,%f18 ! (4_1) dtmp0 *= x20; + faddd %f38,K0,%f38 ! (2_1) dtmp0 += K0; + + lda [%i3]0x82,%l3 ! (2_0) ux0 = *(int*)px; + fdivd %f40,%f2,%f10 ! (1_0) x0 = y0 / x0; + faddd %f20,K2,%f40 ! (3_1) dtmp0 += K2; + + fmuld %f12,%f12,%f20 ! (0_0) x20 = x0 * x0; + fdtos %f16,%f2 ! (1_1) ftmp0 = (float)dtmp0; + st %f2,[%o2] ! (1_1) *pz = ftmp0; + + subcc counter,1,counter + bneg,a,pn %icc,.begin + or %g0,%o1,%o4 + + ldd [cmul_arr+%g5],%f0 ! (2_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); + add %l6,%o5,%o2 ! (1_0) ltmp0 += signx0; + fmuld %f32,%f22,%f32 ! (5_1) dtmp0 *= x20; + + fmuld %f38,%f8,%f8 ! (2_1) x0 = dtmp0 * x0; + and %l3,_0x7fffffff,%l6 ! (2_0) ax0 = ux0 & 0x7fffffff; + sethi %hi(0x00800000),%o5 + faddd %f18,K4,%f18 ! (4_1) dtmp0 += K4; + + and %l4,_0x7fffffff,%g5 ! (2_0) ay0 = uy0 & 0x7fffffff; + cmp %l6,%o5 + bl,pn %icc,.u28 + fmuld %f40,%f24,%f38 ! (3_1) dtmp0 *= x20; +.c28: + cmp %g5,%o5 + bl,pn %icc,.u29 + fmuld K9,%f20,%f40 ! (0_0) dtmp0 = K9 * x20; +.c29: + cmp %l6,_0x7f800000 + bge,pn %icc,.u30 + faddd %f32,K6,%f16 ! (5_1) dtmp0 += K6; +.c30: + ldd [%o2+%o4],%f32 ! (1_0) cadd0 = *(double*)(ltmp0 + signy0); + cmp %g5,_0x7f800000 + bge,pn %icc,.u31 + fmuld %f18,%f4,%f18 ! (4_1) dtmp0 *= x20; +.c31: + sub %l6,%g5,%o2 ! (2_0) ldiff0 = ax0 - ay0; + + fmuld %f0,%f8,%f8 ! (2_1) dtmp0 = cmul0 * x0; + sra %o2,31,%g5 ! (2_0) ldiff0 >>= 31; + sub %i3,%i1,%l6 ! (2_0) addrc0 = (char*)px - (char*)py; + faddd %f38,K1,%f38 ! (3_1) dtmp0 += K1; + + faddd %f40,K8,%f40 ! (0_0) dtmp0 += K8; + and %l6,%g5,%o2 ! (2_0) addrc0 &= ldiff0; + fmuld %f16,%f22,%f16 ! (5_1) dtmp0 *= x20; + + lda [%i1+%o2]0x82,%f0 ! (2_0) fy0 = *(float*)((char*)py + addrc0); + sub %i3,%o2,%o4 ! (2_0) (char*)px - addrc0; + add %o1,stridez,%o2 ! pz += stridez + faddd %f18,K3,%f18 ! (4_1) dtmp0 += K3; + + lda [%o4]0x82,%f2 ! (2_0) fx0 = *(float*)((char*)px - addrc0); + sll %o0,3,%o0 ! (3_1) cmul0_ind = ldiff0 << 3; + + fmuld %f38,%f24,%f38 ! (3_1) dtmp0 *= x20; + cmp %o5,_0x7f800000 ! (2_0) b0 ? 0x7f800000 + bge,pn %icc,.update7 ! (2_0) if ( b0 > 0x7f800000 ) + faddd %f30,%f8,%f24 ! (2_1) dtmp0 = cadd0 + dtmp0; +.cont7: + fmuld %f40,%f20,%f30 ! (0_0) dtmp0 *= x20; + sll %g5,5,%l6 ! (2_0) ltmp0 = ldiff0 << 5; + add %i1,stridey,%i1 ! py += stridey + fstod %f0,%f40 ! (2_0) y0 = (double)fy0; + + faddd %f16,K5,%f8 ! (5_1) dtmp0 += K5; + sra %l3,27,%o5 ! (2_0) signx0 = ux0 >> 27; + add %i3,stridex,%i3 ! px += stridex + fmuld %f18,%f4,%f18 ! (4_1) dtmp0 *= x20; + + fstod %f2,%f2 ! (2_0) x0 = (double)fx0; + sra %l4,28,%o4 ! (2_0) signy0 = uy0 >> 28; + add %l6,cadd_arr,%l6 ! (2_0) ltmp0 += (char*)cadd_arr; +.d7: + lda [%i1]0x82,%l3 ! (3_0) uy0 = *(int*)py; + and %o5,-16,%o5 ! (2_0) signx0 &= -16; + faddd %f30,K7,%f30 ! (0_0) dtmp0 += K7; + + lda [%i3]0x82,%l4 ! (3_0) ux0 = *(int*)px; + fmuld %f8,%f22,%f16 ! (5_1) dtmp0 *= x20; + faddd %f38,K0,%f38 ! (3_1) dtmp0 += K0; + + fdivd %f40,%f2,%f8 ! (2_0) x0 = y0 / x0; + faddd %f18,K2,%f40 ! (4_1) dtmp0 += K2; + + fmuld %f10,%f10,%f18 ! (1_0) x20 = x0 * x0; + fdtos %f24,%f1 ! (2_1) ftmp0 = (float)dtmp0; + st %f1,[%o1] ! (2_1) *pz = ftmp0; + + subcc counter,1,counter + bneg,a,pn %icc,.begin + or %g0,%o2,%o4 + + ldd [cmul_arr+%o0],%f2 ! (3_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); + add %l6,%o5,%o1 ! (2_0) ltmp0 += signx0; + and %o4,-8,%o4 ! (2_0) signy0 &= -8; + fmuld %f30,%f20,%f30 ! (0_0) dtmp0 *= x20; + + fmuld %f38,%f6,%f6 ! (3_1) x0 = dtmp0 * x0; + and %l4,_0x7fffffff,%l6 ! (3_0) ax0 = ux0 & 0x7fffffff; + sethi %hi(0x00800000),%o5 + faddd %f16,K4,%f24 ! (5_1) dtmp0 += K4; + + and %l3,_0x7fffffff,%o0 ! (3_0) ay0 = uy0 & 0x7fffffff; + cmp %l6,%o5 + bl,pn %icc,.u32 + fmuld %f40,%f4,%f38 ! (4_1) dtmp0 *= x20; +.c32: + cmp %o0,%o5 + bl,pn %icc,.u33 + fmuld K9,%f18,%f40 ! (1_0) dtmp0 = K9 * x20; +.c33: + cmp %l6,_0x7f800000 + bge,pn %icc,.u34 + faddd %f30,K6,%f16 ! (0_0) dtmp0 += K6; +.c34: + ldd [%o1+%o4],%f30 ! (2_0) cadd0 = *(double*)(ltmp0 + signy0); + cmp %o0,_0x7f800000 + bge,pn %icc,.u35 + fmuld %f24,%f22,%f24 ! (5_1) dtmp0 *= x20; +.c35: + sub %l6,%o0,%o1 ! (3_0) ldiff0 = ax0 - ay0; + + fmuld %f2,%f6,%f6 ! (3_1) dtmp0 = cmul0 * x0; + sra %o1,31,%o0 ! (3_0) ldiff0 >>= 31; + sub %i3,%i1,%l6 ! (3_0) addrc0 = (char*)px - (char*)py; + faddd %f38,K1,%f38 ! (4_1) dtmp0 += K1; + + faddd %f40,K8,%f40 ! (1_0) dtmp0 += K8; + and %l6,%o0,%o1 ! (3_0) addrc0 &= ldiff0; + fmuld %f16,%f20,%f16 ! (0_0) dtmp0 *= x20; + + lda [%i1+%o1]0x82,%f0 ! (3_0) fy0 = *(float*)((char*)py + addrc0); + sub %i3,%o1,%o4 ! (3_0) (char*)px - addrc0; + add %o2,stridez,%o1 ! pz += stridez + faddd %f24,K3,%f24 ! (5_1) dtmp0 += K3; + + lda [%o4]0x82,%f1 ! (3_0) fx0 = *(float*)((char*)px - addrc0); + sll %l5,3,%l5 ! (4_1) cmul0_ind = ldiff0 << 3; + + fmuld %f38,%f4,%f38 ! (4_1) dtmp0 *= x20; + cmp %o5,_0x7f800000 ! (3_0) b0 ? 0x7f800000 + bge,pn %icc,.update8 ! (3_0) if ( b0 > 0x7f800000 ) + faddd %f28,%f6,%f4 ! (3_1) dtmp0 = cadd0 + dtmp0; +.cont8: + fmuld %f40,%f18,%f28 ! (1_0) dtmp0 *= x20; + sll %o0,5,%l6 ! (3_0) ltmp0 = ldiff0 << 5; + add %i1,stridey,%i1 ! py += stridey + fstod %f0,%f40 ! (3_0) y0 = (double)fy0; + + faddd %f16,K5,%f2 ! (0_0) dtmp0 += K5; + sra %l4,27,%o5 ! (3_0) signx0 = ux0 >> 27; + add %i3,stridex,%i3 ! px += stridex + fmuld %f24,%f22,%f24 ! (5_1) dtmp0 *= x20; + + sra %l3,28,%o4 ! (3_0) signy0 = uy0 >> 28; + fstod %f1,%f16 ! (3_0) x0 = (double)fx0; +.d8: + faddd %f28,K7,%f28 ! (1_0) dtmp0 += K7; + add %l6,cadd_arr,%l6 ! (3_0) ltmp0 += (char*)cadd_arr; + and %o5,-16,%o5 ! (3_0) signx0 &= -16; + + lda [%i1]0x82,%l4 ! (4_0) uy0 = *(int*)py; + fmuld %f2,%f20,%f2 ! (0_0) dtmp0 *= x20; + faddd %f38,K0,%f38 ! (4_1) dtmp0 += K0; + + lda [%i3]0x82,%l3 ! (4_0) ux0 = *(int*)px; + fdivd %f40,%f16,%f6 ! (3_0) x0 = y0 / x0; + faddd %f24,K2,%f24 ! (5_1) dtmp0 += K2; + + fdtos %f4,%f1 ! (3_1) ftmp0 = (float)dtmp0; + and %o4,-8,%o4 ! (3_0) signy0 &= -8; + st %f1,[%o2] ! (3_1) *pz = ftmp0; + fmuld %f8,%f8,%f16 ! (2_0) x20 = x0 * x0; + + subcc counter,1,counter + bneg,a,pn %icc,.begin + or %g0,%o1,%o4 + + ldd [cmul_arr+%l5],%f0 ! (4_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); + add %l6,%o5,%o2 ! (3_0) ltmp0 += signx0; + fmuld %f28,%f18,%f28 ! (1_0) dtmp0 *= x20; + + fmuld %f38,%f62,%f62 ! (4_1) x0 = dtmp0 * x0; + and %l3,_0x7fffffff,%l6 ! (4_0) ax0 = ux0 & 0x7fffffff; + sethi %hi(0x00800000),%o5 + faddd %f2,K4,%f2 ! (0_0) dtmp0 += K4; + + and %l4,_0x7fffffff,%l5 ! (4_0) ay0 = uy0 & 0x7fffffff; + cmp %l6,%o5 + bl,pn %icc,.u36 + fmuld %f24,%f22,%f38 ! (5_1) dtmp0 *= x20; +.c36: + cmp %l5,%o5 + bl,pn %icc,.u37 + fmuld K9,%f16,%f40 ! (2_0) dtmp0 = K9 * x20; +.c37: + cmp %l6,_0x7f800000 + bge,pn %icc,.u38 + faddd %f28,K6,%f4 ! (1_0) dtmp0 += K6; +.c38: + ldd [%o2+%o4],%f28 ! (3_0) cadd0 = *(double*)(ltmp0 + signy0); + cmp %l5,_0x7f800000 + bge,pn %icc,.u39 + fmuld %f2,%f20,%f24 ! (0_0) dtmp0 *= x20; +.c39: + sub %l6,%l5,%o2 ! (4_0) ldiff0 = ax0 - ay0; + + fmuld %f0,%f62,%f62 ! (4_1) dtmp0 = cmul0 * x0; + sra %o2,31,%l5 ! (4_0) ldiff0 >>= 31; + sub %i3,%i1,%l6 ! (4_0) addrc0 = (char*)px - (char*)py; + faddd %f38,K1,%f38 ! (5_1) dtmp0 += K1; + + faddd %f40,K8,%f40 ! (2_0) dtmp0 += K8; + and %l6,%l5,%o2 ! (4_0) addrc0 &= ldiff0; + fmuld %f4,%f18,%f4 ! (1_0) dtmp0 *= x20; + + lda [%i1+%o2]0x82,%f0 ! (4_0) fy0 = *(float*)((char*)py + addrc0); + sub %i3,%o2,%o4 ! (4_0) (char*)px - addrc0; + add %o1,stridez,%o2 ! pz += stridez + faddd %f24,K3,%f24 ! (0_0) dtmp0 += K3; + + lda [%o4]0x82,%f2 ! (4_0) fx0 = *(float*)((char*)px - addrc0); + sll %o7,3,%o7 ! (5_1) cmul0_ind = ldiff0 << 3; + + fmuld %f38,%f22,%f38 ! (5_1) dtmp0 *= x20; + cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 + bge,pn %icc,.update9 ! (4_0) if ( b0 > 0x7f800000 ) + faddd %f26,%f62,%f22 ! (4_1) dtmp0 = cadd0 + dtmp0; +.cont9: + fmuld %f40,%f16,%f26 ! (2_0) dtmp0 *= x20; + sll %l5,5,%l6 ! (4_0) ltmp0 = ldiff0 << 5; + add %i1,stridey,%i1 ! py += stridey + fstod %f0,%f40 ! (4_0) y0 = (double)fy0; + + faddd %f4,K5,%f62 ! (1_0) dtmp0 += K5; + sra %l3,27,%o5 ! (4_0) signx0 = ux0 >> 27; + add %i3,stridex,%i3 ! px += stridex + fmuld %f24,%f20,%f24 ! (0_0) dtmp0 *= x20; + + fstod %f2,%f2 ! (4_0) x0 = (double)fx0; + sra %l4,28,%o4 ! (4_0) signy0 = uy0 >> 28; +.d9: + lda [%i1]0x82,%l3 ! (5_0) uy0 = *(int*)py; + add %l6,cadd_arr,%l6 ! (4_0) ltmp0 += (char*)cadd_arr; + faddd %f26,K7,%f26 ! (2_0) dtmp0 += K7; + + fmuld %f62,%f18,%f4 ! (1_0) dtmp0 *= x20; + and %o5,-16,%o5 ! (4_0) signx0 &= -16; + faddd %f38,K0,%f38 ! (5_1) dtmp0 += K0; + + subcc counter,5,counter + bneg,pn %icc,.tail + nop + + ba .main_loop + nop + + .align 16 +.main_loop: + lda [%i3]0x82,%l4 ! (5_1) ux0 = *(int*)px; + nop + fdivd %f40,%f2,%f62 ! (4_1) x0 = y0 / x0; + faddd %f24,K2,%f40 ! (0_1) dtmp0 += K2; + + fdtos %f22,%f22 ! (4_2) ftmp0 = (float)dtmp0; + and %o4,-8,%o4 ! (4_1) signy0 &= -8; + st %f22,[%o1] ! (4_2) *pz = ftmp0; + fmuld %f6,%f6,%f24 ! (3_1) x20 = x0 * x0; + + ldd [cmul_arr+%o7],%f0 ! (5_2) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); + add %l6,%o5,%o1 ! (4_1) ltmp0 += signx0; + fmuld %f26,%f16,%f26 ! (2_1) dtmp0 *= x20; + + fmuld %f38,%f14,%f14 ! (5_2) x0 = dtmp0 * x0; + and %l4,_0x7fffffff,%l6 ! (5_1) ax0 = ux0 & 0x7fffffff; + sethi %hi(0x00800000),%o5 + faddd %f4,K4,%f4 ! (1_1) dtmp0 += K4; + + and %l3,_0x7fffffff,%o7 ! (5_1) ay0 = uy0 & 0x7fffffff; + fmuld %f40,%f20,%f38 ! (0_1) dtmp0 *= x20; + + cmp %l6,%o5 + bl,pn %icc,.up0 + fmuld K9,%f24,%f40 ! (3_1) dtmp0 = K9 * x20; +.co0: + nop + cmp %o7,%o5 + bl,pn %icc,.up1 + faddd %f26,K6,%f22 ! (2_1) dtmp0 += K6; +.co1: + ldd [%o1+%o4],%f26 ! (4_1) cadd0 = *(double*)(ltmp0 + signy0); + cmp %l6,_0x7f800000 + bge,pn %icc,.up2 + fmuld %f4,%f18,%f4 ! (1_1) dtmp0 *= x20; +.co2: + sub %l6,%o7,%o1 ! (5_1) ldiff0 = ax0 - ay0; + cmp %o7,_0x7f800000 + bge,pn %icc,.up3 + + fmuld %f0,%f14,%f14 ! (5_2) dtmp0 = cmul0 * x0; +.co3: + sra %o1,31,%o7 ! (5_1) ldiff0 >>= 31; + sub %i3,%i1,%l6 ! (5_1) addrc0 = (char*)px - (char*)py; + faddd %f38,K1,%f38 ! (0_1) dtmp0 += K1; + + faddd %f40,K8,%f40 ! (3_1) dtmp0 += K8; + and %l6,%o7,%o1 ! (5_1) addrc0 &= ldiff0; + fmuld %f22,%f16,%f22 ! (2_1) dtmp0 *= x20; + + lda [%i1+%o1]0x82,%f0 ! (5_1) fy0 = *(float*)((char*)py + addrc0); + sll %o7,5,%l6 ! (5_1) ltmp0 = ldiff0 << 5; + sub %i3,%o1,%o4 ! (5_1) (char*)px - addrc0; + faddd %f4,K3,%f4 ! (1_1) dtmp0 += K3; + + lda [%o4]0x82,%f2 ! (5_1) fx0 = *(float*)((char*)px - addrc0); + + fmuld %f38,%f20,%f38 ! (0_1) dtmp0 *= x20; + cmp %o5,_0x7f800000 ! (5_1) b0 ? 0x7f800000 + bge,pn %icc,.update10 ! (5_1) if ( b0 > 0x7f800000 ) + faddd %f36,%f14,%f20 ! (5_2) dtmp0 = cadd0 + dtmp0; +.cont10: + fmuld %f40,%f24,%f36 ! (3_1) dtmp0 *= x20; + nop + fstod %f0,%f40 ! (5_1) y0 = (double)fy0; + + faddd %f22,K5,%f14 ! (2_1) dtmp0 += K5; + add %o2,stridez,%o1 ! pz += stridez + fmuld %f4,%f18,%f4 ! (1_1) dtmp0 *= x20; + + sll %l7,3,%l7 ! (0_1) cmul0_ind = ldiff0 << 3; + add %i3,stridex,%i3 ! px += stridex + fstod %f2,%f2 ! (5_1) x0 = (double)fx0; +.den0: + sra %l3,28,%o4 ! (5_1) signy0 = uy0 >> 28; + add %i1,stridey,%i1 ! py += stridey + + faddd %f36,K7,%f36 ! (3_1) dtmp0 += K7; + sra %l4,27,%o5 ! (5_1) signx0 = ux0 >> 27; + + lda [%i1]0x82,%l4 ! (0_0) uy0 = *(int*)py; + add %l6,cadd_arr,%l6 ! (5_1) ltmp0 += (char*)cadd_arr; + fmuld %f14,%f16,%f22 ! (2_1) dtmp0 *= x20; + faddd %f38,K0,%f38 ! (0_1) dtmp0 += K0; + + lda [%i3]0x82,%l3 ! (0_0) ux0 = *(int*)px; + and %o5,-16,%o5 ! (5_1) signx0 &= -16; + fdivd %f40,%f2,%f14 ! (5_1) x0 = y0 / x0; + faddd %f4,K2,%f40 ! (1_1) dtmp0 += K2; + + fdtos %f20,%f2 ! (5_2) ftmp0 = (float)dtmp0; + st %f2,[%o2] ! (5_2) *pz = ftmp0; + fmuld %f62,%f62,%f4 ! (4_1) x20 = x0 * x0; + + ldd [cmul_arr+%l7],%f0 ! (0_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); + add %l6,%o5,%o2 ! (5_1) ltmp0 += signx0; + and %o4,-8,%o4 ! (5_1) signy0 &= -8; + fmuld %f36,%f24,%f36 ! (3_1) dtmp0 *= x20; + + fmuld %f38,%f12,%f12 ! (0_1) x0 = dtmp0 * x0; + and %l4,_0x7fffffff,%l7 ! (0_0) ay0 = uy0 & 0x7fffffff; + sethi %hi(0x00800000),%o5 + faddd %f22,K4,%f22 ! (2_1) dtmp0 += K4; + + and %l3,_0x7fffffff,%l6 ! (0_0) ax0 = ux0 & 0x7fffffff; + fmuld %f40,%f18,%f38 ! (1_1) dtmp0 *= x20; + + cmp %l7,%o5 + bl,pn %icc,.up4 + fmuld K9,%f4,%f40 ! (4_1) dtmp0 = K9 * x20; +.co4: + nop + cmp %l6,%o5 + bl,pn %icc,.up5 + faddd %f36,K6,%f20 ! (3_1) dtmp0 += K6; +.co5: + ldd [%o2+%o4],%f36 ! (5_1) cadd0 = *(double*)(ltmp0 + signy0); + cmp %l7,_0x7f800000 + bge,pn %icc,.up6 + fmuld %f22,%f16,%f22 ! (2_1) dtmp0 *= x20; +.co6: + sub %l6,%l7,%o2 ! (0_0) ldiff0 = ax0 - ay0; + cmp %l6,_0x7f800000 + bge,pn %icc,.up7 + + fmuld %f0,%f12,%f12 ! (0_1) dtmp0 = cmul0 * x0; +.co7: + sra %o2,31,%l7 ! (0_0) ldiff0 >>= 31; + sub %i3,%i1,%l6 ! (0_0) addrc0 = (char*)px - (char*)py; + faddd %f38,K1,%f38 ! (1_1) dtmp0 += K1; + + faddd %f40,K8,%f40 ! (4_1) dtmp0 += K8; + and %l6,%l7,%o2 ! (0_0) addrc0 &= ldiff0; + fmuld %f20,%f24,%f20 ! (3_1) dtmp0 *= x20; + + lda [%i1+%o2]0x82,%f0 ! (0_0) fy0 = *(float*)((char*)py + addrc0); + sll %g1,3,%g1 ! (1_1) cmul0_ind = ldiff0 << 3; + sub %i3,%o2,%o4 ! (0_0) (char*)px - addrc0 + faddd %f22,K3,%f22 ! (2_1) dtmp0 += K3; + + lda [%o4]0x82,%f2 ! (0_0) fx0 = *(float*)((char*)px - addrc0); + sll %l7,5,%l6 ! (0_0) ltmp0 = ldiff0 << 5; + add %o1,stridez,%o2 ! pz += stridez + + fmuld %f38,%f18,%f38 ! (1_1) dtmp0 *= x20; + cmp %o5,_0x7f800000 ! (0_0) b0 ? 0x7f800000 + bge,pn %icc,.update11 ! (0_0) if ( b0 > 0x7f800000 ) + faddd %f34,%f12,%f18 ! (0_1) dtmp0 = cadd0 + dtmp0; +.cont11: + fmuld %f40,%f4,%f34 ! (4_1) dtmp0 *= x20; + sra %l3,27,%o5 ! (0_0) signx0 = ux0 >> 27; + add %i3,stridex,%i3 ! px += stridex + fstod %f0,%f40 ! (0_0) y0 = (double)fy0; + + faddd %f20,K5,%f12 ! (3_1) dtmp0 += K5; + add %i1,stridey,%i1 ! py += stridey + fmuld %f22,%f16,%f22 ! (2_1) dtmp0 *= x20; + + lda [%i1]0x82,%l3 ! (1_0) uy0 = *(int*)py; + sra %l4,28,%o4 ! (0_0) signy0 = uy0 >> 28; + add %l6,cadd_arr,%l6 ! (0_0) ltmp0 += (char*)cadd_arr; + fstod %f2,%f2 ! (0_0) x0 = (double)fx0; +.den1: + lda [%i3]0x82,%l4 ! (1_0) ux0 = *(int*)px; + and %o5,-16,%o5 ! (0_0) signx0 &= -16; + faddd %f34,K7,%f34 ! (4_1) dtmp0 += K7; + + fmuld %f12,%f24,%f20 ! (3_1) dtmp0 *= x20; + and %o4,-8,%o4 ! (0_0) signy0 &= -8; + faddd %f38,K0,%f38 ! (1_1) dtmp0 += K0; + + fdivd %f40,%f2,%f12 ! (0_0) x0 = y0 / x0; + faddd %f22,K2,%f40 ! (2_1) dtmp0 += K2; + + fdtos %f18,%f2 ! (0_1) ftmp0 = (float)dtmp0; + nop + st %f2,[%o1] ! (0_1) *pz = ftmp0 + fmuld %f14,%f14,%f22 ! (5_1) x20 = x0 * x0; + + ldd [cmul_arr+%g1],%f0 ! (1_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); + add %l6,%o5,%o1 ! (0_0) ltmp0 += signx0; + fmuld %f34,%f4,%f34 ! (4_1) dtmp0 *= x20; + + fmuld %f38,%f10,%f10 ! (1_1) x0 = dtmp0 * x0; + and %l4,_0x7fffffff,%l6 ! (1_0) ax0 = ux0 & 0x7fffffff; + sethi %hi(0x00800000),%o5 + faddd %f20,K4,%f20 ! (3_1) dtmp0 += K4; + + and %l3,_0x7fffffff,%g1 ! (1_0) ay0 = uy0 & 0x7fffffff; + fmuld %f40,%f16,%f38 ! (2_1) dtmp0 *= x20; + + cmp %l6,%o5 + bl,pn %icc,.up8 + fmuld K9,%f22,%f40 ! (5_1) dtmp0 = K9 * x20; +.co8: + nop + cmp %g1,%o5 + bl,pn %icc,.up9 + faddd %f34,K6,%f18 ! (4_1) dtmp0 += K6; +.co9: + ldd [%o1+%o4],%f34 ! (0_0) cadd0 = *(double*)(ltmp0 + signy0); + cmp %l6,_0x7f800000 + bge,pn %icc,.up10 + fmuld %f20,%f24,%f20 ! (3_1) dtmp0 *= x20; +.co10: + sub %l6,%g1,%o1 ! (1_0) ldiff0 = ax0 - ay0; + cmp %g1,_0x7f800000 + bge,pn %icc,.up11 + + fmuld %f0,%f10,%f10 ! (1_1) dtmp0 = cmul0 * x0; +.co11: + sra %o1,31,%g1 ! (1_0) ldiff0 >>= 31; + sub %i3,%i1,%l6 ! (1_0) addrc0 = (char*)px - (char*)py; + faddd %f38,K1,%f38 ! (2_1) dtmp0 += K1; + + faddd %f40,K8,%f40 ! (5_1) dtmp0 += K8; + and %l6,%g1,%o1 ! (1_0) addrc0 &= ldiff0; + fmuld %f18,%f4,%f18 ! (4_1) dtmp0 *= x20; + + lda [%i1+%o1]0x82,%f0 ! (1_0) fy0 = *(float*)((char*)py + addrc0); + sll %g5,3,%g5 ! (2_1) cmul0_ind = ldiff0 << 3; + sub %i3,%o1,%o4 ! (1_0) (char*)px - addrc0; + faddd %f20,K3,%f20 ! (3_1) dtmp0 += K3; + + lda [%o4]0x82,%f2 ! (1_0) fx0 = *(float*)((char*)px - addrc0); + sll %g1,5,%l6 ! (1_0) ltmp0 = ldiff0 << 5; + add %o2,stridez,%o1 ! pz += stridez + + fmuld %f38,%f16,%f38 ! (2_1) dtmp0 *= x20; + cmp %o5,_0x7f800000 ! (1_0) b0 ? 0x7f800000 + bge,pn %icc,.update12 ! (1_0) if ( b0 > 0x7f800000 ) + faddd %f32,%f10,%f16 ! (1_1) dtmp0 = cadd0 + dtmp0; +.cont12: + fmuld %f40,%f22,%f32 ! (5_1) dtmp0 *= x20; + add %i1,stridey,%i1 ! py += stridey + nop + fstod %f0,%f40 ! (1_0) y0 = (double)fy0; + + faddd %f18,K5,%f10 ! (4_1) dtmp0 += K5; + sra %l4,27,%o5 ! (1_0) signx0 = ux0 >> 27; + add %i3,stridex,%i3 ! px += stridex + fmuld %f20,%f24,%f20 ! (3_1) dtmp0 *= x20; + + sra %l3,28,%o4 ! (1_0) signy0 = uy0 >> 28; + add %l6,cadd_arr,%l6 ! (1_0) ltmp0 += (char*)cadd_arr; + fstod %f2,%f2 ! (1_0) x0 = (double)fx0; +.den2: + faddd %f32,K7,%f32 ! (5_1) dtmp0 += K7; + and %o5,-16,%o5 ! (1_0) signx0 &= -16; + and %o4,-8,%o4 ! (1_0) signy0 &= -8; + + lda [%i1]0x82,%l4 ! (2_0) uy0 = *(int*)py; + fmuld %f10,%f4,%f18 ! (4_1) dtmp0 *= x20; + faddd %f38,K0,%f38 ! (2_1) dtmp0 += K0; + + lda [%i3]0x82,%l3 ! (2_0) ux0 = *(int*)px; + fdivd %f40,%f2,%f10 ! (1_0) x0 = y0 / x0; + faddd %f20,K2,%f40 ! (3_1) dtmp0 += K2; + + fdtos %f16,%f2 ! (1_1) ftmp0 = (float)dtmp0; + nop + st %f2,[%o2] ! (1_1) *pz = ftmp0; + fmuld %f12,%f12,%f20 ! (0_0) x20 = x0 * x0; + + ldd [cmul_arr+%g5],%f0 ! (2_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); + add %l6,%o5,%o2 ! (1_0) ltmp0 += signx0; + fmuld %f32,%f22,%f32 ! (5_1) dtmp0 *= x20; + + fmuld %f38,%f8,%f8 ! (2_1) x0 = dtmp0 * x0; + and %l3,_0x7fffffff,%l6 ! (2_0) ax0 = ux0 & 0x7fffffff; + sethi %hi(0x00800000),%o5 + faddd %f18,K4,%f18 ! (4_1) dtmp0 += K4; + + and %l4,_0x7fffffff,%g5 ! (2_0) ay0 = uy0 & 0x7fffffff; + fmuld %f40,%f24,%f38 ! (3_1) dtmp0 *= x20; + + cmp %l6,%o5 + bl,pn %icc,.up12 + fmuld K9,%f20,%f40 ! (0_0) dtmp0 = K9 * x20; +.co12: + nop + cmp %g5,%o5 + bl,pn %icc,.up13 + faddd %f32,K6,%f16 ! (5_1) dtmp0 += K6; +.co13: + ldd [%o2+%o4],%f32 ! (1_0) cadd0 = *(double*)(ltmp0 + signy0); + cmp %l6,_0x7f800000 + bge,pn %icc,.up14 + fmuld %f18,%f4,%f18 ! (4_1) dtmp0 *= x20; +.co14: + sub %l6,%g5,%o2 ! (2_0) ldiff0 = ax0 - ay0; + cmp %g5,_0x7f800000 + bge,pn %icc,.up15 + + fmuld %f0,%f8,%f8 ! (2_1) dtmp0 = cmul0 * x0; +.co15: + sra %o2,31,%g5 ! (2_0) ldiff0 >>= 31; + sub %i3,%i1,%l6 ! (2_0) addrc0 = (char*)px - (char*)py; + faddd %f38,K1,%f38 ! (3_1) dtmp0 += K1; + + faddd %f40,K8,%f40 ! (0_0) dtmp0 += K8; + and %l6,%g5,%o2 ! (2_0) addrc0 &= ldiff0; + fmuld %f16,%f22,%f16 ! (5_1) dtmp0 *= x20; + + lda [%i1+%o2]0x82,%f0 ! (2_0) fy0 = *(float*)((char*)py + addrc0); + sub %i3,%o2,%o4 ! (2_0) (char*)px - addrc0; + add %o1,stridez,%o2 ! pz += stridez + faddd %f18,K3,%f18 ! (4_1) dtmp0 += K3; + + lda [%o4]0x82,%f2 ! (2_0) fx0 = *(float*)((char*)px - addrc0); + sll %o0,3,%o0 ! (3_1) cmul0_ind = ldiff0 << 3; + add %i3,stridex,%i3 ! px += stridex + + fmuld %f38,%f24,%f38 ! (3_1) dtmp0 *= x20; + cmp %o5,_0x7f800000 ! (2_0) b0 ? 0x7f800000 + bge,pn %icc,.update13 ! (2_0) if ( b0 > 0x7f800000 ) + faddd %f30,%f8,%f24 ! (2_1) dtmp0 = cadd0 + dtmp0; +.cont13: + fmuld %f40,%f20,%f30 ! (0_0) dtmp0 *= x20; + sll %g5,5,%l6 ! (2_0) ltmp0 = ldiff0 << 5; + add %i1,stridey,%i1 ! py += stridey + fstod %f0,%f40 ! (2_0) y0 = (double)fy0; + + faddd %f16,K5,%f8 ! (5_1) dtmp0 += K5; + sra %l3,27,%o5 ! (2_0) signx0 = ux0 >> 27; + fmuld %f18,%f4,%f18 ! (4_1) dtmp0 *= x20; + + fstod %f2,%f2 ! (2_0) x0 = (double)fx0; + sra %l4,28,%o4 ! (2_0) signy0 = uy0 >> 28; + add %l6,cadd_arr,%l6 ! (2_0) ltmp0 += (char*)cadd_arr; +.den3: + lda [%i1]0x82,%l3 ! (3_0) uy0 = *(int*)py; + and %o5,-16,%o5 ! (2_0) signx0 &= -16; + faddd %f30,K7,%f30 ! (0_0) dtmp0 += K7; + + lda [%i3]0x82,%l4 ! (3_0) ux0 = *(int*)px; + fmuld %f8,%f22,%f16 ! (5_1) dtmp0 *= x20; + faddd %f38,K0,%f38 ! (3_1) dtmp0 += K0; + + fdivd %f40,%f2,%f8 ! (2_0) x0 = y0 / x0; + faddd %f18,K2,%f40 ! (4_1) dtmp0 += K2; + + fdtos %f24,%f1 ! (2_1) ftmp0 = (float)dtmp0; + st %f1,[%o1] ! (2_1) *pz = ftmp0; + fmuld %f10,%f10,%f18 ! (1_0) x20 = x0 * x0; + + ldd [cmul_arr+%o0],%f2 ! (3_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); + add %l6,%o5,%o1 ! (2_0) ltmp0 += signx0; + and %o4,-8,%o4 ! (2_0) signy0 &= -8; + fmuld %f30,%f20,%f30 ! (0_0) dtmp0 *= x20; + + fmuld %f38,%f6,%f6 ! (3_1) x0 = dtmp0 * x0; + and %l4,_0x7fffffff,%l6 ! (3_0) ax0 = ux0 & 0x7fffffff; + sethi %hi(0x00800000),%o5 + faddd %f16,K4,%f24 ! (5_1) dtmp0 += K4; + + and %l3,_0x7fffffff,%o0 ! (3_0) ay0 = uy0 & 0x7fffffff; + fmuld %f40,%f4,%f38 ! (4_1) dtmp0 *= x20; + + cmp %l6,%o5 + bl,pn %icc,.up16 + fmuld K9,%f18,%f40 ! (1_0) dtmp0 = K9 * x20; +.co16: + nop + cmp %o0,%o5 + bl,pn %icc,.up17 + faddd %f30,K6,%f16 ! (0_0) dtmp0 += K6; +.co17: + ldd [%o1+%o4],%f30 ! (2_0) cadd0 = *(double*)(ltmp0 + signy0); + cmp %l6,_0x7f800000 + bge,pn %icc,.up18 + fmuld %f24,%f22,%f24 ! (5_1) dtmp0 *= x20; +.co18: + sub %l6,%o0,%o1 ! (3_0) ldiff0 = ax0 - ay0; + cmp %o0,_0x7f800000 + bge,pn %icc,.up19 + + fmuld %f2,%f6,%f6 ! (3_1) dtmp0 = cmul0 * x0; +.co19: + sra %o1,31,%o0 ! (3_0) ldiff0 >>= 31; + sub %i3,%i1,%l6 ! (3_0) addrc0 = (char*)px - (char*)py; + faddd %f38,K1,%f38 ! (4_1) dtmp0 += K1; + + faddd %f40,K8,%f40 ! (1_0) dtmp0 += K8; + and %l6,%o0,%o1 ! (3_0) addrc0 &= ldiff0; + fmuld %f16,%f20,%f16 ! (0_0) dtmp0 *= x20; + + lda [%i1+%o1]0x82,%f0 ! (3_0) fy0 = *(float*)((char*)py + addrc0); + sub %i3,%o1,%o4 ! (3_0) (char*)px - addrc0; + add %o2,stridez,%o1 ! pz += stridez + faddd %f24,K3,%f24 ! (5_1) dtmp0 += K3; + + lda [%o4]0x82,%f1 ! (3_0) fx0 = *(float*)((char*)px - addrc0); + sll %l5,3,%l5 ! (4_1) cmul0_ind = ldiff0 << 3; + add %i3,stridex,%i3 ! px += stridex + + fmuld %f38,%f4,%f38 ! (4_1) dtmp0 *= x20; + cmp %o5,_0x7f800000 ! (3_0) b0 ? 0x7f800000 + bge,pn %icc,.update14 ! (3_0) if ( b0 > 0x7f800000 ) + faddd %f28,%f6,%f4 ! (3_1) dtmp0 = cadd0 + dtmp0; +.cont14: + fmuld %f40,%f18,%f28 ! (1_0) dtmp0 *= x20; + sll %o0,5,%l6 ! (3_0) ltmp0 = ldiff0 << 5; + add %i1,stridey,%i1 ! py += stridey + fstod %f0,%f40 ! (3_0) y0 = (double)fy0; + + faddd %f16,K5,%f2 ! (0_0) dtmp0 += K5; + sra %l4,27,%o5 ! (3_0) signx0 = ux0 >> 27; + fmuld %f24,%f22,%f24 ! (5_1) dtmp0 *= x20; + + sra %l3,28,%o4 ! (3_0) signy0 = uy0 >> 28; + fstod %f1,%f16 ! (3_0) x0 = (double)fx0; +.den4: + faddd %f28,K7,%f28 ! (1_0) dtmp0 += K7; + add %l6,cadd_arr,%l6 ! (3_0) ltmp0 += (char*)cadd_arr; + and %o5,-16,%o5 ! (3_0) signx0 &= -16; + + lda [%i1]0x82,%l4 ! (4_0) uy0 = *(int*)py; + fmuld %f2,%f20,%f2 ! (0_0) dtmp0 *= x20; + faddd %f38,K0,%f38 ! (4_1) dtmp0 += K0; + + lda [%i3]0x82,%l3 ! (4_0) ux0 = *(int*)px; + fdivd %f40,%f16,%f6 ! (3_0) x0 = y0 / x0; + faddd %f24,K2,%f24 ! (5_1) dtmp0 += K2; + + fdtos %f4,%f1 ! (3_1) ftmp0 = (float)dtmp0; + and %o4,-8,%o4 ! (3_0) signy0 &= -8; + st %f1,[%o2] ! (3_1) *pz = ftmp0; + fmuld %f8,%f8,%f16 ! (2_0) x20 = x0 * x0; + + ldd [cmul_arr+%l5],%f0 ! (4_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); + add %l6,%o5,%o2 ! (3_0) ltmp0 += signx0; + fmuld %f28,%f18,%f28 ! (1_0) dtmp0 *= x20; + + fmuld %f38,%f62,%f62 ! (4_1) x0 = dtmp0 * x0; + and %l3,_0x7fffffff,%l6 ! (4_0) ax0 = ux0 & 0x7fffffff; + sethi %hi(0x00800000),%o5 + faddd %f2,K4,%f2 ! (0_0) dtmp0 += K4; + + and %l4,_0x7fffffff,%l5 ! (4_0) ay0 = uy0 & 0x7fffffff; + fmuld %f24,%f22,%f38 ! (5_1) dtmp0 *= x20; + + cmp %l6,%o5 + bl,pn %icc,.up20 + fmuld K9,%f16,%f40 ! (2_0) dtmp0 = K9 * x20; +.co20: + nop + cmp %l5,%o5 + bl,pn %icc,.up21 + faddd %f28,K6,%f4 ! (1_0) dtmp0 += K6; +.co21: + ldd [%o2+%o4],%f28 ! (3_0) cadd0 = *(double*)(ltmp0 + signy0); + cmp %l6,_0x7f800000 + bge,pn %icc,.up22 + fmuld %f2,%f20,%f24 ! (0_0) dtmp0 *= x20; +.co22: + sub %l6,%l5,%o2 ! (4_0) ldiff0 = ax0 - ay0; + cmp %l5,_0x7f800000 + bge,pn %icc,.up23 + + fmuld %f0,%f62,%f62 ! (4_1) dtmp0 = cmul0 * x0; +.co23: + sra %o2,31,%l5 ! (4_0) ldiff0 >>= 31; + sub %i3,%i1,%l6 ! (4_0) addrc0 = (char*)px - (char*)py; + faddd %f38,K1,%f38 ! (5_1) dtmp0 += K1; + + faddd %f40,K8,%f40 ! (2_0) dtmp0 += K8; + and %l6,%l5,%o2 ! (4_0) addrc0 &= ldiff0; + fmuld %f4,%f18,%f4 ! (1_0) dtmp0 *= x20; + + lda [%i1+%o2]0x82,%f0 ! (4_0) fy0 = *(float*)((char*)py + addrc0); + sub %i3,%o2,%o4 ! (4_0) (char*)px - addrc0; + add %o1,stridez,%o2 ! pz += stridez + faddd %f24,K3,%f24 ! (0_0) dtmp0 += K3; + + lda [%o4]0x82,%f2 ! (4_0) fx0 = *(float*)((char*)px - addrc0); + sll %o7,3,%o7 ! (5_1) cmul0_ind = ldiff0 << 3; + add %i3,stridex,%i3 ! px += stridex + + fmuld %f38,%f22,%f38 ! (5_1) dtmp0 *= x20; + cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 + bge,pn %icc,.update15 ! (4_0) if ( b0 > 0x7f800000 ) + faddd %f26,%f62,%f22 ! (4_1) dtmp0 = cadd0 + dtmp0; +.cont15: + fmuld %f40,%f16,%f26 ! (2_0) dtmp0 *= x20; + sll %l5,5,%l6 ! (4_0) ltmp0 = ldiff0 << 5; + add %i1,stridey,%i1 ! py += stridey + fstod %f0,%f40 ! (4_0) y0 = (double)fy0; + + faddd %f4,K5,%f62 ! (1_0) dtmp0 += K5; + sra %l3,27,%o5 ! (4_0) signx0 = ux0 >> 27; + fmuld %f24,%f20,%f24 ! (0_0) dtmp0 *= x20; + + fstod %f2,%f2 ! (4_0) x0 = (double)fx0; + sra %l4,28,%o4 ! (4_0) signy0 = uy0 >> 28; +.den5: + lda [%i1]0x82,%l3 ! (5_0) uy0 = *(int*)py; + subcc counter,6,counter ! counter? + add %l6,cadd_arr,%l6 ! (4_0) ltmp0 += (char*)cadd_arr; + faddd %f26,K7,%f26 ! (2_0) dtmp0 += K7; + + fmuld %f62,%f18,%f4 ! (1_0) dtmp0 *= x20; + and %o5,-16,%o5 ! (4_0) signx0 &= -16; + bpos,pt %icc,.main_loop + faddd %f38,K0,%f38 ! (5_1) dtmp0 += K0; + +.tail: + addcc counter,5,counter + bneg,a,pn %icc,.begin + or %g0,%o1,%o4 + + faddd %f24,K2,%f40 ! (0_1) dtmp0 += K2; + + fdtos %f22,%f22 ! (4_2) ftmp0 = (float)dtmp0; + st %f22,[%o1] ! (4_2) *pz = ftmp0; + + subcc counter,1,counter + bneg,a,pn %icc,.begin + or %g0,%o2,%o4 + + ldd [cmul_arr+%o7],%f0 ! (5_2) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); + fmuld %f26,%f16,%f26 ! (2_1) dtmp0 *= x20; + + fmuld %f38,%f14,%f14 ! (5_2) x0 = dtmp0 * x0; + faddd %f4,K4,%f4 ! (1_1) dtmp0 += K4; + + fmuld %f40,%f20,%f38 ! (0_1) dtmp0 *= x20; + + + faddd %f26,K6,%f22 ! (2_1) dtmp0 += K6; + + fmuld %f4,%f18,%f4 ! (1_1) dtmp0 *= x20; + + fmuld %f0,%f14,%f14 ! (5_2) dtmp0 = cmul0 * x0; + faddd %f38,K1,%f38 ! (0_1) dtmp0 += K1; + + fmuld %f22,%f16,%f22 ! (2_1) dtmp0 *= x20; + + faddd %f4,K3,%f4 ! (1_1) dtmp0 += K3; + + fmuld %f38,%f20,%f38 ! (0_1) dtmp0 *= x20; + faddd %f36,%f14,%f20 ! (5_2) dtmp0 = cadd0 + dtmp0; + + faddd %f22,K5,%f14 ! (2_1) dtmp0 += K5; + add %o2,stridez,%o1 ! pz += stridez + fmuld %f4,%f18,%f4 ! (1_1) dtmp0 *= x20; + + sll %l7,3,%l7 ! (0_1) cmul0_ind = ldiff0 << 3; + + fmuld %f14,%f16,%f22 ! (2_1) dtmp0 *= x20; + faddd %f38,K0,%f38 ! (0_1) dtmp0 += K0; + + faddd %f4,K2,%f40 ! (1_1) dtmp0 += K2; + + fdtos %f20,%f2 ! (5_2) ftmp0 = (float)dtmp0; + st %f2,[%o2] ! (5_2) *pz = ftmp0; + + subcc counter,1,counter + bneg,a,pn %icc,.begin + or %g0,%o1,%o4 + + ldd [cmul_arr+%l7],%f0 ! (0_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); + + fmuld %f38,%f12,%f12 ! (0_1) x0 = dtmp0 * x0; + faddd %f22,K4,%f22 ! (2_1) dtmp0 += K4; + + fmuld %f40,%f18,%f38 ! (1_1) dtmp0 *= x20; + + fmuld %f22,%f16,%f22 ! (2_1) dtmp0 *= x20; + + fmuld %f0,%f12,%f12 ! (0_1) dtmp0 = cmul0 * x0; + faddd %f38,K1,%f38 ! (1_1) dtmp0 += K1; + + sll %g1,3,%g1 ! (1_1) cmul0_ind = ldiff0 << 3; + faddd %f22,K3,%f22 ! (2_1) dtmp0 += K3; + + add %o1,stridez,%o2 ! pz += stridez + + fmuld %f38,%f18,%f38 ! (1_1) dtmp0 *= x20; + faddd %f34,%f12,%f18 ! (0_1) dtmp0 = cadd0 + dtmp0; + + fmuld %f22,%f16,%f22 ! (2_1) dtmp0 *= x20; + + faddd %f38,K0,%f38 ! (1_1) dtmp0 += K0; + + faddd %f22,K2,%f40 ! (2_1) dtmp0 += K2; + + fdtos %f18,%f2 ! (0_1) ftmp0 = (float)dtmp0; + st %f2,[%o1] ! (0_1) *pz = ftmp0 + + subcc counter,1,counter + bneg,a,pn %icc,.begin + or %g0,%o2,%o4 + + ldd [cmul_arr+%g1],%f0 ! (1_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); + + fmuld %f38,%f10,%f10 ! (1_1) x0 = dtmp0 * x0; + + fmuld %f40,%f16,%f38 ! (2_1) dtmp0 *= x20; + + fmuld %f0,%f10,%f10 ! (1_1) dtmp0 = cmul0 * x0; + faddd %f38,K1,%f38 ! (2_1) dtmp0 += K1; + + sll %g5,3,%g5 ! (2_1) cmul0_ind = ldiff0 << 3; + + add %o2,stridez,%o1 ! pz += stridez + + fmuld %f38,%f16,%f38 ! (2_1) dtmp0 *= x20; + faddd %f32,%f10,%f16 ! (1_1) dtmp0 = cadd0 + dtmp0; + + faddd %f38,K0,%f38 ! (2_1) dtmp0 += K0; + + fdtos %f16,%f2 ! (1_1) ftmp0 = (float)dtmp0; + st %f2,[%o2] ! (1_1) *pz = ftmp0; + + subcc counter,1,counter + bneg,a,pn %icc,.begin + or %g0,%o1,%o4 + + ldd [cmul_arr+%g5],%f0 ! (2_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); + + fmuld %f38,%f8,%f8 ! (2_1) x0 = dtmp0 * x0; + + fmuld %f0,%f8,%f8 ! (2_1) dtmp0 = cmul0 * x0; + + add %o1,stridez,%o2 ! pz += stridez + + faddd %f30,%f8,%f24 ! (2_1) dtmp0 = cadd0 + dtmp0; + + fdtos %f24,%f1 ! (2_1) ftmp0 = (float)dtmp0; + st %f1,[%o1] ! (2_1) *pz = ftmp0; + + ba .begin + or %g0,%o2,%o4 + + .align 16 +.spec0: + cmp %l6,_0x7f800000 ! ax0 ? 0x7f800000 + bg 2f ! if ( ax0 >= 0x7f800000 ) + srl %l3,30,%l3 ! signx0 = (unsigned)ux0 >> 30; + + cmp %l7,_0x7f800000 ! ay0 ? 0x7f800000 + bg 2f ! if ( ay0 >= 0x7f800000 ) + and %l3,2,%l3 ! signx0 &= 2; + + sra %l4,31,%l4 ! signy0 = uy0 >> 31; + bne,a 1f ! if (ay0 != 0x7f800000) + add %l3,%l3,%l3 ! signx0 += signx0; + + cmp %l6,_0x7f800000 ! ax0 ? 0x7f800000 + bne,a 1f ! if ( ax0 != 0x7f800000 ) + add %g0,2,%l3 ! signx0 = 2 + + add %l3,1,%l3 ! signx0 ++; +1: + sll %l4,3,%l4 ! signy0 <<= 3; + st %l3,[%fp+tmp_pz] ! STORE signx0 + + ldd [cmul_arr+88],%f0 ! LOAD M_PI_4 + + ld [%fp+tmp_pz],%f2 ! LOAD signx0 + + ldd [cmul_arr+%l4],%f4 ! dtmp0 = *(double*)((char*)(cmul_arr + 1) + signy0); + + add %i1,stridey,%i1 ! py += stridey; + fitod %f2,%f2 ! dtmp1 = (double)signx0; + + add %i3,stridex,%i3 ! px += stridex; + + fmuld %f2,%f0,%f0 ! res = signx0 * M_PI_4; + + fmuld %f0,%f4,%f0 ! res *= dtmp0; + fdtos %f0,%f0 ! ftmp0 = (float) res; + st %f0,[%o4] ! *pz = ftmp0; + + ba .begin1 + add %o4,stridez,%o4 ! pz += stridez; +2: + std %l6,[%fp+tmp_pz] ! *(float*)&ax0, *(float*)&ay0 + ldd [%fp+tmp_pz],%f0 ! *(float*)&ax0, *(float*)&ay0 + + add %i1,stridey,%i1 ! py += stridey; + + fmuls %f0,%f1,%f0 ! ftmp0 = *(float*)&ax0 * *(float*)&ay0; + add %i3,stridex,%i3 ! pz += stridex; + st %f0,[%o4] ! *pz = ftmp0; + + ba .begin1 + add %o4,stridez,%o4 ! pz += stridez; + + .align 16 +.spec1: + cmp %l6,0 + bne,pn %icc,1f + nop + + cmp %l7,0 + bne,pn %icc,1f + nop + + sra %l4,28,%l4 ! signy0 = uy0 >> 28; + + sra %l3,27,%l3 ! signx0 = ux0 >> 27; + and %l4,-8,%l4 ! signy0 &= -8; + + sra %o2,31,%o2 ! ldiff0 >>= 31; + and %l3,-16,%l3 ! signx0 &= -16; + + sll %o2,5,%o2 ! ldiff0 <<= 5; + add %l4,%l3,%l3 ! signx0 += signy0; + + add %o2,%l3,%l3 ! signx0 += ldiff0; + add %i1,stridey,%i1 ! py += stridey; + + ldd [cadd_arr+%l3],%f0 ! res = *(double*)((char*)(cadd_arr + 7) + signx0); + add %i3,stridex,%i3 ! px += stridex; + + fdtos %f0,%f0 ! ftmp0 = (float) res; + st %f0,[%o4] ! *pz = ftmp0; + + ba .begin1 + add %o4,stridez,%o4 ! pz += stridez; +1: + stx %o4,[%fp+tmp_pz] + sra %o2,31,%l7 ! (0_0) ldiff0 >>= 31; + sub %i3,%i1,%l6 ! (0_0) addrc0 = (char*)px - (char*)py; + + and %l6,%l7,%o2 ! (0_0) addrc0 &= ldiff0; + + lda [%i1+%o2]0x82,%f0 ! (0_0) fy0 = *(float*)((char*)py + addrc0); + sub %i3,%o2,%o4 ! (0_0) (char*)px - addrc0 + + lda [%i1+%o2]0x82,%l5 ! (0_0) fy0 = *(float*)((char*)py + addrc0); + + lda [%o4]0x82,%f2 ! (0_0) fx0 = *(float*)((char*)px - addrc0); + sll %l7,5,%l6 ! (0_0) ltmp0 = ldiff0 << 5; + + lda [%o4]0x82,%g5 ! (0_0) fx0 = *(float*)((char*)px - addrc0); + + sra %l3,27,%o5 ! (0_0) signx0 = ux0 >> 27; + add %i1,stridey,%i1 ! py += stridey + + add %i3,stridex,%i3 ! px += stridex + + lda [%i1]0x82,%l3 ! (1_0) uy0 = *(int*)py; + sra %l4,28,%o4 ! (0_0) signy0 = uy0 >> 28; + + add %l6,cadd_arr,%l6 ! (0_0) ltmp0 += (char*)cadd_arr; + + and %l5,_0x7fffffff,%l4 + sethi %hi(0x00800000),%g1 + + cmp %l4,%g1 + bge,a %icc,1f + fstod %f0,%f40 ! (0_0) y0 = (double)fy0; + + fabss %f0,%f0 ! fy0 = fabsf(fy0); + ldd [cmul_arr+96],%f40 + sra %l5,28,%l4 ! itmp0 >>= 28; + + and %l4,-8,%l4 + fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0; + + fmuld %f40,%f0,%f40 ! dtmp0 *= C2ONM149; + ldd [cmul_arr+%l4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); + + fmuld %f40,%f0,%f40 ! dtmp0 *= dsign; +1: + and %g5,_0x7fffffff,%l4 + cmp %l4,%g1 + bge,a %icc,.spec1_cont + fstod %f2,%f2 ! (0_0) x0 = (double)fx0; + + fabss %f2,%f2 ! fx0 = fabsf(fx0); + ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 + sra %g5,28,%l4 ! itmp0 >>= 28; + + and %l4,-8,%l4 ! itmp0 = -8; + fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0; + + fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149; + ldd [cmul_arr+%l4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); + + ba .spec1_cont + fmuld %f2,%f0,%f2 ! dtmp0 *= dsign; + + .align 16 +.update0: + cmp counter,0 + bg,pn %icc,1f + nop + + ld [cmul_arr],%f2 + ba .cont0 + fzero %f0 +1: + cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 + bg,pt %icc,1f + nop +2: + sub counter,0,counter + st counter,[%fp+tmp_counter] + stx %i1,[%fp+tmp_py] + stx %i3,[%fp+tmp_px] + + ld [cmul_arr],%f2 + or %g0,0,counter + ba .cont0 + fzero %f0 +1: + andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff + bne,pn %icc,1f + sethi %hi(0x00800000),%o5 + + andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff + be,pn %icc,2b + nop +1: + st %f0,[%fp+tmp_px] + st %f2,[%fp+tmp_px+4] + ld [%fp+tmp_px],%o4 + + and %o4,_0x7fffffff,%l5 ! itmp0 & 0x7fffffff + cmp %l5,%o5 + bge,a 1f + fstod %f0,%f40 ! (0_0) y0 = (double)fy0; + + ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 + sra %o4,28,%o4 ! itmp0 >>= 28; + fabss %f0,%f0 ! fy0 = fabsf(fy0); + + and %o4,-8,%o4 ! itmp0 = -8; + fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0; + + fmuld %f0,%f40,%f40 ! dtmp0 *= C2ONM149; + ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); + + fmuld %f0,%f40,%f40 ! dtmp0 *= dsign; +1: + add %i3,stridex,%i3 ! px += stridex + add %i1,stridey,%i1 ! py += stridey + + ld [%fp+tmp_px+4],%o4 + and %o4,_0x7fffffff,%l5 ! itmp0 & 0x7fffffff + cmp %l5,%o5 + bge,a 1f + fstod %f2,%f2 ! (5_1) x0 = (double)fx0; + + ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 + sra %o4,28,%o4 ! itmp0 >>= 28; + fabss %f2,%f2 ! fx0 = fabsf(fx0); + + and %o4,-8,%o4 ! itmp0 = -8; + fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0; + + fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149; + ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); + + fmuld %f2,%f0,%f2 ! dtmp0 *= dsign; +1: + sra %l4,27,%o5 ! (1_0) signx0 = ux0 >> 27; + + sra %l3,28,%o4 ! (1_0) signy0 = uy0 >> 28; + ba .d0 + add %l6,cadd_arr,%l6 ! (1_0) ltmp0 += (char*)cadd_arr; + + .align 16 +.update1: + cmp counter,1 + bg,pn %icc,1f + nop + + fzero %f0 + ba .cont1 + ld [cmul_arr],%f2 +1: + cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 + bg,pt %icc,1f + nop +2: + sub counter,1,counter + st counter,[%fp+tmp_counter] + stx %i1,[%fp+tmp_py] + stx %i3,[%fp+tmp_px] + + ld [cmul_arr],%f2 + or %g0,1,counter + ba .cont1 + fzero %f0 +1: + andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff + bne,pn %icc,1f + sethi %hi(0x00800000),%o5 + + andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff + be,pn %icc,2b + nop +1: + st %f0,[%fp+tmp_px] + st %f2,[%fp+tmp_px+4] + ld [%fp+tmp_px],%o4 + fmuld %f40,%f20,%f30 ! (0_0) dtmp0 *= x20; + + and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff + cmp %l6,%o5 + bge,a 1f + fstod %f0,%f40 ! (0_0) y0 = (double)fy0; + + ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 + sra %o4,28,%o4 ! itmp0 >>= 28; + fabss %f0,%f0 ! fy0 = fabsf(fy0); + + and %o4,-8,%o4 ! itmp0 = -8; + fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0; + + fmuld %f0,%f40,%f40 ! dtmp0 *= C2ONM149; + ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); + + fmuld %f0,%f40,%f40 ! dtmp0 *= dsign; +1: + + add %i1,stridey,%i1 ! py += stridey + + ld [%fp+tmp_px+4],%o4 + and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff + cmp %l6,%o5 + bge,a 1f + fstod %f2,%f2 ! (5_1) x0 = (double)fx0; + + ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 + sra %o4,28,%o4 ! itmp0 >>= 28; + fabss %f2,%f2 ! fx0 = fabsf(fx0); + + and %o4,-8,%o4 ! itmp0 = -8; + fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0; + + fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149; + ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); + + fmuld %f2,%f0,%f2 ! dtmp0 *= dsign; +1: + sll %g5,5,%l6 ! (2_0) ltmp0 = ldiff0 << 5; + sra %l3,27,%o5 ! (2_0) signx0 = ux0 >> 27; + add %i3,stridex,%i3 ! px += stridex + + sra %l4,28,%o4 ! (2_0) signy0 = uy0 >> 28; + ba .d1 + add %l6,cadd_arr,%l6 ! (2_0) ltmp0 += (char*)cadd_arr; + + .align 16 +.update2: + cmp counter,2 + bg,pn %icc,1f + nop + + ld [cmul_arr],%f1 + ba .cont2 + fzeros %f0 +1: + cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 + bg,pt %icc,1f + nop +2: + sub counter,2,counter + st counter,[%fp+tmp_counter] + stx %i1,[%fp+tmp_py] + stx %i3,[%fp+tmp_px] + + ld [cmul_arr],%f1 + or %g0,2,counter + ba .cont2 + fzeros %f0 +1: + andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff + bne,pn %icc,1f + sethi %hi(0x00800000),%o5 + + andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff + be,pn %icc,2b + nop +1: + std %f0,[%fp+tmp_px] + ld [%fp+tmp_px],%o4 + fmuld %f40,%f18,%f28 ! (1_0) dtmp0 *= x20; + + faddd %f16,K5,%f2 ! (0_0) dtmp0 += K5; + + and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff + cmp %l6,%o5 + bge,a 1f + fstod %f0,%f40 ! (0_0) y0 = (double)fy0; + + ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 + sra %o4,28,%o4 ! itmp0 >>= 28; + fabss %f0,%f0 ! fy0 = fabsf(fy0); + + and %o4,-8,%o4 ! itmp0 = -8; + fitod %f0,%f16 ! dtmp0 = (double) *(int*)&fy0; + + fmuld %f16,%f40,%f40 ! dtmp0 *= C2ONM149; + ldd [cmul_arr+%o4],%f16 ! dsign = *(double*)((char*)cmul_arr + itmp0); + + fmuld %f16,%f40,%f40 ! dtmp0 *= dsign; +1: + add %i1,stridey,%i1 ! py += stridey + + ld [%fp+tmp_px+4],%o4 + and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff + cmp %l6,%o5 + bge,a 1f + fstod %f1,%f16 ! (5_1) x0 = (double)fx0; + + fabss %f1,%f16 ! fx0 = fabsf(fx0); + ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 + sra %o4,28,%o4 ! itmp0 >>= 28; + + and %o4,-8,%o4 ! itmp0 = -8; + fitod %f16,%f16 ! dtmp0 = (double) *(int*)&fx0; + + fmuld %f16,%f0,%f16 ! dtmp0 *= C2ONM149; + ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); + + fmuld %f16,%f0,%f16 ! dtmp0 *= dsign; +1: + sll %o0,5,%l6 ! (3_0) ltmp0 = ldiff0 << 5; + sra %l4,27,%o5 ! (3_0) signx0 = ux0 >> 27; + + add %i3,stridex,%i3 ! px += stridex + ba .d2 + sra %l3,28,%o4 ! (3_0) signy0 = uy0 >> 28; + + .align 16 +.update3: + cmp counter,3 + bg,pn %icc,1f + nop + + fzero %f0 + ba .cont3 + ld [cmul_arr],%f2 +1: + cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 + bg,pt %icc,1f + nop +2: + sub counter,3,counter + st counter,[%fp+tmp_counter] + stx %i1,[%fp+tmp_py] + stx %i3,[%fp+tmp_px] + + ld [cmul_arr],%f2 + or %g0,3,counter + ba .cont3 + fzero %f0 +1: + andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff + bne,pn %icc,1f + sethi %hi(0x00800000),%o5 + + andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff + be,pn %icc,2b + nop +1: + st %f0,[%fp+tmp_px] + st %f2,[%fp+tmp_px+4] + ld [%fp+tmp_px],%o4 + fmuld %f40,%f16,%f26 ! (2_0) dtmp0 *= x20; + + and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff + cmp %l6,%o5 + bge,a 1f + fstod %f0,%f40 ! (0_0) y0 = (double)fy0; + + ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 + sra %o4,28,%o4 ! itmp0 >>= 28; + fabss %f0,%f0 ! fy0 = fabsf(fy0); + + and %o4,-8,%o4 ! itmp0 = -8; + fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0; + + fmuld %f0,%f40,%f40 ! dtmp0 *= C2ONM149; + ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); + + fmuld %f0,%f40,%f40 ! dtmp0 *= dsign; +1: + add %i1,stridey,%i1 ! py += stridey + faddd %f4,K5,%f62 ! (1_0) dtmp0 += K5; + fmuld %f24,%f20,%f24 ! (0_0) dtmp0 *= x20; + + ld [%fp+tmp_px+4],%o4 + and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff + cmp %l6,%o5 + bge,a 1f + fstod %f2,%f2 ! (5_1) x0 = (double)fx0; + + fabss %f2,%f2 ! fx0 = fabsf(fx0); + ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 + sra %o4,28,%o4 ! itmp0 >>= 28; + + and %o4,-8,%o4 ! itmp0 = -8; + fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0; + + fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149; + ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); + + fmuld %f2,%f0,%f2 ! dtmp0 *= dsign; +1: + sll %l5,5,%l6 ! (4_0) ltmp0 = ldiff0 << 5; + sra %l3,27,%o5 ! (4_0) signx0 = ux0 >> 27; + + add %i3,stridex,%i3 ! px += stridex + ba .d3 + sra %l4,28,%o4 ! (4_0) signy0 = uy0 >> 28; + + .align 16 +.update4: + cmp counter,4 + bg,pn %icc,1f + nop + + ld [cmul_arr],%f1 + ba .cont4 + fzeros %f0 +1: + cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 + bg,pt %icc,1f + nop +2: + sub counter,4,counter + st counter,[%fp+tmp_counter] + stx %i1,[%fp+tmp_py] + stx %i3,[%fp+tmp_px] + + ld [cmul_arr],%f1 + or %g0,4,counter + ba .cont4 + fzeros %f0 +1: + andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff + bne,pn %icc,1f + sethi %hi(0x00800000),%o5 + + andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff + be,pn %icc,2b + nop +1: + std %f0,[%fp+tmp_px] + ld [%fp+tmp_px],%o4 + fmuld %f40,%f24,%f36 ! (3_1) dtmp0 *= x20; + + and %o4,_0x7fffffff,%o1 ! itmp0 & 0x7fffffff + cmp %o1,%o5 + bge,a 1f + fstod %f0,%f40 ! (0_0) y0 = (double)fy0; + + ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 + sra %o4,28,%o4 ! itmp0 >>= 28; + fabss %f0,%f0 ! fy0 = fabsf(fy0); + + and %o4,-8,%o4 ! itmp0 = -8; + fitod %f0,%f14 ! dtmp0 = (double) *(int*)&fy0; + + fmuld %f14,%f40,%f40 ! dtmp0 *= C2ONM149; + ldd [cmul_arr+%o4],%f14 ! dsign = *(double*)((char*)cmul_arr + itmp0); + + fmuld %f14,%f40,%f40 ! dtmp0 *= dsign; +1: + faddd %f22,K5,%f14 ! (2_1) dtmp0 += K5; + fmuld %f4,%f18,%f4 ! (1_1) dtmp0 *= x20; + + ld [%fp+tmp_px+4],%o4 + and %o4,_0x7fffffff,%o1 ! itmp0 & 0x7fffffff + cmp %o1,%o5 + bge,a 1f + fstod %f1,%f2 ! (5_1) x0 = (double)fx0; + + fabss %f1,%f22 ! fx0 = fabsf(fx0); + ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 + sra %o4,28,%o4 ! itmp0 >>= 28; + + and %o4,-8,%o4 ! itmp0 = -8; + fitod %f22,%f22 ! dtmp0 = (double) *(int*)&fx0; + + fmuld %f22,%f0,%f22 ! dtmp0 *= C2ONM149; + ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); + + fmuld %f22,%f0,%f2 ! dtmp0 *= dsign; +1: + sll %l7,3,%l7 ! (0_1) cmul0_ind = ldiff0 << 3; + ba .d4 + add %i3,stridex,%i3 ! px += stridex + + .align 16 +.update5: + cmp counter,5 + bg,pn %icc,1f + nop + + ld [cmul_arr],%f2 + ba .cont5 + fzero %f0 +1: + cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 + bg,pt %icc,1f + nop +2: + sub counter,5,counter + st counter,[%fp+tmp_counter] + stx %i1,[%fp+tmp_py] + stx %i3,[%fp+tmp_px] + + ld [cmul_arr],%f2 + or %g0,5,counter + ba .cont5 + fzero %f0 +1: + andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff + bne,pn %icc,1f + sethi %hi(0x00800000),%o5 + + andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff + be,pn %icc,2b + nop +1: + st %f0,[%fp+tmp_px] + st %f2,[%fp+tmp_px+4] + ld [%fp+tmp_px],%o4 + fmuld %f40,%f4,%f34 ! (4_1) dtmp0 *= x20; + + stx %l5,[%fp+tmp_py] + and %o4,_0x7fffffff,%l5 ! itmp0 & 0x7fffffff + cmp %l5,%o5 + bge,a 1f + fstod %f0,%f40 ! (0_0) y0 = (double)fy0; + + ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 + sra %o4,28,%o4 ! itmp0 >>= 28; + fabss %f0,%f0 ! fy0 = fabsf(fy0); + + and %o4,-8,%o4 ! itmp0 = -8; + fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0; + + fmuld %f0,%f40,%f40 ! dtmp0 *= C2ONM149; + ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); + + fmuld %f0,%f40,%f40 ! dtmp0 *= dsign; +1: + faddd %f20,K5,%f12 ! (3_1) dtmp0 += K5; + add %i1,stridey,%i1 ! py += stridey + fmuld %f22,%f16,%f22 ! (2_1) dtmp0 *= x20; + + ld [%fp+tmp_px+4],%o4 + and %o4,_0x7fffffff,%l5 ! itmp0 & 0x7fffffff + cmp %l5,%o5 + bge,a 1f + fstod %f2,%f2 ! (5_1) x0 = (double)fx0; + + ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 + sra %o4,28,%o4 ! itmp0 >>= 28; + fabss %f2,%f2 ! fx0 = fabsf(fx0); + + and %o4,-8,%o4 ! itmp0 = -8; + fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0; + + fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149; + ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); + + fmuld %f2,%f0,%f2 ! dtmp0 *= dsign; +1: + ldx [%fp+tmp_py],%l5 + sra %l3,27,%o5 ! (0_0) signx0 = ux0 >> 27; + add %i3,stridex,%i3 ! px += stridex + + lda [%i1]0x82,%l3 ! (1_0) uy0 = *(int*)py; + sra %l4,28,%o4 ! (0_0) signy0 = uy0 >> 28; + ba .d5 + add %l6,cadd_arr,%l6 ! (0_0) ltmp0 += (char*)cadd_arr; + + .align 16 +.update6: + cmp counter,5 + bg,pn %icc,1f + nop + + ld [cmul_arr],%f2 + ba .cont6 + fzero %f0 +1: + cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 + bg,pt %icc,1f + nop +2: + sub counter,5,counter + st counter,[%fp+tmp_counter] + stx %i1,[%fp+tmp_py] + stx %i3,[%fp+tmp_px] + + ld [cmul_arr],%f2 + or %g0,5,counter + ba .cont6 + fzero %f0 +1: + andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff + bne,pn %icc,1f + sethi %hi(0x00800000),%o5 + + andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff + be,pn %icc,2b + nop +1: + st %f0,[%fp+tmp_pz] + st %f2,[%fp+tmp_pz+4] + ld [%fp+tmp_pz],%o4 + fmuld %f40,%f22,%f32 ! (5_1) dtmp0 *= x20; + + stx %l5,[%fp+tmp_px] + and %o4,_0x7fffffff,%l5 ! itmp0 & 0x7fffffff + cmp %l5,%o5 + bge,a 1f + fstod %f0,%f40 ! (0_0) y0 = (double)fy0; + + ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 + sra %o4,28,%o4 ! itmp0 >>= 28; + fabss %f0,%f0 ! fy0 = fabsf(fy0); + + and %o4,-8,%o4 ! itmp0 = -8; + fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0; + + fmuld %f0,%f40,%f40 ! dtmp0 *= C2ONM149; + ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); + + fmuld %f0,%f40,%f40 ! dtmp0 *= dsign; +1: + faddd %f18,K5,%f10 ! (4_1) dtmp0 += K5; + add %i3,stridex,%i3 ! px += stridex + add %i1,stridey,%i1 ! py += stridey + fmuld %f20,%f24,%f20 ! (3_1) dtmp0 *= x20; + + ld [%fp+tmp_pz+4],%o4 + and %o4,_0x7fffffff,%l5 ! itmp0 & 0x7fffffff + cmp %l5,%o5 + bge,a 1f + fstod %f2,%f2 ! (5_1) x0 = (double)fx0; + + ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 + sra %o4,28,%o4 ! itmp0 >>= 28; + fabss %f2,%f2 ! fx0 = fabsf(fx0); + + and %o4,-8,%o4 ! itmp0 = -8; + fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0; + + fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149; + ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); + + fmuld %f2,%f0,%f2 ! dtmp0 *= dsign; +1: + ldx [%fp+tmp_px],%l5 + + sra %l4,27,%o5 ! (1_0) signx0 = ux0 >> 27; + + sra %l3,28,%o4 ! (1_0) signy0 = uy0 >> 28; + ba .d6 + add %l6,cadd_arr,%l6 ! (1_0) ltmp0 += (char*)cadd_arr; + + .align 16 +.update7: + cmp counter,5 + bg,pn %icc,1f + nop + + ld [cmul_arr],%f2 + ba .cont7 + fzero %f0 +1: + cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 + bg,pt %icc,1f + nop +2: + sub counter,5,counter + st counter,[%fp+tmp_counter] + stx %i1,[%fp+tmp_py] + stx %i3,[%fp+tmp_px] + + ld [cmul_arr],%f2 + or %g0,5,counter + ba .cont7 + fzero %f0 +1: + andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff + bne,pn %icc,1f + sethi %hi(0x00800000),%o5 + + andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff + be,pn %icc,2b + nop +1: + st %f0,[%fp+tmp_pz] + st %f2,[%fp+tmp_pz+4] + ld [%fp+tmp_pz],%o4 + fmuld %f40,%f20,%f30 ! (0_0) dtmp0 *= x20; + + and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff + cmp %l6,%o5 + bge,a 1f + fstod %f0,%f40 ! (0_0) y0 = (double)fy0; + + ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 + sra %o4,28,%o4 ! itmp0 >>= 28; + fabss %f0,%f0 ! fy0 = fabsf(fy0); + + and %o4,-8,%o4 ! itmp0 = -8; + fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0; + + fmuld %f0,%f40,%f40 ! dtmp0 *= C2ONM149; + ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); + + fmuld %f0,%f40,%f40 ! dtmp0 *= dsign; +1: + faddd %f16,K5,%f8 ! (5_1) dtmp0 += K5; + add %i1,stridey,%i1 ! py += stridey + fmuld %f18,%f4,%f18 ! (4_1) dtmp0 *= x20; + + ld [%fp+tmp_pz+4],%o4 + and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff + cmp %l6,%o5 + bge,a 1f + fstod %f2,%f2 ! (5_1) x0 = (double)fx0; + + ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 + sra %o4,28,%o4 ! itmp0 >>= 28; + fabss %f2,%f2 ! fx0 = fabsf(fx0); + + and %o4,-8,%o4 ! itmp0 = -8; + fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0; + + fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149; + ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); + + fmuld %f2,%f0,%f2 ! dtmp0 *= dsign; +1: + sll %g5,5,%l6 ! (2_0) ltmp0 = ldiff0 << 5; + sra %l3,27,%o5 ! (2_0) signx0 = ux0 >> 27; + add %i3,stridex,%i3 ! px += stridex + + sra %l4,28,%o4 ! (2_0) signy0 = uy0 >> 28; + ba .d7 + add %l6,cadd_arr,%l6 ! (2_0) ltmp0 += (char*)cadd_arr; + + .align 16 +.update8: + cmp counter,5 + bg,pn %icc,1f + nop + + ld [cmul_arr],%f1 + ba .cont8 + fzeros %f0 +1: + cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 + bg,pt %icc,1f + nop +2: + sub counter,5,counter + st counter,[%fp+tmp_counter] + stx %i1,[%fp+tmp_py] + stx %i3,[%fp+tmp_px] + + ld [cmul_arr],%f1 + or %g0,5,counter + ba .cont8 + fzeros %f0 +1: + andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff + bne,pn %icc,1f + sethi %hi(0x00800000),%o5 + + andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff + be,pn %icc,2b + nop +1: + std %f0,[%fp+tmp_pz] + ld [%fp+tmp_pz],%o4 + fmuld %f40,%f18,%f28 ! (1_0) dtmp0 *= x20; + + faddd %f16,K5,%f2 ! (0_0) dtmp0 += K5; + + and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff + cmp %l6,%o5 + bge,a 1f + fstod %f0,%f40 ! (0_0) y0 = (double)fy0; + + ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 + sra %o4,28,%o4 ! itmp0 >>= 28; + fabss %f0,%f0 ! fy0 = fabsf(fy0); + + and %o4,-8,%o4 ! itmp0 = -8; + fitod %f0,%f16 ! dtmp0 = (double) *(int*)&fy0; + + fmuld %f16,%f40,%f40 ! dtmp0 *= C2ONM149; + ldd [cmul_arr+%o4],%f16 ! dsign = *(double*)((char*)cmul_arr + itmp0); + + fmuld %f16,%f40,%f40 ! dtmp0 *= dsign; +1: + add %i1,stridey,%i1 ! py += stridey + fmuld %f24,%f22,%f24 ! (5_1) dtmp0 *= x20; + + ld [%fp+tmp_pz+4],%o4 + and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff + cmp %l6,%o5 + bge,a 1f + fstod %f1,%f16 ! (5_1) x0 = (double)fx0; + + fabss %f1,%f16 ! fx0 = fabsf(fx0); + ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 + sra %o4,28,%o4 ! itmp0 >>= 28; + + and %o4,-8,%o4 ! itmp0 = -8; + fitod %f16,%f16 ! dtmp0 = (double) *(int*)&fx0; + + fmuld %f16,%f0,%f16 ! dtmp0 *= C2ONM149; + ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); + + fmuld %f16,%f0,%f16 ! dtmp0 *= dsign; +1: + sll %o0,5,%l6 ! (3_0) ltmp0 = ldiff0 << 5; + sra %l4,27,%o5 ! (3_0) signx0 = ux0 >> 27; + + add %i3,stridex,%i3 ! px += stridex + ba .d8 + sra %l3,28,%o4 ! (3_0) signy0 = uy0 >> 28; + + .align 16 +.update9: + cmp counter,5 + bg,pn %icc,1f + nop + + ld [cmul_arr],%f2 + ba .cont9 + fzero %f0 +1: + cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 + bg,pt %icc,1f + nop +2: + sub counter,5,counter + st counter,[%fp+tmp_counter] + stx %i1,[%fp+tmp_py] + stx %i3,[%fp+tmp_px] + + ld [cmul_arr],%f2 + or %g0,5,counter + ba .cont9 + fzero %f0 +1: + andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff + bne,pn %icc,1f + sethi %hi(0x00800000),%o5 + + andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff + be,pn %icc,2b + nop +1: + st %f0,[%fp+tmp_pz] + st %f2,[%fp+tmp_pz+4] + ld [%fp+tmp_pz],%o4 + fmuld %f40,%f16,%f26 ! (2_0) dtmp0 *= x20; + + and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff + cmp %l6,%o5 + bge,a 1f + fstod %f0,%f40 ! (0_0) y0 = (double)fy0; + + ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 + sra %o4,28,%o4 ! itmp0 >>= 28; + fabss %f0,%f0 ! fy0 = fabsf(fy0); + + and %o4,-8,%o4 ! itmp0 = -8; + fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0; + + fmuld %f0,%f40,%f40 ! dtmp0 *= C2ONM149; + ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); + + fmuld %f0,%f40,%f40 ! dtmp0 *= dsign; +1: + add %i1,stridey,%i1 ! py += stridey + faddd %f4,K5,%f62 ! (1_0) dtmp0 += K5; + fmuld %f24,%f20,%f24 ! (0_0) dtmp0 *= x20; + + ld [%fp+tmp_pz+4],%o4 + and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff + cmp %l6,%o5 + bge,a 1f + fstod %f2,%f2 ! (5_1) x0 = (double)fx0; + + fabss %f2,%f2 ! fx0 = fabsf(fx0); + ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 + sra %o4,28,%o4 ! itmp0 >>= 28; + + and %o4,-8,%o4 ! itmp0 = -8; + fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0; + + fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149; + ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); + + fmuld %f2,%f0,%f2 ! dtmp0 *= dsign; +1: + sll %l5,5,%l6 ! (4_0) ltmp0 = ldiff0 << 5; + sra %l3,27,%o5 ! (4_0) signx0 = ux0 >> 27; + + add %i3,stridex,%i3 ! px += stridex + ba .d9 + sra %l4,28,%o4 ! (4_0) signy0 = uy0 >> 28; + + .align 16 +.update10: + cmp counter,1 + bg,pn %icc,1f + nop + + ld [cmul_arr],%f2 + ba .cont10 + fzero %f0 +1: + cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 + bg,pt %icc,1f + nop +2: + sub counter,1,counter + st counter,[%fp+tmp_counter] + stx %i1,[%fp+tmp_py] + stx %i3,[%fp+tmp_px] + + ld [cmul_arr],%f2 + or %g0,1,counter + ba .cont10 + fzero %f0 +1: + andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff + bne,pn %icc,1f + sethi %hi(0x00800000),%o5 + + andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff + be,pn %icc,2b + nop +1: + st %f0,[%fp+tmp_pz] + st %f2,[%fp+tmp_pz+4] + ld [%fp+tmp_pz],%o1 + fmuld %f40,%f24,%f36 ! (3_1) dtmp0 *= x20; + + and %o1,_0x7fffffff,%o4 ! itmp0 & 0x7fffffff + cmp %o4,%o5 + bge,a 1f + fstod %f0,%f40 ! (5_1) y0 = (double)fy0; + + ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 + sra %o1,28,%o1 ! itmp0 >>= 28; + fabss %f0,%f0 ! fy0 = fabsf(fy0); + + and %o1,-8,%o1 ! itmp0 = -8; + fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0; + + fmuld %f0,%f40,%f40 ! dtmp0 *= C2ONM149; + ldd [cmul_arr+%o1],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); + + fmuld %f0,%f40,%f40 ! dtmp0 *= dsign; +1: + faddd %f22,K5,%f14 ! (2_1) dtmp0 += K5; + fmuld %f4,%f18,%f4 ! (1_1) dtmp0 *= x20; + + sll %l7,3,%l7 ! (0_1) cmul0_ind = ldiff0 << 3; + add %i3,stridex,%i3 ! px += stridex + + ld [%fp+tmp_pz+4],%o1 + and %o1,_0x7fffffff,%o4 ! itmp0 & 0x7fffffff + cmp %o4,%o5 + bge,a 1f + fstod %f2,%f2 ! (5_1) x0 = (double)fx0; + + ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 + sra %o1,28,%o1 ! itmp0 >>= 28; + fabss %f2,%f2 ! fx0 = fabsf(fx0); + + and %o1,-8,%o1 ! itmp0 = -8; + fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0; + + fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149; + ldd [cmul_arr+%o1],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); + + fmuld %f2,%f0,%f2 ! dtmp0 *= dsign; +1: + ba .den0 + add %o2,stridez,%o1 ! pz += stridez + + .align 16 +.update11: + cmp counter,2 + bg,pn %icc,1f + nop + + ld [cmul_arr],%f2 + ba .cont11 + fzero %f0 +1: + cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 + bg,pt %icc,1f + nop +2: + sub counter,2,counter + st counter,[%fp+tmp_counter] + stx %i1,[%fp+tmp_py] + stx %i3,[%fp+tmp_px] + + ld [cmul_arr],%f2 + or %g0,2,counter + ba .cont11 + fzero %f0 +1: + andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff + bne,pn %icc,1f + sethi %hi(0x00800000),%o5 + + andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff + be,pn %icc,2b + nop +1: + st %f0,[%fp+tmp_pz] + st %f2,[%fp+tmp_pz+4] + ld [%fp+tmp_pz],%o4 + fmuld %f40,%f4,%f34 ! (4_1) dtmp0 *= x20; + + stx %l5,[%fp+tmp_px] + and %o4,_0x7fffffff,%l5 ! itmp0 & 0x7fffffff + cmp %l5,%o5 + bge,a 1f + fstod %f0,%f40 ! (0_0) y0 = (double)fy0; + + ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 + sra %o4,28,%o4 ! itmp0 >>= 28; + fabss %f0,%f0 ! fy0 = fabsf(fy0); + + and %o4,-8,%o4 ! itmp0 = -8; + fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0; + + fmuld %f0,%f40,%f40 ! dtmp0 *= C2ONM149; + ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); + + fmuld %f0,%f40,%f40 ! dtmp0 *= dsign; +1: + faddd %f20,K5,%f12 ! (3_1) dtmp0 += K5; + add %i1,stridey,%i1 ! py += stridey + fmuld %f22,%f16,%f22 ! (2_1) dtmp0 *= x20; + + ld [%fp+tmp_pz+4],%o4 + and %o4,_0x7fffffff,%l5 ! itmp0 & 0x7fffffff + cmp %l5,%o5 + bge,a 1f + fstod %f2,%f2 ! (5_1) x0 = (double)fx0; + + ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 + sra %o4,28,%o4 ! itmp0 >>= 28; + fabss %f2,%f2 ! fx0 = fabsf(fx0); + + and %o4,-8,%o4 ! itmp0 = -8; + fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0; + + fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149; + ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); + + fmuld %f2,%f0,%f2 ! dtmp0 *= dsign; +1: + ldx [%fp+tmp_px],%l5 + sra %l3,27,%o5 ! (0_0) signx0 = ux0 >> 27; + add %i3,stridex,%i3 ! px += stridex + + lda [%i1]0x82,%l3 ! (1_0) uy0 = *(int*)py; + sra %l4,28,%o4 ! (0_0) signy0 = uy0 >> 28; + ba .den1 + add %l6,cadd_arr,%l6 ! (0_0) ltmp0 += (char*)cadd_arr; + + .align 16 +.update12: + cmp counter,3 + bg,pn %icc,1f + nop + + ld [cmul_arr],%f2 + ba .cont12 + fzero %f0 +1: + cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 + bg,pt %icc,1f + nop +2: + sub counter,3,counter + st counter,[%fp+tmp_counter] + stx %i1,[%fp+tmp_py] + stx %i3,[%fp+tmp_px] + + ld [cmul_arr],%f2 + or %g0,3,counter + ba .cont12 + fzero %f0 +1: + andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff + bne,pn %icc,1f + sethi %hi(0x00800000),%o5 + + andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff + be,pn %icc,2b + nop +1: + st %f0,[%fp+tmp_pz] + st %f2,[%fp+tmp_pz+4] + ld [%fp+tmp_pz],%o4 + fmuld %f40,%f22,%f32 ! (5_1) dtmp0 *= x20; + + stx %l5,[%fp+tmp_px] + and %o4,_0x7fffffff,%l5 ! itmp0 & 0x7fffffff + cmp %l5,%o5 + bge,a 1f + fstod %f0,%f40 ! (0_0) y0 = (double)fy0; + + ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 + sra %o4,28,%o4 ! itmp0 >>= 28; + fabss %f0,%f0 ! fy0 = fabsf(fy0); + + and %o4,-8,%o4 ! itmp0 = -8; + fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0; + + fmuld %f0,%f40,%f40 ! dtmp0 *= C2ONM149; + ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); + + fmuld %f0,%f40,%f40 ! dtmp0 *= dsign; +1: + faddd %f18,K5,%f10 ! (4_1) dtmp0 += K5; + add %i3,stridex,%i3 ! px += stridex + add %i1,stridey,%i1 ! py += stridey + fmuld %f20,%f24,%f20 ! (3_1) dtmp0 *= x20; + + ld [%fp+tmp_pz+4],%o4 + and %o4,_0x7fffffff,%l5 ! itmp0 & 0x7fffffff + cmp %l5,%o5 + bge,a 1f + fstod %f2,%f2 ! (5_1) x0 = (double)fx0; + + ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 + sra %o4,28,%o4 ! itmp0 >>= 28; + fabss %f2,%f2 ! fx0 = fabsf(fx0); + + and %o4,-8,%o4 ! itmp0 = -8; + fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0; + + fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149; + ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); + + fmuld %f2,%f0,%f2 ! dtmp0 *= dsign; +1: + ldx [%fp+tmp_px],%l5 + + sra %l4,27,%o5 ! (1_0) signx0 = ux0 >> 27; + + sra %l3,28,%o4 ! (1_0) signy0 = uy0 >> 28; + ba .den2 + add %l6,cadd_arr,%l6 ! (1_0) ltmp0 += (char*)cadd_arr; + + .align 16 +.update13: + cmp counter,4 + bg,pn %icc,1f + nop + + ld [cmul_arr],%f2 + ba .cont13 + fzero %f0 +1: + cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 + bg,pt %icc,1f + nop +2: + sub counter,4,counter + st counter,[%fp+tmp_counter] + stx %i1,[%fp+tmp_py] + sub %i3,stridex,%o5 + stx %o5,[%fp+tmp_px] + + ld [cmul_arr],%f2 + or %g0,4,counter + ba .cont13 + fzero %f0 +1: + andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff + bne,pn %icc,1f + sethi %hi(0x00800000),%o5 + + andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff + be,pn %icc,2b + nop +1: + st %f0,[%fp+tmp_pz] + st %f2,[%fp+tmp_pz+4] + ld [%fp+tmp_pz],%o4 + fmuld %f40,%f20,%f30 ! (0_0) dtmp0 *= x20; + + and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff + cmp %l6,%o5 + bge,a 1f + fstod %f0,%f40 ! (0_0) y0 = (double)fy0; + + ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 + sra %o4,28,%o4 ! itmp0 >>= 28; + fabss %f0,%f0 ! fy0 = fabsf(fy0); + + and %o4,-8,%o4 ! itmp0 = -8; + fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0; + + fmuld %f0,%f40,%f40 ! dtmp0 *= C2ONM149; + ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); + + fmuld %f0,%f40,%f40 ! dtmp0 *= dsign; +1: + faddd %f16,K5,%f8 ! (5_1) dtmp0 += K5; + add %i1,stridey,%i1 ! py += stridey + fmuld %f18,%f4,%f18 ! (4_1) dtmp0 *= x20; + + ld [%fp+tmp_pz+4],%o4 + and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff + cmp %l6,%o5 + bge,a 1f + fstod %f2,%f2 ! (5_1) x0 = (double)fx0; + + ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 + sra %o4,28,%o4 ! itmp0 >>= 28; + fabss %f2,%f2 ! fx0 = fabsf(fx0); + + and %o4,-8,%o4 ! itmp0 = -8; + fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0; + + fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149; + ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); + + fmuld %f2,%f0,%f2 ! dtmp0 *= dsign; +1: + sll %g5,5,%l6 ! (2_0) ltmp0 = ldiff0 << 5; + sra %l3,27,%o5 ! (2_0) signx0 = ux0 >> 27; + + sra %l4,28,%o4 ! (2_0) signy0 = uy0 >> 28; + ba .den3 + add %l6,cadd_arr,%l6 ! (2_0) ltmp0 += (char*)cadd_arr; + + .align 16 +.update14: + cmp counter,5 + bg,pn %icc,1f + nop + + ld [cmul_arr],%f1 + ba .cont14 + fzeros %f0 +1: + cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 + bg,pt %icc,1f + nop +2: + sub counter,5,counter + st counter,[%fp+tmp_counter] + stx %i1,[%fp+tmp_py] + sub %i3,stridex,%o5 + stx %o5,[%fp+tmp_px] + + ld [cmul_arr],%f1 + or %g0,5,counter + ba .cont14 + fzeros %f0 +1: + andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff + bne,pn %icc,1f + sethi %hi(0x00800000),%o5 + + andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff + be,pn %icc,2b + nop +1: + std %f0,[%fp+tmp_pz] + ld [%fp+tmp_pz],%o4 + fmuld %f40,%f18,%f28 ! (1_0) dtmp0 *= x20; + + faddd %f16,K5,%f2 ! (0_0) dtmp0 += K5; + + and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff + cmp %l6,%o5 + bge,a 1f + fstod %f0,%f40 ! (0_0) y0 = (double)fy0; + + ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 + sra %o4,28,%o4 ! itmp0 >>= 28; + fabss %f0,%f0 ! fy0 = fabsf(fy0); + + and %o4,-8,%o4 ! itmp0 = -8; + fitod %f0,%f16 ! dtmp0 = (double) *(int*)&fy0; + + fmuld %f16,%f40,%f40 ! dtmp0 *= C2ONM149; + ldd [cmul_arr+%o4],%f16 ! dsign = *(double*)((char*)cmul_arr + itmp0); + + fmuld %f16,%f40,%f40 ! dtmp0 *= dsign; +1: + add %i1,stridey,%i1 ! py += stridey + fmuld %f24,%f22,%f24 ! (5_1) dtmp0 *= x20; + + ld [%fp+tmp_pz+4],%o4 + and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff + cmp %l6,%o5 + bge,a 1f + fstod %f1,%f16 ! (5_1) x0 = (double)fx0; + + fabss %f1,%f16 ! fx0 = fabsf(fx0); + ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 + sra %o4,28,%o4 ! itmp0 >>= 28; + + and %o4,-8,%o4 ! itmp0 = -8; + fitod %f16,%f16 ! dtmp0 = (double) *(int*)&fx0; + + fmuld %f16,%f0,%f16 ! dtmp0 *= C2ONM149; + ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); + + fmuld %f16,%f0,%f16 ! dtmp0 *= dsign; +1: + sll %o0,5,%l6 ! (3_0) ltmp0 = ldiff0 << 5; + sra %l4,27,%o5 ! (3_0) signx0 = ux0 >> 27; + + ba .den4 + sra %l3,28,%o4 ! (3_0) signy0 = uy0 >> 28; + + .align 16 +.update15: + cmp counter,6 + bg,pn %icc,1f + nop + + ld [cmul_arr],%f2 + ba .cont15 + fzero %f0 +1: + cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 + bg,pt %icc,1f + nop +2: + sub counter,6,counter + st counter,[%fp+tmp_counter] + stx %i1,[%fp+tmp_py] + sub %i3,stridex,%o5 + stx %o5,[%fp+tmp_px] + + ld [cmul_arr],%f2 + or %g0,6,counter + ba .cont15 + fzero %f0 +1: + andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff + bne,pn %icc,1f + sethi %hi(0x00800000),%o5 + + andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff + be,pn %icc,2b + nop +1: + st %f0,[%fp+tmp_pz] + st %f2,[%fp+tmp_pz+4] + ld [%fp+tmp_pz],%o4 + fmuld %f40,%f16,%f26 ! (2_0) dtmp0 *= x20; + + and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff + cmp %l6,%o5 + bge,a 1f + fstod %f0,%f40 ! (0_0) y0 = (double)fy0; + + ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 + sra %o4,28,%o4 ! itmp0 >>= 28; + fabss %f0,%f0 ! fy0 = fabsf(fy0); + + and %o4,-8,%o4 ! itmp0 = -8; + fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0; + + fmuld %f0,%f40,%f40 ! dtmp0 *= C2ONM149; + ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); + + fmuld %f0,%f40,%f40 ! dtmp0 *= dsign; +1: + add %i1,stridey,%i1 ! py += stridey + faddd %f4,K5,%f62 ! (1_0) dtmp0 += K5; + fmuld %f24,%f20,%f24 ! (0_0) dtmp0 *= x20; + + ld [%fp+tmp_pz+4],%o4 + and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff + cmp %l6,%o5 + bge,a 1f + fstod %f2,%f2 ! (5_1) x0 = (double)fx0; + + fabss %f2,%f2 ! fx0 = fabsf(fx0); + ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 + sra %o4,28,%o4 ! itmp0 >>= 28; + + and %o4,-8,%o4 ! itmp0 = -8; + fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0; + + fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149; + ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); + + fmuld %f2,%f0,%f2 ! dtmp0 *= dsign; +1: + sll %l5,5,%l6 ! (4_0) ltmp0 = ldiff0 << 5; + sra %l3,27,%o5 ! (4_0) signx0 = ux0 >> 27; + + ba .den5 + sra %l4,28,%o4 ! (4_0) signy0 = uy0 >> 28; + + .align 16 +.u0: + ba .c0 + or %g0,_0x7fffffff,%o5 +.u1: + ba .c1 + or %g0,_0x7fffffff,%o5 +.u2: + ba .c2 + or %g0,_0x7f800000,%o5 +.u3: + ba .c3 + or %g0,_0x7f800000,%o5 +.u4: + ba .c4 + or %g0,_0x7fffffff,%o5 +.u5: + ba .c5 + or %g0,_0x7fffffff,%o5 +.u6: + ba .c6 + or %g0,_0x7f800000,%o5 +.u7: + ba .c7 + or %g0,_0x7f800000,%o5 +.u8: + ba .c8 + or %g0,_0x7fffffff,%o5 +.u9: + ba .c9 + or %g0,_0x7fffffff,%o5 +.u10: + ba .c10 + or %g0,_0x7f800000,%o5 +.u11: + ba .c11 + or %g0,_0x7f800000,%o5 +.u12: + ba .c12 + or %g0,_0x7fffffff,%o5 +.u13: + ba .c13 + or %g0,_0x7fffffff,%o5 +.u14: + ba .c14 + or %g0,_0x7f800000,%o5 +.u15: + ba .c15 + or %g0,_0x7f800000,%o5 +.u16: + ba .c16 + or %g0,_0x7fffffff,%o5 +.u17: + ba .c17 + or %g0,_0x7fffffff,%o5 +.u18: + ba .c18 + or %g0,_0x7f800000,%o5 +.u19: + ba .c19 + or %g0,_0x7f800000,%o5 +.u20: + ba .c20 + or %g0,_0x7fffffff,%o5 +.u21: + ba .c21 + or %g0,_0x7fffffff,%o5 +.u22: + ba .c22 + or %g0,_0x7f800000,%o5 +.u23: + ba .c23 + or %g0,_0x7f800000,%o5 +.u24: + ba .c24 + or %g0,_0x7fffffff,%o5 +.u25: + ba .c25 + or %g0,_0x7fffffff,%o5 +.u26: + ba .c26 + or %g0,_0x7f800000,%o5 +.u27: + ba .c27 + or %g0,_0x7f800000,%o5 +.u28: + ba .c28 + or %g0,_0x7fffffff,%o5 +.u29: + ba .c29 + or %g0,_0x7fffffff,%o5 +.u30: + ba .c30 + or %g0,_0x7f800000,%o5 +.u31: + ba .c31 + or %g0,_0x7f800000,%o5 +.u32: + ba .c32 + or %g0,_0x7fffffff,%o5 +.u33: + ba .c33 + or %g0,_0x7fffffff,%o5 +.u34: + ba .c34 + or %g0,_0x7f800000,%o5 +.u35: + ba .c35 + or %g0,_0x7f800000,%o5 +.u36: + ba .c36 + or %g0,_0x7fffffff,%o5 +.u37: + ba .c37 + or %g0,_0x7fffffff,%o5 +.u38: + ba .c38 + or %g0,_0x7f800000,%o5 +.u39: + ba .c39 + or %g0,_0x7f800000,%o5 +.up0: + ba .co0 + or %g0,_0x7fffffff,%o5 +.up1: + ba .co1 + or %g0,_0x7fffffff,%o5 +.up2: + ba .co2 + or %g0,_0x7f800000,%o5 +.up3: + ba .co3 + or %g0,_0x7f800000,%o5 +.up4: + ba .co4 + or %g0,_0x7fffffff,%o5 +.up5: + ba .co5 + or %g0,_0x7fffffff,%o5 +.up6: + ba .co6 + or %g0,_0x7f800000,%o5 +.up7: + ba .co7 + or %g0,_0x7f800000,%o5 +.up8: + ba .co8 + or %g0,_0x7fffffff,%o5 +.up9: + ba .co9 + or %g0,_0x7fffffff,%o5 +.up10: + ba .co10 + or %g0,_0x7f800000,%o5 +.up11: + ba .co11 + or %g0,_0x7f800000,%o5 +.up12: + ba .co12 + or %g0,_0x7fffffff,%o5 +.up13: + ba .co13 + or %g0,_0x7fffffff,%o5 +.up14: + ba .co14 + or %g0,_0x7f800000,%o5 +.up15: + ba .co15 + or %g0,_0x7f800000,%o5 +.up16: + ba .co16 + or %g0,_0x7fffffff,%o5 +.up17: + ba .co17 + or %g0,_0x7fffffff,%o5 +.up18: + ba .co18 + or %g0,_0x7f800000,%o5 +.up19: + ba .co19 + or %g0,_0x7f800000,%o5 +.up20: + ba .co20 + or %g0,_0x7fffffff,%o5 +.up21: + ba .co21 + or %g0,_0x7fffffff,%o5 +.up22: + ba .co22 + or %g0,_0x7f800000,%o5 +.up23: + ba .co23 + or %g0,_0x7f800000,%o5 +.exit: + ret + restore + SET_SIZE(__vatan2f) + diff --git a/usr/src/lib/libmvec/common/vis/__vatanf.S b/usr/src/lib/libmvec/common/vis/__vatanf.S new file mode 100644 index 0000000000..8bd44bc1ba --- /dev/null +++ b/usr/src/lib/libmvec/common/vis/__vatanf.S @@ -0,0 +1,1892 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "__vatanf.S" + +#include "libm.h" + + RO_DATA + .align 64 + +.CONST_TBL: + .word 0x3fefffff, 0xfffccbbc ! K0 = 9.99999999976686608841e-01 + .word 0xbfd55554, 0x51c6b90f ! K1 = -3.33333091601972730504e-01 + .word 0x3fc98d6d, 0x926596cc ! K2 = 1.99628540499523379702e-01 + .word 0x00020000, 0x00000000 ! DC1 + .word 0xfffc0000, 0x00000000 ! DC2 + .word 0x7ff00000, 0x00000000 ! DC3 + .word 0x3ff00000, 0x00000000 ! DONE = 1.0 + .word 0x40000000, 0x00000000 ! DTWO = 2.0 + +! parr0 = *(int*)&(1.0 / *(double*)&(((long long)i << 45) | 0x3ff0100000000000ULL)) + 0x3ff00000, i = [0, 127] + + .word 0x7fdfe01f, 0x7fdfa11c, 0x7fdf6310, 0x7fdf25f6 + .word 0x7fdee9c7, 0x7fdeae80, 0x7fde741a, 0x7fde3a91 + .word 0x7fde01e0, 0x7fddca01, 0x7fdd92f2, 0x7fdd5cac + .word 0x7fdd272c, 0x7fdcf26e, 0x7fdcbe6d, 0x7fdc8b26 + .word 0x7fdc5894, 0x7fdc26b5, 0x7fdbf583, 0x7fdbc4fd + .word 0x7fdb951e, 0x7fdb65e2, 0x7fdb3748, 0x7fdb094b + .word 0x7fdadbe8, 0x7fdaaf1d, 0x7fda82e6, 0x7fda5741 + .word 0x7fda2c2a, 0x7fda01a0, 0x7fd9d79f, 0x7fd9ae24 + .word 0x7fd9852f, 0x7fd95cbb, 0x7fd934c6, 0x7fd90d4f + .word 0x7fd8e652, 0x7fd8bfce, 0x7fd899c0, 0x7fd87427 + .word 0x7fd84f00, 0x7fd82a4a, 0x7fd80601, 0x7fd7e225 + .word 0x7fd7beb3, 0x7fd79baa, 0x7fd77908, 0x7fd756ca + .word 0x7fd734f0, 0x7fd71378, 0x7fd6f260, 0x7fd6d1a6 + .word 0x7fd6b149, 0x7fd69147, 0x7fd6719f, 0x7fd6524f + .word 0x7fd63356, 0x7fd614b3, 0x7fd5f664, 0x7fd5d867 + .word 0x7fd5babc, 0x7fd59d61, 0x7fd58056, 0x7fd56397 + .word 0x7fd54725, 0x7fd52aff, 0x7fd50f22, 0x7fd4f38f + .word 0x7fd4d843, 0x7fd4bd3e, 0x7fd4a27f, 0x7fd48805 + .word 0x7fd46dce, 0x7fd453d9, 0x7fd43a27, 0x7fd420b5 + .word 0x7fd40782, 0x7fd3ee8f, 0x7fd3d5d9, 0x7fd3bd60 + .word 0x7fd3a524, 0x7fd38d22, 0x7fd3755b, 0x7fd35dce + .word 0x7fd34679, 0x7fd32f5c, 0x7fd31877, 0x7fd301c8 + .word 0x7fd2eb4e, 0x7fd2d50a, 0x7fd2bef9, 0x7fd2a91c + .word 0x7fd29372, 0x7fd27dfa, 0x7fd268b3, 0x7fd2539d + .word 0x7fd23eb7, 0x7fd22a01, 0x7fd21579, 0x7fd20120 + .word 0x7fd1ecf4, 0x7fd1d8f5, 0x7fd1c522, 0x7fd1b17c + .word 0x7fd19e01, 0x7fd18ab0, 0x7fd1778a, 0x7fd1648d + .word 0x7fd151b9, 0x7fd13f0e, 0x7fd12c8b, 0x7fd11a30 + .word 0x7fd107fb, 0x7fd0f5ed, 0x7fd0e406, 0x7fd0d244 + .word 0x7fd0c0a7, 0x7fd0af2f, 0x7fd09ddb, 0x7fd08cab + .word 0x7fd07b9f, 0x7fd06ab5, 0x7fd059ee, 0x7fd04949 + .word 0x7fd038c6, 0x7fd02864, 0x7fd01824, 0x7fd00804 + + .word 0x3ff00000, 0x00000000 ! 1.0 + .word 0xbff00000, 0x00000000 ! -1.0 + +! parr1[i] = atan((double)*(float*)&((i + 460) << 21)), i = [0, 155] + + .word 0x3f2fffff, 0xf555555c, 0x3f33ffff, 0xf595555f + .word 0x3f37ffff, 0xee000018, 0x3f3bffff, 0xe36aaadf + .word 0x3f3fffff, 0xd55555bc, 0x3f43ffff, 0xd65555f2 + .word 0x3f47ffff, 0xb8000185, 0x3f4bffff, 0x8daaadf3 + .word 0x3f4fffff, 0x55555bbc, 0x3f53ffff, 0x59555f19 + .word 0x3f57fffe, 0xe000184d, 0x3f5bfffe, 0x36aadf30 + .word 0x3f5ffffd, 0x5555bbbc, 0x3f63fffd, 0x6555f195 + .word 0x3f67fffb, 0x800184cc, 0x3f6bfff8, 0xdaadf302 + .word 0x3f6ffff5, 0x555bbbb7, 0x3f73fff5, 0x955f194a + .word 0x3f77ffee, 0x00184ca6, 0x3f7bffe3, 0x6adf2fd1 + .word 0x3f7fffd5, 0x55bbba97, 0x3f83ffd6, 0x55f1929c + .word 0x3f87ffb8, 0x0184c30a, 0x3f8bff8d, 0xadf2e78c + .word 0x3f8fff55, 0x5bbb729b, 0x3f93ff59, 0x5f18a700 + .word 0x3f97fee0, 0x184a5c36, 0x3f9bfe36, 0xdf291712 + .word 0x3f9ffd55, 0xbba97625, 0x3fa3fd65, 0xf169c9d9 + .word 0x3fa7fb81, 0x8430da2a, 0x3fabf8dd, 0xf139c444 + .word 0x3faff55b, 0xb72cfdea, 0x3fb3f59f, 0x0e7c559d + .word 0x3fb7ee18, 0x2602f10f, 0x3fbbe39e, 0xbe6f07c4 + .word 0x3fbfd5ba, 0x9aac2f6e, 0x3fc3d6ee, 0xe8c6626c + .word 0x3fc7b97b, 0x4bce5b02, 0x3fcb90d7, 0x529260a2 + .word 0x3fcf5b75, 0xf92c80dd, 0x3fd36277, 0x3707ebcc + .word 0x3fd6f619, 0x41e4def1, 0x3fda64ee, 0xc3cc23fd + .word 0x3fddac67, 0x0561bb4f, 0x3fe1e00b, 0xabdefeb4 + .word 0x3fe4978f, 0xa3269ee1, 0x3fe700a7, 0xc5784634 + .word 0x3fe921fb, 0x54442d18, 0x3fecac7c, 0x57846f9e + .word 0x3fef730b, 0xd281f69b, 0x3ff0d38f, 0x2c5ba09f + .word 0x3ff1b6e1, 0x92ebbe44, 0x3ff30b6d, 0x796a4da8 + .word 0x3ff3fc17, 0x6b7a8560, 0x3ff4ae10, 0xfc6589a5 + .word 0x3ff5368c, 0x951e9cfd, 0x3ff5f973, 0x15254857 + .word 0x3ff67d88, 0x63bc99bd, 0x3ff6dcc5, 0x7bb565fd + .word 0x3ff7249f, 0xaa996a21, 0x3ff789bd, 0x2c160054 + .word 0x3ff7cd6f, 0x6dc59db4, 0x3ff7fde8, 0x0870c2a0 + .word 0x3ff82250, 0x768ac529, 0x3ff8555a, 0x2787981f + .word 0x3ff87769, 0xeb8e956b, 0x3ff88fc2, 0x18ace9dc + .word 0x3ff8a205, 0xfd558740, 0x3ff8bb9a, 0x63718f45 + .word 0x3ff8cca9, 0x27cf0b3d, 0x3ff8d8d8, 0xbf65316f + .word 0x3ff8e1fc, 0xa98cb633, 0x3ff8eec8, 0xcfd00665 + .word 0x3ff8f751, 0x0eba96e6, 0x3ff8fd69, 0x4acf36b0 + .word 0x3ff901fb, 0x7eee715e, 0x3ff90861, 0xd082d9b5 + .word 0x3ff90ca6, 0x0b9322c5, 0x3ff90fb2, 0x37a7ea27 + .word 0x3ff911fb, 0x59997f3a, 0x3ff9152e, 0x8a326c38 + .word 0x3ff91750, 0xab2e0d12, 0x3ff918d6, 0xc2f9c9e2 + .word 0x3ff919fb, 0x54eed7a9, 0x3ff91b94, 0xee352849 + .word 0x3ff91ca5, 0xff216922, 0x3ff91d69, 0x0b3f72ff + .word 0x3ff91dfb, 0x5459826d, 0x3ff91ec8, 0x211be619 + .word 0x3ff91f50, 0xa99fd49a, 0x3ff91fb2, 0x2fb5defa + .word 0x3ff91ffb, 0x5446d7c3, 0x3ff92061, 0xbaabf105 + .word 0x3ff920a5, 0xfeefa208, 0x3ff920d6, 0xc1fb87e7 + .word 0x3ff920fb, 0x5444826e, 0x3ff9212e, 0x87778bfc + .word 0x3ff92150, 0xa9999bb6, 0x3ff92169, 0x0b1faabb + .word 0x3ff9217b, 0x544437c3, 0x3ff92194, 0xedddcc28 + .word 0x3ff921a5, 0xfeeedaec, 0x3ff921b2, 0x2fb1e5f1 + .word 0x3ff921bb, 0x54442e6e, 0x3ff921c8, 0x2110fa94 + .word 0x3ff921d0, 0xa99982d3, 0x3ff921d6, 0xc1fb08c6 + .word 0x3ff921db, 0x54442d43, 0x3ff921e1, 0xbaaa9395 + .word 0x3ff921e5, 0xfeeed7d0, 0x3ff921e9, 0x0b1f9ad7 + .word 0x3ff921eb, 0x54442d1e, 0x3ff921ee, 0x8777604e + .word 0x3ff921f0, 0xa999826f, 0x3ff921f2, 0x2fb1e3f5 + .word 0x3ff921f3, 0x54442d19, 0x3ff921f4, 0xedddc6b2 + .word 0x3ff921f5, 0xfeeed7c3, 0x3ff921f6, 0xc1fb0886 + .word 0x3ff921f7, 0x54442d18, 0x3ff921f8, 0x2110f9e5 + .word 0x3ff921f8, 0xa999826e, 0x3ff921f9, 0x0b1f9acf + .word 0x3ff921f9, 0x54442d18, 0x3ff921f9, 0xbaaa937f + .word 0x3ff921f9, 0xfeeed7c3, 0x3ff921fa, 0x2fb1e3f4 + .word 0x3ff921fa, 0x54442d18, 0x3ff921fa, 0x8777604b + .word 0x3ff921fa, 0xa999826e, 0x3ff921fa, 0xc1fb0886 + .word 0x3ff921fa, 0xd4442d18, 0x3ff921fa, 0xedddc6b2 + .word 0x3ff921fa, 0xfeeed7c3, 0x3ff921fb, 0x0b1f9acf + .word 0x3ff921fb, 0x14442d18, 0x3ff921fb, 0x2110f9e5 + .word 0x3ff921fb, 0x2999826e, 0x3ff921fb, 0x2fb1e3f4 + .word 0x3ff921fb, 0x34442d18, 0x3ff921fb, 0x3aaa937f + .word 0x3ff921fb, 0x3eeed7c3, 0x3ff921fb, 0x41fb0886 + .word 0x3ff921fb, 0x44442d18, 0x3ff921fb, 0x4777604b + .word 0x3ff921fb, 0x4999826e, 0x3ff921fb, 0x4b1f9acf + .word 0x3ff921fb, 0x4c442d18, 0x3ff921fb, 0x4dddc6b2 + .word 0x3ff921fb, 0x4eeed7c3, 0x3ff921fb, 0x4fb1e3f4 + .word 0x3ff921fb, 0x50442d18, 0x3ff921fb, 0x5110f9e5 + .word 0x3ff921fb, 0x5199826e, 0x3ff921fb, 0x51fb0886 + +#define DC2 %f2 +#define DTWO %f6 +#define DONE %f52 +#define K0 %f54 +#define K1 %f56 +#define K2 %f58 +#define DC1 %f60 +#define DC3 %f62 + +#define stridex %o2 +#define stridey %o3 +#define MASK_0x7fffffff %i1 +#define MASK_0x100000 %i5 + +#define tmp_px STACK_BIAS-32 +#define tmp_counter STACK_BIAS-24 +#define tmp0 STACK_BIAS-16 +#define tmp1 STACK_BIAS-8 + +#define counter %l1 + +! sizeof temp storage - must be a multiple of 16 for V9 +#define tmps 0x20 + +!-------------------------------------------------------------------- +! !!!!! vatanf algorithm !!!!! +! ux = ((int*)px)[0]; +! ax = ux & 0x7fffffff; +! +! if ( ax < 0x39b89c55 ) +! { +! *(int*)py = ux; +! goto next; +! } +! +! if ( ax > 0x4c700518 ) +! { +! if ( ax > 0x7f800000 ) +! { +! float fpx = fabsf(*px); +! fpx *= fpx; +! *py = fpx; +! goto next; +! } +! +! sign = ux & 0x80000000; +! sign |= pi_2; +! *(int*)py = sign; +! goto next; +! } +! +! ftmp0 = *px; +! x = (double)ftmp0; +! px += stridex; +! y = vis_fpadd32(x,DC1); +! y = vis_fand(y,DC2); +! div = x * y; +! xx = x - y; +! div += DONE; +! i = ((unsigned long long*)&div)[0]; +! y0 = vis_fand(div,DC3); +! i >>= 43; +! i &= 508; +! *(float*)&dtmp0 = *(float*)((char*)parr0 + i); +! y0 = vis_fpsub32(dtmp0, y0); +! dtmp0 = div0 * y0; +! dtmp0 = DTWO - dtmp0; +! y0 *= dtmp0; +! dtmp1 = div0 * y0; +! dtmp1 = DTWO - dtmp1; +! y0 *= dtmp1; +! ax = ux & 0x7fffffff; +! ax += 0x00100000; +! ax >>= 18; +! ax &= -8; +! res = *(double*)((char*)parr1 + ax); +! ux >>= 28; +! ux &= -8; +! dtmp0 = *(double*)((char*)sign_arr + ux); +! res *= dtmp0; +! xx *= y0; +! x2 = xx * xx; +! dtmp0 = K2 * x2; +! dtmp0 += K1; +! dtmp0 *= x2; +! dtmp0 += K0; +! dtmp0 *= xx; +! res += dtmp0; +! ftmp0 = (float)res; +! py[0] = ftmp0; +! py += stridey; +!-------------------------------------------------------------------- + + ENTRY(__vatanf) + save %sp,-SA(MINFRAME)-tmps,%sp + PIC_SETUP(l7) + PIC_SET(l7,.CONST_TBL,l2) + + st %i0,[%fp+tmp_counter] + + sllx %i2,2,stridex + sllx %i4,2,stridey + + or %g0,%i3,%o1 + stx %i1,[%fp+tmp_px] + + ldd [%l2],K0 + ldd [%l2+8],K1 + ldd [%l2+16],K2 + ldd [%l2+24],DC1 + ldd [%l2+32],DC2 + ldd [%l2+40],DC3 + ldd [%l2+48],DONE + ldd [%l2+56],DTWO + + add %l2,64,%i4 + add %l2,64+512,%l0 + add %l2,64+512+16-0x1cc*8,%l7 + + sethi %hi(0x100000),MASK_0x100000 + sethi %hi(0x7ffffc00),MASK_0x7fffffff + add MASK_0x7fffffff,1023,MASK_0x7fffffff + + sethi %hi(0x39b89c00),%o4 + add %o4,0x55,%o4 + sethi %hi(0x4c700400),%o5 + add %o5,0x118,%o5 + +.begin: + ld [%fp+tmp_counter],counter + ldx [%fp+tmp_px],%i3 + st %g0,[%fp+tmp_counter] +.begin1: + cmp counter,0 + ble,pn %icc,.exit + nop + + lda [%i3]0x82,%l6 ! (0_0) ux = ((int*)px)[0]; + + and %l6,MASK_0x7fffffff,%l5 ! (0_0) ax = ux & 0x7fffffff; + lda [%i3]0x82,%f0 ! (0_0) ftmp0 = *px; + + cmp %l5,%o4 ! (0_0) ax ? 0x39b89c55 + bl,pn %icc,.spec0 ! (0_0) if ( ax < 0x39b89c55 ) + nop + + cmp %l5,%o5 ! (0_0) ax ? 0x4c700518 + bg,pn %icc,.spec1 ! (0_0) if ( ax > 0x4c700518 ) + nop + + add %i3,stridex,%l5 ! px += stridex; + fstod %f0,%f22 ! (0_0) ftmp0 = *px; + mov %l6,%i3 + + lda [%l5]0x82,%l6 ! (1_0) ux = ((int*)px)[0]; + + and %l6,MASK_0x7fffffff,%o7 ! (1_0) ax = ux & 0x7fffffff; + lda [%l5]0x82,%f0 ! (1_0) ftmp0 = *px; + add %l5,stridex,%l4 ! px += stridex; + fpadd32 %f22,DC1,%f24 ! (0_0) y = vis_fpadd32(x,dconst1); + + cmp %o7,%o4 ! (1_0) ax ? 0x39b89c55 + bl,pn %icc,.update0 ! (1_0) if ( ax < 0x39b89c55 ) + nop +.cont0: + cmp %o7,%o5 ! (1_0) ax ? 0x4c700518 + bg,pn %icc,.update1 ! (1_0) if ( ax > 0x4c700518 ) + nop +.cont1: + fstod %f0,%f20 ! (1_0) x = (double)ftmp0; + mov %l6,%l5 + + fand %f24,DC2,%f26 ! (0_0) y = vis_fand(y,dconst2); + + fmuld %f22,%f26,%f32 ! (0_0) div = x * y; + + lda [%l4]0x82,%l6 ! (2_0) ux = ((int*)px)[0]; + fsubd %f22,%f26,%f22 ! (0_0) xx = x - y; + + and %l6,MASK_0x7fffffff,%o7 ! (2_0) ax = ux & 0x7fffffff; + lda [%l4]0x82,%f0 ! (2_0) ftmp0 = *px; + add %l4,stridex,%l3 ! px += stridex; + fpadd32 %f20,DC1,%f24 ! (1_0) y = vis_fpadd32(x,dconst1); + + cmp %o7,%o4 ! (2_0) ax ? 0x39b89c55 + bl,pn %icc,.update2 ! (2_0) if ( ax < 0x39b89c55 ) + faddd DONE,%f32,%f32 ! (0_0) div += done; +.cont2: + cmp %o7,%o5 ! (2_0) ax ? 0x4c700518 + bg,pn %icc,.update3 ! (2_0) if ( ax > 0x4c700518 ) + nop +.cont3: + std %f32,[%fp+tmp0] ! (0_0) i = ((unsigned long long*)&div)[0]; + mov %l6,%l4 + fstod %f0,%f18 ! (2_0) x = (double)ftmp0; + + fand %f24,DC2,%f26 ! (1_0) y = vis_fand(y,dconst2); + + fmuld %f20,%f26,%f30 ! (1_0) div = x * y; + + lda [%l3]0x82,%l6 ! (3_0) ux = ((int*)px)[0]; + fsubd %f20,%f26,%f20 ! (1_0) xx = x - y; + + and %l6,MASK_0x7fffffff,%o7 ! (3_0) ax = ux & 0x7fffffff; + lda [%l3]0x82,%f0 ! (3_0) ftmp0 = *px; + add %l3,stridex,%i0 ! px += stridex; + fpadd32 %f18,DC1,%f24 ! (2_0) y = vis_fpadd32(x,dconst1); + + cmp %o7,%o4 ! (3_0) ax ? 0x39b89c55 + bl,pn %icc,.update4 ! (3_0) if ( ax < 0x39b89c55 ) + faddd DONE,%f30,%f30 ! (1_0) div += done; +.cont4: + cmp %o7,%o5 ! (3_0) ax ? 0x4c700518 + bg,pn %icc,.update5 ! (3_0) if ( ax > 0x4c700518 ) + nop +.cont5: + std %f30,[%fp+tmp1] ! (1_0) i = ((unsigned long long*)&div)[0]; + mov %l6,%l3 + fstod %f0,%f16 ! (3_0) x = (double)ftmp0; + + ldx [%fp+tmp0],%o0 ! (0_0) i = ((unsigned long long*)&div)[0]; + fand %f24,DC2,%f26 ! (2_0) y = vis_fand(y,dconst2); + + fand %f32,DC3,%f24 ! (0_0) y0 = vis_fand(div,dconst3); + + srlx %o0,43,%o0 ! (0_0) i >>= 43; + + and %o0,508,%l6 ! (0_0) i &= 508; + + ld [%i4+%l6],%f0 ! (0_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); + + fmuld %f18,%f26,%f28 ! (2_0) div = x * y; + + lda [%i0]0x82,%l6 ! (4_0) ux = ((int*)px)[0]; + fsubd %f18,%f26,%f18 ! (2_0) xx = x - y; + + fpsub32 %f0,%f24,%f40 ! (0_0) y0 = vis_fpsub32(dtmp0, y0); + + and %l6,MASK_0x7fffffff,%o7 ! (4_0) ax = ux & 0x7fffffff; + lda [%i0]0x82,%f0 ! (4_0) ftmp0 = *px; + add %i0,stridex,%i2 ! px += stridex; + fpadd32 %f16,DC1,%f24 ! (3_0) y = vis_fpadd32(x,dconst1); + + cmp %o7,%o4 ! (4_0) ax ? 0x39b89c55 + bl,pn %icc,.update6 ! (4_0) if ( ax < 0x39b89c55 ) + faddd DONE,%f28,%f28 ! (2_0) div += done; +.cont6: + fmuld %f32,%f40,%f42 ! (0_0) dtmp0 = div0 * y0; + cmp %o7,%o5 ! (4_0) ax ? 0x4c700518 + bg,pn %icc,.update7 ! (4_0) if ( ax > 0x4c700518 ) + nop +.cont7: + std %f28,[%fp+tmp0] ! (2_0) i = ((unsigned long long*)&div)[0]; + mov %l6,%i0 + fstod %f0,%f14 ! (4_0) x = (double)ftmp0; + + ldx [%fp+tmp1],%g1 ! (1_0) i = ((unsigned long long*)&div)[0]; + fand %f24,DC2,%f26 ! (3_0) y = vis_fand(y,dconst2); + + fand %f30,DC3,%f24 ! (1_0) y0 = vis_fand(div,dconst3); + + fsubd DTWO,%f42,%f44 ! (0_0) dtmp0 = dtwo - dtmp0; + srlx %g1,43,%g1 ! (1_0) i >>= 43; + + and %g1,508,%l6 ! (1_0) i &= 508; + + ld [%i4+%l6],%f0 ! (1_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); + + fmuld %f16,%f26,%f34 ! (3_0) div = x * y; + + lda [%i2]0x82,%l6 ! (5_0) ux = ((int*)px)[0]; + fsubd %f16,%f26,%f16 ! (3_0) xx = x - y; + + fpsub32 %f0,%f24,%f38 ! (1_0) y0 = vis_fpsub32(dtmp0, y0); + add %i2,stridex,%l2 ! px += stridex; + + fmuld %f40,%f44,%f40 ! (0_0) y0 *= dtmp0; + and %l6,MASK_0x7fffffff,%o7 ! (5_0) ax = ux & 0x7fffffff; + lda [%i2]0x82,%f0 ! (5_0) ftmp0 = *px; + fpadd32 %f14,DC1,%f24 ! (4_0) y = vis_fpadd32(x,dconst1); + + cmp %o7,%o4 ! (5_0) ax ? 0x39b89c55 + bl,pn %icc,.update8 ! (5_0) if ( ax < 0x39b89c55 ) + faddd DONE,%f34,%f34 ! (3_0) div += done; +.cont8: + fmuld %f30,%f38,%f42 ! (1_0) dtmp0 = div0 * y0; + cmp %o7,%o5 ! (5_0) ax ? 0x4c700518 + bg,pn %icc,.update9 ! (5_0) if ( ax > 0x4c700518 ) + nop +.cont9: + std %f34,[%fp+tmp1] ! (3_0) i = ((unsigned long long*)&div)[0]; + mov %l6,%i2 + fstod %f0,%f36 ! (5_0) x = (double)ftmp0; + + fmuld %f32,%f40,%f32 ! (0_0) dtmp1 = div0 * y0; + ldx [%fp+tmp0],%o0 ! (2_0) i = ((unsigned long long*)&div)[0]; + fand %f24,DC2,%f26 ! (4_0) y = vis_fand(y,dconst2); + + fand %f28,DC3,%f24 ! (2_0) y0 = vis_fand(div,dconst3); + + fsubd DTWO,%f42,%f44 ! (1_0) dtmp0 = dtwo - dtmp0; + srlx %o0,43,%o0 ! (2_0) i >>= 43; + + and %o0,508,%l6 ! (2_0) i &= 508; + fsubd DTWO,%f32,%f46 ! (0_0) dtmp1 = dtwo - dtmp1; + + ld [%i4+%l6],%f0 ! (2_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); + + fmuld %f14,%f26,%f32 ! (4_0) div = x * y; + + lda [%l2]0x82,%l6 ! (6_0) ux = ((int*)px)[0]; + fsubd %f14,%f26,%f14 ! (4_0) xx = x - y; + + fmuld %f40,%f46,%f26 ! (0_0) y0 *= dtmp1; + add %l2,stridex,%g5 ! px += stridex; + fpsub32 %f0,%f24,%f40 ! (2_0) y0 = vis_fpsub32(dtmp0, y0); + + fmuld %f38,%f44,%f38 ! (1_0) y0 *= dtmp0; + and %l6,MASK_0x7fffffff,%o7 ! (6_0) ax = ux & 0x7fffffff; + lda [%l2]0x82,%f0 ! (6_0) ftmp0 = *px; + fpadd32 %f36,DC1,%f24 ! (5_0) y = vis_fpadd32(x,dconst1); + + cmp %o7,%o4 ! (6_0) ax ? 0x39b89c55 + bl,pn %icc,.update10 ! (6_0) if ( ax < 0x39b89c55 ) + faddd DONE,%f32,%f32 ! (4_0) div += done; +.cont10: + fmuld %f28,%f40,%f42 ! (2_0) dtmp0 = div0 * y0; + cmp %o7,%o5 ! (6_0) ax ? 0x4c700518 + bg,pn %icc,.update11 ! (6_0) if ( ax > 0x4c700518 ) + nop +.cont11: + fmuld %f22,%f26,%f22 ! (0_0) xx *= y0; + mov %l6,%l2 + std %f32,[%fp+tmp0] ! (4_0) i = ((unsigned long long*)&div)[0]; + fstod %f0,%f10 ! (6_0) x = (double)ftmp0; + + fmuld %f30,%f38,%f30 ! (1_0) dtmp1 = div0 * y0; + ldx [%fp+tmp1],%g1 ! (3_0) i = ((unsigned long long*)&div)[0]; + fand %f24,DC2,%f26 ! (5_0) y = vis_fand(y,dconst2); + + fand %f34,DC3,%f24 ! (3_0) y0 = vis_fand(div,dconst3); + + fmuld %f22,%f22,%f50 ! (0_0) x2 = xx * xx; + srlx %g1,43,%g1 ! (3_0) i >>= 43; + fsubd DTWO,%f42,%f44 ! (2_0) dtmp0 = dtwo - dtmp0; + + and %g1,508,%l6 ! (3_0) i &= 508; + mov %i3,%o7 + fsubd DTWO,%f30,%f46 ! (1_0) dtmp1 = dtwo - dtmp1; + + ld [%i4+%l6],%f0 ! (3_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); + + fmuld %f36,%f26,%f30 ! (5_0) div = x * y; + srl %o7,28,%g1 ! (0_0) ux >>= 28; + add %g5,stridex,%i3 ! px += stridex; + + fmuld K2,%f50,%f4 ! (0_0) dtmp0 = K2 * x2; + and %o7,MASK_0x7fffffff,%o0 ! (0_0) ax = ux & 0x7fffffff; + lda [%g5]0x82,%l6 ! (7_0) ux = ((int*)px)[0]; + fsubd %f36,%f26,%f36 ! (5_0) xx = x - y; + + fmuld %f38,%f46,%f26 ! (1_0) y0 *= dtmp1; + add %o0,MASK_0x100000,%o0 ! (0_0) ax += 0x00100000; + and %g1,-8,%g1 ! (0_0) ux &= -8; + fpsub32 %f0,%f24,%f38 ! (3_0) y0 = vis_fpsub32(dtmp0, y0); + + fmuld %f40,%f44,%f40 ! (2_0) y0 *= dtmp0; + and %l6,MASK_0x7fffffff,%o7 ! (7_0) ax = ux & 0x7fffffff; + lda [%g5]0x82,%f0 ! (7_0) ftmp0 = *px; + fpadd32 %f10,DC1,%f24 ! (6_0) y = vis_fpadd32(x,dconst1); + + cmp %o7,%o4 ! (7_0) ax ? 0x39b89c55 + bl,pn %icc,.update12 ! (7_0) if ( ax < 0x39b89c55 ) + faddd DONE,%f30,%f30 ! (5_0) div += done; +.cont12: + fmuld %f34,%f38,%f42 ! (3_0) dtmp0 = div0 * y0; + cmp %o7,%o5 ! (7_0) ax ? 0x4c700518 + bg,pn %icc,.update13 ! (7_0) if ( ax > 0x4c700518 ) + faddd %f4,K1,%f4 ! (0_0) dtmp0 += K1; +.cont13: + fmuld %f20,%f26,%f20 ! (1_0) xx *= y0; + srl %o0,18,%o7 ! (0_0) ax >>= 18; + std %f30,[%fp+tmp1] ! (5_0) i = ((unsigned long long*)&div)[0]; + fstod %f0,%f8 ! (7_0) x = (double)ftmp0; + + fmuld %f28,%f40,%f28 ! (2_0) dtmp1 = div0 * y0; + and %o7,-8,%o7 ! (0_0) ux &= -8; + ldx [%fp+tmp0],%o0 ! (4_0) i = ((unsigned long long*)&div)[0]; + fand %f24,DC2,%f26 ! (6_0) y = vis_fand(y,dconst2); + + add %o7,%l7,%o7 ! (0_0) (char*)parr1 + ax; + mov %l6,%g5 + ldd [%l0+%g1],%f48 ! (0_0) dtmp0 = *(double*)((char*)sign_arr + ux); + + fmuld %f4,%f50,%f4 ! (0_0) dtmp0 *= x2; + srlx %o0,43,%o0 ! (4_0) i >>= 43; + ldd [%o7],%f0 ! (0_0) res = *(double*)((char*)parr1 + ax); + fand %f32,DC3,%f24 ! (4_0) y0 = vis_fand(div,dconst3); + + fmuld %f20,%f20,%f50 ! (1_0) x2 = xx * xx; + and %o0,508,%l6 ! (4_0) i &= 508; + mov %l5,%o7 + fsubd DTWO,%f42,%f44 ! (3_0) dtmp0 = dtwo - dtmp0; + + fsubd DTWO,%f28,%f46 ! (2_0) dtmp1 = dtwo - dtmp1; + + fmuld %f0,%f48,%f48 ! (0_0) res *= dtmp0; + srl %o7,28,%l5 ! (1_0) ux >>= 28; + ld [%i4+%l6],%f0 ! (4_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); + + fmuld %f10,%f26,%f28 ! (6_0) div = x * y; + faddd %f4,K0,%f42 ! (0_0) dtmp0 += K0; + + subcc counter,8,counter + bneg,pn %icc,.tail + or %g0,%o1,%o0 + + add %fp,tmp0,%g1 + lda [%i3]0x82,%l6 ! (0_0) ux = ((int*)px)[0]; + + ba .main_loop + add %i3,stridex,%l5 ! px += stridex; + + .align 16 +.main_loop: + fsubd %f10,%f26,%f10 ! (6_1) xx = x - y; + and %o7,MASK_0x7fffffff,%o1 ! (1_1) ax = ux & 0x7fffffff; + st %f12,[%g1] ! (7_1) py[0] = ftmp0; + fmuld K2,%f50,%f4 ! (1_1) dtmp0 = K2 * x2; + + fmuld %f40,%f46,%f26 ! (2_1) y0 *= dtmp1; + srl %o7,28,%o7 ! (1_0) ux >>= 28; + add %o1,MASK_0x100000,%g1 ! (1_1) ax += 0x00100000; + fpsub32 %f0,%f24,%f40 ! (4_1) y0 = vis_fpsub32(dtmp0, y0); + + fmuld %f38,%f44,%f38 ! (3_1) y0 *= dtmp0; + and %l6,MASK_0x7fffffff,%o1 ! (0_0) ax = ux & 0x7fffffff; + lda [%i3]0x82,%f0 ! (0_0) ftmp0 = *px; + fpadd32 %f8,DC1,%f24 ! (7_1) y = vis_fpadd32(x,dconst1); + + fmuld %f42,%f22,%f44 ! (0_1) dtmp0 *= xx; + cmp %o1,%o4 ! (0_0) ax ? 0x39b89c55 + bl,pn %icc,.update14 ! (0_0) if ( ax < 0x39b89c55 ) + faddd DONE,%f28,%f28 ! (6_1) div += done; +.cont14: + fmuld %f32,%f40,%f42 ! (4_1) dtmp0 = div0 * y0; + cmp %o1,%o5 ! (0_0) ax ? 0x4c700518 + bg,pn %icc,.update15 ! (0_0) if ( ax > 0x4c700518 ) + faddd %f4,K1,%f4 ! (1_1) dtmp0 += K1; +.cont15: + fmuld %f18,%f26,%f18 ! (2_1) xx *= y0; + srl %g1,18,%o1 ! (1_1) ax >>= 18; + std %f28,[%fp+tmp0] ! (6_1) i = ((unsigned long long*)&div)[0]; + fstod %f0,%f22 ! (0_0) ftmp0 = *px; + + fmuld %f34,%f38,%f34 ! (3_1) dtmp1 = div0 * y0; + and %o1,-8,%o1 ! (1_1) ax &= -8; + ldx [%fp+tmp1],%g1 ! (5_1) i = ((unsigned long long*)&div)[0]; + fand %f24,DC2,%f26 ! (7_1) y = vis_fand(y,dconst2); + + ldd [%o1+%l7],%f0 ! (1_1) res = *(double*)((char*)parr1 + ax); + and %o7,-8,%o7 ! (1_1) ux &= -8; + mov %l6,%i3 + faddd %f48,%f44,%f12 ! (0_1) res += dtmp0; + + fmuld %f4,%f50,%f4 ! (1_1) dtmp0 *= x2; + nop + ldd [%l0+%o7],%f48 ! (1_1) dtmp0 = *(double*)((char*)sign_arr + ux); + fand %f30,DC3,%f24 ! (5_1) y0 = vis_fand(div,dconst3); + + fmuld %f18,%f18,%f50 ! (2_1) x2 = xx * xx; + srlx %g1,43,%g1 ! (5_1) i >>= 43; + mov %l4,%o7 + fsubd DTWO,%f42,%f44 ! (4_1) dtmp0 = dtwo - dtmp0; + + and %g1,508,%l6 ! (5_1) i &= 508; + nop + bn,pn %icc,.exit + fsubd DTWO,%f34,%f46 ! (3_1) dtmp1 = dtwo - dtmp1; + + fmuld %f0,%f48,%f48 ! (1_1) res *= dtmp0; + add %o0,stridey,%g1 ! py += stridey; + ld [%i4+%l6],%f0 ! (5_1) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); + fdtos %f12,%f12 ! (0_1) ftmp0 = (float)res; + + fmuld %f8,%f26,%f34 ! (7_1) div = x * y; + srl %o7,28,%o1 ! (2_1) ux >>= 28; + lda [%l5]0x82,%l6 ! (1_0) ux = ((int*)px)[0]; + faddd %f4,K0,%f42 ! (1_1) dtmp0 += K0; + + fmuld K2,%f50,%f4 ! (2_1) dtmp0 = K2 * x2; + and %o7,MASK_0x7fffffff,%o7 ! (2_1) ax = ux & 0x7fffffff; + st %f12,[%o0] ! (0_1) py[0] = ftmp0; + fsubd %f8,%f26,%f8 ! (7_1) xx = x - y; + + fmuld %f38,%f46,%f26 ! (3_1) y0 *= dtmp1; + add %l5,stridex,%l4 ! px += stridex; + add %o7,MASK_0x100000,%o0 ! (2_1) ax += 0x00100000; + fpsub32 %f0,%f24,%f38 ! (5_1) y0 = vis_fpsub32(dtmp0, y0); + + fmuld %f40,%f44,%f40 ! (4_1) y0 *= dtmp0; + and %l6,MASK_0x7fffffff,%o7 ! (1_0) ax = ux & 0x7fffffff; + lda [%l5]0x82,%f0 ! (1_0) ftmp0 = *px; + fpadd32 %f22,DC1,%f24 ! (0_0) y = vis_fpadd32(x,dconst1); + + fmuld %f42,%f20,%f44 ! (1_1) dtmp0 *= xx; + cmp %o7,%o4 ! (1_0) ax ? 0x39b89c55 + bl,pn %icc,.update16 ! (1_0) if ( ax < 0x39b89c55 ) + faddd DONE,%f34,%f34 ! (7_1) div += done; +.cont16: + fmuld %f30,%f38,%f42 ! (5_1) dtmp0 = div0 * y0; + cmp %o7,%o5 ! (1_0) ax ? 0x4c700518 + bg,pn %icc,.update17 ! (1_0) if ( ax > 0x4c700518 ) + faddd %f4,K1,%f4 ! (2_1) dtmp0 += K1; +.cont17: + fmuld %f16,%f26,%f16 ! (3_1) xx *= y0; + srl %o0,18,%o7 ! (2_1) ax >>= 18; + std %f34,[%fp+tmp1] ! (7_1) i = ((unsigned long long*)&div)[0]; + fstod %f0,%f20 ! (1_0) x = (double)ftmp0; + + fmuld %f32,%f40,%f32 ! (4_1) dtmp1 = div0 * y0; + ldx [%fp+tmp0],%o0 ! (6_1) i = ((unsigned long long*)&div)[0]; + and %o1,-8,%o1 ! (2_1) ux &= -8; + fand %f24,DC2,%f26 ! (0_0) y = vis_fand(y,dconst2); + + faddd %f48,%f44,%f12 ! (1_1) res += dtmp0; + and %o7,-8,%o7 ! (2_1) ax &= -8; + ldd [%l0+%o1],%f48 ! (2_1) dtmp0 = *(double*)((char*)sign_arr + ux); + bn,pn %icc,.exit + + ldd [%o7+%l7],%f0 ! (2_1) res = *(double*)((char*)parr1 + ax); + mov %l6,%l5 + fmuld %f4,%f50,%f4 ! (2_1) dtmp0 *= x2; + fand %f28,DC3,%f24 ! (6_1) y0 = vis_fand(div,dconst3); + + fmuld %f16,%f16,%f50 ! (3_1) x2 = xx * xx; + srlx %o0,43,%o0 ! (6_1) i >>= 43; + mov %l3,%o7 + fsubd DTWO,%f42,%f44 ! (5_1) dtmp0 = dtwo - dtmp0; + + and %o0,508,%l6 ! (6_1) i &= 508; + add %l4,stridex,%l3 ! px += stridex; + bn,pn %icc,.exit + fsubd DTWO,%f32,%f46 ! (4_1) dtmp1 = dtwo - dtmp1; + + fmuld %f0,%f48,%f48 ! (2_1) res *= dtmp0; + add %g1,stridey,%o0 ! py += stridey; + ld [%i4+%l6],%f0 ! (6_1) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); + fdtos %f12,%f12 ! (1_1) ftmp0 = (float)res; + + fmuld %f22,%f26,%f32 ! (0_0) div = x * y; + srl %o7,28,%o1 ! (3_1) ux >>= 28; + lda [%l4]0x82,%l6 ! (2_0) ux = ((int*)px)[0]; + faddd %f4,K0,%f42 ! (2_1) dtmp0 += K0; + + fmuld K2,%f50,%f4 ! (3_1) dtmp0 = K2 * x2; + and %o7,MASK_0x7fffffff,%o7 ! (3_1) ax = ux & 0x7fffffff; + st %f12,[%g1] ! (1_1) py[0] = ftmp0; + fsubd %f22,%f26,%f22 ! (0_0) xx = x - y; + + fmuld %f40,%f46,%f26 ! (4_1) y0 *= dtmp1; + add %o7,MASK_0x100000,%g1 ! (3_1) ax += 0x00100000; + and %o1,-8,%o1 ! (3_1) ux &= -8; + fpsub32 %f0,%f24,%f40 ! (6_1) y0 = vis_fpsub32(dtmp0, y0); + + fmuld %f38,%f44,%f38 ! (5_1) y0 *= dtmp0; + and %l6,MASK_0x7fffffff,%o7 ! (2_0) ax = ux & 0x7fffffff; + lda [%l4]0x82,%f0 ! (2_0) ftmp0 = *px; + fpadd32 %f20,DC1,%f24 ! (1_0) y = vis_fpadd32(x,dconst1); + + fmuld %f42,%f18,%f44 ! (2_1) dtmp0 *= xx; + cmp %o7,%o4 ! (2_0) ax ? 0x39b89c55 + bl,pn %icc,.update18 ! (2_0) if ( ax < 0x39b89c55 ) + faddd DONE,%f32,%f32 ! (0_0) div += done; +.cont18: + fmuld %f28,%f40,%f42 ! (6_1) dtmp0 = div0 * y0; + cmp %o7,%o5 ! (2_0) ax ? 0x4c700518 + bg,pn %icc,.update19 ! (2_0) if ( ax > 0x4c700518 ) + faddd %f4,K1,%f4 ! (3_1) dtmp0 += K1; +.cont19: + fmuld %f14,%f26,%f14 ! (4_1) xx *= y0; + srl %g1,18,%o7 ! (3_1) ax >>= 18; + std %f32,[%fp+tmp0] ! (0_0) i = ((unsigned long long*)&div)[0]; + fstod %f0,%f18 ! (2_0) x = (double)ftmp0; + + fmuld %f30,%f38,%f30 ! (5_1) dtmp1 = div0 * y0; + and %o7,-8,%o7 ! (3_1) ax &= -8; + ldx [%fp+tmp1],%g1 ! (7_1) i = ((unsigned long long*)&div)[0]; + fand %f24,DC2,%f26 ! (1_0) y = vis_fand(y,dconst2); + + faddd %f48,%f44,%f12 ! (2_1) res += dtmp0; + mov %l6,%l4 + ldd [%l0+%o1],%f48 ! (3_1) dtmp0 = *(double*)((char*)sign_arr + ux); + bn,pn %icc,.exit + + fmuld %f4,%f50,%f4 ! (3_1) dtmp0 *= x2; + ldd [%o7+%l7],%f0 ! (3_1) res = *(double*)((char*)parr1 + ax) + nop + fand %f34,DC3,%f24 ! (7_1) y0 = vis_fand(div,dconst3); + + fmuld %f14,%f14,%f50 ! (4_1) x2 = xx * xx; + srlx %g1,43,%g1 ! (7_1) i >>= 43; + mov %i0,%o7 + fsubd DTWO,%f42,%f44 ! (6_1) dtmp0 = dtwo - dtmp0; + + and %g1,508,%l6 ! (7_1) i &= 508; + add %l3,stridex,%i0 ! px += stridex; + bn,pn %icc,.exit + fsubd DTWO,%f30,%f46 ! (5_1) dtmp1 = dtwo - dtmp1; + + fmuld %f0,%f48,%f48 ! (3_1) res *= dtmp0; + add %o0,stridey,%g1 ! py += stridey; + ld [%i4+%l6],%f0 ! (7_1) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); + fdtos %f12,%f12 ! (2_1) ftmp0 = (float)res; + + fmuld %f20,%f26,%f30 ! (1_0) div = x * y; + srl %o7,28,%o1 ! (4_1) ux >>= 28; + lda [%l3]0x82,%l6 ! (3_0) ux = ((int*)px)[0]; + faddd %f4,K0,%f42 ! (3_1) dtmp0 += K0; + + fmuld K2,%f50,%f4 ! (4_1) dtmp0 = K2 * x2; + and %o7,MASK_0x7fffffff,%o7 ! (4_1) ax = ux & 0x7fffffff; + st %f12,[%o0] ! (2_1) py[0] = ftmp0; + fsubd %f20,%f26,%f20 ! (1_0) xx = x - y; + + fmuld %f38,%f46,%f26 ! (5_1) y0 *= dtmp1; + add %o7,MASK_0x100000,%o0 ! (4_1) ax += 0x00100000; + and %o1,-8,%o1 ! (4_1) ux &= -8; + fpsub32 %f0,%f24,%f38 ! (7_1) y0 = vis_fpsub32(dtmp0, y0); + + fmuld %f40,%f44,%f40 ! (6_1) y0 *= dtmp0; + and %l6,MASK_0x7fffffff,%o7 ! (3_0) ax = ux & 0x7fffffff; + lda [%l3]0x82,%f0 ! (3_0) ftmp0 = *px; + fpadd32 %f18,DC1,%f24 ! (2_0) y = vis_fpadd32(x,dconst1); + + fmuld %f42,%f16,%f44 ! (3_1) dtmp0 *= xx; + cmp %o7,%o4 ! (3_0) ax ? 0x39b89c55 + bl,pn %icc,.update20 ! (3_0) if ( ax < 0x39b89c55 ) + faddd DONE,%f30,%f30 ! (1_0) div += done; +.cont20: + fmuld %f34,%f38,%f42 ! (7_1) dtmp0 = div0 * y0; + cmp %o7,%o5 ! (3_0) ax ? 0x4c700518 + bg,pn %icc,.update21 ! (3_0) if ( ax > 0x4c700518 ) + faddd %f4,K1,%f4 ! (4_1) dtmp0 += K1; +.cont21: + fmuld %f36,%f26,%f36 ! (5_1) xx *= y0; + srl %o0,18,%o7 ! (4_1) ax >>= 18; + std %f30,[%fp+tmp1] ! (1_0) i = ((unsigned long long*)&div)[0]; + fstod %f0,%f16 ! (3_0) x = (double)ftmp0; + + fmuld %f28,%f40,%f28 ! (6_1) dtmp1 = div0 * y0; + and %o7,-8,%o7 ! (4_1) ax &= -8; + ldx [%fp+tmp0],%o0 ! (0_0) i = ((unsigned long long*)&div)[0]; + fand %f24,DC2,%f26 ! (2_0) y = vis_fand(y,dconst2); + + faddd %f48,%f44,%f12 ! (3_1) res += dtmp0; + nop + ldd [%l0+%o1],%f48 ! (4_1) dtmp0 = *(double*)((char*)sign_arr + ux); + bn,pn %icc,.exit + + ldd [%o7+%l7],%f0 ! (4_1) res = *(double*)((char*)parr1 + ax); + mov %l6,%l3 + fmuld %f4,%f50,%f4 ! (4_1) dtmp0 *= x2; + fand %f32,DC3,%f24 ! (0_0) y0 = vis_fand(div,dconst3); + + fmuld %f36,%f36,%f50 ! (5_1) x2 = xx * xx; + srlx %o0,43,%o0 ! (0_0) i >>= 43; + mov %i2,%o7 + fsubd DTWO,%f42,%f44 ! (7_1) dtmp0 = dtwo - dtmp0; + + and %o0,508,%l6 ! (0_0) i &= 508; + add %i0,stridex,%i2 ! px += stridex; + bn,pn %icc,.exit + fsubd DTWO,%f28,%f46 ! (6_1) dtmp1 = dtwo - dtmp1; + + fmuld %f0,%f48,%f48 ! (4_1) res *= dtmp0; + add %g1,stridey,%o0 ! py += stridey; + ld [%i4+%l6],%f0 ! (0_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); + fdtos %f12,%f12 ! (3_1) ftmp0 = (float)res; + + fmuld %f18,%f26,%f28 ! (2_0) div = x * y; + srl %o7,28,%o1 ! (5_1) ux >>= 28; + lda [%i0]0x82,%l6 ! (4_0) ux = ((int*)px)[0]; + faddd %f4,K0,%f42 ! (4_1) dtmp0 += K0; + + fmuld K2,%f50,%f4 ! (5_1) dtmp0 = K2 * x2; + and %o7,MASK_0x7fffffff,%o7 ! (5_1) ax = ux & 0x7fffffff; + st %f12,[%g1] ! (3_1) py[0] = ftmp0; + fsubd %f18,%f26,%f18 ! (2_0) xx = x - y; + + fmuld %f40,%f46,%f26 ! (6_1) y0 *= dtmp1; + add %o7,MASK_0x100000,%g1 ! (5_1) ax += 0x00100000; + and %o1,-8,%o1 ! (5_1) ux &= -8; + fpsub32 %f0,%f24,%f40 ! (0_0) y0 = vis_fpsub32(dtmp0, y0); + + fmuld %f38,%f44,%f38 ! (7_1) y0 *= dtmp0; + and %l6,MASK_0x7fffffff,%o7 ! (4_0) ax = ux & 0x7fffffff; + lda [%i0]0x82,%f0 ! (4_0) ftmp0 = *px; + fpadd32 %f16,DC1,%f24 ! (3_0) y = vis_fpadd32(x,dconst1); + + fmuld %f42,%f14,%f44 ! (4_1) dtmp0 *= xx; + cmp %o7,%o4 ! (4_0) ax ? 0x39b89c55 + bl,pn %icc,.update22 ! (4_0) if ( ax < 0x39b89c55 ) + faddd DONE,%f28,%f28 ! (2_0) div += done; +.cont22: + fmuld %f32,%f40,%f42 ! (0_0) dtmp0 = div0 * y0; + cmp %o7,%o5 ! (4_0) ax ? 0x4c700518 + bg,pn %icc,.update23 ! (4_0) if ( ax > 0x4c700518 ) + faddd %f4,K1,%f4 ! (5_1) dtmp0 += K1; +.cont23: + fmuld %f10,%f26,%f10 ! (6_1) xx *= y0; + srl %g1,18,%o7 ! (5_1) ax >>= 18; + std %f28,[%fp+tmp0] ! (2_0) i = ((unsigned long long*)&div)[0]; + fstod %f0,%f14 ! (4_0) x = (double)ftmp0; + + fmuld %f34,%f38,%f34 ! (7_1) dtmp1 = div0 * y0; + and %o7,-8,%o7 ! (5_1) ax &= -8; + ldx [%fp+tmp1],%g1 ! (1_0) i = ((unsigned long long*)&div)[0]; + fand %f24,DC2,%f26 ! (3_0) y = vis_fand(y,dconst2); + + faddd %f48,%f44,%f12 ! (4_1) res += dtmp0; + mov %l6,%i0 + ldd [%l0+%o1],%f48 ! (5_1) dtmp0 = *(double*)((char*)sign_arr + ux); + bn,pn %icc,.exit + + ldd [%o7+%l7],%f0 ! (5_1) res = *(double*)((char*)parr1 + ax); + nop + fmuld %f4,%f50,%f4 ! (5_1) dtmp0 *= x2; + fand %f30,DC3,%f24 ! (1_0) y0 = vis_fand(div,dconst3); + + fmuld %f10,%f10,%f50 ! (6_1) x2 = xx * xx; + srlx %g1,43,%g1 ! (1_0) i >>= 43; + mov %l2,%o7 + fsubd DTWO,%f42,%f44 ! (0_0) dtmp0 = dtwo - dtmp0; + + and %g1,508,%l6 ! (1_0) i &= 508; + add %i2,stridex,%l2 ! px += stridex; + bn,pn %icc,.exit + fsubd DTWO,%f34,%f46 ! (7_1) dtmp1 = dtwo - dtmp1; + + fmuld %f0,%f48,%f48 ! (5_1) res *= dtmp0; + add %o0,stridey,%g1 ! py += stridey; + ld [%i4+%l6],%f0 ! (1_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); + fdtos %f12,%f12 ! (4_1) ftmp0 = (float)res; + + fmuld %f16,%f26,%f34 ! (3_0) div = x * y; + srl %o7,28,%o1 ! (6_1) ux >>= 28; + lda [%i2]0x82,%l6 ! (5_0) ux = ((int*)px)[0]; + faddd %f4,K0,%f42 ! (5_1) dtmp0 += K0; + + fmuld K2,%f50,%f4 ! (6_1) dtmp0 = K2 * x2; + and %o7,MASK_0x7fffffff,%o7 ! (6_1) ax = ux & 0x7fffffff; + st %f12,[%o0] ! (4_1) py[0] = ftmp0; + fsubd %f16,%f26,%f16 ! (3_0) xx = x - y; + + fmuld %f38,%f46,%f26 ! (7_1) y0 *= dtmp1; + add %o7,MASK_0x100000,%o0 ! (6_1) ax += 0x00100000; + and %o1,-8,%o1 ! (6_1) ux &= -8; + fpsub32 %f0,%f24,%f38 ! (1_0) y0 = vis_fpsub32(dtmp0, y0); + + fmuld %f40,%f44,%f40 ! (0_0) y0 *= dtmp0; + and %l6,MASK_0x7fffffff,%o7 ! (5_0) ax = ux & 0x7fffffff; + lda [%i2]0x82,%f0 ! (5_0) ftmp0 = *px; + fpadd32 %f14,DC1,%f24 ! (4_0) y = vis_fpadd32(x,dconst1); + + fmuld %f42,%f36,%f44 ! (5_1) dtmp0 *= xx; + cmp %o7,%o4 ! (5_0) ax ? 0x39b89c55 + bl,pn %icc,.update24 ! (5_0) if ( ax < 0x39b89c55 ) + faddd DONE,%f34,%f34 ! (3_0) div += done; +.cont24: + fmuld %f30,%f38,%f42 ! (1_0) dtmp0 = div0 * y0; + cmp %o7,%o5 ! (5_0) ax ? 0x4c700518 + bg,pn %icc,.update25 ! (5_0) if ( ax > 0x4c700518 ) + faddd %f4,K1,%f4 ! (6_1) dtmp0 += K1; +.cont25: + fmuld %f8,%f26,%f8 ! (7_1) xx *= y0; + srl %o0,18,%o7 ! (6_1) ax >>= 18; + std %f34,[%fp+tmp1] ! (3_0) i = ((unsigned long long*)&div)[0]; + fstod %f0,%f36 ! (5_0) x = (double)ftmp0; + + fmuld %f32,%f40,%f32 ! (0_0) dtmp1 = div0 * y0; + and %o7,-8,%o7 ! (6_1) ax &= -8; + ldx [%fp+tmp0],%o0 ! (2_0) i = ((unsigned long long*)&div)[0]; + fand %f24,DC2,%f26 ! (4_0) y = vis_fand(y,dconst2); + + faddd %f48,%f44,%f12 ! (5_1) res += dtmp0; + mov %l6,%i2 + ldd [%l0+%o1],%f48 ! (6_1) dtmp0 = *(double*)((char*)sign_arr + ux); + bn,pn %icc,.exit + + ldd [%o7+%l7],%f0 ! (6_1) res = *(double*)((char*)parr1 + ax); + nop + fmuld %f4,%f50,%f4 ! (6_1) dtmp0 *= x2; + fand %f28,DC3,%f24 ! (2_0) y0 = vis_fand(div,dconst3); + + fmuld %f8,%f8,%f50 ! (7_1) x2 = xx * xx; + srlx %o0,43,%o0 ! (2_0) i >>= 43; + mov %g5,%o7 + fsubd DTWO,%f42,%f44 ! (1_0) dtmp0 = dtwo - dtmp0; + + and %o0,508,%l6 ! (2_0) i &= 508; + add %l2,stridex,%g5 ! px += stridex; + bn,pn %icc,.exit + fsubd DTWO,%f32,%f46 ! (0_0) dtmp1 = dtwo - dtmp1; + + fmuld %f0,%f48,%f48 ! (6_1) res *= dtmp0; + add %g1,stridey,%o0 ! py += stridey; + ld [%i4+%l6],%f0 ! (2_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); + fdtos %f12,%f12 ! (5_1) ftmp0 = (float)res; + + fmuld %f14,%f26,%f32 ! (4_0) div = x * y; + srl %o7,28,%o1 ! (7_1) ux >>= 28; + lda [%l2]0x82,%l6 ! (6_0) ux = ((int*)px)[0]; + faddd %f4,K0,%f42 ! (6_1) dtmp0 += K0; + + fmuld K2,%f50,%f4 ! (7_1) dtmp0 = K2 * x2; + and %o7,MASK_0x7fffffff,%o7 ! (7_1) ax = ux & 0x7fffffff; + st %f12,[%g1] ! (5_1) py[0] = ftmp0; + fsubd %f14,%f26,%f14 ! (4_0) xx = x - y; + + fmuld %f40,%f46,%f26 ! (0_0) y0 *= dtmp1; + add %o7,MASK_0x100000,%g1 ! (7_1) ax += 0x00100000; + and %o1,-8,%o1 ! (7_1) ux &= -8; + fpsub32 %f0,%f24,%f40 ! (2_0) y0 = vis_fpsub32(dtmp0, y0); + + fmuld %f38,%f44,%f38 ! (1_0) y0 *= dtmp0; + and %l6,MASK_0x7fffffff,%o7 ! (6_0) ax = ux & 0x7fffffff; + lda [%l2]0x82,%f0 ! (6_0) ftmp0 = *px; + fpadd32 %f36,DC1,%f24 ! (5_0) y = vis_fpadd32(x,dconst1); + + fmuld %f42,%f10,%f44 ! (6_1) dtmp0 *= xx; + cmp %o7,%o4 ! (6_0) ax ? 0x39b89c55 + bl,pn %icc,.update26 ! (6_0) if ( ax < 0x39b89c55 ) + faddd DONE,%f32,%f32 ! (4_0) div += done; +.cont26: + fmuld %f28,%f40,%f42 ! (2_0) dtmp0 = div0 * y0; + cmp %o7,%o5 ! (6_0) ax ? 0x4c700518 + bg,pn %icc,.update27 ! (6_0) if ( ax > 0x4c700518 ) + faddd %f4,K1,%f4 ! (7_1) dtmp0 += K1; +.cont27: + fmuld %f22,%f26,%f22 ! (0_0) xx *= y0; + srl %g1,18,%o7 ! (7_1) ax >>= 18; + std %f32,[%fp+tmp0] ! (4_0) i = ((unsigned long long*)&div)[0]; + fstod %f0,%f10 ! (6_0) x = (double)ftmp0; + + fmuld %f30,%f38,%f30 ! (1_0) dtmp1 = div0 * y0; + and %o7,-8,%o7 ! (7_1) ax &= -8; + ldx [%fp+tmp1],%g1 ! (3_0) i = ((unsigned long long*)&div)[0]; + fand %f24,DC2,%f26 ! (5_0) y = vis_fand(y,dconst2); + + faddd %f48,%f44,%f12 ! (6_1) res += dtmp0; + mov %l6,%l2 + ldd [%l0+%o1],%f48 ! (7_1) dtmp0 = *(double*)((char*)sign_arr + ux); + bn,pn %icc,.exit + + ldd [%o7+%l7],%f0 ! (7_1) res = *(double*)((char*)parr1 + ax); + nop + fmuld %f4,%f50,%f4 ! (7_1) dtmp0 *= x2; + fand %f34,DC3,%f24 ! (3_0) y0 = vis_fand(div,dconst3); + + fmuld %f22,%f22,%f50 ! (0_0) x2 = xx * xx; + srlx %g1,43,%g1 ! (3_0) i >>= 43; + mov %i3,%o7 + fsubd DTWO,%f42,%f44 ! (2_0) dtmp0 = dtwo - dtmp0; + + and %g1,508,%l6 ! (3_0) i &= 508; + add %g5,stridex,%i3 ! px += stridex; + bn,pn %icc,.exit + fsubd DTWO,%f30,%f46 ! (1_0) dtmp1 = dtwo - dtmp1; + + fmuld %f0,%f48,%f48 ! (7_1) res *= dtmp0; + add %o0,stridey,%g1 ! py += stridey; + ld [%i4+%l6],%f0 ! (3_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); + fdtos %f12,%f12 ! (6_1) ftmp0 = (float)res; + + fmuld %f36,%f26,%f30 ! (5_0) div = x * y; + srl %o7,28,%o1 ! (0_0) ux >>= 28; + lda [%g5]0x82,%l6 ! (7_0) ux = ((int*)px)[0]; + faddd %f4,K0,%f42 ! (7_1) dtmp0 += K0; + + fmuld K2,%f50,%f4 ! (0_0) dtmp0 = K2 * x2; + and %o7,MASK_0x7fffffff,%o7 ! (0_0) ax = ux & 0x7fffffff; + st %f12,[%o0] ! (6_1) py[0] = ftmp0; + fsubd %f36,%f26,%f36 ! (5_0) xx = x - y; + + fmuld %f38,%f46,%f26 ! (1_0) y0 *= dtmp1; + add %o7,MASK_0x100000,%o0 ! (0_0) ax += 0x00100000; + and %o1,-8,%o1 ! (0_0) ux &= -8; + fpsub32 %f0,%f24,%f38 ! (3_0) y0 = vis_fpsub32(dtmp0, y0); + + fmuld %f40,%f44,%f40 ! (2_0) y0 *= dtmp0; + and %l6,MASK_0x7fffffff,%o7 ! (7_0) ax = ux & 0x7fffffff; + lda [%g5]0x82,%f0 ! (7_0) ftmp0 = *px; + fpadd32 %f10,DC1,%f24 ! (6_0) y = vis_fpadd32(x,dconst1); + + fmuld %f42,%f8,%f44 ! (7_1) dtmp0 *= xx; + cmp %o7,%o4 ! (7_0) ax ? 0x39b89c55 + bl,pn %icc,.update28 ! (7_0) if ( ax < 0x39b89c55 ) + faddd DONE,%f30,%f30 ! (5_0) div += done; +.cont28: + fmuld %f34,%f38,%f42 ! (3_0) dtmp0 = div0 * y0; + cmp %o7,%o5 ! (7_0) ax ? 0x4c700518 + bg,pn %icc,.update29 ! (7_0) if ( ax > 0x4c700518 ) + faddd %f4,K1,%f4 ! (0_0) dtmp0 += K1; +.cont29: + fmuld %f20,%f26,%f20 ! (1_0) xx *= y0; + srl %o0,18,%o7 ! (0_0) ax >>= 18; + std %f30,[%fp+tmp1] ! (5_0) i = ((unsigned long long*)&div)[0]; + fstod %f0,%f8 ! (7_0) x = (double)ftmp0; + + fmuld %f28,%f40,%f28 ! (2_0) dtmp1 = div0 * y0; + and %o7,-8,%o7 ! (0_0) ux &= -8; + ldx [%fp+tmp0],%o0 ! (4_0) i = ((unsigned long long*)&div)[0]; + fand %f24,DC2,%f26 ! (6_0) y = vis_fand(y,dconst2); + + faddd %f48,%f44,%f12 ! (7_1) res += dtmp0; + subcc counter,8,counter + ldd [%l0+%o1],%f48 ! (0_0) dtmp0 = *(double*)((char*)sign_arr + ux); + bn,pn %icc,.exit + + fmuld %f4,%f50,%f4 ! (0_0) dtmp0 *= x2; + mov %l6,%g5 + ldd [%o7+%l7],%f0 ! (0_0) res = *(double*)((char*)parr1 + ax); + fand %f32,DC3,%f24 ! (4_0) y0 = vis_fand(div,dconst3); + + fmuld %f20,%f20,%f50 ! (1_0) x2 = xx * xx; + srlx %o0,43,%l6 ! (4_0) i >>= 43; + mov %l5,%o7 + fsubd DTWO,%f42,%f44 ! (3_0) dtmp0 = dtwo - dtmp0; + + add %g1,stridey,%o0 ! py += stridey; + and %l6,508,%l6 ! (4_0) i &= 508; + bn,pn %icc,.exit + fsubd DTWO,%f28,%f46 ! (2_0) dtmp1 = dtwo - dtmp1; + + fmuld %f0,%f48,%f48 ! (0_0) res *= dtmp0; + ld [%i4+%l6],%f0 ! (4_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); + add %i3,stridex,%l5 ! px += stridex; + fdtos %f12,%f12 ! (7_1) ftmp0 = (float)res; + + lda [%i3]0x82,%l6 ! (0_0) ux = ((int*)px)[0]; + fmuld %f10,%f26,%f28 ! (6_0) div = x * y; + bpos,pt %icc,.main_loop + faddd %f4,K0,%f42 ! (0_0) dtmp0 += K0; + + srl %o7,28,%l5 ! (1_0) ux >>= 28; + st %f12,[%g1] ! (7_1) py[0] = ftmp0; + +.tail: + addcc counter,7,counter + bneg,pn %icc,.begin + or %g0,%o0,%o1 + + fsubd %f10,%f26,%f10 ! (6_1) xx = x - y; + and %o7,MASK_0x7fffffff,%g1 ! (1_1) ax = ux & 0x7fffffff; + fmuld K2,%f50,%f4 ! (1_1) dtmp0 = K2 * x2; + + fmuld %f40,%f46,%f26 ! (2_1) y0 *= dtmp1; + add %g1,MASK_0x100000,%g1 ! (1_1) ax += 0x00100000; + and %l5,-8,%l5 ! (1_1) ux &= -8; + fpsub32 %f0,%f24,%f40 ! (4_1) y0 = vis_fpsub32(dtmp0, y0); + + fmuld %f38,%f44,%f38 ! (3_1) y0 *= dtmp0; + + fmuld %f42,%f22,%f44 ! (0_1) dtmp0 *= xx; + faddd DONE,%f28,%f28 ! (6_1) div += done; + + fmuld %f32,%f40,%f42 ! (4_1) dtmp0 = div0 * y0; + faddd %f4,K1,%f4 ! (1_1) dtmp0 += K1; + + fmuld %f18,%f26,%f18 ! (2_1) xx *= y0; + srl %g1,18,%o7 ! (1_1) ax >>= 18; + std %f28,[%fp+tmp0] ! (6_1) i = ((unsigned long long*)&div)[0]; + + fmuld %f34,%f38,%f34 ! (3_1) dtmp1 = div0 * y0; + and %o7,-8,%o7 ! (1_1) ax &= -8; + ldx [%fp+tmp1],%g1 ! (5_1) i = ((unsigned long long*)&div)[0]; + + faddd %f48,%f44,%f12 ! (0_1) res += dtmp0; + add %o7,%l7,%o7 ! (1_1) (char*)parr1 + ax; + ldd [%l0+%l5],%f48 ! (1_1) dtmp0 = *(double*)((char*)sign_arr + ux); + + fmuld %f4,%f50,%f4 ! (1_1) dtmp0 *= x2; + fand %f30,DC3,%f24 ! (5_1) y0 = vis_fand(div,dconst3); + ldd [%o7],%f0 ! (1_1) res = *(double*)((char*)parr1 + ax); + + fmuld %f18,%f18,%f50 ! (2_1) x2 = xx * xx; + fsubd DTWO,%f42,%f44 ! (4_1) dtmp0 = dtwo - dtmp0; + srlx %g1,43,%g1 ! (5_1) i >>= 43; + + and %g1,508,%l6 ! (5_1) i &= 508; + mov %l4,%o7 + fsubd DTWO,%f34,%f46 ! (3_1) dtmp1 = dtwo - dtmp1; + + fmuld %f0,%f48,%f48 ! (1_1) res *= dtmp0; + add %o0,stridey,%g1 ! py += stridey; + ld [%i4+%l6],%f0 ! (5_1) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); + fdtos %f12,%f12 ! (0_1) ftmp0 = (float)res; + + srl %o7,28,%l4 ! (2_1) ux >>= 28; + st %f12,[%o0] ! (0_1) py[0] = ftmp0; + faddd %f4,K0,%f42 ! (1_1) dtmp0 += K0; + + subcc counter,1,counter + bneg,pn %icc,.begin + or %g0,%g1,%o1 + + fmuld K2,%f50,%f4 ! (2_1) dtmp0 = K2 * x2; + and %o7,MASK_0x7fffffff,%o0 ! (2_1) ax = ux & 0x7fffffff; + + fmuld %f38,%f46,%f26 ! (3_1) y0 *= dtmp1; + add %o0,MASK_0x100000,%o0 ! (2_1) ax += 0x00100000; + and %l4,-8,%l4 ! (2_1) ux &= -8; + fpsub32 %f0,%f24,%f38 ! (5_1) y0 = vis_fpsub32(dtmp0, y0); + + fmuld %f40,%f44,%f40 ! (4_1) y0 *= dtmp0; + + fmuld %f42,%f20,%f44 ! (1_1) dtmp0 *= xx; + + fmuld %f30,%f38,%f42 ! (5_1) dtmp0 = div0 * y0; + faddd %f4,K1,%f4 ! (2_1) dtmp0 += K1; + + fmuld %f16,%f26,%f16 ! (3_1) xx *= y0; + srl %o0,18,%o7 ! (2_1) ax >>= 18; + + fmuld %f32,%f40,%f32 ! (4_1) dtmp1 = div0 * y0; + and %o7,-8,%o7 ! (2_1) ax &= -8; + ldx [%fp+tmp0],%o0 ! (6_1) i = ((unsigned long long*)&div)[0]; + + faddd %f48,%f44,%f12 ! (1_1) res += dtmp0; + add %o7,%l7,%o7 ! (2_1) (char*)parr1 + ax; + ldd [%l0+%l4],%f48 ! (2_1) dtmp0 = *(double*)((char*)sign_arr + ux); + + fmuld %f4,%f50,%f4 ! (2_1) dtmp0 *= x2; + fand %f28,DC3,%f24 ! (6_1) y0 = vis_fand(div,dconst3); + ldd [%o7],%f0 ! (2_1) res = *(double*)((char*)parr1 + ax); + + fmuld %f16,%f16,%f50 ! (3_1) x2 = xx * xx; + fsubd DTWO,%f42,%f44 ! (5_1) dtmp0 = dtwo - dtmp0; + srlx %o0,43,%o0 ! (6_1) i >>= 43; + + and %o0,508,%l6 ! (6_1) i &= 508; + mov %l3,%o7 + fsubd DTWO,%f32,%f46 ! (4_1) dtmp1 = dtwo - dtmp1; + + fmuld %f0,%f48,%f48 ! (2_1) res *= dtmp0; + add %g1,stridey,%o0 ! py += stridey; + ld [%i4+%l6],%f0 ! (6_1) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); + fdtos %f12,%f12 ! (1_1) ftmp0 = (float)res; + + srl %o7,28,%l3 ! (3_1) ux >>= 28; + st %f12,[%g1] ! (1_1) py[0] = ftmp0; + faddd %f4,K0,%f42 ! (2_1) dtmp0 += K0; + + subcc counter,1,counter + bneg,pn %icc,.begin + or %g0,%o0,%o1 + + fmuld K2,%f50,%f4 ! (3_1) dtmp0 = K2 * x2; + and %o7,MASK_0x7fffffff,%g1 ! (3_1) ax = ux & 0x7fffffff; + + fmuld %f40,%f46,%f26 ! (4_1) y0 *= dtmp1; + add %g1,MASK_0x100000,%g1 ! (3_1) ax += 0x00100000; + and %l3,-8,%l3 ! (3_1) ux &= -8; + fpsub32 %f0,%f24,%f40 ! (6_1) y0 = vis_fpsub32(dtmp0, y0); + + fmuld %f38,%f44,%f38 ! (5_1) y0 *= dtmp0; + + fmuld %f42,%f18,%f44 ! (2_1) dtmp0 *= xx; + + fmuld %f28,%f40,%f42 ! (6_1) dtmp0 = div0 * y0; + faddd %f4,K1,%f4 ! (3_1) dtmp0 += K1; + + fmuld %f14,%f26,%f14 ! (4_1) xx *= y0; + srl %g1,18,%o7 ! (3_1) ax >>= 18; + + fmuld %f30,%f38,%f30 ! (5_1) dtmp1 = div0 * y0; + and %o7,-8,%o7 ! (3_1) ax &= -8; + + faddd %f48,%f44,%f12 ! (2_1) res += dtmp0; + add %o7,%l7,%o7 ! (3_1) (char*)parr1 + ax; + ldd [%l0+%l3],%f48 ! (3_1) dtmp0 = *(double*)((char*)sign_arr + ux); + + fmuld %f4,%f50,%f4 ! (3_1) dtmp0 *= x2; + ldd [%o7],%f0 ! (3_1) res = *(double*)((char*)parr1 + ax) + + fmuld %f14,%f14,%f50 ! (4_1) x2 = xx * xx; + fsubd DTWO,%f42,%f44 ! (6_1) dtmp0 = dtwo - dtmp0; + + mov %i0,%o7 + fsubd DTWO,%f30,%f46 ! (5_1) dtmp1 = dtwo - dtmp1; + + fmuld %f0,%f48,%f48 ! (3_1) res *= dtmp0; + add %o0,stridey,%g1 ! py += stridey; + fdtos %f12,%f12 ! (2_1) ftmp0 = (float)res; + + srl %o7,28,%i0 ! (4_1) ux >>= 28; + st %f12,[%o0] ! (2_1) py[0] = ftmp0; + faddd %f4,K0,%f42 ! (3_1) dtmp0 += K0; + + subcc counter,1,counter + bneg,pn %icc,.begin + or %g0,%g1,%o1 + + fmuld K2,%f50,%f4 ! (4_1) dtmp0 = K2 * x2; + and %o7,MASK_0x7fffffff,%o0 ! (4_1) ax = ux & 0x7fffffff; + + fmuld %f38,%f46,%f26 ! (5_1) y0 *= dtmp1; + add %o0,MASK_0x100000,%o0 ! (4_1) ax += 0x00100000; + and %i0,-8,%i0 ! (4_1) ux &= -8; + + fmuld %f40,%f44,%f40 ! (6_1) y0 *= dtmp0; + + fmuld %f42,%f16,%f44 ! (3_1) dtmp0 *= xx; + + faddd %f4,K1,%f4 ! (4_1) dtmp0 += K1; + + fmuld %f36,%f26,%f36 ! (5_1) xx *= y0; + srl %o0,18,%o7 ! (4_1) ax >>= 18; + + fmuld %f28,%f40,%f28 ! (6_1) dtmp1 = div0 * y0; + and %o7,-8,%o7 ! (4_1) ax &= -8; + + faddd %f48,%f44,%f12 ! (3_1) res += dtmp0; + add %o7,%l7,%o7 ! (4_1) (char*)parr1 + ax; + ldd [%l0+%i0],%f48 ! (4_1) dtmp0 = *(double*)((char*)sign_arr + ux); + + fmuld %f4,%f50,%f4 ! (4_1) dtmp0 *= x2; + ldd [%o7],%f0 ! (4_1) res = *(double*)((char*)parr1 + ax); + + fmuld %f36,%f36,%f50 ! (5_1) x2 = xx * xx; + + mov %i2,%o7 + fsubd DTWO,%f28,%f46 ! (6_1) dtmp1 = dtwo - dtmp1; + + fmuld %f0,%f48,%f48 ! (4_1) res *= dtmp0; + add %g1,stridey,%o0 ! py += stridey; + fdtos %f12,%f12 ! (3_1) ftmp0 = (float)res; + + srl %o7,28,%i2 ! (5_1) ux >>= 28; + st %f12,[%g1] ! (3_1) py[0] = ftmp0; + faddd %f4,K0,%f42 ! (4_1) dtmp0 += K0; + + subcc counter,1,counter + bneg,pn %icc,.begin + or %g0,%o0,%o1 + + fmuld K2,%f50,%f4 ! (5_1) dtmp0 = K2 * x2; + and %o7,MASK_0x7fffffff,%g1 ! (5_1) ax = ux & 0x7fffffff; + + fmuld %f40,%f46,%f26 ! (6_1) y0 *= dtmp1; + add %g1,MASK_0x100000,%g1 ! (5_1) ax += 0x00100000; + and %i2,-8,%i2 ! (5_1) ux &= -8; + + fmuld %f42,%f14,%f44 ! (4_1) dtmp0 *= xx; + + faddd %f4,K1,%f4 ! (5_1) dtmp0 += K1; + + fmuld %f10,%f26,%f10 ! (6_1) xx *= y0; + srl %g1,18,%o7 ! (5_1) ax >>= 18; + + and %o7,-8,%o7 ! (5_1) ax &= -8; + + faddd %f48,%f44,%f12 ! (4_1) res += dtmp0; + add %o7,%l7,%o7 ! (5_1) (char*)parr1 + ax; + ldd [%l0+%i2],%f48 ! (5_1) dtmp0 = *(double*)((char*)sign_arr + ux); + + fmuld %f4,%f50,%f4 ! (5_1) dtmp0 *= x2; + ldd [%o7],%f0 ! (5_1) res = *(double*)((char*)parr1 + ax); + + fmuld %f10,%f10,%f50 ! (6_1) x2 = xx * xx; + + mov %l2,%o7 + + fmuld %f0,%f48,%f48 ! (5_1) res *= dtmp0; + add %o0,stridey,%g1 ! py += stridey; + fdtos %f12,%f12 ! (4_1) ftmp0 = (float)res; + + srl %o7,28,%l2 ! (6_1) ux >>= 28; + st %f12,[%o0] ! (4_1) py[0] = ftmp0; + faddd %f4,K0,%f42 ! (5_1) dtmp0 += K0; + + subcc counter,1,counter + bneg,pn %icc,.begin + or %g0,%g1,%o1 + + fmuld K2,%f50,%f4 ! (6_1) dtmp0 = K2 * x2; + and %o7,MASK_0x7fffffff,%o0 ! (6_1) ax = ux & 0x7fffffff; + + add %o0,MASK_0x100000,%o0 ! (6_1) ax += 0x00100000; + and %l2,-8,%l2 ! (6_1) ux &= -8; + + fmuld %f42,%f36,%f44 ! (5_1) dtmp0 *= xx; + + faddd %f4,K1,%f4 ! (6_1) dtmp0 += K1; + + srl %o0,18,%o7 ! (6_1) ax >>= 18; + + and %o7,-8,%o7 ! (6_1) ax &= -8; + + faddd %f48,%f44,%f12 ! (5_1) res += dtmp0; + add %o7,%l7,%o7 ! (6_1) (char*)parr1 + ax; + ldd [%l0+%l2],%f48 ! (6_1) dtmp0 = *(double*)((char*)sign_arr + ux); + + fmuld %f4,%f50,%f4 ! (6_1) dtmp0 *= x2; + ldd [%o7],%f0 ! (6_1) res = *(double*)((char*)parr1 + ax); + + fmuld %f0,%f48,%f48 ! (6_1) res *= dtmp0; + add %g1,stridey,%o0 ! py += stridey; + fdtos %f12,%f12 ! (5_1) ftmp0 = (float)res; + + st %f12,[%g1] ! (5_1) py[0] = ftmp0; + faddd %f4,K0,%f42 ! (6_1) dtmp0 += K0; + + subcc counter,1,counter + bneg,pn %icc,.begin + or %g0,%o0,%o1 + + fmuld %f42,%f10,%f44 ! (6_1) dtmp0 *= xx; + + faddd %f48,%f44,%f12 ! (6_1) res += dtmp0; + + add %o0,stridey,%g1 ! py += stridey; + fdtos %f12,%f12 ! (6_1) ftmp0 = (float)res; + + st %f12,[%o0] ! (6_1) py[0] = ftmp0; + + ba .begin + or %g0,%g1,%o1 ! py += stridey; + +.exit: + ret + restore %g0,%g0,%g0 + + .align 16 +.spec0: + add %i3,stridex,%i3 ! px += stridex; + sub counter,1,counter + st %l6,[%o1] ! *(int*)py = ux; + + ba .begin1 + add %o1,stridey,%o1 ! py += stridey; + + .align 16 +.spec1: + sethi %hi(0x7f800000),%l3 + sethi %hi(0x3fc90c00),%l4 ! pi_2 + + sethi %hi(0x80000000),%o0 + add %l4,0x3db,%l4 ! pi_2 + + cmp %l5,%l3 ! if ( ax > 0x7f800000 ) + bg,a,pn %icc,1f + fabss %f0,%f0 ! fpx = fabsf(*px); + + and %l6,%o0,%l6 ! sign = ux & 0x80000000; + + or %l6,%l4,%l6 ! sign |= pi_2; + + add %i3,stridex,%i3 ! px += stridex; + sub counter,1,counter + st %l6,[%o1] ! *(int*)py = sign; + + ba .begin1 + add %o1,stridey,%o1 ! py += stridey; + +1: + fmuls %f0,%f0,%f0 ! fpx *= fpx; + + add %i3,stridex,%i3 ! px += stridex + sub counter,1,counter + st %f0,[%o1] ! *py = fpx; + + ba .begin1 + add %o1,stridey,%o1 ! py += stridey; + + .align 16 +.update0: + cmp counter,1 + fzeros %f0 + ble,a .cont0 + sethi %hi(0x3fffffff),%l6 + + sub counter,1,counter + st counter,[%fp+tmp_counter] + + stx %l5,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont0 + or %g0,1,counter + + .align 16 +.update1: + cmp counter,1 + fzeros %f0 + ble,a .cont1 + sethi %hi(0x3fffffff),%l6 + + sub counter,1,counter + st counter,[%fp+tmp_counter] + + stx %l5,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont1 + or %g0,1,counter + + .align 16 +.update2: + cmp counter,2 + fzeros %f0 + ble,a .cont2 + sethi %hi(0x3fffffff),%l6 + + sub counter,2,counter + st counter,[%fp+tmp_counter] + + stx %l4,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont2 + or %g0,2,counter + + .align 16 +.update3: + cmp counter,2 + fzeros %f0 + ble,a .cont3 + sethi %hi(0x3fffffff),%l6 + + sub counter,2,counter + st counter,[%fp+tmp_counter] + + stx %l4,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont3 + or %g0,2,counter + + .align 16 +.update4: + cmp counter,3 + fzeros %f0 + ble,a .cont4 + sethi %hi(0x3fffffff),%l6 + + sub counter,3,counter + st counter,[%fp+tmp_counter] + + stx %l3,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont4 + or %g0,3,counter + + .align 16 +.update5: + cmp counter,3 + fzeros %f0 + ble,a .cont5 + sethi %hi(0x3fffffff),%l6 + + sub counter,3,counter + st counter,[%fp+tmp_counter] + + stx %l3,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont5 + or %g0,3,counter + + .align 16 +.update6: + cmp counter,4 + fzeros %f0 + ble,a .cont6 + sethi %hi(0x3fffffff),%l6 + + sub counter,4,counter + st counter,[%fp+tmp_counter] + + stx %i0,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont6 + or %g0,4,counter + + .align 16 +.update7: + cmp counter,4 + fzeros %f0 + ble,a .cont7 + sethi %hi(0x3fffffff),%l6 + + sub counter,4,counter + st counter,[%fp+tmp_counter] + + stx %i0,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont7 + or %g0,4,counter + + .align 16 +.update8: + cmp counter,5 + fzeros %f0 + ble,a .cont8 + sethi %hi(0x3fffffff),%l6 + + sub counter,5,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont8 + or %g0,5,counter + + .align 16 +.update9: + cmp counter,5 + fzeros %f0 + ble,a .cont9 + sethi %hi(0x3fffffff),%l6 + + sub counter,5,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont9 + or %g0,5,counter + + .align 16 +.update10: + cmp counter,6 + fzeros %f0 + ble,a .cont10 + sethi %hi(0x3fffffff),%l6 + + sub counter,6,counter + st counter,[%fp+tmp_counter] + + stx %l2,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont10 + or %g0,6,counter + + .align 16 +.update11: + cmp counter,6 + fzeros %f0 + ble,a .cont11 + sethi %hi(0x3fffffff),%l6 + + sub counter,6,counter + st counter,[%fp+tmp_counter] + + stx %l2,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont11 + or %g0,6,counter + + .align 16 +.update12: + cmp counter,7 + fzeros %f0 + ble,a .cont12 + sethi %hi(0x3fffffff),%l6 + + sub counter,7,counter + st counter,[%fp+tmp_counter] + + stx %g5,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont12 + or %g0,7,counter + + .align 16 +.update13: + cmp counter,7 + fzeros %f0 + ble,a .cont13 + sethi %hi(0x3fffffff),%l6 + + sub counter,7,counter + st counter,[%fp+tmp_counter] + + stx %g5,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont13 + or %g0,7,counter + + .align 16 +.update14: + cmp counter,0 + fzeros %f0 + ble,a .cont14 + sethi %hi(0x3fffffff),%l6 + + sub counter,0,counter + st counter,[%fp+tmp_counter] + + stx %i3,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont14 + or %g0,0,counter + + .align 16 +.update15: + cmp counter,0 + fzeros %f0 + ble,a .cont15 + sethi %hi(0x3fffffff),%l6 + + sub counter,0,counter + st counter,[%fp+tmp_counter] + + stx %i3,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont15 + or %g0,0,counter + + .align 16 +.update16: + cmp counter,1 + fzeros %f0 + ble,a .cont16 + sethi %hi(0x3fffffff),%l6 + + sub counter,1,counter + st counter,[%fp+tmp_counter] + + stx %l5,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont16 + or %g0,1,counter + + .align 16 +.update17: + cmp counter,1 + fzeros %f0 + ble,a .cont17 + sethi %hi(0x3fffffff),%l6 + + sub counter,1,counter + st counter,[%fp+tmp_counter] + + stx %l5,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont17 + or %g0,1,counter + + .align 16 +.update18: + cmp counter,2 + fzeros %f0 + ble,a .cont18 + sethi %hi(0x3fffffff),%l6 + + sub counter,2,counter + st counter,[%fp+tmp_counter] + + stx %l4,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont18 + or %g0,2,counter + + .align 16 +.update19: + cmp counter,2 + fzeros %f0 + ble,a .cont19 + sethi %hi(0x3fffffff),%l6 + + sub counter,2,counter + st counter,[%fp+tmp_counter] + + stx %l4,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont19 + or %g0,2,counter + + .align 16 +.update20: + cmp counter,3 + fzeros %f0 + ble,a .cont20 + sethi %hi(0x3fffffff),%l6 + + sub counter,3,counter + st counter,[%fp+tmp_counter] + + stx %l3,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont20 + or %g0,3,counter + + .align 16 +.update21: + cmp counter,3 + fzeros %f0 + ble,a .cont21 + sethi %hi(0x3fffffff),%l6 + + sub counter,3,counter + st counter,[%fp+tmp_counter] + + stx %l3,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont21 + or %g0,3,counter + + .align 16 +.update22: + cmp counter,4 + fzeros %f0 + ble,a .cont22 + sethi %hi(0x3fffffff),%l6 + + sub counter,4,counter + st counter,[%fp+tmp_counter] + + stx %i0,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont22 + or %g0,4,counter + + .align 16 +.update23: + cmp counter,4 + fzeros %f0 + ble,a .cont23 + sethi %hi(0x3fffffff),%l6 + + sub counter,4,counter + st counter,[%fp+tmp_counter] + + stx %i0,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont23 + or %g0,4,counter + + .align 16 +.update24: + cmp counter,5 + fzeros %f0 + ble,a .cont24 + sethi %hi(0x3fffffff),%l6 + + sub counter,5,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont24 + or %g0,5,counter + + .align 16 +.update25: + cmp counter,5 + fzeros %f0 + ble,a .cont25 + sethi %hi(0x3fffffff),%l6 + + sub counter,5,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont25 + or %g0,5,counter + + .align 16 +.update26: + cmp counter,6 + fzeros %f0 + ble,a .cont26 + sethi %hi(0x3fffffff),%l6 + + sub counter,6,counter + st counter,[%fp+tmp_counter] + + stx %l2,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont26 + or %g0,6,counter + + .align 16 +.update27: + cmp counter,6 + fzeros %f0 + ble,a .cont27 + sethi %hi(0x3fffffff),%l6 + + sub counter,6,counter + st counter,[%fp+tmp_counter] + + stx %l2,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont27 + or %g0,6,counter + + .align 16 +.update28: + cmp counter,7 + fzeros %f0 + ble,a .cont28 + sethi %hi(0x3fffffff),%l6 + + sub counter,7,counter + st counter,[%fp+tmp_counter] + + stx %g5,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont28 + or %g0,7,counter + + .align 16 +.update29: + cmp counter,7 + fzeros %f0 + ble,a .cont29 + sethi %hi(0x3fffffff),%l6 + + sub counter,7,counter + st counter,[%fp+tmp_counter] + + stx %g5,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont29 + or %g0,7,counter + + SET_SIZE(__vatanf) + diff --git a/usr/src/lib/libmvec/common/vis/__vcos.S b/usr/src/lib/libmvec/common/vis/__vcos.S new file mode 100644 index 0000000000..0d3ffa8ffe --- /dev/null +++ b/usr/src/lib/libmvec/common/vis/__vcos.S @@ -0,0 +1,3079 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "__vcos.S" + +#include "libm.h" + + RO_DATA + .align 64 +constants: + .word 0x3ec718e3,0xa6972785 + .word 0x3ef9fd39,0x94293940 + .word 0xbf2a019f,0x75ee4be1 + .word 0xbf56c16b,0xba552569 + .word 0x3f811111,0x1108c703 + .word 0x3fa55555,0x554f5b35 + .word 0xbfc55555,0x555554d0 + .word 0xbfdfffff,0xffffff85 + .word 0x3ff00000,0x00000000 + .word 0xbfc55555,0x5551fc28 + .word 0x3f811107,0x62eacc9d + .word 0xbfdfffff,0xffff6328 + .word 0x3fa55551,0x5f7acf0c + .word 0x3fe45f30,0x6dc9c883 + .word 0x43380000,0x00000000 + .word 0x3ff921fb,0x54400000 + .word 0x3dd0b461,0x1a600000 + .word 0x3ba3198a,0x2e000000 + .word 0x397b839a,0x252049c1 + .word 0x80000000,0x00004000 + .word 0xffff8000,0x00000000 ! N.B.: low-order words used + .word 0x3fc90000,0x80000000 ! for sign bit hacking; see + .word 0x3fc40000,0x00000000 ! references to "thresh" below + +#define p4 0x0 +#define q4 0x08 +#define p3 0x10 +#define q3 0x18 +#define p2 0x20 +#define q2 0x28 +#define p1 0x30 +#define q1 0x38 +#define one 0x40 +#define pp1 0x48 +#define pp2 0x50 +#define qq1 0x58 +#define qq2 0x60 +#define invpio2 0x68 +#define round 0x70 +#define pio2_1 0x78 +#define pio2_2 0x80 +#define pio2_3 0x88 +#define pio2_3t 0x90 +#define f30val 0x98 +#define mask 0xa0 +#define thresh 0xa8 + +! local storage indices + +#define xsave STACK_BIAS-0x8 +#define ysave STACK_BIAS-0x10 +#define nsave STACK_BIAS-0x14 +#define sxsave STACK_BIAS-0x18 +#define sysave STACK_BIAS-0x1c +#define biguns STACK_BIAS-0x20 +#define n2 STACK_BIAS-0x24 +#define n1 STACK_BIAS-0x28 +#define n0 STACK_BIAS-0x2c +#define x2_1 STACK_BIAS-0x40 +#define x1_1 STACK_BIAS-0x50 +#define x0_1 STACK_BIAS-0x60 +#define y2_0 STACK_BIAS-0x70 +#define y1_0 STACK_BIAS-0x80 +#define y0_0 STACK_BIAS-0x90 +! sizeof temp storage - must be a multiple of 16 for V9 +#define tmps 0x90 + +!-------------------------------------------------------------------- +! define pipes for easier reading + +#define P0_f0 %f0 +#define P0_f1 %f1 +#define P0_f2 %f2 +#define P0_f3 %f3 +#define P0_f4 %f4 +#define P0_f5 %f5 +#define P0_f6 %f6 +#define P0_f7 %f7 +#define P0_f8 %f8 +#define P0_f9 %f9 + +#define P1_f10 %f10 +#define P1_f11 %f11 +#define P1_f12 %f12 +#define P1_f13 %f13 +#define P1_f14 %f14 +#define P1_f15 %f15 +#define P1_f16 %f16 +#define P1_f17 %f17 +#define P1_f18 %f18 +#define P1_f19 %f19 + +#define P2_f20 %f20 +#define P2_f21 %f21 +#define P2_f22 %f22 +#define P2_f23 %f23 +#define P2_f24 %f24 +#define P2_f25 %f25 +#define P2_f26 %f26 +#define P2_f27 %f27 +#define P2_f28 %f28 +#define P2_f29 %f29 + +! define __vlibm_TBL_sincos_hi & lo for easy reading + +#define SC_HI %l3 +#define SC_LO %l4 + +! define constants for easy reading + +#define C_q1 %f46 +#define C_q2 %f48 +#define C_q3 %f50 +#define C_q4 %f52 + +! one ( 1 ) uno eins echi un +#define C_ONE %f54 +#define C_ONE_LO %f55 + +! masks +#define MSK_SIGN %i5 +#define MSK_BIT31 %f30 +#define MSK_BIT13 %f31 +#define MSK_BITSHI17 %f44 + + +! constants for pp and qq +#define C_pp1 %f56 +#define C_pp2 %f58 +#define C_qq1 %f60 +#define C_qq2 %f62 + +! sign mask +#define C_signM %i5 + +#define LIM_l5 %l5 +#define LIM_l6 %l6 +! when in pri range, using value as transition from poly to table. +! for Medium range,change use of %l6 and use to keep track of biguns. +#define LIM_l7 %l7 + +!-------------------------------------------------------------------- + + + ENTRY(__vcos) + save %sp,-SA(MINFRAME)-tmps,%sp + PIC_SETUP(g5) + PIC_SET(g5,__vlibm_TBL_sincos_hi,l3) + PIC_SET(g5,__vlibm_TBL_sincos_lo,l4) + PIC_SET(g5,constants,o0) + mov %o0,%g1 + wr %g0,0x82,%asi ! set %asi for non-faulting loads + +! ========== primary range ========== + +! register use + +! i0 n +! i1 x +! i2 stridex +! i3 y +! i4 stridey +! i5 0x80000000 + +! l0 hx0 +! l1 hx1 +! l2 hx2 +! l3 __vlibm_TBL_sincos_hi +! l4 __vlibm_TBL_sincos_lo +! l5 0x3fc40000 +! l6 0x3e400000 +! l7 0x3fe921fb + +! the following are 64-bit registers in both V8+ and V9 + +! g1 scratch +! g5 + +! o0 py0 +! o1 py1 +! o2 py2 +! o3 oy0 +! o4 oy1 +! o5 oy2 +! o7 scratch + +! f0 x0 +! f2 +! f4 +! f6 +! f8 scratch for table base +! f9 signbit0 +! f10 x1 +! f12 +! f14 +! f16 +! f18 scratch for table base +! f19 signbit1 +! f20 x2 +! f22 +! f24 +! f26 +! f28 scratch for table base +! f29 signbit2 +! f30 0x80000000 +! f31 0x4000 +! f32 +! f34 +! f36 +! f38 +! f40 +! f42 +! f44 0xffff800000000000 +! f46 p1 +! f48 p2 +! f50 p3 +! f52 p4 +! f54 one +! f56 pp1 +! f58 pp2 +! f60 qq1 +! f62 qq2 + +#ifdef __sparcv9 + stx %i1,[%fp+xsave] ! save arguments + stx %i3,[%fp+ysave] +#else + st %i1,[%fp+xsave] ! save arguments + st %i3,[%fp+ysave] +#endif + + st %i0,[%fp+nsave] + st %i2,[%fp+sxsave] + st %i4,[%fp+sysave] + sethi %hi(0x80000000),MSK_SIGN ! load/set up constants + sethi %hi(0x3fc40000),LIM_l5 + sethi %hi(0x3e400000),LIM_l6 + sethi %hi(0x3fe921fb),LIM_l7 + or LIM_l7,%lo(0x3fe921fb),LIM_l7 + ldd [%g1+f30val],MSK_BIT31 + ldd [%g1+mask],MSK_BITSHI17 + ldd [%g1+q1],C_q1 + ldd [%g1+q2],C_q2 + ldd [%g1+q3],C_q3 + ldd [%g1+q4],C_q4 + ldd [%g1+one],C_ONE + ldd [%g1+pp1],C_pp1 + ldd [%g1+pp2],C_pp2 + ldd [%g1+qq1],C_qq1 + ldd [%g1+qq2],C_qq2 + sll %i2,3,%i2 ! scale strides + sll %i4,3,%i4 + add %fp,x0_1,%o3 ! precondition loop + add %fp,x0_1,%o4 + add %fp,x0_1,%o5 + ld [%i1],%l0 ! hx = *x + ld [%i1],P0_f0 + ld [%i1+4],P0_f1 + andn %l0,MSK_SIGN,%l0 ! hx &= ~0x80000000 + add %i1,%i2,%i1 ! x += stridex + + ba,pt %icc,.loop0 +!delay slot + nop + + .align 32 +.loop0: + lda [%i1]%asi,%l1 ! preload next argument + sub %l0,LIM_l6,%g1 + sub LIM_l7,%l0,%o7 + fands P0_f0,MSK_BIT31,P0_f9 ! save signbit + + lda [%i1]%asi,P1_f10 + orcc %o7,%g1,%g0 + mov %i3,%o0 ! py0 = y + bl,pn %icc,.range0 ! if hx < 0x3e400000 or > 0x3fe921fb + +! delay slot + lda [%i1+4]%asi,P1_f11 + addcc %i0,-1,%i0 + add %i3,%i4,%i3 ! y += stridey + ble,pn %icc,.endloop1 + +! delay slot + andn %l1,MSK_SIGN,%l1 + add %i1,%i2,%i1 ! x += stridex + fabsd P0_f0,P0_f0 + fmuld C_ONE,C_ONE,C_ONE ! one*one; a nop for alignment only + +.loop1: + lda [%i1]%asi,%l2 ! preload next argument + sub %l1,LIM_l6,%g1 + sub LIM_l7,%l1,%o7 + fands P1_f10,MSK_BIT31,P1_f19 ! save signbit + + lda [%i1]%asi,P2_f20 + orcc %o7,%g1,%g0 + mov %i3,%o1 ! py1 = y + bl,pn %icc,.range1 ! if hx < 0x3e400000 or > 0x3fe921fb + +! delay slot + lda [%i1+4]%asi,P2_f21 + addcc %i0,-1,%i0 + add %i3,%i4,%i3 ! y += stridey + ble,pn %icc,.endloop2 + +! delay slot + andn %l2,MSK_SIGN,%l2 + add %i1,%i2,%i1 ! x += stridex + fabsd P1_f10,P1_f10 + fmuld C_ONE,C_ONE,C_ONE ! one*one; a nop for alignment only + +.loop2: + st P0_f6,[%o3] + sub %l2,LIM_l6,%g1 + sub LIM_l7,%l2,%o7 + fands P2_f20,MSK_BIT31,P2_f29 ! save signbit + + st P0_f7,[%o3+4] + orcc %g1,%o7,%g0 + mov %i3,%o2 ! py2 = y + bl,pn %icc,.range2 ! if hx < 0x3e400000 or > 0x3fe921fb + +! delay slot + add %i3,%i4,%i3 ! y += stridey + cmp %l0,LIM_l5 + fabsd P2_f20,P2_f20 + bl,pn %icc,.case4 + +! delay slot + st P1_f16,[%o4] + cmp %l1,LIM_l5 + fpadd32s P0_f0,MSK_BIT13,P0_f8 + bl,pn %icc,.case2 + +! delay slot + st P1_f17,[%o4+4] + cmp %l2,LIM_l5 + fpadd32s P1_f10,MSK_BIT13,P1_f18 + bl,pn %icc,.case1 + +! delay slot + st P2_f26,[%o5] + mov %o0,%o3 + sethi %hi(0x3fc3c000),%o7 + fpadd32s P2_f20,MSK_BIT13,P2_f28 + + st P2_f27,[%o5+4] + fand P0_f8,MSK_BITSHI17,P0_f2 + mov %o1,%o4 + + fand P1_f18,MSK_BITSHI17,P1_f12 + mov %o2,%o5 + sub %l0,%o7,%l0 + + fand P2_f28,MSK_BITSHI17,P2_f22 + sub %l1,%o7,%l1 + sub %l2,%o7,%l2 + + fsubd P0_f0,P0_f2,P0_f0 + srl %l0,10,%l0 + add SC_HI,8,%g1;add SC_LO,8,%o7 + + fsubd P1_f10,P1_f12,P1_f10 + srl %l1,10,%l1 + + fsubd P2_f20,P2_f22,P2_f20 + srl %l2,10,%l2 + + fmuld P0_f0,P0_f0,P0_f2 + andn %l0,0x1f,%l0 + + fmuld P1_f10,P1_f10,P1_f12 + andn %l1,0x1f,%l1 + + fmuld P2_f20,P2_f20,P2_f22 + andn %l2,0x1f,%l2 + + fmuld P0_f2,C_pp2,P0_f6 + ldd [%g1+%l0],%f32 + + fmuld P1_f12,C_pp2,P1_f16 + ldd [%g1+%l1],%f36 + + fmuld P2_f22,C_pp2,P2_f26 + ldd [%g1+%l2],%f40 + + faddd P0_f6,C_pp1,P0_f6 + fmuld P0_f2,C_qq2,P0_f4 + ldd [SC_HI+%l0],%f34 + + faddd P1_f16,C_pp1,P1_f16 + fmuld P1_f12,C_qq2,P1_f14 + ldd [SC_HI+%l1],%f38 + + faddd P2_f26,C_pp1,P2_f26 + fmuld P2_f22,C_qq2,P2_f24 + ldd [SC_HI+%l2],%f42 + + fmuld P0_f2,P0_f6,P0_f6 + faddd P0_f4,C_qq1,P0_f4 + + fmuld P1_f12,P1_f16,P1_f16 + faddd P1_f14,C_qq1,P1_f14 + + fmuld P2_f22,P2_f26,P2_f26 + faddd P2_f24,C_qq1,P2_f24 + + faddd P0_f6,C_ONE,P0_f6 + fmuld P0_f2,P0_f4,P0_f4 + + faddd P1_f16,C_ONE,P1_f16 + fmuld P1_f12,P1_f14,P1_f14 + + faddd P2_f26,C_ONE,P2_f26 + fmuld P2_f22,P2_f24,P2_f24 + + fmuld P0_f0,P0_f6,P0_f6 + ldd [%o7+%l0],P0_f2 + + fmuld P1_f10,P1_f16,P1_f16 + ldd [%o7+%l1],P1_f12 + + fmuld P2_f20,P2_f26,P2_f26 + ldd [%o7+%l2],P2_f22 + + fmuld P0_f4,%f32,P0_f4 + lda [%i1]%asi,%l0 ! preload next argument + + fmuld P1_f14,%f36,P1_f14 + lda [%i1]%asi,P0_f0 + + fmuld P2_f24,%f40,P2_f24 + lda [%i1+4]%asi,P0_f1 + + fmuld P0_f6,%f34,P0_f6 + add %i1,%i2,%i1 ! x += stridex + + fmuld P1_f16,%f38,P1_f16 + + fmuld P2_f26,%f42,P2_f26 + + fsubd P0_f6,P0_f4,P0_f6 + + fsubd P1_f16,P1_f14,P1_f16 + + fsubd P2_f26,P2_f24,P2_f26 + + fsubd P0_f2,P0_f6,P0_f6 + + fsubd P1_f12,P1_f16,P1_f16 + + fsubd P2_f22,P2_f26,P2_f26 + + faddd P0_f6,%f32,P0_f6 + + faddd P1_f16,%f36,P1_f16 + + faddd P2_f26,%f40,P2_f26 + andn %l0,MSK_SIGN,%l0 ! hx &= ~0x80000000 + + nop !!(vsin) fors P0_f6,P0_f9,P0_f6 + addcc %i0,-1,%i0 + + nop !!(vsin) fors P1_f16,P1_f19,P1_f16 + bg,pt %icc,.loop0 + +! delay slot + nop !!(vsin) fors P2_f26,P2_f29,P2_f26 + + ba,pt %icc,.endloop0 +! delay slot + nop + + .align 32 +.case1: + st P2_f27,[%o5+4] + sethi %hi(0x3fc3c000),%o7 + fand P0_f8,MSK_BITSHI17,P0_f2 + + sub %l0,%o7,%l0 + sub %l1,%o7,%l1 + add SC_HI,8,%g1;add SC_LO,8,%o7 + fand P1_f18,MSK_BITSHI17,P1_f12 + fmuld P2_f20,P2_f20,P2_f22 + + fsubd P0_f0,P0_f2,P0_f0 + srl %l0,10,%l0 + mov %o0,%o3 + + fsubd P1_f10,P1_f12,P1_f10 + srl %l1,10,%l1 + mov %o1,%o4 + + fmuld P2_f22,C_q4,P2_f24 + mov %o2,%o5 + + fmuld P0_f0,P0_f0,P0_f2 + andn %l0,0x1f,%l0 + + fmuld P1_f10,P1_f10,P1_f12 + andn %l1,0x1f,%l1 + + faddd P2_f24,C_q3,P2_f24 + + fmuld P0_f2,C_pp2,P0_f6 + ldd [%g1+%l0],%f32 + + fmuld P1_f12,C_pp2,P1_f16 + ldd [%g1+%l1],%f36 + + fmuld P2_f22,P2_f24,P2_f24 + + faddd P0_f6,C_pp1,P0_f6 + fmuld P0_f2,C_qq2,P0_f4 + ldd [SC_HI+%l0],%f34 + + faddd P1_f16,C_pp1,P1_f16 + fmuld P1_f12,C_qq2,P1_f14 + ldd [SC_HI+%l1],%f38 + + faddd P2_f24,C_q2,P2_f24 + + fmuld P0_f2,P0_f6,P0_f6 + faddd P0_f4,C_qq1,P0_f4 + + fmuld P1_f12,P1_f16,P1_f16 + faddd P1_f14,C_qq1,P1_f14 + + fmuld P2_f22,P2_f24,P2_f24 + + faddd P0_f6,C_ONE,P0_f6 + fmuld P0_f2,P0_f4,P0_f4 + + faddd P1_f16,C_ONE,P1_f16 + fmuld P1_f12,P1_f14,P1_f14 + + faddd P2_f24,C_q1,P2_f24 + + fmuld P0_f0,P0_f6,P0_f6 + ldd [%o7+%l0],P0_f2 + + fmuld P1_f10,P1_f16,P1_f16 + ldd [%o7+%l1],P1_f12 + + fmuld P0_f4,%f32,P0_f4 + lda [%i1]%asi,%l0 ! preload next argument + + fmuld P1_f14,%f36,P1_f14 + lda [%i1]%asi,P0_f0 + + fmuld P0_f6,%f34,P0_f6 + lda [%i1+4]%asi,P0_f1 + + fmuld P1_f16,%f38,P1_f16 + add %i1,%i2,%i1 ! x += stridex + + fmuld P2_f22,P2_f24,P2_f24 + + fsubd P0_f6,P0_f4,P0_f6 + + fsubd P1_f16,P1_f14,P1_f16 + + !!(vsin)fmuld P2_f20,P2_f24,P2_f24 + + fsubd P0_f2,P0_f6,P0_f6 + + fsubd P1_f12,P1_f16,P1_f16 + + faddd C_ONE,P2_f24,P2_f26 !!(vsin)faddd P2_f20,P2_f24,P2_f26 + + faddd P0_f6,%f32,P0_f6 + + faddd P1_f16,%f36,P1_f16 + andn %l0,MSK_SIGN,%l0 ! hx &= ~0x80000000 + + nop !!(vsin) fors P2_f26,P2_f29,P2_f26 + addcc %i0,-1,%i0 + + nop !!(vsin) fors P0_f6,P0_f9,P0_f6 + bg,pt %icc,.loop0 + +! delay slot + nop !!(vsin) fors P1_f16,P1_f19,P1_f16 + + ba,pt %icc,.endloop0 +! delay slot + nop + + .align 32 +.case2: + st P2_f26,[%o5] + cmp %l2,LIM_l5 + fpadd32s P2_f20,MSK_BIT13,P2_f28 + bl,pn %icc,.case3 + +! delay slot + st P2_f27,[%o5+4] + sethi %hi(0x3fc3c000),%o7 + fand P0_f8,MSK_BITSHI17,P0_f2 + + sub %l0,%o7,%l0 + sub %l2,%o7,%l2 + add SC_HI,8,%g1;add SC_LO,8,%o7 + fand P2_f28,MSK_BITSHI17,P2_f22 + fmuld P1_f10,P1_f10,P1_f12 + + fsubd P0_f0,P0_f2,P0_f0 + srl %l0,10,%l0 + mov %o0,%o3 + + fsubd P2_f20,P2_f22,P2_f20 + srl %l2,10,%l2 + mov %o2,%o5 + + fmuld P1_f12,C_q4,P1_f14 + mov %o1,%o4 + + fmuld P0_f0,P0_f0,P0_f2 + andn %l0,0x1f,%l0 + + fmuld P2_f20,P2_f20,P2_f22 + andn %l2,0x1f,%l2 + + faddd P1_f14,C_q3,P1_f14 + + fmuld P0_f2,C_pp2,P0_f6 + ldd [%g1+%l0],%f32 + + fmuld P2_f22,C_pp2,P2_f26 + ldd [%g1+%l2],%f40 + + fmuld P1_f12,P1_f14,P1_f14 + + faddd P0_f6,C_pp1,P0_f6 + fmuld P0_f2,C_qq2,P0_f4 + ldd [SC_HI+%l0],%f34 + + faddd P2_f26,C_pp1,P2_f26 + fmuld P2_f22,C_qq2,P2_f24 + ldd [SC_HI+%l2],%f42 + + faddd P1_f14,C_q2,P1_f14 + + fmuld P0_f2,P0_f6,P0_f6 + faddd P0_f4,C_qq1,P0_f4 + + fmuld P2_f22,P2_f26,P2_f26 + faddd P2_f24,C_qq1,P2_f24 + + fmuld P1_f12,P1_f14,P1_f14 + + faddd P0_f6,C_ONE,P0_f6 + fmuld P0_f2,P0_f4,P0_f4 + + faddd P2_f26,C_ONE,P2_f26 + fmuld P2_f22,P2_f24,P2_f24 + + faddd P1_f14,C_q1,P1_f14 + + fmuld P0_f0,P0_f6,P0_f6 + ldd [%o7+%l0],P0_f2 + + fmuld P2_f20,P2_f26,P2_f26 + ldd [%o7+%l2],P2_f22 + + fmuld P0_f4,%f32,P0_f4 + lda [%i1]%asi,%l0 ! preload next argument + + fmuld P2_f24,%f40,P2_f24 + lda [%i1]%asi,P0_f0 + + fmuld P0_f6,%f34,P0_f6 + lda [%i1+4]%asi,P0_f1 + + fmuld P2_f26,%f42,P2_f26 + add %i1,%i2,%i1 ! x += stridex + + fmuld P1_f12,P1_f14,P1_f14 + + fsubd P0_f6,P0_f4,P0_f6 + + fsubd P2_f26,P2_f24,P2_f26 + + !!(vsin)fmuld P1_f10,P1_f14,P1_f14 + + fsubd P0_f2,P0_f6,P0_f6 + + fsubd P2_f22,P2_f26,P2_f26 + + faddd C_ONE,P1_f14,P1_f16 !!(vsin)faddd P1_f10,P1_f14,P1_f16 + + faddd P0_f6,%f32,P0_f6 + + faddd P2_f26,%f40,P2_f26 + andn %l0,MSK_SIGN,%l0 ! hx &= ~0x80000000 + + nop !!(vsin) fors P1_f16,P1_f19,P1_f16 + addcc %i0,-1,%i0 + + nop !!(vsin) fors P0_f6,P0_f9,P0_f6 + bg,pt %icc,.loop0 + +! delay slot + nop !!(vsin) fors P2_f26,P2_f29,P2_f26 + + ba,pt %icc,.endloop0 +! delay slot + nop + + .align 32 +.case3: + sethi %hi(0x3fc3c000),%o7 + fand P0_f8,MSK_BITSHI17,P0_f2 + fmuld P1_f10,P1_f10,P1_f12 + + sub %l0,%o7,%l0 + add SC_HI,8,%g1;add SC_LO,8,%o7 + fmuld P2_f20,P2_f20,P2_f22 + + fsubd P0_f0,P0_f2,P0_f0 + srl %l0,10,%l0 + mov %o0,%o3 + + fmuld P1_f12,C_q4,P1_f14 + mov %o1,%o4 + + fmuld P2_f22,C_q4,P2_f24 + mov %o2,%o5 + + fmuld P0_f0,P0_f0,P0_f2 + andn %l0,0x1f,%l0 + + faddd P1_f14,C_q3,P1_f14 + + faddd P2_f24,C_q3,P2_f24 + + fmuld P0_f2,C_pp2,P0_f6 + ldd [%g1+%l0],%f32 + + fmuld P1_f12,P1_f14,P1_f14 + + fmuld P2_f22,P2_f24,P2_f24 + + faddd P0_f6,C_pp1,P0_f6 + fmuld P0_f2,C_qq2,P0_f4 + ldd [SC_HI+%l0],%f34 + + faddd P1_f14,C_q2,P1_f14 + + faddd P2_f24,C_q2,P2_f24 + + fmuld P0_f2,P0_f6,P0_f6 + faddd P0_f4,C_qq1,P0_f4 + + fmuld P1_f12,P1_f14,P1_f14 + + fmuld P2_f22,P2_f24,P2_f24 + + faddd P0_f6,C_ONE,P0_f6 + fmuld P0_f2,P0_f4,P0_f4 + + faddd P1_f14,C_q1,P1_f14 + + faddd P2_f24,C_q1,P2_f24 + + fmuld P0_f0,P0_f6,P0_f6 + ldd [%o7+%l0],P0_f2 + + fmuld P0_f4,%f32,P0_f4 + lda [%i1]%asi,%l0 ! preload next argument + + fmuld P1_f12,P1_f14,P1_f14 + lda [%i1]%asi,P0_f0 + + fmuld P0_f6,%f34,P0_f6 + lda [%i1+4]%asi,P0_f1 + + fmuld P2_f22,P2_f24,P2_f24 + add %i1,%i2,%i1 ! x += stridex + + !!(vsin)fmuld P1_f10,P1_f14,P1_f14 + + fsubd P0_f6,P0_f4,P0_f6 + + !!(vsin)fmuld P2_f20,P2_f24,P2_f24 + + faddd C_ONE,P1_f14,P1_f16 !!(vsin)faddd P1_f10,P1_f14,P1_f16 + + fsubd P0_f2,P0_f6,P0_f6 + + faddd C_ONE,P2_f24,P2_f26 !!(vsin)faddd P2_f20,P2_f24,P2_f26 + + nop !!(vsin) fors P1_f16,P1_f19,P1_f16 + andn %l0,MSK_SIGN,%l0 ! hx &= ~0x80000000 + + faddd P0_f6,%f32,P0_f6 + addcc %i0,-1,%i0 + + nop !!(vsin) fors P2_f26,P2_f29,P2_f26 + bg,pt %icc,.loop0 + +! delay slot + nop !!(vsin) fors P0_f6,P0_f9,P0_f6 + + ba,pt %icc,.endloop0 +! delay slot + nop + + .align 32 +.case4: + st P1_f17,[%o4+4] + cmp %l1,LIM_l5 + fpadd32s P1_f10,MSK_BIT13,P1_f18 + bl,pn %icc,.case6 + +! delay slot + st P2_f26,[%o5] + cmp %l2,LIM_l5 + fpadd32s P2_f20,MSK_BIT13,P2_f28 + bl,pn %icc,.case5 + +! delay slot + st P2_f27,[%o5+4] + sethi %hi(0x3fc3c000),%o7 + fand P1_f18,MSK_BITSHI17,P1_f12 + + sub %l1,%o7,%l1 + sub %l2,%o7,%l2 + add SC_HI,8,%g1;add SC_LO,8,%o7 + fand P2_f28,MSK_BITSHI17,P2_f22 + fmuld P0_f0,P0_f0,P0_f2 + + fsubd P1_f10,P1_f12,P1_f10 + srl %l1,10,%l1 + mov %o1,%o4 + + fsubd P2_f20,P2_f22,P2_f20 + srl %l2,10,%l2 + mov %o2,%o5 + + fmovd P0_f0,P0_f6 !ID for processing + fmuld P0_f2,C_q4,P0_f4 + mov %o0,%o3 + + fmuld P1_f10,P1_f10,P1_f12 + andn %l1,0x1f,%l1 + + fmuld P2_f20,P2_f20,P2_f22 + andn %l2,0x1f,%l2 + + faddd P0_f4,C_q3,P0_f4 + + fmuld P1_f12,C_pp2,P1_f16 + ldd [%g1+%l1],%f36 + + fmuld P2_f22,C_pp2,P2_f26 + ldd [%g1+%l2],%f40 + + fmuld P0_f2,P0_f4,P0_f4 + + faddd P1_f16,C_pp1,P1_f16 + fmuld P1_f12,C_qq2,P1_f14 + ldd [SC_HI+%l1],%f38 + + faddd P2_f26,C_pp1,P2_f26 + fmuld P2_f22,C_qq2,P2_f24 + ldd [SC_HI+%l2],%f42 + + faddd P0_f4,C_q2,P0_f4 + + fmuld P1_f12,P1_f16,P1_f16 + faddd P1_f14,C_qq1,P1_f14 + + fmuld P2_f22,P2_f26,P2_f26 + faddd P2_f24,C_qq1,P2_f24 + + fmuld P0_f2,P0_f4,P0_f4 + + faddd P1_f16,C_ONE,P1_f16 + fmuld P1_f12,P1_f14,P1_f14 + + faddd P2_f26,C_ONE,P2_f26 + fmuld P2_f22,P2_f24,P2_f24 + + faddd P0_f4,C_q1,P0_f4 + + fmuld P1_f10,P1_f16,P1_f16 + ldd [%o7+%l1],P1_f12 + + fmuld P2_f20,P2_f26,P2_f26 + ldd [%o7+%l2],P2_f22 + + fmuld P1_f14,%f36,P1_f14 + lda [%i1]%asi,%l0 ! preload next argument + + fmuld P2_f24,%f40,P2_f24 + lda [%i1]%asi,P0_f0 + + fmuld P1_f16,%f38,P1_f16 + lda [%i1+4]%asi,P0_f1 + + fmuld P2_f26,%f42,P2_f26 + add %i1,%i2,%i1 ! x += stridex + + fmuld P0_f2,P0_f4,P0_f4 + + fsubd P1_f16,P1_f14,P1_f16 + + fsubd P2_f26,P2_f24,P2_f26 + + !!(vsin)fmuld P0_f6,P0_f4,P0_f4 + + fsubd P1_f12,P1_f16,P1_f16 + + fsubd P2_f22,P2_f26,P2_f26 + + faddd C_ONE,P0_f4,P0_f6 !!(vsin)faddd P0_f6,P0_f4,P0_f6 ! faddd then spaces for processing + + faddd P1_f16,%f36,P1_f16 + + faddd P2_f26,%f40,P2_f26 + andn %l0,MSK_SIGN,%l0 ! hx &= ~0x80000000 + + nop !!(vsin) fors P0_f6,P0_f9,P0_f6 + addcc %i0,-1,%i0 + + nop !!(vsin) fors P1_f16,P1_f19,P1_f16 + bg,pt %icc,.loop0 + +! delay slot + nop !!(vsin) fors P2_f26,P2_f29,P2_f26 + + ba,pt %icc,.endloop0 +! delay slot + nop + + .align 32 +.case5: + sethi %hi(0x3fc3c000),%o7 + fand P1_f18,MSK_BITSHI17,P1_f12 + fmuld P0_f0,P0_f0,P0_f2 + + sub %l1,%o7,%l1 + add SC_HI,8,%g1;add SC_LO,8,%o7 + fmuld P2_f20,P2_f20,P2_f22 + + fsubd P1_f10,P1_f12,P1_f10 + srl %l1,10,%l1 + mov %o1,%o4 + + fmovd P0_f0,P0_f6 !ID for processing + fmuld P0_f2,C_q4,P0_f4 + mov %o0,%o3 + + fmuld P2_f22,C_q4,P2_f24 + mov %o2,%o5 + + fmuld P1_f10,P1_f10,P1_f12 + andn %l1,0x1f,%l1 + + faddd P0_f4,C_q3,P0_f4 + + faddd P2_f24,C_q3,P2_f24 + + fmuld P1_f12,C_pp2,P1_f16 + ldd [%g1+%l1],%f36 + + fmuld P0_f2,P0_f4,P0_f4 + + fmuld P2_f22,P2_f24,P2_f24 + + faddd P1_f16,C_pp1,P1_f16 + fmuld P1_f12,C_qq2,P1_f14 + ldd [SC_HI+%l1],%f38 + + faddd P0_f4,C_q2,P0_f4 + + faddd P2_f24,C_q2,P2_f24 + + fmuld P1_f12,P1_f16,P1_f16 + faddd P1_f14,C_qq1,P1_f14 + + fmuld P0_f2,P0_f4,P0_f4 + + fmuld P2_f22,P2_f24,P2_f24 + + faddd P1_f16,C_ONE,P1_f16 + fmuld P1_f12,P1_f14,P1_f14 + + faddd P0_f4,C_q1,P0_f4 + + faddd P2_f24,C_q1,P2_f24 + + fmuld P1_f10,P1_f16,P1_f16 + ldd [%o7+%l1],P1_f12 + + fmuld P1_f14,%f36,P1_f14 + lda [%i1]%asi,%l0 ! preload next argument + + fmuld P0_f2,P0_f4,P0_f4 + lda [%i1]%asi,P0_f0 + + fmuld P1_f16,%f38,P1_f16 + lda [%i1+4]%asi,P0_f1 + + fmuld P2_f22,P2_f24,P2_f24 + add %i1,%i2,%i1 ! x += stridex + + !!(vsin)fmuld P0_f6,P0_f4,P0_f4 + + fsubd P1_f16,P1_f14,P1_f16 + + !!(vsin)fmuld P2_f20,P2_f24,P2_f24 + + faddd C_ONE,P0_f4,P0_f6 !!(vsin)faddd P0_f6,P0_f4,P0_f6 ! faddd then spaces for processing + + fsubd P1_f12,P1_f16,P1_f16 + + faddd C_ONE,P2_f24,P2_f26 !!(vsin)faddd P2_f20,P2_f24,P2_f26 + + nop !!(vsin) fors P0_f6,P0_f9,P0_f6 + andn %l0,MSK_SIGN,%l0 ! hx &= ~0x80000000 + + faddd P1_f16,%f36,P1_f16 + addcc %i0,-1,%i0 + + nop !!(vsin) fors P2_f26,P2_f29,P2_f26 + bg,pt %icc,.loop0 + +! delay slot + nop !!(vsin) fors P1_f16,P1_f19,P1_f16 + + ba,pt %icc,.endloop0 +! delay slot + nop + + .align 32 +.case6: + st P2_f27,[%o5+4] + cmp %l2,LIM_l5 + fpadd32s P2_f20,MSK_BIT13,P2_f28 + bl,pn %icc,.case7 + +! delay slot + sethi %hi(0x3fc3c000),%o7 + fand P2_f28,MSK_BITSHI17,P2_f22 + fmuld P0_f0,P0_f0,P0_f2 + + sub %l2,%o7,%l2 + add SC_HI,8,%g1;add SC_LO,8,%o7 + fmuld P1_f10,P1_f10,P1_f12 + + fsubd P2_f20,P2_f22,P2_f20 + srl %l2,10,%l2 + mov %o2,%o5 + + fmovd P0_f0,P0_f6 !ID for processing + fmuld P0_f2,C_q4,P0_f4 + mov %o0,%o3 + + fmuld P1_f12,C_q4,P1_f14 + mov %o1,%o4 + + fmuld P2_f20,P2_f20,P2_f22 + andn %l2,0x1f,%l2 + + faddd P0_f4,C_q3,P0_f4 + + faddd P1_f14,C_q3,P1_f14 + + fmuld P2_f22,C_pp2,P2_f26 + ldd [%g1+%l2],%f40 + + fmuld P0_f2,P0_f4,P0_f4 + + fmuld P1_f12,P1_f14,P1_f14 + + faddd P2_f26,C_pp1,P2_f26 + fmuld P2_f22,C_qq2,P2_f24 + ldd [SC_HI+%l2],%f42 + + faddd P0_f4,C_q2,P0_f4 + + faddd P1_f14,C_q2,P1_f14 + + fmuld P2_f22,P2_f26,P2_f26 + faddd P2_f24,C_qq1,P2_f24 + + fmuld P0_f2,P0_f4,P0_f4 + + fmuld P1_f12,P1_f14,P1_f14 + + faddd P2_f26,C_ONE,P2_f26 + fmuld P2_f22,P2_f24,P2_f24 + + faddd P0_f4,C_q1,P0_f4 + + faddd P1_f14,C_q1,P1_f14 + + fmuld P2_f20,P2_f26,P2_f26 + ldd [%o7+%l2],P2_f22 + + fmuld P2_f24,%f40,P2_f24 + lda [%i1]%asi,%l0 ! preload next argument + + fmuld P0_f2,P0_f4,P0_f4 + lda [%i1]%asi,P0_f0 + + fmuld P2_f26,%f42,P2_f26 + lda [%i1+4]%asi,P0_f1 + + fmuld P1_f12,P1_f14,P1_f14 + add %i1,%i2,%i1 ! x += stridex + + !!(vsin)fmuld P0_f6,P0_f4,P0_f4 + + fsubd P2_f26,P2_f24,P2_f26 + + !!(vsin)fmuld P1_f10,P1_f14,P1_f14 + + faddd C_ONE,P0_f4,P0_f6 !!(vsin)faddd P0_f6,P0_f4,P0_f6 ! faddd then spaces for processing + + fsubd P2_f22,P2_f26,P2_f26 + + faddd C_ONE,P1_f14,P1_f16 !!(vsin)faddd P1_f10,P1_f14,P1_f16 + + nop !!(vsin) fors P0_f6,P0_f9,P0_f6 + andn %l0,MSK_SIGN,%l0 ! hx &= ~0x80000000 + + faddd P2_f26,%f40,P2_f26 + addcc %i0,-1,%i0 + + nop !!(vsin) fors P1_f16,P1_f19,P1_f16 + bg,pt %icc,.loop0 + +! delay slot + nop !!(vsin) fors P2_f26,P2_f29,P2_f26 + + ba,pt %icc,.endloop0 +! delay slot + nop + + .align 32 +.case7: + fmuld P0_f0,P0_f0,P0_f2 + fmovd P0_f0,P0_f6 !ID for processing + mov %o0,%o3 + + fmuld P1_f10,P1_f10,P1_f12 + mov %o1,%o4 + + fmuld P2_f20,P2_f20,P2_f22 + mov %o2,%o5 + + fmuld P0_f2,C_q4,P0_f4 + lda [%i1]%asi,%l0 ! preload next argument + + fmuld P1_f12,C_q4,P1_f14 + lda [%i1]%asi,P0_f0 + + fmuld P2_f22,C_q4,P2_f24 + lda [%i1+4]%asi,P0_f1 + + faddd P0_f4,C_q3,P0_f4 + add %i1,%i2,%i1 ! x += stridex + + faddd P1_f14,C_q3,P1_f14 + + faddd P2_f24,C_q3,P2_f24 + + fmuld P0_f2,P0_f4,P0_f4 + + fmuld P1_f12,P1_f14,P1_f14 + + fmuld P2_f22,P2_f24,P2_f24 + + faddd P0_f4,C_q2,P0_f4 + + faddd P1_f14,C_q2,P1_f14 + + faddd P2_f24,C_q2,P2_f24 + + fmuld P0_f2,P0_f4,P0_f4 + + fmuld P1_f12,P1_f14,P1_f14 + + fmuld P2_f22,P2_f24,P2_f24 + + faddd P0_f4,C_q1,P0_f4 + + faddd P1_f14,C_q1,P1_f14 + + faddd P2_f24,C_q1,P2_f24 + + fmuld P0_f2,P0_f4,P0_f4 + + fmuld P1_f12,P1_f14,P1_f14 + + fmuld P2_f22,P2_f24,P2_f24 + + !!(vsin)fmuld P0_f6,P0_f4,P0_f4 + + !!(vsin)fmuld P1_f10,P1_f14,P1_f14 + + !!(vsin)fmuld P2_f20,P2_f24,P2_f24 + + faddd C_ONE,P0_f4,P0_f6 !!(vsin)faddd P0_f6,P0_f4,P0_f6 ! faddd then spaces for processing + + faddd C_ONE,P1_f14,P1_f16 !!(vsin)faddd P1_f10,P1_f14,P1_f16 + + faddd C_ONE,P2_f24,P2_f26 !!(vsin)faddd P2_f20,P2_f24,P2_f26 + andn %l0,MSK_SIGN,%l0 ! hx &= ~0x80000000 + + nop !!(vsin) fors P0_f6,P0_f9,P0_f6 + addcc %i0,-1,%i0 + + nop !!(vsin) fors P1_f16,P1_f19,P1_f16 + bg,pt %icc,.loop0 + +! delay slot + nop !!(vsin) fors P2_f26,P2_f29,P2_f26 + + ba,pt %icc,.endloop0 +! delay slot + nop + + + .align 32 +.endloop2: + cmp %l1,LIM_l5 + bl,pn %icc,1f +! delay slot + fabsd P1_f10,P1_f10 + sethi %hi(0x3fc3c000),%o7 + fpadd32s P1_f10,MSK_BIT13,P1_f18 + fand P1_f18,MSK_BITSHI17,P1_f12 + sub %l1,%o7,%l1 + add SC_HI,8,%g1;add SC_LO,8,%o7 + fsubd P1_f10,P1_f12,P1_f10 + srl %l1,10,%l1 + fmuld P1_f10,P1_f10,P1_f12 + andn %l1,0x1f,%l1 + fmuld P1_f12,C_pp2,P2_f20 + ldd [%g1+%l1],%f36 + faddd P2_f20,C_pp1,P2_f20 + fmuld P1_f12,C_qq2,P1_f14 + ldd [SC_HI+%l1],%f38 + fmuld P1_f12,P2_f20,P2_f20 + faddd P1_f14,C_qq1,P1_f14 + faddd P2_f20,C_ONE,P2_f20 + fmuld P1_f12,P1_f14,P1_f14 + fmuld P1_f10,P2_f20,P2_f20 + ldd [%o7+%l1],P1_f12 + fmuld P1_f14,%f36,P1_f14 + fmuld P2_f20,%f38,P2_f20 + fsubd P2_f20,P1_f14,P2_f20 + fsubd P1_f12,P2_f20,P2_f20 + ba,pt %icc,2f +! delay slot + faddd P2_f20,%f36,P2_f20 +1: + fmuld P1_f10,P1_f10,P1_f12 + fmuld P1_f12,C_q4,P1_f14 + faddd P1_f14,C_q3,P1_f14 + fmuld P1_f12,P1_f14,P1_f14 + faddd P1_f14,C_q2,P1_f14 + fmuld P1_f12,P1_f14,P1_f14 + faddd P1_f14,C_q1,P1_f14 + fmuld P1_f12,P1_f14,P1_f14 + !!(vsin)fmuld P1_f10,P1_f14,P1_f14 + faddd C_ONE,P1_f14,P2_f20 !!(vsin)faddd P1_f10,P1_f14,P2_f20 +2: + nop !!(vsin) fors P2_f20,P1_f19,P2_f20 + st P2_f20,[%o1] + st P2_f21,[%o1+4] + +.endloop1: + cmp %l0,LIM_l5 + bl,pn %icc,1f +! delay slot + fabsd P0_f0,P0_f0 + sethi %hi(0x3fc3c000),%o7 + fpadd32s P0_f0,MSK_BIT13,P0_f8 + fand P0_f8,MSK_BITSHI17,P0_f2 + sub %l0,%o7,%l0 + add SC_HI,8,%g1;add SC_LO,8,%o7 + fsubd P0_f0,P0_f2,P0_f0 + srl %l0,10,%l0 + fmuld P0_f0,P0_f0,P0_f2 + andn %l0,0x1f,%l0 + fmuld P0_f2,C_pp2,P2_f20 + ldd [%g1+%l0],%f32 + faddd P2_f20,C_pp1,P2_f20 + fmuld P0_f2,C_qq2,P0_f4 + ldd [SC_HI+%l0],%f34 + fmuld P0_f2,P2_f20,P2_f20 + faddd P0_f4,C_qq1,P0_f4 + faddd P2_f20,C_ONE,P2_f20 + fmuld P0_f2,P0_f4,P0_f4 + fmuld P0_f0,P2_f20,P2_f20 + ldd [%o7+%l0],P0_f2 + fmuld P0_f4,%f32,P0_f4 + fmuld P2_f20,%f34,P2_f20 + fsubd P2_f20,P0_f4,P2_f20 + fsubd P0_f2,P2_f20,P2_f20 + ba,pt %icc,2f +! delay slot + faddd P2_f20,%f32,P2_f20 +1: + fmuld P0_f0,P0_f0,P0_f2 + fmuld P0_f2,C_q4,P0_f4 + faddd P0_f4,C_q3,P0_f4 + fmuld P0_f2,P0_f4,P0_f4 + faddd P0_f4,C_q2,P0_f4 + fmuld P0_f2,P0_f4,P0_f4 + faddd P0_f4,C_q1,P0_f4 + fmuld P0_f2,P0_f4,P0_f4 + !!(vsin)fmuld P0_f0,P0_f4,P0_f4 + faddd C_ONE,P0_f4,P2_f20 !!(vsin)faddd P0_f0,P0_f4,P2_f20 +2: + nop !!(vsin) fors P2_f20,P0_f9,P2_f20 + st P2_f20,[%o0] + st P2_f21,[%o0+4] + +.endloop0: + st P0_f6,[%o3] + st P0_f7,[%o3+4] + st P1_f16,[%o4] + st P1_f17,[%o4+4] + st P2_f26,[%o5] + st P2_f27,[%o5+4] + +! return. finished off with only primary range arguments + + ret + restore + + + .align 32 +.range0: + cmp %l0,LIM_l6 + bg,a,pt %icc,.MEDIUM ! branch to Medium range on big arg. +! delay slot, annulled if branch not taken + mov 0x1,LIM_l6 ! set biguns flag or + fdtoi P0_f0,P0_f2; fmovd C_ONE,P0_f0 ; st P0_f0,[%o0] ! *y = *x with inexact if x nonzero + st P0_f1,[%o0+4] + !nop ! (vsin) fdtoi P0_f0,P0_f2 + addcc %i0,-1,%i0 + ble,pn %icc,.endloop0 +! delay slot, harmless if branch taken + add %i3,%i4,%i3 ! y += stridey + andn %l1,MSK_SIGN,%l0 ! hx &= ~0x80000000 + fmovd P1_f10,P0_f0 + ba,pt %icc,.loop0 +! delay slot + add %i1,%i2,%i1 ! x += stridex + + + .align 32 +.range1: + cmp %l1,LIM_l6 + bg,a,pt %icc,.MEDIUM ! branch to Medium range on big arg. +! delay slot, annulled if branch not taken + mov 0x2,LIM_l6 ! set biguns flag or + fdtoi P1_f10,P1_f12; fmovd C_ONE,P1_f10 ; st P1_f10,[%o1] ! *y = *x with inexact if x nonzero + st P1_f11,[%o1+4] + !nop ! (vsin) fdtoi P1_f10,P1_f12 + addcc %i0,-1,%i0 + ble,pn %icc,.endloop1 +! delay slot, harmless if branch taken + add %i3,%i4,%i3 ! y += stridey + andn %l2,MSK_SIGN,%l1 ! hx &= ~0x80000000 + fmovd P2_f20,P1_f10 + ba,pt %icc,.loop1 +! delay slot + add %i1,%i2,%i1 ! x += stridex + + + .align 32 +.range2: + cmp %l2,LIM_l6 + bg,a,pt %icc,.MEDIUM ! brance to Medium range on big arg. +! delay slot, annulled if branch not taken + mov 0x3,LIM_l6 ! set biguns flag or + fdtoi P2_f20,P2_f22; fmovd C_ONE,P2_f20 ; st P2_f20,[%o2] ! *y = *x with inexact if x nonzero + st P2_f21,[%o2+4] + nop ! (vsin) fdtoi P2_f20,P2_f22 +1: + addcc %i0,-1,%i0 + ble,pn %icc,.endloop2 +! delay slot + nop + ld [%i1],%l2 + ld [%i1],P2_f20 + ld [%i1+4],P2_f21 + andn %l2,MSK_SIGN,%l2 ! hx &= ~0x80000000 + ba,pt %icc,.loop2 +! delay slot + add %i1,%i2,%i1 ! x += stridex + + + .align 32 +.MEDIUM: + +! ========== medium range ========== + +! register use + +! i0 n +! i1 x +! i2 stridex +! i3 y +! i4 stridey +! i5 0x80000000 + +! l0 hx0 +! l1 hx1 +! l2 hx2 +! l3 __vlibm_TBL_sincos_hi +! l4 __vlibm_TBL_sincos_lo +! l5 constants +! l6 biguns stored here : still called LIM_l6 +! l7 0x413921fb + +! the following are 64-bit registers in both V8+ and V9 + +! g1 scratch +! g5 + +! o0 py0 +! o1 py1 +! o2 py2 +! o3 n0 +! o4 n1 +! o5 n2 +! o7 scratch + +! f0 x0 +! f2 n0,y0 +! f4 +! f6 +! f8 scratch for table base +! f9 signbit0 +! f10 x1 +! f12 n1,y1 +! f14 +! f16 +! f18 scratch for table base +! f19 signbit1 +! f20 x2 +! f22 n2,y2 +! f24 +! f26 +! f28 scratch for table base +! f29 signbit2 +! f30 0x80000000 +! f31 0x4000 +! f32 +! f34 +! f36 +! f38 +! f40 invpio2 +! f42 round +! f44 0xffff800000000000 +! f46 pio2_1 +! f48 pio2_2 +! f50 pio2_3 +! f52 pio2_3t +! f54 one +! f56 pp1 +! f58 pp2 +! f60 qq1 +! f62 qq2 + + + PIC_SET(g5,constants,l5) + + ! %o3,%o4,%o5 need to be stored + st P0_f6,[%o3] + sethi %hi(0x413921fb),%l7 + st P0_f7,[%o3+4] + or %l7,%lo(0x413921fb),%l7 + st P1_f16,[%o4] + st P1_f17,[%o4+4] + st P2_f26,[%o5] + st P2_f27,[%o5+4] + ldd [%l5+invpio2],%f40 + ldd [%l5+round],%f42 + ldd [%l5+pio2_1],%f46 + ldd [%l5+pio2_2],%f48 + ldd [%l5+pio2_3],%f50 + ldd [%l5+pio2_3t],%f52 + std %f54,[%fp+x0_1+8] ! set up stack data + std %f54,[%fp+x1_1+8] + std %f54,[%fp+x2_1+8] + stx %g0,[%fp+y0_0+8] + stx %g0,[%fp+y1_0+8] + stx %g0,[%fp+y2_0+8] + +! branched here in the middle of the array. Need to adjust +! for the members of the triple that were selected in the primary +! loop. + +! no adjustment since all three selected here + subcc LIM_l6,0x1,%g0 ! continue in LOOP0? + bz,a %icc,.LOOP0 + mov 0x0,LIM_l6 ! delay slot set biguns=0 + +! ajust 1st triple since 2d and 3d done here + subcc LIM_l6,0x2,%g0 ! continue in LOOP1? + fmuld %f0,%f40,%f2 ! adj LOOP0 + bz,a %icc,.LOOP1 + mov 0x0,LIM_l6 ! delay slot set biguns=0 + +! ajust 1st and 2d triple since 3d done here + subcc LIM_l6,0x3,%g0 ! continue in LOOP2? + !done fmuld %f0,%f40,%f2 ! adj LOOP0 + sub %i3,%i4,%i3 ! adjust to not double increment + fmuld %f10,%f40,%f12 ! adj LOOP1 + faddd %f2,%f42,%f2 ! adj LOOP1 + bz,a %icc,.LOOP2 + mov 0x0,LIM_l6 ! delay slot set biguns=0 + + ba .LOOP0 + nop + +! -- 16 byte aligned + + .align 32 +.LOOP0: + lda [%i1]%asi,%l1 ! preload next argument + mov %i3,%o0 ! py0 = y + + lda [%i1]%asi,%f10 + cmp %l0,%l7 + add %i3,%i4,%i3 ! y += stridey + bg,pn %icc,.BIG0 ! if hx > 0x413921fb + +! delay slot + lda [%i1+4]%asi,%f11 + addcc %i0,-1,%i0 + add %i1,%i2,%i1 ! x += stridex + ble,pn %icc,.ENDLOOP1 + +! delay slot + andn %l1,%i5,%l1 + nop + fmuld %f0,%f40,%f2 + fabsd %f54,%f54 ! a nop for alignment only + +.LOOP1: + lda [%i1]%asi,%l2 ! preload next argument + mov %i3,%o1 ! py1 = y + + lda [%i1]%asi,%f20 + cmp %l1,%l7 + add %i3,%i4,%i3 ! y += stridey + bg,pn %icc,.BIG1 ! if hx > 0x413921fb + +! delay slot + lda [%i1+4]%asi,%f21 + addcc %i0,-1,%i0 + add %i1,%i2,%i1 ! x += stridex + ble,pn %icc,.ENDLOOP2 + +! delay slot + andn %l2,%i5,%l2 + nop + fmuld %f10,%f40,%f12 + faddd %f2,%f42,%f2 + +.LOOP2: + st %f3,[%fp+n0] + mov %i3,%o2 ! py2 = y + + cmp %l2,%l7 + add %i3,%i4,%i3 ! y += stridey + fmuld %f20,%f40,%f22 + bg,pn %icc,.BIG2 ! if hx > 0x413921fb + +! delay slot + add %l5,thresh+4,%o7 + faddd %f12,%f42,%f12 + st %f13,[%fp+n1] + +! - + + add %l5,thresh,%g1 + faddd %f22,%f42,%f22 + st %f23,[%fp+n2] + + fsubd %f2,%f42,%f2 ! n + + fsubd %f12,%f42,%f12 ! n + + fsubd %f22,%f42,%f22 ! n + + fmuld %f2,%f46,%f4 + + fmuld %f12,%f46,%f14 + + fmuld %f22,%f46,%f24 + + fsubd %f0,%f4,%f4 + fmuld %f2,%f48,%f6 + + fsubd %f10,%f14,%f14 + fmuld %f12,%f48,%f16 + + fsubd %f20,%f24,%f24 + fmuld %f22,%f48,%f26 + + fsubd %f4,%f6,%f0 + ld [%fp+n0],%o3 ; add %o3,1,%o3 + + fsubd %f14,%f16,%f10 + ld [%fp+n1],%o4 ; add %o4,1,%o4 + + fsubd %f24,%f26,%f20 + ld [%fp+n2],%o5 ; add %o5,1,%o5 + + fsubd %f4,%f0,%f32 + and %o3,1,%o3 + + fsubd %f14,%f10,%f34 + and %o4,1,%o4 + + fsubd %f24,%f20,%f36 + and %o5,1,%o5 + + fsubd %f32,%f6,%f32 + fmuld %f2,%f50,%f8 + sll %o3,3,%o3 + + fsubd %f34,%f16,%f34 + fmuld %f12,%f50,%f18 + sll %o4,3,%o4 + + fsubd %f36,%f26,%f36 + fmuld %f22,%f50,%f28 + sll %o5,3,%o5 + + fsubd %f8,%f32,%f8 + ld [%g1+%o3],%f6 + + fsubd %f18,%f34,%f18 + ld [%g1+%o4],%f16 + + fsubd %f28,%f36,%f28 + ld [%g1+%o5],%f26 + + fsubd %f0,%f8,%f4 + + fsubd %f10,%f18,%f14 + + fsubd %f20,%f28,%f24 + + fsubd %f0,%f4,%f32 + + fsubd %f10,%f14,%f34 + + fsubd %f20,%f24,%f36 + + fsubd %f32,%f8,%f32 + fmuld %f2,%f52,%f2 + + fsubd %f34,%f18,%f34 + fmuld %f12,%f52,%f12 + + fsubd %f36,%f28,%f36 + fmuld %f22,%f52,%f22 + + fsubd %f2,%f32,%f2 + ld [%o7+%o3],%f8 + + fsubd %f12,%f34,%f12 + ld [%o7+%o4],%f18 + + fsubd %f22,%f36,%f22 + ld [%o7+%o5],%f28 + + fsubd %f4,%f2,%f0 ! x + + fsubd %f14,%f12,%f10 ! x + + fsubd %f24,%f22,%f20 ! x + + fsubd %f4,%f0,%f4 + + fsubd %f14,%f10,%f14 + + fsubd %f24,%f20,%f24 + + fands %f0,%f30,%f9 ! save signbit + + fands %f10,%f30,%f19 ! save signbit + + fands %f20,%f30,%f29 ! save signbit + + fabsd %f0,%f0 + std %f0,[%fp+x0_1] + + fabsd %f10,%f10 + std %f10,[%fp+x1_1] + + fabsd %f20,%f20 + std %f20,[%fp+x2_1] + + fsubd %f4,%f2,%f2 ! y + + fsubd %f14,%f12,%f12 ! y + + fsubd %f24,%f22,%f22 ! y + + fcmpgt32 %f6,%f0,%l0 + + fcmpgt32 %f16,%f10,%l1 + + fcmpgt32 %f26,%f20,%l2 + +! -- 16 byte aligned + fxors %f2,%f9,%f2 + + fxors %f12,%f19,%f12 + + fxors %f22,%f29,%f22 + + fands %f9,%f8,%f9 ! if (n & 1) clear sign bit + andcc %l0,2,%g0 + bne,pn %icc,.CASE4 + +! delay slot + fands %f19,%f18,%f19 ! if (n & 1) clear sign bit + andcc %l1,2,%g0 + bne,pn %icc,.CASE2 + +! delay slot + fands %f29,%f28,%f29 ! if (n & 1) clear sign bit + andcc %l2,2,%g0 + bne,pn %icc,.CASE1 + +! delay slot + fpadd32s %f0,%f31,%f8 + sethi %hi(0x3fc3c000),%o7 + ld [%fp+x0_1],%l0 + + fpadd32s %f10,%f31,%f18 + add %l3,8,%g1 + ld [%fp+x1_1],%l1 + + fpadd32s %f20,%f31,%f28 + ld [%fp+x2_1],%l2 + + fand %f8,%f44,%f4 + sub %l0,%o7,%l0 + + fand %f18,%f44,%f14 + sub %l1,%o7,%l1 + + fand %f28,%f44,%f24 + sub %l2,%o7,%l2 + + fsubd %f0,%f4,%f0 + srl %l0,10,%l0 + + fsubd %f10,%f14,%f10 + srl %l1,10,%l1 + + fsubd %f20,%f24,%f20 + srl %l2,10,%l2 + + faddd %f0,%f2,%f0 + andn %l0,0x1f,%l0 + + faddd %f10,%f12,%f10 + andn %l1,0x1f,%l1 + + faddd %f20,%f22,%f20 + andn %l2,0x1f,%l2 + + fmuld %f0,%f0,%f2 + add %l0,%o3,%l0 + + fmuld %f10,%f10,%f12 + add %l1,%o4,%l1 + + fmuld %f20,%f20,%f22 + add %l2,%o5,%l2 + + fmuld %f2,%f58,%f6 + ldd [%l3+%l0],%f32 + + fmuld %f12,%f58,%f16 + ldd [%l3+%l1],%f34 + + fmuld %f22,%f58,%f26 + ldd [%l3+%l2],%f36 + + faddd %f6,%f56,%f6 + fmuld %f2,%f62,%f4 + + faddd %f16,%f56,%f16 + fmuld %f12,%f62,%f14 + + faddd %f26,%f56,%f26 + fmuld %f22,%f62,%f24 + + fmuld %f2,%f6,%f6 + faddd %f4,%f60,%f4 + + fmuld %f12,%f16,%f16 + faddd %f14,%f60,%f14 + + fmuld %f22,%f26,%f26 + faddd %f24,%f60,%f24 + + faddd %f6,%f54,%f6 + fmuld %f2,%f4,%f4 + + faddd %f16,%f54,%f16 + fmuld %f12,%f14,%f14 + + faddd %f26,%f54,%f26 + fmuld %f22,%f24,%f24 + + fmuld %f0,%f6,%f6 + ldd [%g1+%l0],%f2 + + fmuld %f10,%f16,%f16 + ldd [%g1+%l1],%f12 + + fmuld %f20,%f26,%f26 + ldd [%g1+%l2],%f22 + + fmuld %f4,%f32,%f4 + ldd [%l4+%l0],%f0 + + fmuld %f14,%f34,%f14 + ldd [%l4+%l1],%f10 + + fmuld %f24,%f36,%f24 + ldd [%l4+%l2],%f20 + + fmuld %f6,%f2,%f6 + + fmuld %f16,%f12,%f16 + + fmuld %f26,%f22,%f26 + + faddd %f6,%f4,%f6 + + faddd %f16,%f14,%f16 + + faddd %f26,%f24,%f26 + + faddd %f6,%f0,%f6 + + faddd %f16,%f10,%f16 + + faddd %f26,%f20,%f26 + + faddd %f6,%f32,%f6 + + faddd %f16,%f34,%f16 + + faddd %f26,%f36,%f26 + +.FIXSIGN: + ld [%fp+n0],%o3 ; add %o3,1,%o3 + add %l5,thresh-4,%g1 + + ld [%fp+n1],%o4 ; add %o4,1,%o4 + + ld [%fp+n2],%o5 ; add %o5,1,%o5 + and %o3,2,%o3 + + sll %o3,2,%o3 + and %o4,2,%o4 + lda [%i1]%asi,%l0 ! preload next argument + + sll %o4,2,%o4 + and %o5,2,%o5 + ld [%g1+%o3],%f8 + + sll %o5,2,%o5 + ld [%g1+%o4],%f18 + + ld [%g1+%o5],%f28 + fxors %f9,%f8,%f9 + + lda [%i1]%asi,%f0 + fxors %f29,%f28,%f29 + + lda [%i1+4]%asi,%f1 + fxors %f19,%f18,%f19 + + fors %f6,%f9,%f6 ! tack on sign + add %i1,%i2,%i1 ! x += stridex + st %f6,[%o0] + + fors %f26,%f29,%f26 ! tack on sign + st %f7,[%o0+4] + + fors %f16,%f19,%f16 ! tack on sign + st %f26,[%o2] + + st %f27,[%o2+4] + addcc %i0,-1,%i0 + + st %f16,[%o1] + andn %l0,%i5,%l0 ! hx &= ~0x80000000 + bg,pt %icc,.LOOP0 + +! delay slot + st %f17,[%o1+4] + + ba,pt %icc,.ENDLOOP0 +! delay slot + nop + + .align 32 +.CASE1: + fpadd32s %f10,%f31,%f18 + sethi %hi(0x3fc3c000),%o7 + ld [%fp+x0_1],%l0 + + fand %f8,%f44,%f4 + add %l3,8,%g1 + ld [%fp+x1_1],%l1 + + fand %f18,%f44,%f14 + sub %l0,%o7,%l0 + + fsubd %f0,%f4,%f0 + srl %l0,10,%l0 + sub %l1,%o7,%l1 + + fsubd %f10,%f14,%f10 + srl %l1,10,%l1 + + fmuld %f20,%f20,%f20 + ldd [%l5+%o5],%f36 + add %l5,%o5,%l2 + + faddd %f0,%f2,%f0 + andn %l0,0x1f,%l0 + + faddd %f10,%f12,%f10 + andn %l1,0x1f,%l1 + + fmuld %f20,%f36,%f24 + ldd [%l2+0x10],%f26 + add %fp,%o5,%o5 + + fmuld %f0,%f0,%f2 + add %l0,%o3,%l0 + + fmuld %f10,%f10,%f12 + add %l1,%o4,%l1 + + faddd %f24,%f26,%f24 + ldd [%l2+0x20],%f36 + + fmuld %f2,%f58,%f6 + ldd [%l3+%l0],%f32 + + fmuld %f12,%f58,%f16 + ldd [%l3+%l1],%f34 + + fmuld %f20,%f24,%f24 + ldd [%l2+0x30],%f26 + + faddd %f6,%f56,%f6 + fmuld %f2,%f62,%f4 + + faddd %f16,%f56,%f16 + fmuld %f12,%f62,%f14 + + faddd %f24,%f36,%f24 + ldd [%o5+x2_1],%f36 + + fmuld %f2,%f6,%f6 + faddd %f4,%f60,%f4 + + fmuld %f12,%f16,%f16 + faddd %f14,%f60,%f14 + + fmuld %f20,%f24,%f24 + + faddd %f6,%f54,%f6 + fmuld %f2,%f4,%f4 + ldd [%g1+%l0],%f2 + + faddd %f16,%f54,%f16 + fmuld %f12,%f14,%f14 + ldd [%g1+%l1],%f12 + + faddd %f24,%f26,%f24 + + fmuld %f0,%f6,%f6 + ldd [%l4+%l0],%f0 + + fmuld %f10,%f16,%f16 + ldd [%l4+%l1],%f10 + + fmuld %f4,%f32,%f4 + std %f22,[%fp+y2_0] + + fmuld %f14,%f34,%f14 + + fmuld %f6,%f2,%f6 + + fmuld %f16,%f12,%f16 + + fmuld %f20,%f24,%f24 + + faddd %f6,%f4,%f6 + + faddd %f16,%f14,%f16 + + fmuld %f36,%f24,%f24 + ldd [%o5+y2_0],%f22 + + faddd %f6,%f0,%f6 + + faddd %f16,%f10,%f16 + + faddd %f24,%f22,%f24 + + faddd %f6,%f32,%f6 + + faddd %f16,%f34,%f16 + ba,pt %icc,.FIXSIGN + +! delay slot + faddd %f36,%f24,%f26 + + .align 32 +.CASE2: + fpadd32s %f0,%f31,%f8 + ld [%fp+x0_1],%l0 + andcc %l2,2,%g0 + bne,pn %icc,.CASE3 + +! delay slot + sethi %hi(0x3fc3c000),%o7 + fpadd32s %f20,%f31,%f28 + ld [%fp+x2_1],%l2 + + fand %f8,%f44,%f4 + sub %l0,%o7,%l0 + add %l3,8,%g1 + + fand %f28,%f44,%f24 + sub %l2,%o7,%l2 + + fsubd %f0,%f4,%f0 + srl %l0,10,%l0 + + fsubd %f20,%f24,%f20 + srl %l2,10,%l2 + + fmuld %f10,%f10,%f10 + ldd [%l5+%o4],%f34 + add %l5,%o4,%l1 + + faddd %f0,%f2,%f0 + andn %l0,0x1f,%l0 + + faddd %f20,%f22,%f20 + andn %l2,0x1f,%l2 + + fmuld %f10,%f34,%f14 + ldd [%l1+0x10],%f16 + add %fp,%o4,%o4 + + fmuld %f0,%f0,%f2 + add %l0,%o3,%l0 + + fmuld %f20,%f20,%f22 + add %l2,%o5,%l2 + + faddd %f14,%f16,%f14 + ldd [%l1+0x20],%f34 + + fmuld %f2,%f58,%f6 + ldd [%l3+%l0],%f32 + + fmuld %f22,%f58,%f26 + ldd [%l3+%l2],%f36 + + fmuld %f10,%f14,%f14 + ldd [%l1+0x30],%f16 + + faddd %f6,%f56,%f6 + fmuld %f2,%f62,%f4 + + faddd %f26,%f56,%f26 + fmuld %f22,%f62,%f24 + + faddd %f14,%f34,%f14 + ldd [%o4+x1_1],%f34 + + fmuld %f2,%f6,%f6 + faddd %f4,%f60,%f4 + + fmuld %f22,%f26,%f26 + faddd %f24,%f60,%f24 + + fmuld %f10,%f14,%f14 + + faddd %f6,%f54,%f6 + fmuld %f2,%f4,%f4 + ldd [%g1+%l0],%f2 + + faddd %f26,%f54,%f26 + fmuld %f22,%f24,%f24 + ldd [%g1+%l2],%f22 + + faddd %f14,%f16,%f14 + + fmuld %f0,%f6,%f6 + ldd [%l4+%l0],%f0 + + fmuld %f20,%f26,%f26 + ldd [%l4+%l2],%f20 + + fmuld %f4,%f32,%f4 + std %f12,[%fp+y1_0] + + fmuld %f24,%f36,%f24 + + fmuld %f6,%f2,%f6 + + fmuld %f26,%f22,%f26 + + fmuld %f10,%f14,%f14 + + faddd %f6,%f4,%f6 + + faddd %f26,%f24,%f26 + + fmuld %f34,%f14,%f14 + ldd [%o4+y1_0],%f12 + + faddd %f6,%f0,%f6 + + faddd %f26,%f20,%f26 + + faddd %f14,%f12,%f14 + + faddd %f6,%f32,%f6 + + faddd %f26,%f36,%f26 + ba,pt %icc,.FIXSIGN + +! delay slot + faddd %f34,%f14,%f16 + + .align 32 +.CASE3: + fand %f8,%f44,%f4 + add %l3,8,%g1 + sub %l0,%o7,%l0 + + fmuld %f10,%f10,%f10 + ldd [%l5+%o4],%f34 + add %l5,%o4,%l1 + + fsubd %f0,%f4,%f0 + srl %l0,10,%l0 + + fmuld %f20,%f20,%f20 + ldd [%l5+%o5],%f36 + add %l5,%o5,%l2 + + fmuld %f10,%f34,%f14 + ldd [%l1+0x10],%f16 + add %fp,%o4,%o4 + + faddd %f0,%f2,%f0 + andn %l0,0x1f,%l0 + + fmuld %f20,%f36,%f24 + ldd [%l2+0x10],%f26 + add %fp,%o5,%o5 + + faddd %f14,%f16,%f14 + ldd [%l1+0x20],%f34 + + fmuld %f0,%f0,%f2 + add %l0,%o3,%l0 + + faddd %f24,%f26,%f24 + ldd [%l2+0x20],%f36 + + fmuld %f10,%f14,%f14 + ldd [%l1+0x30],%f16 + + fmuld %f2,%f58,%f6 + ldd [%l3+%l0],%f32 + + fmuld %f20,%f24,%f24 + ldd [%l2+0x30],%f26 + + faddd %f14,%f34,%f14 + ldd [%o4+x1_1],%f34 + + faddd %f6,%f56,%f6 + fmuld %f2,%f62,%f4 + + faddd %f24,%f36,%f24 + ldd [%o5+x2_1],%f36 + + fmuld %f10,%f14,%f14 + std %f12,[%fp+y1_0] + + fmuld %f2,%f6,%f6 + faddd %f4,%f60,%f4 + + fmuld %f20,%f24,%f24 + std %f22,[%fp+y2_0] + + faddd %f14,%f16,%f14 + + faddd %f6,%f54,%f6 + fmuld %f2,%f4,%f4 + ldd [%g1+%l0],%f2 + + faddd %f24,%f26,%f24 + + fmuld %f10,%f14,%f14 + + fmuld %f0,%f6,%f6 + ldd [%l4+%l0],%f0 + + fmuld %f4,%f32,%f4 + + fmuld %f20,%f24,%f24 + + fmuld %f6,%f2,%f6 + + fmuld %f34,%f14,%f14 + ldd [%o4+y1_0],%f12 + + fmuld %f36,%f24,%f24 + ldd [%o5+y2_0],%f22 + + faddd %f6,%f4,%f6 + + faddd %f14,%f12,%f14 + + faddd %f24,%f22,%f24 + + faddd %f6,%f0,%f6 + + faddd %f34,%f14,%f16 + + faddd %f36,%f24,%f26 + ba,pt %icc,.FIXSIGN + +! delay slot + faddd %f6,%f32,%f6 + + .align 32 +.CASE4: + fands %f29,%f28,%f29 ! if (n & 1) clear sign bit + sethi %hi(0x3fc3c000),%o7 + andcc %l1,2,%g0 + bne,pn %icc,.CASE6 + +! delay slot + andcc %l2,2,%g0 + fpadd32s %f10,%f31,%f18 + ld [%fp+x1_1],%l1 + bne,pn %icc,.CASE5 + +! delay slot + add %l3,8,%g1 + ld [%fp+x2_1],%l2 + fpadd32s %f20,%f31,%f28 + + fand %f18,%f44,%f14 + sub %l1,%o7,%l1 + + fand %f28,%f44,%f24 + sub %l2,%o7,%l2 + + fsubd %f10,%f14,%f10 + srl %l1,10,%l1 + + fsubd %f20,%f24,%f20 + srl %l2,10,%l2 + + fmuld %f0,%f0,%f0 + ldd [%l5+%o3],%f32 + add %l5,%o3,%l0 + + faddd %f10,%f12,%f10 + andn %l1,0x1f,%l1 + + faddd %f20,%f22,%f20 + andn %l2,0x1f,%l2 + + fmuld %f0,%f32,%f4 + ldd [%l0+0x10],%f6 + add %fp,%o3,%o3 + + fmuld %f10,%f10,%f12 + add %l1,%o4,%l1 + + fmuld %f20,%f20,%f22 + add %l2,%o5,%l2 + + faddd %f4,%f6,%f4 + ldd [%l0+0x20],%f32 + + fmuld %f12,%f58,%f16 + ldd [%l3+%l1],%f34 + + fmuld %f22,%f58,%f26 + ldd [%l3+%l2],%f36 + + fmuld %f0,%f4,%f4 + ldd [%l0+0x30],%f6 + + faddd %f16,%f56,%f16 + fmuld %f12,%f62,%f14 + + faddd %f26,%f56,%f26 + fmuld %f22,%f62,%f24 + + faddd %f4,%f32,%f4 + ldd [%o3+x0_1],%f32 + + fmuld %f12,%f16,%f16 + faddd %f14,%f60,%f14 + + fmuld %f22,%f26,%f26 + faddd %f24,%f60,%f24 + + fmuld %f0,%f4,%f4 + + faddd %f16,%f54,%f16 + fmuld %f12,%f14,%f14 + ldd [%g1+%l1],%f12 + + faddd %f26,%f54,%f26 + fmuld %f22,%f24,%f24 + ldd [%g1+%l2],%f22 + + faddd %f4,%f6,%f4 + + fmuld %f10,%f16,%f16 + ldd [%l4+%l1],%f10 + + fmuld %f20,%f26,%f26 + ldd [%l4+%l2],%f20 + + fmuld %f14,%f34,%f14 + std %f2,[%fp+y0_0] + + fmuld %f24,%f36,%f24 + + fmuld %f0,%f4,%f4 + + fmuld %f16,%f12,%f16 + + fmuld %f26,%f22,%f26 + + fmuld %f32,%f4,%f4 + ldd [%o3+y0_0],%f2 + + faddd %f16,%f14,%f16 + + faddd %f26,%f24,%f26 + + faddd %f4,%f2,%f4 + + faddd %f16,%f10,%f16 + + faddd %f26,%f20,%f26 + + faddd %f32,%f4,%f6 + + faddd %f16,%f34,%f16 + ba,pt %icc,.FIXSIGN + +! delay slot + faddd %f26,%f36,%f26 + + .align 32 +.CASE5: + fand %f18,%f44,%f14 + sub %l1,%o7,%l1 + + fmuld %f0,%f0,%f0 + ldd [%l5+%o3],%f32 + add %l5,%o3,%l0 + + fsubd %f10,%f14,%f10 + srl %l1,10,%l1 + + fmuld %f20,%f20,%f20 + ldd [%l5+%o5],%f36 + add %l5,%o5,%l2 + + fmuld %f0,%f32,%f4 + ldd [%l0+0x10],%f6 + add %fp,%o3,%o3 + + faddd %f10,%f12,%f10 + andn %l1,0x1f,%l1 + + fmuld %f20,%f36,%f24 + ldd [%l2+0x10],%f26 + add %fp,%o5,%o5 + + faddd %f4,%f6,%f4 + ldd [%l0+0x20],%f32 + + fmuld %f10,%f10,%f12 + add %l1,%o4,%l1 + + faddd %f24,%f26,%f24 + ldd [%l2+0x20],%f36 + + fmuld %f0,%f4,%f4 + ldd [%l0+0x30],%f6 + + fmuld %f12,%f58,%f16 + ldd [%l3+%l1],%f34 + + fmuld %f20,%f24,%f24 + ldd [%l2+0x30],%f26 + + faddd %f4,%f32,%f4 + ldd [%o3+x0_1],%f32 + + faddd %f16,%f56,%f16 + fmuld %f12,%f62,%f14 + + faddd %f24,%f36,%f24 + ldd [%o5+x2_1],%f36 + + fmuld %f0,%f4,%f4 + std %f2,[%fp+y0_0] + + fmuld %f12,%f16,%f16 + faddd %f14,%f60,%f14 + + fmuld %f20,%f24,%f24 + std %f22,[%fp+y2_0] + + faddd %f4,%f6,%f4 + + faddd %f16,%f54,%f16 + fmuld %f12,%f14,%f14 + ldd [%g1+%l1],%f12 + + faddd %f24,%f26,%f24 + + fmuld %f0,%f4,%f4 + + fmuld %f10,%f16,%f16 + ldd [%l4+%l1],%f10 + + fmuld %f14,%f34,%f14 + + fmuld %f20,%f24,%f24 + + fmuld %f16,%f12,%f16 + + fmuld %f32,%f4,%f4 + ldd [%o3+y0_0],%f2 + + fmuld %f36,%f24,%f24 + ldd [%o5+y2_0],%f22 + + faddd %f16,%f14,%f16 + + faddd %f4,%f2,%f4 + + faddd %f24,%f22,%f24 + + faddd %f16,%f10,%f16 + + faddd %f32,%f4,%f6 + + faddd %f36,%f24,%f26 + ba,pt %icc,.FIXSIGN + +! delay slot + faddd %f16,%f34,%f16 + + .align 32 +.CASE6: + ld [%fp+x2_1],%l2 + add %l3,8,%g1 + bne,pn %icc,.CASE7 +! delay slot + fpadd32s %f20,%f31,%f28 + + fand %f28,%f44,%f24 + ldd [%l5+%o3],%f32 + add %l5,%o3,%l0 + + fmuld %f0,%f0,%f0 + sub %l2,%o7,%l2 + + fsubd %f20,%f24,%f20 + srl %l2,10,%l2 + + fmuld %f10,%f10,%f10 + ldd [%l5+%o4],%f34 + add %l5,%o4,%l1 + + fmuld %f0,%f32,%f4 + ldd [%l0+0x10],%f6 + add %fp,%o3,%o3 + + faddd %f20,%f22,%f20 + andn %l2,0x1f,%l2 + + fmuld %f10,%f34,%f14 + ldd [%l1+0x10],%f16 + add %fp,%o4,%o4 + + faddd %f4,%f6,%f4 + ldd [%l0+0x20],%f32 + + fmuld %f20,%f20,%f22 + add %l2,%o5,%l2 + + faddd %f14,%f16,%f14 + ldd [%l1+0x20],%f34 + + fmuld %f0,%f4,%f4 + ldd [%l0+0x30],%f6 + + fmuld %f22,%f58,%f26 + ldd [%l3+%l2],%f36 + + fmuld %f10,%f14,%f14 + ldd [%l1+0x30],%f16 + + faddd %f4,%f32,%f4 + ldd [%o3+x0_1],%f32 + + faddd %f26,%f56,%f26 + fmuld %f22,%f62,%f24 + + faddd %f14,%f34,%f14 + ldd [%o4+x1_1],%f34 + + fmuld %f0,%f4,%f4 + std %f2,[%fp+y0_0] + + fmuld %f22,%f26,%f26 + faddd %f24,%f60,%f24 + + fmuld %f10,%f14,%f14 + std %f12,[%fp+y1_0] + + faddd %f4,%f6,%f4 + + faddd %f26,%f54,%f26 + fmuld %f22,%f24,%f24 + ldd [%g1+%l2],%f22 + + faddd %f14,%f16,%f14 + + fmuld %f0,%f4,%f4 + + fmuld %f20,%f26,%f26 + ldd [%l4+%l2],%f20 + + fmuld %f24,%f36,%f24 + + fmuld %f10,%f14,%f14 + + fmuld %f26,%f22,%f26 + + fmuld %f32,%f4,%f4 + ldd [%o3+y0_0],%f2 + + fmuld %f34,%f14,%f14 + ldd [%o4+y1_0],%f12 + + faddd %f26,%f24,%f26 + + faddd %f4,%f2,%f4 + + faddd %f14,%f12,%f14 + + faddd %f26,%f20,%f26 + + faddd %f32,%f4,%f6 + + faddd %f34,%f14,%f16 + ba,pt %icc,.FIXSIGN + +! delay slot + faddd %f26,%f36,%f26 + + .align 32 +.CASE7: + fmuld %f0,%f0,%f0 + ldd [%l5+%o3],%f32 + add %l5,%o3,%l0 + + fmuld %f10,%f10,%f10 + ldd [%l5+%o4],%f34 + add %l5,%o4,%l1 + + fmuld %f20,%f20,%f20 + ldd [%l5+%o5],%f36 + add %l5,%o5,%l2 + + fmuld %f0,%f32,%f4 + ldd [%l0+0x10],%f6 + add %fp,%o3,%o3 + + fmuld %f10,%f34,%f14 + ldd [%l1+0x10],%f16 + add %fp,%o4,%o4 + + fmuld %f20,%f36,%f24 + ldd [%l2+0x10],%f26 + add %fp,%o5,%o5 + + faddd %f4,%f6,%f4 + ldd [%l0+0x20],%f32 + + faddd %f14,%f16,%f14 + ldd [%l1+0x20],%f34 + + faddd %f24,%f26,%f24 + ldd [%l2+0x20],%f36 + + fmuld %f0,%f4,%f4 + ldd [%l0+0x30],%f6 + + fmuld %f10,%f14,%f14 + ldd [%l1+0x30],%f16 + + fmuld %f20,%f24,%f24 + ldd [%l2+0x30],%f26 + + faddd %f4,%f32,%f4 + ldd [%o3+x0_1],%f32 + + faddd %f14,%f34,%f14 + ldd [%o4+x1_1],%f34 + + faddd %f24,%f36,%f24 + ldd [%o5+x2_1],%f36 + + fmuld %f0,%f4,%f4 + std %f2,[%fp+y0_0] + + fmuld %f10,%f14,%f14 + std %f12,[%fp+y1_0] + + fmuld %f20,%f24,%f24 + std %f22,[%fp+y2_0] + + faddd %f4,%f6,%f4 + + faddd %f14,%f16,%f14 + + faddd %f24,%f26,%f24 + + fmuld %f0,%f4,%f4 + + fmuld %f10,%f14,%f14 + + fmuld %f20,%f24,%f24 + + fmuld %f32,%f4,%f4 + ldd [%o3+y0_0],%f2 + + fmuld %f34,%f14,%f14 + ldd [%o4+y1_0],%f12 + + fmuld %f36,%f24,%f24 + ldd [%o5+y2_0],%f22 + + faddd %f4,%f2,%f4 + + faddd %f14,%f12,%f14 + + faddd %f24,%f22,%f24 + + faddd %f32,%f4,%f6 + + faddd %f34,%f14,%f16 + ba,pt %icc,.FIXSIGN + +! delay slot + faddd %f36,%f24,%f26 + + + .align 32 +.ENDLOOP2: + fmuld %f10,%f40,%f12 + add %l5,thresh,%g1 + faddd %f12,%f42,%f12 + st %f13,[%fp+n1] + fsubd %f12,%f42,%f12 ! n + fmuld %f12,%f46,%f14 + fsubd %f10,%f14,%f14 + fmuld %f12,%f48,%f16 + fsubd %f14,%f16,%f10 + ld [%fp+n1],%o4 ; add %o4,1,%o4 + fsubd %f14,%f10,%f34 + and %o4,1,%o4 + fsubd %f34,%f16,%f34 + fmuld %f12,%f50,%f18 + sll %o4,3,%o4 + fsubd %f18,%f34,%f18 + ld [%g1+%o4],%f16 + fsubd %f10,%f18,%f14 + fsubd %f10,%f14,%f34 + add %l5,thresh+4,%o7 + fsubd %f34,%f18,%f34 + fmuld %f12,%f52,%f12 + fsubd %f12,%f34,%f12 + ld [%o7+%o4],%f18 + fsubd %f14,%f12,%f10 ! x + fsubd %f14,%f10,%f14 + fands %f10,%f30,%f19 ! save signbit + fabsd %f10,%f10 + std %f10,[%fp+x1_1] + fsubd %f14,%f12,%f12 ! y + fcmpgt32 %f16,%f10,%l1 + fxors %f12,%f19,%f12 + fands %f19,%f18,%f19 ! if (n & 1) clear sign bit + andcc %l1,2,%g0 + bne,pn %icc,1f +! delay slot + nop + fpadd32s %f10,%f31,%f18 + ld [%fp+x1_1],%l1 + fand %f18,%f44,%f14 + sethi %hi(0x3fc3c000),%o7 + add %l3,8,%g1 + fsubd %f10,%f14,%f10 + sub %l1,%o7,%l1 + srl %l1,10,%l1 + faddd %f10,%f12,%f10 + andn %l1,0x1f,%l1 + fmuld %f10,%f10,%f12 + add %l1,%o4,%l1 + fmuld %f12,%f58,%f16 + ldd [%l3+%l1],%f34 + faddd %f16,%f56,%f16 + fmuld %f12,%f62,%f14 + fmuld %f12,%f16,%f16 + faddd %f14,%f60,%f14 + faddd %f16,%f54,%f16 + fmuld %f12,%f14,%f14 + ldd [%g1+%l1],%f12 + fmuld %f10,%f16,%f16 + ldd [%l4+%l1],%f10 + fmuld %f14,%f34,%f14 + fmuld %f16,%f12,%f16 + faddd %f16,%f14,%f16 + faddd %f16,%f10,%f16 + ba,pt %icc,2f + faddd %f16,%f34,%f16 +1: + fmuld %f10,%f10,%f10 + ldd [%l5+%o4],%f34 + add %l5,%o4,%l1 + fmuld %f10,%f34,%f14 + ldd [%l1+0x10],%f16 + add %fp,%o4,%o4 + faddd %f14,%f16,%f14 + ldd [%l1+0x20],%f34 + fmuld %f10,%f14,%f14 + ldd [%l1+0x30],%f16 + faddd %f14,%f34,%f14 + ldd [%o4+x1_1],%f34 + fmuld %f10,%f14,%f14 + std %f12,[%fp+y1_0] + faddd %f14,%f16,%f14 + fmuld %f10,%f14,%f14 + fmuld %f34,%f14,%f14 + ldd [%o4+y1_0],%f12 + faddd %f14,%f12,%f14 + faddd %f34,%f14,%f16 +2: + add %l5,thresh-4,%g1 + ld [%fp+n1],%o4 ; add %o4,1,%o4 + and %o4,2,%o4 + sll %o4,2,%o4 + ld [%g1+%o4],%f18 + fxors %f19,%f18,%f19 + fors %f16,%f19,%f16 ! tack on sign + st %f16,[%o1] + st %f17,[%o1+4] + +.ENDLOOP1: + fmuld %f0,%f40,%f2 + add %l5,thresh,%g1 + faddd %f2,%f42,%f2 + st %f3,[%fp+n0] + fsubd %f2,%f42,%f2 ! n + fmuld %f2,%f46,%f4 + fsubd %f0,%f4,%f4 + fmuld %f2,%f48,%f6 + fsubd %f4,%f6,%f0 + ld [%fp+n0],%o3 ; add %o3,1,%o3 + fsubd %f4,%f0,%f32 + and %o3,1,%o3 + fsubd %f32,%f6,%f32 + fmuld %f2,%f50,%f8 + sll %o3,3,%o3 + fsubd %f8,%f32,%f8 + ld [%g1+%o3],%f6 + fsubd %f0,%f8,%f4 + fsubd %f0,%f4,%f32 + add %l5,thresh+4,%o7 + fsubd %f32,%f8,%f32 + fmuld %f2,%f52,%f2 + fsubd %f2,%f32,%f2 + ld [%o7+%o3],%f8 + fsubd %f4,%f2,%f0 ! x + fsubd %f4,%f0,%f4 + fands %f0,%f30,%f9 ! save signbit + fabsd %f0,%f0 + std %f0,[%fp+x0_1] + fsubd %f4,%f2,%f2 ! y + fcmpgt32 %f6,%f0,%l0 + fxors %f2,%f9,%f2 + fands %f9,%f8,%f9 ! if (n & 1) clear sign bit + andcc %l0,2,%g0 + bne,pn %icc,1f +! delay slot + nop + fpadd32s %f0,%f31,%f8 + ld [%fp+x0_1],%l0 + fand %f8,%f44,%f4 + sethi %hi(0x3fc3c000),%o7 + add %l3,8,%g1 + fsubd %f0,%f4,%f0 + sub %l0,%o7,%l0 + srl %l0,10,%l0 + faddd %f0,%f2,%f0 + andn %l0,0x1f,%l0 + fmuld %f0,%f0,%f2 + add %l0,%o3,%l0 + fmuld %f2,%f58,%f6 + ldd [%l3+%l0],%f32 + faddd %f6,%f56,%f6 + fmuld %f2,%f62,%f4 + fmuld %f2,%f6,%f6 + faddd %f4,%f60,%f4 + faddd %f6,%f54,%f6 + fmuld %f2,%f4,%f4 + ldd [%g1+%l0],%f2 + fmuld %f0,%f6,%f6 + ldd [%l4+%l0],%f0 + fmuld %f4,%f32,%f4 + fmuld %f6,%f2,%f6 + faddd %f6,%f4,%f6 + faddd %f6,%f0,%f6 + ba,pt %icc,2f + faddd %f6,%f32,%f6 +1: + fmuld %f0,%f0,%f0 + ldd [%l5+%o3],%f32 + add %l5,%o3,%l0 + fmuld %f0,%f32,%f4 + ldd [%l0+0x10],%f6 + add %fp,%o3,%o3 + faddd %f4,%f6,%f4 + ldd [%l0+0x20],%f32 + fmuld %f0,%f4,%f4 + ldd [%l0+0x30],%f6 + faddd %f4,%f32,%f4 + ldd [%o3+x0_1],%f32 + fmuld %f0,%f4,%f4 + std %f2,[%fp+y0_0] + faddd %f4,%f6,%f4 + fmuld %f0,%f4,%f4 + fmuld %f32,%f4,%f4 + ldd [%o3+y0_0],%f2 + faddd %f4,%f2,%f4 + faddd %f32,%f4,%f6 +2: + add %l5,thresh-4,%g1 + ld [%fp+n0],%o3 ; add %o3,1,%o3 + and %o3,2,%o3 + sll %o3,2,%o3 + ld [%g1+%o3],%f8 + fxors %f9,%f8,%f9 + fors %f6,%f9,%f6 ! tack on sign + st %f6,[%o0] + st %f7,[%o0+4] + +.ENDLOOP0: + +! check for huge arguments remaining + + tst LIM_l6 + be,pt %icc,.exit +! delay slot + nop + +! ========== huge range (use C code) ========== + +#ifdef __sparcv9 + ldx [%fp+xsave],%o1 + ldx [%fp+ysave],%o3 +#else + ld [%fp+xsave],%o1 + ld [%fp+ysave],%o3 +#endif + ld [%fp+nsave],%o0 + ld [%fp+sxsave],%o2 + ld [%fp+sysave],%o4 + sra %o2,0,%o2 ! sign-extend for V9 + sra %o4,0,%o4 + call __vlibm_vcos_big + mov %l7,%o5 ! delay slot + +.exit: + ret + restore + + + .align 32 +.SKIP0: + addcc %i0,-1,%i0 + ble,pn %icc,.ENDLOOP0 +! delay slot, harmless if branch taken + add %i3,%i4,%i3 ! y += stridey + andn %l1,%i5,%l0 ! hx &= ~0x80000000 + fmovs %f10,%f0 + ld [%i1+4],%f1 + ba,pt %icc,.LOOP0 +! delay slot + add %i1,%i2,%i1 ! x += stridex + + + .align 32 +.SKIP1: + addcc %i0,-1,%i0 + ble,pn %icc,.ENDLOOP1 +! delay slot, harmless if branch taken + add %i3,%i4,%i3 ! y += stridey + andn %l2,%i5,%l1 ! hx &= ~0x80000000 + fmovs %f20,%f10 + ld [%i1+4],%f11 + ba,pt %icc,.LOOP1 +! delay slot + add %i1,%i2,%i1 ! x += stridex + + + .align 32 +.SKIP2: + addcc %i0,-1,%i0 + ble,pn %icc,.ENDLOOP2 +! delay slot, harmless if branch taken + add %i3,%i4,%i3 ! y += stridey + ld [%i1],%l2 + ld [%i1],%f20 + ld [%i1+4],%f21 + andn %l2,%i5,%l2 ! hx &= ~0x80000000 + ba,pt %icc,.LOOP2 +! delay slot + add %i1,%i2,%i1 ! x += stridex + + + .align 32 +.BIG0: + sethi %hi(0x7ff00000),%o7 + cmp %l0,%o7 + bl,a,pt %icc,1f ! if hx < 0x7ff00000 +! delay slot, annulled if branch not taken + mov %l7,LIM_l6 ! set biguns flag or + fsubd %f0,%f0,%f0 ! y = x - x + st %f0,[%o0] + st %f1,[%o0+4] +1: + addcc %i0,-1,%i0 + ble,pn %icc,.ENDLOOP0 +! delay slot, harmless if branch taken + andn %l1,%i5,%l0 ! hx &= ~0x80000000 + fmovd %f10,%f0 + ba,pt %icc,.LOOP0 +! delay slot + add %i1,%i2,%i1 ! x += stridex + + + .align 32 +.BIG1: + sethi %hi(0x7ff00000),%o7 + cmp %l1,%o7 + bl,a,pt %icc,1f ! if hx < 0x7ff00000 +! delay slot, annulled if branch not taken + mov %l7,LIM_l6 ! set biguns flag or + fsubd %f10,%f10,%f10 ! y = x - x + st %f10,[%o1] + st %f11,[%o1+4] +1: + addcc %i0,-1,%i0 + ble,pn %icc,.ENDLOOP1 +! delay slot, harmless if branch taken + andn %l2,%i5,%l1 ! hx &= ~0x80000000 + fmovd %f20,%f10 + ba,pt %icc,.LOOP1 +! delay slot + add %i1,%i2,%i1 ! x += stridex + + + .align 32 +.BIG2: + sethi %hi(0x7ff00000),%o7 + cmp %l2,%o7 + bl,a,pt %icc,1f ! if hx < 0x7ff00000 +! delay slot, annulled if branch not taken + mov %l7,LIM_l6 ! set biguns flag or + fsubd %f20,%f20,%f20 ! y = x - x + st %f20,[%o2] + st %f21,[%o2+4] +1: + addcc %i0,-1,%i0 + ble,pn %icc,.ENDLOOP2 +! delay slot + nop + ld [%i1],%l2 + ld [%i1],%f20 + ld [%i1+4],%f21 + andn %l2,%i5,%l2 ! hx &= ~0x80000000 + ba,pt %icc,.LOOP2 +! delay slot + add %i1,%i2,%i1 ! x += stridex + + SET_SIZE(__vcos) + diff --git a/usr/src/lib/libmvec/common/vis/__vcos_ultra3.S b/usr/src/lib/libmvec/common/vis/__vcos_ultra3.S new file mode 100644 index 0000000000..394ee795e7 --- /dev/null +++ b/usr/src/lib/libmvec/common/vis/__vcos_ultra3.S @@ -0,0 +1,3425 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "__vcos_ultra3.S" + +#include "libm.h" +#if defined(LIBMVEC_SO_BUILD) + .weak __vcos + .type __vcos,#function + __vcos = __vcos_ultra3 +#endif + + RO_DATA + .align 64 +constants: + .word 0x42c80000,0x00000000 ! 3 * 2^44 + .word 0x43380000,0x00000000 ! 3 * 2^51 + .word 0x3fe45f30,0x6dc9c883 ! invpio2 + .word 0x3ff921fb,0x54442c00 ! pio2_1 + .word 0x3d318469,0x898cc400 ! pio2_2 + .word 0x3a71701b,0x839a2520 ! pio2_3 + .word 0xbfc55555,0x55555533 ! pp1 + .word 0x3f811111,0x10e7d53b ! pp2 + .word 0xbf2a0167,0xe6b3cf9b ! pp3 + .word 0xbfdfffff,0xffffff65 ! qq1 + .word 0x3fa55555,0x54f88ed0 ! qq2 + .word 0xbf56c12c,0xdd185f60 ! qq3 + +! local storage indices + +#define xsave STACK_BIAS-0x8 +#define ysave STACK_BIAS-0x10 +#define nsave STACK_BIAS-0x14 +#define sxsave STACK_BIAS-0x18 +#define sysave STACK_BIAS-0x1c +#define biguns STACK_BIAS-0x20 +#define nk3 STACK_BIAS-0x24 +#define nk2 STACK_BIAS-0x28 +#define nk1 STACK_BIAS-0x2c +#define nk0 STACK_BIAS-0x30 +#define junk STACK_BIAS-0x38 +! sizeof temp storage - must be a multiple of 16 for V9 +#define tmps 0x40 + +! register use + +! i0 n +! i1 x +! i2 stridex +! i3 y +! i4 stridey +! i5 0x80000000 + +! l0 hx0 +! l1 hx1 +! l2 hx2 +! l3 hx3 +! l4 k0 +! l5 k1 +! l6 k2 +! l7 k3 + +! the following are 64-bit registers in both V8+ and V9 + +! g1 __vlibm_TBL_sincos2 +! g5 scratch + +! o0 py0 +! o1 py1 +! o2 py2 +! o3 py3 +! o4 0x3e400000 +! o5 0x3fe921fb,0x4099251e +! o7 scratch + +! f0 hx0 +! f2 +! f4 +! f6 +! f8 hx1 +! f10 +! f12 +! f14 +! f16 hx2 +! f18 +! f20 +! f22 +! f24 hx3 +! f26 +! f28 +! f30 +! f32 +! f34 +! f36 +! f38 + +#define c3two44 %f40 +#define c3two51 %f42 +#define invpio2 %f44 +#define pio2_1 %f46 +#define pio2_2 %f48 +#define pio2_3 %f50 +#define pp1 %f52 +#define pp2 %f54 +#define pp3 %f56 +#define qq1 %f58 +#define qq2 %f60 +#define qq3 %f62 + + ENTRY(__vcos_ultra3) + save %sp,-SA(MINFRAME)-tmps,%sp + PIC_SETUP(l7) + PIC_SET(l7,constants,o0) + PIC_SET(l7,__vlibm_TBL_sincos2,o1) + mov %o1,%g1 + wr %g0,0x82,%asi ! set %asi for non-faulting loads +#ifdef __sparcv9 + stx %i1,[%fp+xsave] ! save arguments + stx %i3,[%fp+ysave] +#else + st %i1,[%fp+xsave] ! save arguments + st %i3,[%fp+ysave] +#endif + st %i0,[%fp+nsave] + st %i2,[%fp+sxsave] + st %i4,[%fp+sysave] + st %g0,[%fp+biguns] ! biguns = 0 + ldd [%o0+0x00],c3two44 ! load/set up constants + ldd [%o0+0x08],c3two51 + ldd [%o0+0x10],invpio2 + ldd [%o0+0x18],pio2_1 + ldd [%o0+0x20],pio2_2 + ldd [%o0+0x28],pio2_3 + ldd [%o0+0x30],pp1 + ldd [%o0+0x38],pp2 + ldd [%o0+0x40],pp3 + ldd [%o0+0x48],qq1 + ldd [%o0+0x50],qq2 + ldd [%o0+0x58],qq3 + sethi %hi(0x80000000),%i5 + sethi %hi(0x3e400000),%o4 + sethi %hi(0x3fe921fb),%o5 + or %o5,%lo(0x3fe921fb),%o5 + sllx %o5,32,%o5 + sethi %hi(0x4099251e),%o7 + or %o7,%lo(0x4099251e),%o7 + or %o5,%o7,%o5 + sll %i2,3,%i2 ! scale strides + sll %i4,3,%i4 + add %fp,junk,%o1 ! loop prologue + add %fp,junk,%o2 + add %fp,junk,%o3 + ld [%i1],%l0 ! *x + ld [%i1],%f0 + ld [%i1+4],%f3 + andn %l0,%i5,%l0 ! mask off sign + add %i1,%i2,%i1 ! x += stridex + ba .loop0 + nop + +! 16-byte aligned + .align 16 +.loop0: + lda [%i1]%asi,%l1 ! preload next argument + sub %l0,%o4,%g5 + sub %o5,%l0,%o7 + fabss %f0,%f2 + + lda [%i1]%asi,%f8 + orcc %o7,%g5,%g0 + mov %i3,%o0 ! py0 = y + bl,pn %icc,.range0 ! hx < 0x3e400000 or hx > 0x4099251e + +! delay slot + lda [%i1+4]%asi,%f11 + addcc %i0,-1,%i0 + add %i3,%i4,%i3 ! y += stridey + ble,pn %icc,.last1 + +! delay slot + andn %l1,%i5,%l1 + add %i1,%i2,%i1 ! x += stridex + faddd %f2,c3two44,%f4 + st %f15,[%o1+4] + +.loop1: + lda [%i1]%asi,%l2 ! preload next argument + sub %l1,%o4,%g5 + sub %o5,%l1,%o7 + fabss %f8,%f10 + + lda [%i1]%asi,%f16 + orcc %o7,%g5,%g0 + mov %i3,%o1 ! py1 = y + bl,pn %icc,.range1 ! hx < 0x3e400000 or hx > 0x4099251e + +! delay slot + lda [%i1+4]%asi,%f19 + addcc %i0,-1,%i0 + add %i3,%i4,%i3 ! y += stridey + ble,pn %icc,.last2 + +! delay slot + andn %l2,%i5,%l2 + add %i1,%i2,%i1 ! x += stridex + faddd %f10,c3two44,%f12 + st %f23,[%o2+4] + +.loop2: + lda [%i1]%asi,%l3 ! preload next argument + sub %l2,%o4,%g5 + sub %o5,%l2,%o7 + fabss %f16,%f18 + + lda [%i1]%asi,%f24 + orcc %o7,%g5,%g0 + mov %i3,%o2 ! py2 = y + bl,pn %icc,.range2 ! hx < 0x3e400000 or hx > 0x4099251e + +! delay slot + lda [%i1+4]%asi,%f27 + addcc %i0,-1,%i0 + add %i3,%i4,%i3 ! y += stridey + ble,pn %icc,.last3 + +! delay slot + andn %l3,%i5,%l3 + add %i1,%i2,%i1 ! x += stridex + faddd %f18,c3two44,%f20 + st %f31,[%o3+4] + +.loop3: + sub %l3,%o4,%g5 + sub %o5,%l3,%o7 + fabss %f24,%f26 + st %f5,[%fp+nk0] + + orcc %o7,%g5,%g0 + mov %i3,%o3 ! py3 = y + bl,pn %icc,.range3 ! hx < 0x3e400000 or > hx 0x4099251e +! delay slot + st %f13,[%fp+nk1] + +!!! DONE? +.cont: + srlx %o5,32,%o7 + add %i3,%i4,%i3 ! y += stridey + fmovs %f3,%f1 + st %f21,[%fp+nk2] + + sub %o7,%l0,%l0 + sub %o7,%l1,%l1 + faddd %f26,c3two44,%f28 + st %f29,[%fp+nk3] + + sub %o7,%l2,%l2 + sub %o7,%l3,%l3 + fmovs %f11,%f9 + + or %l0,%l1,%l0 + or %l2,%l3,%l2 + fmovs %f19,%f17 + + fmovs %f27,%f25 + fmuld %f0,invpio2,%f6 ! x * invpio2, for medium range + + fmuld %f8,invpio2,%f14 + ld [%fp+nk0],%l4 + + fmuld %f16,invpio2,%f22 + ld [%fp+nk1],%l5 + + orcc %l0,%l2,%g0 + bl,pn %icc,.medium +! delay slot + fmuld %f24,invpio2,%f30 + ld [%fp+nk2],%l6 + + ld [%fp+nk3],%l7 + sll %l4,5,%l4 ! k + fcmpd %fcc0,%f0,pio2_3 ! x < pio2_3 iff x < 0 + + sll %l5,5,%l5 + ldd [%l4+%g1],%f4 + fcmpd %fcc1,%f8,pio2_3 + + sll %l6,5,%l6 + ldd [%l5+%g1],%f12 + fcmpd %fcc2,%f16,pio2_3 + + sll %l7,5,%l7 + ldd [%l6+%g1],%f20 + fcmpd %fcc3,%f24,pio2_3 + + ldd [%l7+%g1],%f28 + fsubd %f2,%f4,%f2 ! x -= __vlibm_TBL_sincos2[k] + + fsubd %f10,%f12,%f10 + + fsubd %f18,%f20,%f18 + + fsubd %f26,%f28,%f26 + + fmuld %f2,%f2,%f0 ! z = x * x + + fmuld %f10,%f10,%f8 + + fmuld %f18,%f18,%f16 + + fmuld %f26,%f26,%f24 + + fmuld %f0,qq3,%f6 + + fmuld %f8,qq3,%f14 + + fmuld %f16,qq3,%f22 + + fmuld %f24,qq3,%f30 + + faddd %f6,qq2,%f6 + fmuld %f0,pp2,%f4 + + faddd %f14,qq2,%f14 + fmuld %f8,pp2,%f12 + + faddd %f22,qq2,%f22 + fmuld %f16,pp2,%f20 + + faddd %f30,qq2,%f30 + fmuld %f24,pp2,%f28 + + fmuld %f0,%f6,%f6 + faddd %f4,pp1,%f4 + + fmuld %f8,%f14,%f14 + faddd %f12,pp1,%f12 + + fmuld %f16,%f22,%f22 + faddd %f20,pp1,%f20 + + fmuld %f24,%f30,%f30 + faddd %f28,pp1,%f28 + + faddd %f6,qq1,%f6 + fmuld %f0,%f4,%f4 + add %l4,%g1,%l4 + + faddd %f14,qq1,%f14 + fmuld %f8,%f12,%f12 + add %l5,%g1,%l5 + + faddd %f22,qq1,%f22 + fmuld %f16,%f20,%f20 + add %l6,%g1,%l6 + + faddd %f30,qq1,%f30 + fmuld %f24,%f28,%f28 + add %l7,%g1,%l7 + + fmuld %f2,%f4,%f4 + + fmuld %f10,%f12,%f12 + + fmuld %f18,%f20,%f20 + + fmuld %f26,%f28,%f28 + + fmuld %f0,%f6,%f6 + faddd %f4,%f2,%f4 + ldd [%l4+16],%f32 + + fmuld %f8,%f14,%f14 + faddd %f12,%f10,%f12 + ldd [%l5+16],%f34 + + fmuld %f16,%f22,%f22 + faddd %f20,%f18,%f20 + ldd [%l6+16],%f36 + + fmuld %f24,%f30,%f30 + faddd %f28,%f26,%f28 + ldd [%l7+16],%f38 + + fmuld %f32,%f6,%f6 + ldd [%l4+8],%f2 + + fmuld %f34,%f14,%f14 + ldd [%l5+8],%f10 + + fmuld %f36,%f22,%f22 + ldd [%l6+8],%f18 + + fmuld %f38,%f30,%f30 + ldd [%l7+8],%f26 + + fmuld %f2,%f4,%f4 + + fmuld %f10,%f12,%f12 + + fmuld %f18,%f20,%f20 + + fmuld %f26,%f28,%f28 + + fsubd %f6,%f4,%f6 + lda [%i1]%asi,%l0 ! preload next argument + + fsubd %f14,%f12,%f14 + lda [%i1]%asi,%f0 + + fsubd %f22,%f20,%f22 + lda [%i1+4]%asi,%f3 + + fsubd %f30,%f28,%f30 + andn %l0,%i5,%l0 + add %i1,%i2,%i1 + + faddd %f6,%f32,%f6 + st %f6,[%o0] + + faddd %f14,%f34,%f14 + st %f14,[%o1] + + faddd %f22,%f36,%f22 + st %f22,[%o2] + + faddd %f30,%f38,%f30 + st %f30,[%o3] + addcc %i0,-1,%i0 + + bg,pt %icc,.loop0 +! delay slot + st %f7,[%o0+4] + + ba,pt %icc,.end +! delay slot + nop + + + .align 16 +.medium: + faddd %f6,c3two51,%f4 + st %f5,[%fp+nk0] + + faddd %f14,c3two51,%f12 + st %f13,[%fp+nk1] + + faddd %f22,c3two51,%f20 + st %f21,[%fp+nk2] + + faddd %f30,c3two51,%f28 + st %f29,[%fp+nk3] + + fsubd %f4,c3two51,%f6 + + fsubd %f12,c3two51,%f14 + + fsubd %f20,c3two51,%f22 + + fsubd %f28,c3two51,%f30 + + fmuld %f6,pio2_1,%f2 + ld [%fp+nk0],%l0 ! n + + fmuld %f14,pio2_1,%f10 + ld [%fp+nk1],%l1 + + fmuld %f22,pio2_1,%f18 + ld [%fp+nk2],%l2 + + fmuld %f30,pio2_1,%f26 + ld [%fp+nk3],%l3 + + fsubd %f0,%f2,%f0 + fmuld %f6,pio2_2,%f4 + add %l0,1,%l0 + + fsubd %f8,%f10,%f8 + fmuld %f14,pio2_2,%f12 + add %l1,1,%l1 + + fsubd %f16,%f18,%f16 + fmuld %f22,pio2_2,%f20 + add %l2,1,%l2 + + fsubd %f24,%f26,%f24 + fmuld %f30,pio2_2,%f28 + add %l3,1,%l3 + + fsubd %f0,%f4,%f32 + + fsubd %f8,%f12,%f34 + + fsubd %f16,%f20,%f36 + + fsubd %f24,%f28,%f38 + + fsubd %f0,%f32,%f0 + fcmple32 %f32,pio2_3,%l4 ! x <= pio2_3 iff x < 0 + + fsubd %f8,%f34,%f8 + fcmple32 %f34,pio2_3,%l5 + + fsubd %f16,%f36,%f16 + fcmple32 %f36,pio2_3,%l6 + + fsubd %f24,%f38,%f24 + fcmple32 %f38,pio2_3,%l7 + + fsubd %f0,%f4,%f0 + fmuld %f6,pio2_3,%f6 + sll %l4,30,%l4 ! if (x < 0) n = -n ^ 2 + + fsubd %f8,%f12,%f8 + fmuld %f14,pio2_3,%f14 + sll %l5,30,%l5 + + fsubd %f16,%f20,%f16 + fmuld %f22,pio2_3,%f22 + sll %l6,30,%l6 + + fsubd %f24,%f28,%f24 + fmuld %f30,pio2_3,%f30 + sll %l7,30,%l7 + + fsubd %f6,%f0,%f6 + sra %l4,31,%l4 + + fsubd %f14,%f8,%f14 + sra %l5,31,%l5 + + fsubd %f22,%f16,%f22 + sra %l6,31,%l6 + + fsubd %f30,%f24,%f30 + sra %l7,31,%l7 + + fsubd %f32,%f6,%f0 ! reduced x + xor %l0,%l4,%l0 + + fsubd %f34,%f14,%f8 + xor %l1,%l5,%l1 + + fsubd %f36,%f22,%f16 + xor %l2,%l6,%l2 + + fsubd %f38,%f30,%f24 + xor %l3,%l7,%l3 + + fabsd %f0,%f2 + sub %l0,%l4,%l0 + + fabsd %f8,%f10 + sub %l1,%l5,%l1 + + fabsd %f16,%f18 + sub %l2,%l6,%l2 + + fabsd %f24,%f26 + sub %l3,%l7,%l3 + + faddd %f2,c3two44,%f4 + st %f5,[%fp+nk0] + and %l4,2,%l4 + + faddd %f10,c3two44,%f12 + st %f13,[%fp+nk1] + and %l5,2,%l5 + + faddd %f18,c3two44,%f20 + st %f21,[%fp+nk2] + and %l6,2,%l6 + + faddd %f26,c3two44,%f28 + st %f29,[%fp+nk3] + and %l7,2,%l7 + + fsubd %f32,%f0,%f4 + xor %l0,%l4,%l0 + + fsubd %f34,%f8,%f12 + xor %l1,%l5,%l1 + + fsubd %f36,%f16,%f20 + xor %l2,%l6,%l2 + + fsubd %f38,%f24,%f28 + xor %l3,%l7,%l3 + + fzero %f38 + ld [%fp+nk0],%l4 + + fsubd %f4,%f6,%f6 ! w + ld [%fp+nk1],%l5 + + fsubd %f12,%f14,%f14 + ld [%fp+nk2],%l6 + + fnegd %f38,%f38 + ld [%fp+nk3],%l7 + sll %l4,5,%l4 ! k + + fsubd %f20,%f22,%f22 + sll %l5,5,%l5 + + fsubd %f28,%f30,%f30 + sll %l6,5,%l6 + + fand %f0,%f38,%f32 ! sign bit of x + ldd [%l4+%g1],%f4 + sll %l7,5,%l7 + + fand %f8,%f38,%f34 + ldd [%l5+%g1],%f12 + + fand %f16,%f38,%f36 + ldd [%l6+%g1],%f20 + + fand %f24,%f38,%f38 + ldd [%l7+%g1],%f28 + + fsubd %f2,%f4,%f2 ! x -= __vlibm_TBL_sincos2[k] + + fsubd %f10,%f12,%f10 + + fsubd %f18,%f20,%f18 + nop + + fsubd %f26,%f28,%f26 + nop + +! 16-byte aligned + fmuld %f2,%f2,%f0 ! z = x * x + andcc %l0,1,%g0 + bz,pn %icc,.case8 +! delay slot + fxor %f6,%f32,%f32 + + fmuld %f10,%f10,%f8 + andcc %l1,1,%g0 + bz,pn %icc,.case4 +! delay slot + fxor %f14,%f34,%f34 + + fmuld %f18,%f18,%f16 + andcc %l2,1,%g0 + bz,pn %icc,.case2 +! delay slot + fxor %f22,%f36,%f36 + + fmuld %f26,%f26,%f24 + andcc %l3,1,%g0 + bz,pn %icc,.case1 +! delay slot + fxor %f30,%f38,%f38 + +!.case0: + fmuld %f0,qq3,%f6 ! cos(x0) + + fmuld %f8,qq3,%f14 ! cos(x1) + + fmuld %f16,qq3,%f22 ! cos(x2) + + fmuld %f24,qq3,%f30 ! cos(x3) + + faddd %f6,qq2,%f6 + fmuld %f0,pp2,%f4 + + faddd %f14,qq2,%f14 + fmuld %f8,pp2,%f12 + + faddd %f22,qq2,%f22 + fmuld %f16,pp2,%f20 + + faddd %f30,qq2,%f30 + fmuld %f24,pp2,%f28 + + fmuld %f0,%f6,%f6 + faddd %f4,pp1,%f4 + + fmuld %f8,%f14,%f14 + faddd %f12,pp1,%f12 + + fmuld %f16,%f22,%f22 + faddd %f20,pp1,%f20 + + fmuld %f24,%f30,%f30 + faddd %f28,pp1,%f28 + + faddd %f6,qq1,%f6 + fmuld %f0,%f4,%f4 + add %l4,%g1,%l4 + + faddd %f14,qq1,%f14 + fmuld %f8,%f12,%f12 + add %l5,%g1,%l5 + + faddd %f22,qq1,%f22 + fmuld %f16,%f20,%f20 + add %l6,%g1,%l6 + + faddd %f30,qq1,%f30 + fmuld %f24,%f28,%f28 + add %l7,%g1,%l7 + + fmuld %f2,%f4,%f4 + + fmuld %f10,%f12,%f12 + + fmuld %f18,%f20,%f20 + + fmuld %f26,%f28,%f28 + + fmuld %f0,%f6,%f6 + faddd %f4,%f32,%f4 + ldd [%l4+16],%f0 + + fmuld %f8,%f14,%f14 + faddd %f12,%f34,%f12 + ldd [%l5+16],%f8 + + fmuld %f16,%f22,%f22 + faddd %f20,%f36,%f20 + ldd [%l6+16],%f16 + + fmuld %f24,%f30,%f30 + faddd %f28,%f38,%f28 + ldd [%l7+16],%f24 + + fmuld %f0,%f6,%f6 + faddd %f4,%f2,%f4 + ldd [%l4+8],%f32 + + fmuld %f8,%f14,%f14 + faddd %f12,%f10,%f12 + ldd [%l5+8],%f34 + + fmuld %f16,%f22,%f22 + faddd %f20,%f18,%f20 + ldd [%l6+8],%f36 + + fmuld %f24,%f30,%f30 + faddd %f28,%f26,%f28 + ldd [%l7+8],%f38 + + fmuld %f32,%f4,%f4 + + fmuld %f34,%f12,%f12 + + fmuld %f36,%f20,%f20 + + fmuld %f38,%f28,%f28 + + fsubd %f6,%f4,%f6 + + fsubd %f14,%f12,%f14 + + fsubd %f22,%f20,%f22 + + fsubd %f30,%f28,%f30 + + faddd %f6,%f0,%f6 + + faddd %f14,%f8,%f14 + + faddd %f22,%f16,%f22 + + faddd %f30,%f24,%f30 + mov %l0,%l4 + + fnegd %f6,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fnegd %f14,%f12 + lda [%i1]%asi,%f0 + + fnegd %f22,%f20 + lda [%i1+4]%asi,%f3 + + fnegd %f30,%f28 + andn %l0,%i5,%l0 + add %i1,%i2,%i1 + + andcc %l4,2,%g0 + fmovdnz %icc,%f4,%f6 + st %f6,[%o0] + + andcc %l1,2,%g0 + fmovdnz %icc,%f12,%f14 + st %f14,[%o1] + + andcc %l2,2,%g0 + fmovdnz %icc,%f20,%f22 + st %f22,[%o2] + + andcc %l3,2,%g0 + fmovdnz %icc,%f28,%f30 + st %f30,[%o3] + + addcc %i0,-1,%i0 + bg,pt %icc,.loop0 +! delay slot + st %f7,[%o0+4] + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case1: + fmuld %f24,pp3,%f30 ! sin(x3) + + fmuld %f0,qq3,%f6 ! cos(x0) + + fmuld %f8,qq3,%f14 ! cos(x1) + + fmuld %f16,qq3,%f22 ! cos(x2) + + faddd %f30,pp2,%f30 + fmuld %f24,qq2,%f28 + + faddd %f6,qq2,%f6 + fmuld %f0,pp2,%f4 + + faddd %f14,qq2,%f14 + fmuld %f8,pp2,%f12 + + faddd %f22,qq2,%f22 + fmuld %f16,pp2,%f20 + + fmuld %f24,%f30,%f30 + faddd %f28,qq1,%f28 + + fmuld %f0,%f6,%f6 + faddd %f4,pp1,%f4 + + fmuld %f8,%f14,%f14 + faddd %f12,pp1,%f12 + + fmuld %f16,%f22,%f22 + faddd %f20,pp1,%f20 + + faddd %f30,pp1,%f30 + fmuld %f24,%f28,%f28 + add %l7,%g1,%l7 + + faddd %f6,qq1,%f6 + fmuld %f0,%f4,%f4 + add %l4,%g1,%l4 + + faddd %f14,qq1,%f14 + fmuld %f8,%f12,%f12 + add %l5,%g1,%l5 + + faddd %f22,qq1,%f22 + fmuld %f16,%f20,%f20 + add %l6,%g1,%l6 + + fmuld %f24,%f30,%f30 + + fmuld %f2,%f4,%f4 + + fmuld %f10,%f12,%f12 + + fmuld %f18,%f20,%f20 + + fmuld %f26,%f30,%f30 + ldd [%l7+8],%f24 + + fmuld %f0,%f6,%f6 + faddd %f4,%f32,%f4 + ldd [%l4+16],%f0 + + fmuld %f8,%f14,%f14 + faddd %f12,%f34,%f12 + ldd [%l5+16],%f8 + + fmuld %f16,%f22,%f22 + faddd %f20,%f36,%f20 + ldd [%l6+16],%f16 + + fmuld %f24,%f28,%f28 + faddd %f38,%f30,%f30 + + fmuld %f0,%f6,%f6 + faddd %f4,%f2,%f4 + ldd [%l4+8],%f32 + + fmuld %f8,%f14,%f14 + faddd %f12,%f10,%f12 + ldd [%l5+8],%f34 + + fmuld %f16,%f22,%f22 + faddd %f20,%f18,%f20 + ldd [%l6+8],%f36 + + faddd %f26,%f30,%f30 + ldd [%l7+16],%f38 + + fmuld %f32,%f4,%f4 + + fmuld %f34,%f12,%f12 + + fmuld %f36,%f20,%f20 + + fmuld %f38,%f30,%f30 + + fsubd %f6,%f4,%f6 + + fsubd %f14,%f12,%f14 + + fsubd %f22,%f20,%f22 + + faddd %f30,%f28,%f30 + + faddd %f6,%f0,%f6 + + faddd %f14,%f8,%f14 + + faddd %f22,%f16,%f22 + + faddd %f30,%f24,%f30 + mov %l0,%l4 + + fnegd %f6,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fnegd %f14,%f12 + lda [%i1]%asi,%f0 + + fnegd %f22,%f20 + lda [%i1+4]%asi,%f3 + + fnegd %f30,%f28 + andn %l0,%i5,%l0 + add %i1,%i2,%i1 + + andcc %l4,2,%g0 + fmovdnz %icc,%f4,%f6 + st %f6,[%o0] + + andcc %l1,2,%g0 + fmovdnz %icc,%f12,%f14 + st %f14,[%o1] + + andcc %l2,2,%g0 + fmovdnz %icc,%f20,%f22 + st %f22,[%o2] + + andcc %l3,2,%g0 + fmovdnz %icc,%f28,%f30 + st %f30,[%o3] + + addcc %i0,-1,%i0 + bg,pt %icc,.loop0 +! delay slot + st %f7,[%o0+4] + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case2: + fmuld %f26,%f26,%f24 + andcc %l3,1,%g0 + bz,pn %icc,.case3 +! delay slot + fxor %f30,%f38,%f38 + + fmuld %f16,pp3,%f22 ! sin(x2) + + fmuld %f0,qq3,%f6 ! cos(x0) + + fmuld %f8,qq3,%f14 ! cos(x1) + + faddd %f22,pp2,%f22 + fmuld %f16,qq2,%f20 + + fmuld %f24,qq3,%f30 ! cos(x3) + + faddd %f6,qq2,%f6 + fmuld %f0,pp2,%f4 + + faddd %f14,qq2,%f14 + fmuld %f8,pp2,%f12 + + fmuld %f16,%f22,%f22 + faddd %f20,qq1,%f20 + + faddd %f30,qq2,%f30 + fmuld %f24,pp2,%f28 + + fmuld %f0,%f6,%f6 + faddd %f4,pp1,%f4 + + fmuld %f8,%f14,%f14 + faddd %f12,pp1,%f12 + + faddd %f22,pp1,%f22 + fmuld %f16,%f20,%f20 + add %l6,%g1,%l6 + + fmuld %f24,%f30,%f30 + faddd %f28,pp1,%f28 + + faddd %f6,qq1,%f6 + fmuld %f0,%f4,%f4 + add %l4,%g1,%l4 + + faddd %f14,qq1,%f14 + fmuld %f8,%f12,%f12 + add %l5,%g1,%l5 + + fmuld %f16,%f22,%f22 + + faddd %f30,qq1,%f30 + fmuld %f24,%f28,%f28 + add %l7,%g1,%l7 + + fmuld %f2,%f4,%f4 + + fmuld %f10,%f12,%f12 + + fmuld %f18,%f22,%f22 + ldd [%l6+8],%f16 + + fmuld %f26,%f28,%f28 + + fmuld %f0,%f6,%f6 + faddd %f4,%f32,%f4 + ldd [%l4+16],%f0 + + fmuld %f8,%f14,%f14 + faddd %f12,%f34,%f12 + ldd [%l5+16],%f8 + + fmuld %f16,%f20,%f20 + faddd %f36,%f22,%f22 + + fmuld %f24,%f30,%f30 + faddd %f28,%f38,%f28 + ldd [%l7+16],%f24 + + fmuld %f0,%f6,%f6 + faddd %f4,%f2,%f4 + ldd [%l4+8],%f32 + + fmuld %f8,%f14,%f14 + faddd %f12,%f10,%f12 + ldd [%l5+8],%f34 + + faddd %f18,%f22,%f22 + ldd [%l6+16],%f36 + + fmuld %f24,%f30,%f30 + faddd %f28,%f26,%f28 + ldd [%l7+8],%f38 + + fmuld %f32,%f4,%f4 + + fmuld %f34,%f12,%f12 + + fmuld %f36,%f22,%f22 + + fmuld %f38,%f28,%f28 + + fsubd %f6,%f4,%f6 + + fsubd %f14,%f12,%f14 + + faddd %f22,%f20,%f22 + + fsubd %f30,%f28,%f30 + + faddd %f6,%f0,%f6 + + faddd %f14,%f8,%f14 + + faddd %f22,%f16,%f22 + + faddd %f30,%f24,%f30 + mov %l0,%l4 + + fnegd %f6,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fnegd %f14,%f12 + lda [%i1]%asi,%f0 + + fnegd %f22,%f20 + lda [%i1+4]%asi,%f3 + + fnegd %f30,%f28 + andn %l0,%i5,%l0 + add %i1,%i2,%i1 + + andcc %l4,2,%g0 + fmovdnz %icc,%f4,%f6 + st %f6,[%o0] + + andcc %l1,2,%g0 + fmovdnz %icc,%f12,%f14 + st %f14,[%o1] + + andcc %l2,2,%g0 + fmovdnz %icc,%f20,%f22 + st %f22,[%o2] + + andcc %l3,2,%g0 + fmovdnz %icc,%f28,%f30 + st %f30,[%o3] + + addcc %i0,-1,%i0 + bg,pt %icc,.loop0 +! delay slot + st %f7,[%o0+4] + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case3: + fmuld %f16,pp3,%f22 ! sin(x2) + + fmuld %f24,pp3,%f30 ! sin(x3) + + fmuld %f0,qq3,%f6 ! cos(x0) + + fmuld %f8,qq3,%f14 ! cos(x1) + + faddd %f22,pp2,%f22 + fmuld %f16,qq2,%f20 + + faddd %f30,pp2,%f30 + fmuld %f24,qq2,%f28 + + faddd %f6,qq2,%f6 + fmuld %f0,pp2,%f4 + + faddd %f14,qq2,%f14 + fmuld %f8,pp2,%f12 + + fmuld %f16,%f22,%f22 + faddd %f20,qq1,%f20 + + fmuld %f24,%f30,%f30 + faddd %f28,qq1,%f28 + + fmuld %f0,%f6,%f6 + faddd %f4,pp1,%f4 + + fmuld %f8,%f14,%f14 + faddd %f12,pp1,%f12 + + faddd %f22,pp1,%f22 + fmuld %f16,%f20,%f20 + add %l6,%g1,%l6 + + faddd %f30,pp1,%f30 + fmuld %f24,%f28,%f28 + add %l7,%g1,%l7 + + faddd %f6,qq1,%f6 + fmuld %f0,%f4,%f4 + add %l4,%g1,%l4 + + faddd %f14,qq1,%f14 + fmuld %f8,%f12,%f12 + add %l5,%g1,%l5 + + fmuld %f16,%f22,%f22 + + fmuld %f24,%f30,%f30 + + fmuld %f2,%f4,%f4 + + fmuld %f10,%f12,%f12 + + fmuld %f18,%f22,%f22 + ldd [%l6+8],%f16 + + fmuld %f26,%f30,%f30 + ldd [%l7+8],%f24 + + fmuld %f0,%f6,%f6 + faddd %f4,%f32,%f4 + ldd [%l4+16],%f0 + + fmuld %f8,%f14,%f14 + faddd %f12,%f34,%f12 + ldd [%l5+16],%f8 + + fmuld %f16,%f20,%f20 + faddd %f36,%f22,%f22 + + fmuld %f24,%f28,%f28 + faddd %f38,%f30,%f30 + + fmuld %f0,%f6,%f6 + faddd %f4,%f2,%f4 + ldd [%l4+8],%f32 + + fmuld %f8,%f14,%f14 + faddd %f12,%f10,%f12 + ldd [%l5+8],%f34 + + faddd %f18,%f22,%f22 + ldd [%l6+16],%f36 + + faddd %f26,%f30,%f30 + ldd [%l7+16],%f38 + + fmuld %f32,%f4,%f4 + + fmuld %f34,%f12,%f12 + + fmuld %f36,%f22,%f22 + + fmuld %f38,%f30,%f30 + + fsubd %f6,%f4,%f6 + + fsubd %f14,%f12,%f14 + + faddd %f22,%f20,%f22 + + faddd %f30,%f28,%f30 + + faddd %f6,%f0,%f6 + + faddd %f14,%f8,%f14 + + faddd %f22,%f16,%f22 + + faddd %f30,%f24,%f30 + mov %l0,%l4 + + fnegd %f6,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fnegd %f14,%f12 + lda [%i1]%asi,%f0 + + fnegd %f22,%f20 + lda [%i1+4]%asi,%f3 + + fnegd %f30,%f28 + andn %l0,%i5,%l0 + add %i1,%i2,%i1 + + andcc %l4,2,%g0 + fmovdnz %icc,%f4,%f6 + st %f6,[%o0] + + andcc %l1,2,%g0 + fmovdnz %icc,%f12,%f14 + st %f14,[%o1] + + andcc %l2,2,%g0 + fmovdnz %icc,%f20,%f22 + st %f22,[%o2] + + andcc %l3,2,%g0 + fmovdnz %icc,%f28,%f30 + st %f30,[%o3] + + addcc %i0,-1,%i0 + bg,pt %icc,.loop0 +! delay slot + st %f7,[%o0+4] + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case4: + fmuld %f18,%f18,%f16 + andcc %l2,1,%g0 + bz,pn %icc,.case6 +! delay slot + fxor %f22,%f36,%f36 + + fmuld %f26,%f26,%f24 + andcc %l3,1,%g0 + bz,pn %icc,.case5 +! delay slot + fxor %f30,%f38,%f38 + + fmuld %f8,pp3,%f14 ! sin(x1) + + fmuld %f0,qq3,%f6 ! cos(x0) + + faddd %f14,pp2,%f14 + fmuld %f8,qq2,%f12 + + fmuld %f16,qq3,%f22 ! cos(x2) + + fmuld %f24,qq3,%f30 ! cos(x3) + + faddd %f6,qq2,%f6 + fmuld %f0,pp2,%f4 + + fmuld %f8,%f14,%f14 + faddd %f12,qq1,%f12 + + faddd %f22,qq2,%f22 + fmuld %f16,pp2,%f20 + + faddd %f30,qq2,%f30 + fmuld %f24,pp2,%f28 + + fmuld %f0,%f6,%f6 + faddd %f4,pp1,%f4 + + faddd %f14,pp1,%f14 + fmuld %f8,%f12,%f12 + add %l5,%g1,%l5 + + fmuld %f16,%f22,%f22 + faddd %f20,pp1,%f20 + + fmuld %f24,%f30,%f30 + faddd %f28,pp1,%f28 + + faddd %f6,qq1,%f6 + fmuld %f0,%f4,%f4 + add %l4,%g1,%l4 + + fmuld %f8,%f14,%f14 + + faddd %f22,qq1,%f22 + fmuld %f16,%f20,%f20 + add %l6,%g1,%l6 + + faddd %f30,qq1,%f30 + fmuld %f24,%f28,%f28 + add %l7,%g1,%l7 + + fmuld %f2,%f4,%f4 + + fmuld %f10,%f14,%f14 + ldd [%l5+8],%f8 + + fmuld %f18,%f20,%f20 + + fmuld %f26,%f28,%f28 + + fmuld %f0,%f6,%f6 + faddd %f4,%f32,%f4 + ldd [%l4+16],%f0 + + fmuld %f8,%f12,%f12 + faddd %f34,%f14,%f14 + + fmuld %f16,%f22,%f22 + faddd %f20,%f36,%f20 + ldd [%l6+16],%f16 + + fmuld %f24,%f30,%f30 + faddd %f28,%f38,%f28 + ldd [%l7+16],%f24 + + fmuld %f0,%f6,%f6 + faddd %f4,%f2,%f4 + ldd [%l4+8],%f32 + + faddd %f10,%f14,%f14 + ldd [%l5+16],%f34 + + fmuld %f16,%f22,%f22 + faddd %f20,%f18,%f20 + ldd [%l6+8],%f36 + + fmuld %f24,%f30,%f30 + faddd %f28,%f26,%f28 + ldd [%l7+8],%f38 + + fmuld %f32,%f4,%f4 + + fmuld %f34,%f14,%f14 + + fmuld %f36,%f20,%f20 + + fmuld %f38,%f28,%f28 + + fsubd %f6,%f4,%f6 + + faddd %f14,%f12,%f14 + + fsubd %f22,%f20,%f22 + + fsubd %f30,%f28,%f30 + + faddd %f6,%f0,%f6 + + faddd %f14,%f8,%f14 + + faddd %f22,%f16,%f22 + + faddd %f30,%f24,%f30 + mov %l0,%l4 + + fnegd %f6,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fnegd %f14,%f12 + lda [%i1]%asi,%f0 + + fnegd %f22,%f20 + lda [%i1+4]%asi,%f3 + + fnegd %f30,%f28 + andn %l0,%i5,%l0 + add %i1,%i2,%i1 + + andcc %l4,2,%g0 + fmovdnz %icc,%f4,%f6 + st %f6,[%o0] + + andcc %l1,2,%g0 + fmovdnz %icc,%f12,%f14 + st %f14,[%o1] + + andcc %l2,2,%g0 + fmovdnz %icc,%f20,%f22 + st %f22,[%o2] + + andcc %l3,2,%g0 + fmovdnz %icc,%f28,%f30 + st %f30,[%o3] + + addcc %i0,-1,%i0 + bg,pt %icc,.loop0 +! delay slot + st %f7,[%o0+4] + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case5: + fmuld %f8,pp3,%f14 ! sin(x1) + + fmuld %f24,pp3,%f30 ! sin(x3) + + fmuld %f0,qq3,%f6 ! cos(x0) + + faddd %f14,pp2,%f14 + fmuld %f8,qq2,%f12 + + fmuld %f16,qq3,%f22 ! cos(x2) + + faddd %f30,pp2,%f30 + fmuld %f24,qq2,%f28 + + faddd %f6,qq2,%f6 + fmuld %f0,pp2,%f4 + + fmuld %f8,%f14,%f14 + faddd %f12,qq1,%f12 + + faddd %f22,qq2,%f22 + fmuld %f16,pp2,%f20 + + fmuld %f24,%f30,%f30 + faddd %f28,qq1,%f28 + + fmuld %f0,%f6,%f6 + faddd %f4,pp1,%f4 + + faddd %f14,pp1,%f14 + fmuld %f8,%f12,%f12 + add %l5,%g1,%l5 + + fmuld %f16,%f22,%f22 + faddd %f20,pp1,%f20 + + faddd %f30,pp1,%f30 + fmuld %f24,%f28,%f28 + add %l7,%g1,%l7 + + faddd %f6,qq1,%f6 + fmuld %f0,%f4,%f4 + add %l4,%g1,%l4 + + fmuld %f8,%f14,%f14 + + faddd %f22,qq1,%f22 + fmuld %f16,%f20,%f20 + add %l6,%g1,%l6 + + fmuld %f24,%f30,%f30 + + fmuld %f2,%f4,%f4 + + fmuld %f10,%f14,%f14 + ldd [%l5+8],%f8 + + fmuld %f18,%f20,%f20 + + fmuld %f26,%f30,%f30 + ldd [%l7+8],%f24 + + fmuld %f0,%f6,%f6 + faddd %f4,%f32,%f4 + ldd [%l4+16],%f0 + + fmuld %f8,%f12,%f12 + faddd %f34,%f14,%f14 + + fmuld %f16,%f22,%f22 + faddd %f20,%f36,%f20 + ldd [%l6+16],%f16 + + fmuld %f24,%f28,%f28 + faddd %f38,%f30,%f30 + + fmuld %f0,%f6,%f6 + faddd %f4,%f2,%f4 + ldd [%l4+8],%f32 + + faddd %f10,%f14,%f14 + ldd [%l5+16],%f34 + + fmuld %f16,%f22,%f22 + faddd %f20,%f18,%f20 + ldd [%l6+8],%f36 + + faddd %f26,%f30,%f30 + ldd [%l7+16],%f38 + + fmuld %f32,%f4,%f4 + + fmuld %f34,%f14,%f14 + + fmuld %f36,%f20,%f20 + + fmuld %f38,%f30,%f30 + + fsubd %f6,%f4,%f6 + + faddd %f14,%f12,%f14 + + fsubd %f22,%f20,%f22 + + faddd %f30,%f28,%f30 + + faddd %f6,%f0,%f6 + + faddd %f14,%f8,%f14 + + faddd %f22,%f16,%f22 + + faddd %f30,%f24,%f30 + mov %l0,%l4 + + fnegd %f6,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fnegd %f14,%f12 + lda [%i1]%asi,%f0 + + fnegd %f22,%f20 + lda [%i1+4]%asi,%f3 + + fnegd %f30,%f28 + andn %l0,%i5,%l0 + add %i1,%i2,%i1 + + andcc %l4,2,%g0 + fmovdnz %icc,%f4,%f6 + st %f6,[%o0] + + andcc %l1,2,%g0 + fmovdnz %icc,%f12,%f14 + st %f14,[%o1] + + andcc %l2,2,%g0 + fmovdnz %icc,%f20,%f22 + st %f22,[%o2] + + andcc %l3,2,%g0 + fmovdnz %icc,%f28,%f30 + st %f30,[%o3] + + addcc %i0,-1,%i0 + bg,pt %icc,.loop0 +! delay slot + st %f7,[%o0+4] + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case6: + fmuld %f26,%f26,%f24 + andcc %l3,1,%g0 + bz,pn %icc,.case7 +! delay slot + fxor %f30,%f38,%f38 + + fmuld %f8,pp3,%f14 ! sin(x1) + + fmuld %f16,pp3,%f22 ! sin(x2) + + fmuld %f0,qq3,%f6 ! cos(x0) + + faddd %f14,pp2,%f14 + fmuld %f8,qq2,%f12 + + faddd %f22,pp2,%f22 + fmuld %f16,qq2,%f20 + + fmuld %f24,qq3,%f30 ! cos(x3) + + faddd %f6,qq2,%f6 + fmuld %f0,pp2,%f4 + + fmuld %f8,%f14,%f14 + faddd %f12,qq1,%f12 + + fmuld %f16,%f22,%f22 + faddd %f20,qq1,%f20 + + faddd %f30,qq2,%f30 + fmuld %f24,pp2,%f28 + + fmuld %f0,%f6,%f6 + faddd %f4,pp1,%f4 + + faddd %f14,pp1,%f14 + fmuld %f8,%f12,%f12 + add %l5,%g1,%l5 + + faddd %f22,pp1,%f22 + fmuld %f16,%f20,%f20 + add %l6,%g1,%l6 + + fmuld %f24,%f30,%f30 + faddd %f28,pp1,%f28 + + faddd %f6,qq1,%f6 + fmuld %f0,%f4,%f4 + add %l4,%g1,%l4 + + fmuld %f8,%f14,%f14 + + fmuld %f16,%f22,%f22 + + faddd %f30,qq1,%f30 + fmuld %f24,%f28,%f28 + add %l7,%g1,%l7 + + fmuld %f2,%f4,%f4 + + fmuld %f10,%f14,%f14 + ldd [%l5+8],%f8 + + fmuld %f18,%f22,%f22 + ldd [%l6+8],%f16 + + fmuld %f26,%f28,%f28 + + fmuld %f0,%f6,%f6 + faddd %f4,%f32,%f4 + ldd [%l4+16],%f0 + + fmuld %f8,%f12,%f12 + faddd %f34,%f14,%f14 + + fmuld %f16,%f20,%f20 + faddd %f36,%f22,%f22 + + fmuld %f24,%f30,%f30 + faddd %f28,%f38,%f28 + ldd [%l7+16],%f24 + + fmuld %f0,%f6,%f6 + faddd %f4,%f2,%f4 + ldd [%l4+8],%f32 + + faddd %f10,%f14,%f14 + ldd [%l5+16],%f34 + + faddd %f18,%f22,%f22 + ldd [%l6+16],%f36 + + fmuld %f24,%f30,%f30 + faddd %f28,%f26,%f28 + ldd [%l7+8],%f38 + + fmuld %f32,%f4,%f4 + + fmuld %f34,%f14,%f14 + + fmuld %f36,%f22,%f22 + + fmuld %f38,%f28,%f28 + + fsubd %f6,%f4,%f6 + + faddd %f14,%f12,%f14 + + faddd %f22,%f20,%f22 + + fsubd %f30,%f28,%f30 + + faddd %f6,%f0,%f6 + + faddd %f14,%f8,%f14 + + faddd %f22,%f16,%f22 + + faddd %f30,%f24,%f30 + mov %l0,%l4 + + fnegd %f6,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fnegd %f14,%f12 + lda [%i1]%asi,%f0 + + fnegd %f22,%f20 + lda [%i1+4]%asi,%f3 + + fnegd %f30,%f28 + andn %l0,%i5,%l0 + add %i1,%i2,%i1 + + andcc %l4,2,%g0 + fmovdnz %icc,%f4,%f6 + st %f6,[%o0] + + andcc %l1,2,%g0 + fmovdnz %icc,%f12,%f14 + st %f14,[%o1] + + andcc %l2,2,%g0 + fmovdnz %icc,%f20,%f22 + st %f22,[%o2] + + andcc %l3,2,%g0 + fmovdnz %icc,%f28,%f30 + st %f30,[%o3] + + addcc %i0,-1,%i0 + bg,pt %icc,.loop0 +! delay slot + st %f7,[%o0+4] + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case7: + fmuld %f8,pp3,%f14 ! sin(x1) + + fmuld %f16,pp3,%f22 ! sin(x2) + + fmuld %f24,pp3,%f30 ! sin(x3) + + fmuld %f0,qq3,%f6 ! cos(x0) + + faddd %f14,pp2,%f14 + fmuld %f8,qq2,%f12 + + faddd %f22,pp2,%f22 + fmuld %f16,qq2,%f20 + + faddd %f30,pp2,%f30 + fmuld %f24,qq2,%f28 + + faddd %f6,qq2,%f6 + fmuld %f0,pp2,%f4 + + fmuld %f8,%f14,%f14 + faddd %f12,qq1,%f12 + + fmuld %f16,%f22,%f22 + faddd %f20,qq1,%f20 + + fmuld %f24,%f30,%f30 + faddd %f28,qq1,%f28 + + fmuld %f0,%f6,%f6 + faddd %f4,pp1,%f4 + + faddd %f14,pp1,%f14 + fmuld %f8,%f12,%f12 + add %l5,%g1,%l5 + + faddd %f22,pp1,%f22 + fmuld %f16,%f20,%f20 + add %l6,%g1,%l6 + + faddd %f30,pp1,%f30 + fmuld %f24,%f28,%f28 + add %l7,%g1,%l7 + + faddd %f6,qq1,%f6 + fmuld %f0,%f4,%f4 + add %l4,%g1,%l4 + + fmuld %f8,%f14,%f14 + + fmuld %f16,%f22,%f22 + + fmuld %f24,%f30,%f30 + + fmuld %f2,%f4,%f4 + + fmuld %f10,%f14,%f14 + ldd [%l5+8],%f8 + + fmuld %f18,%f22,%f22 + ldd [%l6+8],%f16 + + fmuld %f26,%f30,%f30 + ldd [%l7+8],%f24 + + fmuld %f0,%f6,%f6 + faddd %f4,%f32,%f4 + ldd [%l4+16],%f0 + + fmuld %f8,%f12,%f12 + faddd %f34,%f14,%f14 + + fmuld %f16,%f20,%f20 + faddd %f36,%f22,%f22 + + fmuld %f24,%f28,%f28 + faddd %f38,%f30,%f30 + + fmuld %f0,%f6,%f6 + faddd %f4,%f2,%f4 + ldd [%l4+8],%f32 + + faddd %f10,%f14,%f14 + ldd [%l5+16],%f34 + + faddd %f18,%f22,%f22 + ldd [%l6+16],%f36 + + faddd %f26,%f30,%f30 + ldd [%l7+16],%f38 + + fmuld %f32,%f4,%f4 + + fmuld %f34,%f14,%f14 + + fmuld %f36,%f22,%f22 + + fmuld %f38,%f30,%f30 + + fsubd %f6,%f4,%f6 + + faddd %f14,%f12,%f14 + + faddd %f22,%f20,%f22 + + faddd %f30,%f28,%f30 + + faddd %f6,%f0,%f6 + + faddd %f14,%f8,%f14 + + faddd %f22,%f16,%f22 + + faddd %f30,%f24,%f30 + mov %l0,%l4 + + fnegd %f6,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fnegd %f14,%f12 + lda [%i1]%asi,%f0 + + fnegd %f22,%f20 + lda [%i1+4]%asi,%f3 + + fnegd %f30,%f28 + andn %l0,%i5,%l0 + add %i1,%i2,%i1 + + andcc %l4,2,%g0 + fmovdnz %icc,%f4,%f6 + st %f6,[%o0] + + andcc %l1,2,%g0 + fmovdnz %icc,%f12,%f14 + st %f14,[%o1] + + andcc %l2,2,%g0 + fmovdnz %icc,%f20,%f22 + st %f22,[%o2] + + andcc %l3,2,%g0 + fmovdnz %icc,%f28,%f30 + st %f30,[%o3] + + addcc %i0,-1,%i0 + bg,pt %icc,.loop0 +! delay slot + st %f7,[%o0+4] + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case8: + fmuld %f10,%f10,%f8 + andcc %l1,1,%g0 + bz,pn %icc,.case12 +! delay slot + fxor %f14,%f34,%f34 + + fmuld %f18,%f18,%f16 + andcc %l2,1,%g0 + bz,pn %icc,.case10 +! delay slot + fxor %f22,%f36,%f36 + + fmuld %f26,%f26,%f24 + andcc %l3,1,%g0 + bz,pn %icc,.case9 +! delay slot + fxor %f30,%f38,%f38 + + fmuld %f0,pp3,%f6 ! sin(x0) + + faddd %f6,pp2,%f6 + fmuld %f0,qq2,%f4 + + fmuld %f8,qq3,%f14 ! cos(x1) + + fmuld %f16,qq3,%f22 ! cos(x2) + + fmuld %f24,qq3,%f30 ! cos(x3) + + fmuld %f0,%f6,%f6 + faddd %f4,qq1,%f4 + + faddd %f14,qq2,%f14 + fmuld %f8,pp2,%f12 + + faddd %f22,qq2,%f22 + fmuld %f16,pp2,%f20 + + faddd %f30,qq2,%f30 + fmuld %f24,pp2,%f28 + + faddd %f6,pp1,%f6 + fmuld %f0,%f4,%f4 + add %l4,%g1,%l4 + + fmuld %f8,%f14,%f14 + faddd %f12,pp1,%f12 + + fmuld %f16,%f22,%f22 + faddd %f20,pp1,%f20 + + fmuld %f24,%f30,%f30 + faddd %f28,pp1,%f28 + + fmuld %f0,%f6,%f6 + + faddd %f14,qq1,%f14 + fmuld %f8,%f12,%f12 + add %l5,%g1,%l5 + + faddd %f22,qq1,%f22 + fmuld %f16,%f20,%f20 + add %l6,%g1,%l6 + + faddd %f30,qq1,%f30 + fmuld %f24,%f28,%f28 + add %l7,%g1,%l7 + + fmuld %f2,%f6,%f6 + ldd [%l4+8],%f0 + + fmuld %f10,%f12,%f12 + + fmuld %f18,%f20,%f20 + + fmuld %f26,%f28,%f28 + + fmuld %f0,%f4,%f4 + faddd %f32,%f6,%f6 + + fmuld %f8,%f14,%f14 + faddd %f12,%f34,%f12 + ldd [%l5+16],%f8 + + fmuld %f16,%f22,%f22 + faddd %f20,%f36,%f20 + ldd [%l6+16],%f16 + + fmuld %f24,%f30,%f30 + faddd %f28,%f38,%f28 + ldd [%l7+16],%f24 + + faddd %f2,%f6,%f6 + ldd [%l4+16],%f32 + + fmuld %f8,%f14,%f14 + faddd %f12,%f10,%f12 + ldd [%l5+8],%f34 + + fmuld %f16,%f22,%f22 + faddd %f20,%f18,%f20 + ldd [%l6+8],%f36 + + fmuld %f24,%f30,%f30 + faddd %f28,%f26,%f28 + ldd [%l7+8],%f38 + + fmuld %f32,%f6,%f6 + + fmuld %f34,%f12,%f12 + + fmuld %f36,%f20,%f20 + + fmuld %f38,%f28,%f28 + + faddd %f6,%f4,%f6 + + fsubd %f14,%f12,%f14 + + fsubd %f22,%f20,%f22 + + fsubd %f30,%f28,%f30 + + faddd %f6,%f0,%f6 + + faddd %f14,%f8,%f14 + + faddd %f22,%f16,%f22 + + faddd %f30,%f24,%f30 + mov %l0,%l4 + + fnegd %f6,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fnegd %f14,%f12 + lda [%i1]%asi,%f0 + + fnegd %f22,%f20 + lda [%i1+4]%asi,%f3 + + fnegd %f30,%f28 + andn %l0,%i5,%l0 + add %i1,%i2,%i1 + + andcc %l4,2,%g0 + fmovdnz %icc,%f4,%f6 + st %f6,[%o0] + + andcc %l1,2,%g0 + fmovdnz %icc,%f12,%f14 + st %f14,[%o1] + + andcc %l2,2,%g0 + fmovdnz %icc,%f20,%f22 + st %f22,[%o2] + + andcc %l3,2,%g0 + fmovdnz %icc,%f28,%f30 + st %f30,[%o3] + + addcc %i0,-1,%i0 + bg,pt %icc,.loop0 +! delay slot + st %f7,[%o0+4] + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case9: + fmuld %f0,pp3,%f6 ! sin(x0) + + fmuld %f24,pp3,%f30 ! sin(x3) + + faddd %f6,pp2,%f6 + fmuld %f0,qq2,%f4 + + fmuld %f8,qq3,%f14 ! cos(x1) + + fmuld %f16,qq3,%f22 ! cos(x2) + + faddd %f30,pp2,%f30 + fmuld %f24,qq2,%f28 + + fmuld %f0,%f6,%f6 + faddd %f4,qq1,%f4 + + faddd %f14,qq2,%f14 + fmuld %f8,pp2,%f12 + + faddd %f22,qq2,%f22 + fmuld %f16,pp2,%f20 + + fmuld %f24,%f30,%f30 + faddd %f28,qq1,%f28 + + faddd %f6,pp1,%f6 + fmuld %f0,%f4,%f4 + add %l4,%g1,%l4 + + fmuld %f8,%f14,%f14 + faddd %f12,pp1,%f12 + + fmuld %f16,%f22,%f22 + faddd %f20,pp1,%f20 + + faddd %f30,pp1,%f30 + fmuld %f24,%f28,%f28 + add %l7,%g1,%l7 + + fmuld %f0,%f6,%f6 + + faddd %f14,qq1,%f14 + fmuld %f8,%f12,%f12 + add %l5,%g1,%l5 + + faddd %f22,qq1,%f22 + fmuld %f16,%f20,%f20 + add %l6,%g1,%l6 + + fmuld %f24,%f30,%f30 + + fmuld %f2,%f6,%f6 + ldd [%l4+8],%f0 + + fmuld %f10,%f12,%f12 + + fmuld %f18,%f20,%f20 + + fmuld %f26,%f30,%f30 + ldd [%l7+8],%f24 + + fmuld %f0,%f4,%f4 + faddd %f32,%f6,%f6 + + fmuld %f8,%f14,%f14 + faddd %f12,%f34,%f12 + ldd [%l5+16],%f8 + + fmuld %f16,%f22,%f22 + faddd %f20,%f36,%f20 + ldd [%l6+16],%f16 + + fmuld %f24,%f28,%f28 + faddd %f38,%f30,%f30 + + faddd %f2,%f6,%f6 + ldd [%l4+16],%f32 + + fmuld %f8,%f14,%f14 + faddd %f12,%f10,%f12 + ldd [%l5+8],%f34 + + fmuld %f16,%f22,%f22 + faddd %f20,%f18,%f20 + ldd [%l6+8],%f36 + + faddd %f26,%f30,%f30 + ldd [%l7+16],%f38 + + fmuld %f32,%f6,%f6 + + fmuld %f34,%f12,%f12 + + fmuld %f36,%f20,%f20 + + fmuld %f38,%f30,%f30 + + faddd %f6,%f4,%f6 + + fsubd %f14,%f12,%f14 + + fsubd %f22,%f20,%f22 + + faddd %f30,%f28,%f30 + + faddd %f6,%f0,%f6 + + faddd %f14,%f8,%f14 + + faddd %f22,%f16,%f22 + + faddd %f30,%f24,%f30 + mov %l0,%l4 + + fnegd %f6,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fnegd %f14,%f12 + lda [%i1]%asi,%f0 + + fnegd %f22,%f20 + lda [%i1+4]%asi,%f3 + + fnegd %f30,%f28 + andn %l0,%i5,%l0 + add %i1,%i2,%i1 + + andcc %l4,2,%g0 + fmovdnz %icc,%f4,%f6 + st %f6,[%o0] + + andcc %l1,2,%g0 + fmovdnz %icc,%f12,%f14 + st %f14,[%o1] + + andcc %l2,2,%g0 + fmovdnz %icc,%f20,%f22 + st %f22,[%o2] + + andcc %l3,2,%g0 + fmovdnz %icc,%f28,%f30 + st %f30,[%o3] + + addcc %i0,-1,%i0 + bg,pt %icc,.loop0 +! delay slot + st %f7,[%o0+4] + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case10: + fmuld %f26,%f26,%f24 + andcc %l3,1,%g0 + bz,pn %icc,.case11 +! delay slot + fxor %f30,%f38,%f38 + + fmuld %f0,pp3,%f6 ! sin(x0) + + fmuld %f16,pp3,%f22 ! sin(x2) + + faddd %f6,pp2,%f6 + fmuld %f0,qq2,%f4 + + fmuld %f8,qq3,%f14 ! cos(x1) + + faddd %f22,pp2,%f22 + fmuld %f16,qq2,%f20 + + fmuld %f24,qq3,%f30 ! cos(x3) + + fmuld %f0,%f6,%f6 + faddd %f4,qq1,%f4 + + faddd %f14,qq2,%f14 + fmuld %f8,pp2,%f12 + + fmuld %f16,%f22,%f22 + faddd %f20,qq1,%f20 + + faddd %f30,qq2,%f30 + fmuld %f24,pp2,%f28 + + faddd %f6,pp1,%f6 + fmuld %f0,%f4,%f4 + add %l4,%g1,%l4 + + fmuld %f8,%f14,%f14 + faddd %f12,pp1,%f12 + + faddd %f22,pp1,%f22 + fmuld %f16,%f20,%f20 + add %l6,%g1,%l6 + + fmuld %f24,%f30,%f30 + faddd %f28,pp1,%f28 + + fmuld %f0,%f6,%f6 + + faddd %f14,qq1,%f14 + fmuld %f8,%f12,%f12 + add %l5,%g1,%l5 + + fmuld %f16,%f22,%f22 + + faddd %f30,qq1,%f30 + fmuld %f24,%f28,%f28 + add %l7,%g1,%l7 + + fmuld %f2,%f6,%f6 + ldd [%l4+8],%f0 + + fmuld %f10,%f12,%f12 + + fmuld %f18,%f22,%f22 + ldd [%l6+8],%f16 + + fmuld %f26,%f28,%f28 + + fmuld %f0,%f4,%f4 + faddd %f32,%f6,%f6 + + fmuld %f8,%f14,%f14 + faddd %f12,%f34,%f12 + ldd [%l5+16],%f8 + + fmuld %f16,%f20,%f20 + faddd %f36,%f22,%f22 + + fmuld %f24,%f30,%f30 + faddd %f28,%f38,%f28 + ldd [%l7+16],%f24 + + faddd %f2,%f6,%f6 + ldd [%l4+16],%f32 + + fmuld %f8,%f14,%f14 + faddd %f12,%f10,%f12 + ldd [%l5+8],%f34 + + faddd %f18,%f22,%f22 + ldd [%l6+16],%f36 + + fmuld %f24,%f30,%f30 + faddd %f28,%f26,%f28 + ldd [%l7+8],%f38 + + fmuld %f32,%f6,%f6 + + fmuld %f34,%f12,%f12 + + fmuld %f36,%f22,%f22 + + fmuld %f38,%f28,%f28 + + faddd %f6,%f4,%f6 + + fsubd %f14,%f12,%f14 + + faddd %f22,%f20,%f22 + + fsubd %f30,%f28,%f30 + + faddd %f6,%f0,%f6 + + faddd %f14,%f8,%f14 + + faddd %f22,%f16,%f22 + + faddd %f30,%f24,%f30 + mov %l0,%l4 + + fnegd %f6,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fnegd %f14,%f12 + lda [%i1]%asi,%f0 + + fnegd %f22,%f20 + lda [%i1+4]%asi,%f3 + + fnegd %f30,%f28 + andn %l0,%i5,%l0 + add %i1,%i2,%i1 + + andcc %l4,2,%g0 + fmovdnz %icc,%f4,%f6 + st %f6,[%o0] + + andcc %l1,2,%g0 + fmovdnz %icc,%f12,%f14 + st %f14,[%o1] + + andcc %l2,2,%g0 + fmovdnz %icc,%f20,%f22 + st %f22,[%o2] + + andcc %l3,2,%g0 + fmovdnz %icc,%f28,%f30 + st %f30,[%o3] + + addcc %i0,-1,%i0 + bg,pt %icc,.loop0 +! delay slot + st %f7,[%o0+4] + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case11: + fmuld %f0,pp3,%f6 ! sin(x0) + + fmuld %f16,pp3,%f22 ! sin(x2) + + fmuld %f24,pp3,%f30 ! sin(x3) + + faddd %f6,pp2,%f6 + fmuld %f0,qq2,%f4 + + fmuld %f8,qq3,%f14 ! cos(x1) + + faddd %f22,pp2,%f22 + fmuld %f16,qq2,%f20 + + faddd %f30,pp2,%f30 + fmuld %f24,qq2,%f28 + + fmuld %f0,%f6,%f6 + faddd %f4,qq1,%f4 + + faddd %f14,qq2,%f14 + fmuld %f8,pp2,%f12 + + fmuld %f16,%f22,%f22 + faddd %f20,qq1,%f20 + + fmuld %f24,%f30,%f30 + faddd %f28,qq1,%f28 + + faddd %f6,pp1,%f6 + fmuld %f0,%f4,%f4 + add %l4,%g1,%l4 + + fmuld %f8,%f14,%f14 + faddd %f12,pp1,%f12 + + faddd %f22,pp1,%f22 + fmuld %f16,%f20,%f20 + add %l6,%g1,%l6 + + faddd %f30,pp1,%f30 + fmuld %f24,%f28,%f28 + add %l7,%g1,%l7 + + fmuld %f0,%f6,%f6 + + faddd %f14,qq1,%f14 + fmuld %f8,%f12,%f12 + add %l5,%g1,%l5 + + fmuld %f16,%f22,%f22 + + fmuld %f24,%f30,%f30 + + fmuld %f2,%f6,%f6 + ldd [%l4+8],%f0 + + fmuld %f10,%f12,%f12 + + fmuld %f18,%f22,%f22 + ldd [%l6+8],%f16 + + fmuld %f26,%f30,%f30 + ldd [%l7+8],%f24 + + fmuld %f0,%f4,%f4 + faddd %f32,%f6,%f6 + + fmuld %f8,%f14,%f14 + faddd %f12,%f34,%f12 + ldd [%l5+16],%f8 + + fmuld %f16,%f20,%f20 + faddd %f36,%f22,%f22 + + fmuld %f24,%f28,%f28 + faddd %f38,%f30,%f30 + + faddd %f2,%f6,%f6 + ldd [%l4+16],%f32 + + fmuld %f8,%f14,%f14 + faddd %f12,%f10,%f12 + ldd [%l5+8],%f34 + + faddd %f18,%f22,%f22 + ldd [%l6+16],%f36 + + faddd %f26,%f30,%f30 + ldd [%l7+16],%f38 + + fmuld %f32,%f6,%f6 + + fmuld %f34,%f12,%f12 + + fmuld %f36,%f22,%f22 + + fmuld %f38,%f30,%f30 + + faddd %f6,%f4,%f6 + + fsubd %f14,%f12,%f14 + + faddd %f22,%f20,%f22 + + faddd %f30,%f28,%f30 + + faddd %f6,%f0,%f6 + + faddd %f14,%f8,%f14 + + faddd %f22,%f16,%f22 + + faddd %f30,%f24,%f30 + mov %l0,%l4 + + fnegd %f6,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fnegd %f14,%f12 + lda [%i1]%asi,%f0 + + fnegd %f22,%f20 + lda [%i1+4]%asi,%f3 + + fnegd %f30,%f28 + andn %l0,%i5,%l0 + add %i1,%i2,%i1 + + andcc %l4,2,%g0 + fmovdnz %icc,%f4,%f6 + st %f6,[%o0] + + andcc %l1,2,%g0 + fmovdnz %icc,%f12,%f14 + st %f14,[%o1] + + andcc %l2,2,%g0 + fmovdnz %icc,%f20,%f22 + st %f22,[%o2] + + andcc %l3,2,%g0 + fmovdnz %icc,%f28,%f30 + st %f30,[%o3] + + addcc %i0,-1,%i0 + bg,pt %icc,.loop0 +! delay slot + st %f7,[%o0+4] + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case12: + fmuld %f18,%f18,%f16 + andcc %l2,1,%g0 + bz,pn %icc,.case14 +! delay slot + fxor %f22,%f36,%f36 + + fmuld %f26,%f26,%f24 + andcc %l3,1,%g0 + bz,pn %icc,.case13 +! delay slot + fxor %f30,%f38,%f38 + + fmuld %f0,pp3,%f6 ! sin(x0) + + fmuld %f8,pp3,%f14 ! sin(x1) + + faddd %f6,pp2,%f6 + fmuld %f0,qq2,%f4 + + faddd %f14,pp2,%f14 + fmuld %f8,qq2,%f12 + + fmuld %f16,qq3,%f22 ! cos(x2) + + fmuld %f24,qq3,%f30 ! cos(x3) + + fmuld %f0,%f6,%f6 + faddd %f4,qq1,%f4 + + fmuld %f8,%f14,%f14 + faddd %f12,qq1,%f12 + + faddd %f22,qq2,%f22 + fmuld %f16,pp2,%f20 + + faddd %f30,qq2,%f30 + fmuld %f24,pp2,%f28 + + faddd %f6,pp1,%f6 + fmuld %f0,%f4,%f4 + add %l4,%g1,%l4 + + faddd %f14,pp1,%f14 + fmuld %f8,%f12,%f12 + add %l5,%g1,%l5 + + fmuld %f16,%f22,%f22 + faddd %f20,pp1,%f20 + + fmuld %f24,%f30,%f30 + faddd %f28,pp1,%f28 + + fmuld %f0,%f6,%f6 + + fmuld %f8,%f14,%f14 + + faddd %f22,qq1,%f22 + fmuld %f16,%f20,%f20 + add %l6,%g1,%l6 + + faddd %f30,qq1,%f30 + fmuld %f24,%f28,%f28 + add %l7,%g1,%l7 + + fmuld %f2,%f6,%f6 + ldd [%l4+8],%f0 + + fmuld %f10,%f14,%f14 + ldd [%l5+8],%f8 + + fmuld %f18,%f20,%f20 + + fmuld %f26,%f28,%f28 + + fmuld %f0,%f4,%f4 + faddd %f32,%f6,%f6 + + fmuld %f8,%f12,%f12 + faddd %f34,%f14,%f14 + + fmuld %f16,%f22,%f22 + faddd %f20,%f36,%f20 + ldd [%l6+16],%f16 + + fmuld %f24,%f30,%f30 + faddd %f28,%f38,%f28 + ldd [%l7+16],%f24 + + faddd %f2,%f6,%f6 + ldd [%l4+16],%f32 + + faddd %f10,%f14,%f14 + ldd [%l5+16],%f34 + + fmuld %f16,%f22,%f22 + faddd %f20,%f18,%f20 + ldd [%l6+8],%f36 + + fmuld %f24,%f30,%f30 + faddd %f28,%f26,%f28 + ldd [%l7+8],%f38 + + fmuld %f32,%f6,%f6 + + fmuld %f34,%f14,%f14 + + fmuld %f36,%f20,%f20 + + fmuld %f38,%f28,%f28 + + faddd %f6,%f4,%f6 + + faddd %f14,%f12,%f14 + + fsubd %f22,%f20,%f22 + + fsubd %f30,%f28,%f30 + + faddd %f6,%f0,%f6 + + faddd %f14,%f8,%f14 + + faddd %f22,%f16,%f22 + + faddd %f30,%f24,%f30 + mov %l0,%l4 + + fnegd %f6,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fnegd %f14,%f12 + lda [%i1]%asi,%f0 + + fnegd %f22,%f20 + lda [%i1+4]%asi,%f3 + + fnegd %f30,%f28 + andn %l0,%i5,%l0 + add %i1,%i2,%i1 + + andcc %l4,2,%g0 + fmovdnz %icc,%f4,%f6 + st %f6,[%o0] + + andcc %l1,2,%g0 + fmovdnz %icc,%f12,%f14 + st %f14,[%o1] + + andcc %l2,2,%g0 + fmovdnz %icc,%f20,%f22 + st %f22,[%o2] + + andcc %l3,2,%g0 + fmovdnz %icc,%f28,%f30 + st %f30,[%o3] + + addcc %i0,-1,%i0 + bg,pt %icc,.loop0 +! delay slot + st %f7,[%o0+4] + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case13: + fmuld %f0,pp3,%f6 ! sin(x0) + + fmuld %f8,pp3,%f14 ! sin(x1) + + fmuld %f24,pp3,%f30 ! sin(x3) + + faddd %f6,pp2,%f6 + fmuld %f0,qq2,%f4 + + faddd %f14,pp2,%f14 + fmuld %f8,qq2,%f12 + + fmuld %f16,qq3,%f22 ! cos(x2) + + faddd %f30,pp2,%f30 + fmuld %f24,qq2,%f28 + + fmuld %f0,%f6,%f6 + faddd %f4,qq1,%f4 + + fmuld %f8,%f14,%f14 + faddd %f12,qq1,%f12 + + faddd %f22,qq2,%f22 + fmuld %f16,pp2,%f20 + + fmuld %f24,%f30,%f30 + faddd %f28,qq1,%f28 + + faddd %f6,pp1,%f6 + fmuld %f0,%f4,%f4 + add %l4,%g1,%l4 + + faddd %f14,pp1,%f14 + fmuld %f8,%f12,%f12 + add %l5,%g1,%l5 + + fmuld %f16,%f22,%f22 + faddd %f20,pp1,%f20 + + faddd %f30,pp1,%f30 + fmuld %f24,%f28,%f28 + add %l7,%g1,%l7 + + fmuld %f0,%f6,%f6 + + fmuld %f8,%f14,%f14 + + faddd %f22,qq1,%f22 + fmuld %f16,%f20,%f20 + add %l6,%g1,%l6 + + fmuld %f24,%f30,%f30 + + fmuld %f2,%f6,%f6 + ldd [%l4+8],%f0 + + fmuld %f10,%f14,%f14 + ldd [%l5+8],%f8 + + fmuld %f18,%f20,%f20 + + fmuld %f26,%f30,%f30 + ldd [%l7+8],%f24 + + fmuld %f0,%f4,%f4 + faddd %f32,%f6,%f6 + + fmuld %f8,%f12,%f12 + faddd %f34,%f14,%f14 + + fmuld %f16,%f22,%f22 + faddd %f20,%f36,%f20 + ldd [%l6+16],%f16 + + fmuld %f24,%f28,%f28 + faddd %f38,%f30,%f30 + + faddd %f2,%f6,%f6 + ldd [%l4+16],%f32 + + faddd %f10,%f14,%f14 + ldd [%l5+16],%f34 + + fmuld %f16,%f22,%f22 + faddd %f20,%f18,%f20 + ldd [%l6+8],%f36 + + faddd %f26,%f30,%f30 + ldd [%l7+16],%f38 + + fmuld %f32,%f6,%f6 + + fmuld %f34,%f14,%f14 + + fmuld %f36,%f20,%f20 + + fmuld %f38,%f30,%f30 + + faddd %f6,%f4,%f6 + + faddd %f14,%f12,%f14 + + fsubd %f22,%f20,%f22 + + faddd %f30,%f28,%f30 + + faddd %f6,%f0,%f6 + + faddd %f14,%f8,%f14 + + faddd %f22,%f16,%f22 + + faddd %f30,%f24,%f30 + mov %l0,%l4 + + fnegd %f6,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fnegd %f14,%f12 + lda [%i1]%asi,%f0 + + fnegd %f22,%f20 + lda [%i1+4]%asi,%f3 + + fnegd %f30,%f28 + andn %l0,%i5,%l0 + add %i1,%i2,%i1 + + andcc %l4,2,%g0 + fmovdnz %icc,%f4,%f6 + st %f6,[%o0] + + andcc %l1,2,%g0 + fmovdnz %icc,%f12,%f14 + st %f14,[%o1] + + andcc %l2,2,%g0 + fmovdnz %icc,%f20,%f22 + st %f22,[%o2] + + andcc %l3,2,%g0 + fmovdnz %icc,%f28,%f30 + st %f30,[%o3] + + addcc %i0,-1,%i0 + bg,pt %icc,.loop0 +! delay slot + st %f7,[%o0+4] + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case14: + fmuld %f26,%f26,%f24 + andcc %l3,1,%g0 + bz,pn %icc,.case15 +! delay slot + fxor %f30,%f38,%f38 + + fmuld %f0,pp3,%f6 ! sin(x0) + + fmuld %f8,pp3,%f14 ! sin(x1) + + fmuld %f16,pp3,%f22 ! sin(x2) + + faddd %f6,pp2,%f6 + fmuld %f0,qq2,%f4 + + faddd %f14,pp2,%f14 + fmuld %f8,qq2,%f12 + + faddd %f22,pp2,%f22 + fmuld %f16,qq2,%f20 + + fmuld %f24,qq3,%f30 ! cos(x3) + + fmuld %f0,%f6,%f6 + faddd %f4,qq1,%f4 + + fmuld %f8,%f14,%f14 + faddd %f12,qq1,%f12 + + fmuld %f16,%f22,%f22 + faddd %f20,qq1,%f20 + + faddd %f30,qq2,%f30 + fmuld %f24,pp2,%f28 + + faddd %f6,pp1,%f6 + fmuld %f0,%f4,%f4 + add %l4,%g1,%l4 + + faddd %f14,pp1,%f14 + fmuld %f8,%f12,%f12 + add %l5,%g1,%l5 + + faddd %f22,pp1,%f22 + fmuld %f16,%f20,%f20 + add %l6,%g1,%l6 + + fmuld %f24,%f30,%f30 + faddd %f28,pp1,%f28 + + fmuld %f0,%f6,%f6 + + fmuld %f8,%f14,%f14 + + fmuld %f16,%f22,%f22 + + faddd %f30,qq1,%f30 + fmuld %f24,%f28,%f28 + add %l7,%g1,%l7 + + fmuld %f2,%f6,%f6 + ldd [%l4+8],%f0 + + fmuld %f10,%f14,%f14 + ldd [%l5+8],%f8 + + fmuld %f18,%f22,%f22 + ldd [%l6+8],%f16 + + fmuld %f26,%f28,%f28 + + fmuld %f0,%f4,%f4 + faddd %f32,%f6,%f6 + + fmuld %f8,%f12,%f12 + faddd %f34,%f14,%f14 + + fmuld %f16,%f20,%f20 + faddd %f36,%f22,%f22 + + fmuld %f24,%f30,%f30 + faddd %f28,%f38,%f28 + ldd [%l7+16],%f24 + + faddd %f2,%f6,%f6 + ldd [%l4+16],%f32 + + faddd %f10,%f14,%f14 + ldd [%l5+16],%f34 + + faddd %f18,%f22,%f22 + ldd [%l6+16],%f36 + + fmuld %f24,%f30,%f30 + faddd %f28,%f26,%f28 + ldd [%l7+8],%f38 + + fmuld %f32,%f6,%f6 + + fmuld %f34,%f14,%f14 + + fmuld %f36,%f22,%f22 + + fmuld %f38,%f28,%f28 + + faddd %f6,%f4,%f6 + + faddd %f14,%f12,%f14 + + faddd %f22,%f20,%f22 + + fsubd %f30,%f28,%f30 + + faddd %f6,%f0,%f6 + + faddd %f14,%f8,%f14 + + faddd %f22,%f16,%f22 + + faddd %f30,%f24,%f30 + mov %l0,%l4 + + fnegd %f6,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fnegd %f14,%f12 + lda [%i1]%asi,%f0 + + fnegd %f22,%f20 + lda [%i1+4]%asi,%f3 + + fnegd %f30,%f28 + andn %l0,%i5,%l0 + add %i1,%i2,%i1 + + andcc %l4,2,%g0 + fmovdnz %icc,%f4,%f6 + st %f6,[%o0] + + andcc %l1,2,%g0 + fmovdnz %icc,%f12,%f14 + st %f14,[%o1] + + andcc %l2,2,%g0 + fmovdnz %icc,%f20,%f22 + st %f22,[%o2] + + andcc %l3,2,%g0 + fmovdnz %icc,%f28,%f30 + st %f30,[%o3] + + addcc %i0,-1,%i0 + bg,pt %icc,.loop0 +! delay slot + st %f7,[%o0+4] + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case15: + fmuld %f0,pp3,%f6 ! sin(x0) + + fmuld %f8,pp3,%f14 ! sin(x1) + + fmuld %f16,pp3,%f22 ! sin(x2) + + fmuld %f24,pp3,%f30 ! sin(x3) + + faddd %f6,pp2,%f6 + fmuld %f0,qq2,%f4 + + faddd %f14,pp2,%f14 + fmuld %f8,qq2,%f12 + + faddd %f22,pp2,%f22 + fmuld %f16,qq2,%f20 + + faddd %f30,pp2,%f30 + fmuld %f24,qq2,%f28 + + fmuld %f0,%f6,%f6 + faddd %f4,qq1,%f4 + + fmuld %f8,%f14,%f14 + faddd %f12,qq1,%f12 + + fmuld %f16,%f22,%f22 + faddd %f20,qq1,%f20 + + fmuld %f24,%f30,%f30 + faddd %f28,qq1,%f28 + + faddd %f6,pp1,%f6 + fmuld %f0,%f4,%f4 + add %l4,%g1,%l4 + + faddd %f14,pp1,%f14 + fmuld %f8,%f12,%f12 + add %l5,%g1,%l5 + + faddd %f22,pp1,%f22 + fmuld %f16,%f20,%f20 + add %l6,%g1,%l6 + + faddd %f30,pp1,%f30 + fmuld %f24,%f28,%f28 + add %l7,%g1,%l7 + + fmuld %f0,%f6,%f6 + + fmuld %f8,%f14,%f14 + + fmuld %f16,%f22,%f22 + + fmuld %f24,%f30,%f30 + + fmuld %f2,%f6,%f6 + ldd [%l4+8],%f0 + + fmuld %f10,%f14,%f14 + ldd [%l5+8],%f8 + + fmuld %f18,%f22,%f22 + ldd [%l6+8],%f16 + + fmuld %f26,%f30,%f30 + ldd [%l7+8],%f24 + + fmuld %f0,%f4,%f4 + faddd %f32,%f6,%f6 + + fmuld %f8,%f12,%f12 + faddd %f34,%f14,%f14 + + fmuld %f16,%f20,%f20 + faddd %f36,%f22,%f22 + + fmuld %f24,%f28,%f28 + faddd %f38,%f30,%f30 + + faddd %f2,%f6,%f6 + ldd [%l4+16],%f32 + + faddd %f10,%f14,%f14 + ldd [%l5+16],%f34 + + faddd %f18,%f22,%f22 + ldd [%l6+16],%f36 + + faddd %f26,%f30,%f30 + ldd [%l7+16],%f38 + + fmuld %f32,%f6,%f6 + + fmuld %f34,%f14,%f14 + + fmuld %f36,%f22,%f22 + + fmuld %f38,%f30,%f30 + + faddd %f6,%f4,%f6 + + faddd %f14,%f12,%f14 + + faddd %f22,%f20,%f22 + + faddd %f30,%f28,%f30 + + faddd %f6,%f0,%f6 + + faddd %f14,%f8,%f14 + + faddd %f22,%f16,%f22 + + faddd %f30,%f24,%f30 + mov %l0,%l4 + + fnegd %f6,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fnegd %f14,%f12 + lda [%i1]%asi,%f0 + + fnegd %f22,%f20 + lda [%i1+4]%asi,%f3 + + fnegd %f30,%f28 + andn %l0,%i5,%l0 + add %i1,%i2,%i1 + + andcc %l4,2,%g0 + fmovdnz %icc,%f4,%f6 + st %f6,[%o0] + + andcc %l1,2,%g0 + fmovdnz %icc,%f12,%f14 + st %f14,[%o1] + + andcc %l2,2,%g0 + fmovdnz %icc,%f20,%f22 + st %f22,[%o2] + + andcc %l3,2,%g0 + fmovdnz %icc,%f28,%f30 + st %f30,[%o3] + + addcc %i0,-1,%i0 + bg,pt %icc,.loop0 +! delay slot + st %f7,[%o0+4] + + ba,pt %icc,.end +! delay slot + nop + + + .align 16 +.end: + st %f15,[%o1+4] + st %f23,[%o2+4] + st %f31,[%o3+4] + ld [%fp+biguns],%i5 + tst %i5 ! check for huge arguments remaining + be,pt %icc,.exit +! delay slot + nop +#ifdef __sparcv9 + ldx [%fp+xsave],%o1 + ldx [%fp+ysave],%o3 +#else + ld [%fp+xsave],%o1 + ld [%fp+ysave],%o3 +#endif + ld [%fp+nsave],%o0 + ld [%fp+sxsave],%o2 + ld [%fp+sysave],%o4 + sra %o2,0,%o2 ! sign-extend for V9 + sra %o4,0,%o4 + call __vlibm_vcos_big_ultra3 + sra %o5,0,%o5 ! delay slot + +.exit: + ret + restore + + + .align 16 +.last1: + faddd %f2,c3two44,%f4 + st %f15,[%o1+4] +.last1_from_range1: + mov 0,%l1 + fzeros %f8 + fzero %f10 + add %fp,junk,%o1 +.last2: + faddd %f10,c3two44,%f12 + st %f23,[%o2+4] +.last2_from_range2: + mov 0,%l2 + fzeros %f16 + fzero %f18 + add %fp,junk,%o2 +.last3: + faddd %f18,c3two44,%f20 + st %f31,[%o3+4] + st %f5,[%fp+nk0] + st %f13,[%fp+nk1] +.last3_from_range3: + mov 0,%l3 + fzeros %f24 + fzero %f26 + ba,pt %icc,.cont +! delay slot + add %fp,junk,%o3 + + + .align 16 +.range0: + cmp %l0,%o4 + bl,pt %icc,1f ! hx < 0x3e400000 +! delay slot, harmless if branch taken + sethi %hi(0x7ff00000),%o7 + cmp %l0,%o7 + bl,a,pt %icc,2f ! branch if finite +! delay slot, squashed if branch not taken + st %o4,[%fp+biguns] ! set biguns + fzero %f0 + fmuld %f2,%f0,%f2 + st %f2,[%o0] + ba,pt %icc,2f +! delay slot + st %f3,[%o0+4] +1: + fdtoi %f2,%f4 ! raise inexact if not zero + sethi %hi(0x3ff00000),%o7 + st %o7,[%o0] + st %g0,[%o0+4] +2: + addcc %i0,-1,%i0 + ble,pn %icc,.end +! delay slot, harmless if branch taken + add %i3,%i4,%i3 ! y += stridey + andn %l1,%i5,%l0 ! hx &= ~0x80000000 + fmovs %f8,%f0 + fmovs %f11,%f3 + ba,pt %icc,.loop0 +! delay slot + add %i1,%i2,%i1 ! x += stridex + + + .align 16 +.range1: + cmp %l1,%o4 + bl,pt %icc,1f ! hx < 0x3e400000 +! delay slot, harmless if branch taken + sethi %hi(0x7ff00000),%o7 + cmp %l1,%o7 + bl,a,pt %icc,2f ! branch if finite +! delay slot, squashed if branch not taken + st %o4,[%fp+biguns] ! set biguns + fzero %f8 + fmuld %f10,%f8,%f10 + st %f10,[%o1] + ba,pt %icc,2f +! delay slot + st %f11,[%o1+4] +1: + fdtoi %f10,%f12 ! raise inexact if not zero + sethi %hi(0x3ff00000),%o7 + st %o7,[%o1] + st %g0,[%o1+4] +2: + addcc %i0,-1,%i0 + ble,pn %icc,.last1_from_range1 +! delay slot, harmless if branch taken + add %i3,%i4,%i3 ! y += stridey + andn %l2,%i5,%l1 ! hx &= ~0x80000000 + fmovs %f16,%f8 + fmovs %f19,%f11 + ba,pt %icc,.loop1 +! delay slot + add %i1,%i2,%i1 ! x += stridex + + + .align 16 +.range2: + cmp %l2,%o4 + bl,pt %icc,1f ! hx < 0x3e400000 +! delay slot, harmless if branch taken + sethi %hi(0x7ff00000),%o7 + cmp %l2,%o7 + bl,a,pt %icc,2f ! branch if finite +! delay slot, squashed if branch not taken + st %o4,[%fp+biguns] ! set biguns + fzero %f16 + fmuld %f18,%f16,%f18 + st %f18,[%o2] + ba,pt %icc,2f +! delay slot + st %f19,[%o2+4] +1: + fdtoi %f18,%f20 ! raise inexact if not zero + sethi %hi(0x3ff00000),%o7 + st %o7,[%o2] + st %g0,[%o2+4] +2: + addcc %i0,-1,%i0 + ble,pn %icc,.last2_from_range2 +! delay slot, harmless if branch taken + add %i3,%i4,%i3 ! y += stridey + andn %l3,%i5,%l2 ! hx &= ~0x80000000 + fmovs %f24,%f16 + fmovs %f27,%f19 + ba,pt %icc,.loop2 +! delay slot + add %i1,%i2,%i1 ! x += stridex + + + .align 16 +.range3: + cmp %l3,%o4 + bl,pt %icc,1f ! hx < 0x3e400000 +! delay slot, harmless if branch taken + sethi %hi(0x7ff00000),%o7 + cmp %l3,%o7 + bl,a,pt %icc,2f ! branch if finite +! delay slot, squashed if branch not taken + st %o4,[%fp+biguns] ! set biguns + fzero %f24 + fmuld %f26,%f24,%f26 + st %f26,[%o3] + ba,pt %icc,2f +! delay slot + st %f27,[%o3+4] +1: + fdtoi %f26,%f28 ! raise inexact if not zero + sethi %hi(0x3ff00000),%o7 + st %o7,[%o3] + st %g0,[%o3+4] +2: + addcc %i0,-1,%i0 + ble,pn %icc,.last3_from_range3 +! delay slot, harmless if branch taken + add %i3,%i4,%i3 ! y += stridey + ld [%i1],%l3 + ld [%i1],%f24 + ld [%i1+4],%f27 + andn %l3,%i5,%l3 ! hx &= ~0x80000000 + ba,pt %icc,.loop3 +! delay slot + add %i1,%i2,%i1 ! x += stridex + + SET_SIZE(__vcos_ultra3) + diff --git a/usr/src/lib/libmvec/common/vis/__vcosf.S b/usr/src/lib/libmvec/common/vis/__vcosf.S new file mode 100644 index 0000000000..a20550e23b --- /dev/null +++ b/usr/src/lib/libmvec/common/vis/__vcosf.S @@ -0,0 +1,2102 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "__vcosf.S" + +#include "libm.h" + + RO_DATA + .align 64 +constants: + .word 0xbfc55554,0x60000000 + .word 0x3f811077,0xe0000000 + .word 0xbf29956b,0x60000000 + .word 0x3ff00000,0x00000000 + .word 0xbfe00000,0x00000000 + .word 0x3fa55554,0xa0000000 + .word 0xbf56c0c1,0xe0000000 + .word 0x3ef99e24,0xe0000000 + .word 0x3fe45f30,0x6dc9c883 + .word 0x43380000,0x00000000 + .word 0x3ff921fb,0x54400000 + .word 0x3dd0b461,0x1a626331 + .word 0x3f490fdb,0 + .word 0x49c90fdb,0 + .word 0x7f800000,0 + .word 0x80000000,0 + +#define S0 0x0 +#define S1 0x08 +#define S2 0x10 +#define one 0x18 +#define mhalf 0x20 +#define C0 0x28 +#define C1 0x30 +#define C2 0x38 +#define invpio2 0x40 +#define round 0x48 +#define pio2_1 0x50 +#define pio2_t 0x58 +#define thresh1 0x60 +#define thresh2 0x68 +#define inf 0x70 +#define signbit 0x78 + +! local storage indices + +#define xsave STACK_BIAS-0x8 +#define ysave STACK_BIAS-0x10 +#define nsave STACK_BIAS-0x14 +#define sxsave STACK_BIAS-0x18 +#define sysave STACK_BIAS-0x1c +#define junk STACK_BIAS-0x20 +#define n3 STACK_BIAS-0x24 +#define n2 STACK_BIAS-0x28 +#define n1 STACK_BIAS-0x2c +#define n0 STACK_BIAS-0x30 +! sizeof temp storage - must be a multiple of 16 for V9 +#define tmps 0x30 + +! register use + +! i0 n +! i1 x +! i2 stridex +! i3 y +! i4 stridey +! i5 biguns + +! l0 n0 +! l1 n1 +! l2 n2 +! l3 n3 +! l4 +! l5 +! l6 +! l7 + +! the following are 64-bit registers in both V8+ and V9 + +! g1 +! g5 + +! o0 py0 +! o1 py1 +! o2 py2 +! o3 py3 +! o4 +! o5 +! o7 + +! f0 x0 +! f2 x1 +! f4 x2 +! f6 x3 +! f8 thresh1 (pi/4) +! f10 y0 +! f12 y1 +! f14 y2 +! f16 y3 +! f18 thresh2 (2^19 pi) +! f20 +! f22 +! f24 +! f26 +! f28 signbit +! f30 +! f32 +! f34 +! f36 +! f38 inf +! f40 S0 +! f42 S1 +! f44 S2 +! f46 one +! f48 mhalf +! f50 C0 +! f52 C1 +! f54 C2 +! f56 invpio2 +! f58 round +! f60 pio2_1 +! f62 pio2_t + + ENTRY(__vcosf) + save %sp,-SA(MINFRAME)-tmps,%sp + PIC_SETUP(l7) + PIC_SET(l7,constants,l0) + mov %l0,%g1 + wr %g0,0x82,%asi ! set %asi for non-faulting loads +#ifdef __sparcv9 + stx %i1,[%fp+xsave] ! save arguments + stx %i3,[%fp+ysave] +#else + st %i1,[%fp+xsave] ! save arguments + st %i3,[%fp+ysave] +#endif + st %i0,[%fp+nsave] + st %i2,[%fp+sxsave] + st %i4,[%fp+sysave] + mov 0,%i5 ! biguns = 0 + ldd [%g1+S0],%f40 ! load constants + ldd [%g1+S1],%f42 + ldd [%g1+S2],%f44 + ldd [%g1+one],%f46 + ldd [%g1+mhalf],%f48 + ldd [%g1+C0],%f50 + ldd [%g1+C1],%f52 + ldd [%g1+C2],%f54 + ldd [%g1+invpio2],%f56 + ldd [%g1+round],%f58 + ldd [%g1+pio2_1],%f60 + ldd [%g1+pio2_t],%f62 + ldd [%g1+thresh1],%f8 + ldd [%g1+thresh2],%f18 + ldd [%g1+inf],%f38 + ldd [%g1+signbit],%f28 + sll %i2,2,%i2 ! scale strides + sll %i4,2,%i4 + fzero %f10 ! loop prologue + add %fp,junk,%o0 + fzero %f12 + add %fp,junk,%o1 + fzero %f14 + add %fp,junk,%o2 + fzero %f16 + ba .start + add %fp,junk,%o3 + + .align 16 +! 16-byte aligned +.start: + ld [%i1],%f0 ! *x + add %i1,%i2,%i1 ! x += stridex + addcc %i0,-1,%i0 + fdtos %f10,%f10 + + st %f10,[%o0] + mov %i3,%o0 ! py0 = y + ble,pn %icc,.last1 +! delay slot + add %i3,%i4,%i3 ! y += stridey + + ld [%i1],%f2 ! *x + add %i1,%i2,%i1 ! x += stridex + addcc %i0,-1,%i0 + fdtos %f12,%f12 + + st %f12,[%o1] + mov %i3,%o1 ! py1 = y + ble,pn %icc,.last2 +! delay slot + add %i3,%i4,%i3 ! y += stridey + + ld [%i1],%f4 ! *x + add %i1,%i2,%i1 ! x += stridex + addcc %i0,-1,%i0 + fdtos %f14,%f14 + + st %f14,[%o2] + mov %i3,%o2 ! py2 = y + ble,pn %icc,.last3 +! delay slot + add %i3,%i4,%i3 ! y += stridey + + ld [%i1],%f6 ! *x + add %i1,%i2,%i1 ! x += stridex + nop + fdtos %f16,%f16 + + st %f16,[%o3] + mov %i3,%o3 ! py3 = y + add %i3,%i4,%i3 ! y += stridey +.cont: + fabsd %f0,%f30 + + fabsd %f2,%f32 + + fabsd %f4,%f34 + + fabsd %f6,%f36 + fcmple32 %f30,%f18,%l0 + + fcmple32 %f32,%f18,%l1 + + fcmple32 %f34,%f18,%l2 + + fcmple32 %f36,%f18,%l3 + nop + +! 16-byte aligned + andcc %l0,2,%g0 + bz,pn %icc,.range0 ! branch if > 2^19 pi +! delay slot + fcmple32 %f30,%f8,%l0 + +.check1: + andcc %l1,2,%g0 + bz,pn %icc,.range1 ! branch if > 2^19 pi +! delay slot + fcmple32 %f32,%f8,%l1 + +.check2: + andcc %l2,2,%g0 + bz,pn %icc,.range2 ! branch if > 2^19 pi +! delay slot + fcmple32 %f34,%f8,%l2 + +.check3: + andcc %l3,2,%g0 + bz,pn %icc,.range3 ! branch if > 2^19 pi +! delay slot + fcmple32 %f36,%f8,%l3 + +.checkprimary: + fsmuld %f0,%f0,%f30 + fstod %f0,%f0 + + fsmuld %f2,%f2,%f32 + fstod %f2,%f2 + and %l0,%l1,%o4 + + fsmuld %f4,%f4,%f34 + fstod %f4,%f4 + + fsmuld %f6,%f6,%f36 + fstod %f6,%f6 + and %l2,%l3,%o5 + + fmuld %f30,%f54,%f10 + and %o4,%o5,%o5 + + fmuld %f32,%f54,%f12 + andcc %o5,2,%g0 + bz,pn %icc,.medium ! branch if any argument is > pi/4 +! delay slot + nop + + fmuld %f34,%f54,%f14 + + fmuld %f36,%f54,%f16 + + fmuld %f30,%f48,%f20 + faddd %f10,%f52,%f10 + + fmuld %f32,%f48,%f22 + faddd %f12,%f52,%f12 + + fmuld %f34,%f48,%f24 + faddd %f14,%f52,%f14 + + fmuld %f36,%f48,%f26 + faddd %f16,%f52,%f16 + + fmuld %f30,%f10,%f10 + faddd %f20,%f46,%f20 + + fmuld %f32,%f12,%f12 + faddd %f22,%f46,%f22 + + fmuld %f34,%f14,%f14 + faddd %f24,%f46,%f24 + + fmuld %f36,%f16,%f16 + faddd %f26,%f46,%f26 + + fmuld %f30,%f30,%f30 + faddd %f10,%f50,%f10 + + fmuld %f32,%f32,%f32 + faddd %f12,%f50,%f12 + + fmuld %f34,%f34,%f34 + faddd %f14,%f50,%f14 + + fmuld %f36,%f36,%f36 + faddd %f16,%f50,%f16 + + fmuld %f30,%f10,%f10 + + fmuld %f32,%f12,%f12 + + fmuld %f34,%f14,%f14 + + fmuld %f36,%f16,%f16 + + faddd %f10,%f20,%f10 + + faddd %f12,%f22,%f12 + + faddd %f14,%f24,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + faddd %f16,%f26,%f16 + + ba,pt %icc,.end +! delay slot + nop + + + .align 16 +.medium: + fmuld %f0,%f56,%f10 + + fmuld %f2,%f56,%f12 + + fmuld %f4,%f56,%f14 + + fmuld %f6,%f56,%f16 + + faddd %f10,%f58,%f10 + st %f11,[%fp+n0] + + faddd %f12,%f58,%f12 + st %f13,[%fp+n1] + + faddd %f14,%f58,%f14 + st %f15,[%fp+n2] + + faddd %f16,%f58,%f16 + st %f17,[%fp+n3] + + fsubd %f10,%f58,%f10 + + fsubd %f12,%f58,%f12 + + fsubd %f14,%f58,%f14 + + fsubd %f16,%f58,%f16 + + fmuld %f10,%f60,%f20 + ld [%fp+n0],%l0 + + fmuld %f12,%f60,%f22 + ld [%fp+n1],%l1 + + fmuld %f14,%f60,%f24 + ld [%fp+n2],%l2 + + fmuld %f16,%f60,%f26 + ld [%fp+n3],%l3 + + fsubd %f0,%f20,%f0 + fmuld %f10,%f62,%f30 + add %l0,1,%l0 + + fsubd %f2,%f22,%f2 + fmuld %f12,%f62,%f32 + add %l1,1,%l1 + + fsubd %f4,%f24,%f4 + fmuld %f14,%f62,%f34 + add %l2,1,%l2 + + fsubd %f6,%f26,%f6 + fmuld %f16,%f62,%f36 + add %l3,1,%l3 + + fsubd %f0,%f30,%f0 + + fsubd %f2,%f32,%f2 + + fsubd %f4,%f34,%f4 + + fsubd %f6,%f36,%f6 + andcc %l0,1,%g0 + + fmuld %f0,%f0,%f30 + bz,pn %icc,.case8 +! delay slot + andcc %l1,1,%g0 + + fmuld %f2,%f2,%f32 + bz,pn %icc,.case4 +! delay slot + andcc %l2,1,%g0 + + fmuld %f4,%f4,%f34 + bz,pn %icc,.case2 +! delay slot + andcc %l3,1,%g0 + + fmuld %f6,%f6,%f36 + bz,pn %icc,.case1 +! delay slot + nop + +!.case0: + fmuld %f30,%f54,%f10 ! cos(x0) + fzero %f0 + + fmuld %f32,%f54,%f12 ! cos(x1) + fzero %f2 + + fmuld %f34,%f54,%f14 ! cos(x2) + fzero %f4 + + fmuld %f36,%f54,%f16 ! cos(x3) + fzero %f6 + + fmuld %f30,%f48,%f20 + faddd %f10,%f52,%f10 + + fmuld %f32,%f48,%f22 + faddd %f12,%f52,%f12 + + fmuld %f34,%f48,%f24 + faddd %f14,%f52,%f14 + + fmuld %f36,%f48,%f26 + faddd %f16,%f52,%f16 + + fmuld %f30,%f10,%f10 + faddd %f20,%f46,%f20 + + fmuld %f32,%f12,%f12 + faddd %f22,%f46,%f22 + + fmuld %f34,%f14,%f14 + faddd %f24,%f46,%f24 + + fmuld %f36,%f16,%f16 + faddd %f26,%f46,%f26 + + fmuld %f30,%f30,%f30 + faddd %f10,%f50,%f10 + and %l0,2,%g1 + + fmuld %f32,%f32,%f32 + faddd %f12,%f50,%f12 + and %l1,2,%g5 + + fmuld %f34,%f34,%f34 + faddd %f14,%f50,%f14 + and %l2,2,%o4 + + fmuld %f36,%f36,%f36 + faddd %f16,%f50,%f16 + and %l3,2,%o5 + + fmuld %f30,%f10,%f10 + fmovrdnz %g1,%f28,%f0 + + fmuld %f32,%f12,%f12 + fmovrdnz %g5,%f28,%f2 + + fmuld %f34,%f14,%f14 + fmovrdnz %o4,%f28,%f4 + + fmuld %f36,%f16,%f16 + fmovrdnz %o5,%f28,%f6 + + faddd %f10,%f20,%f10 + + faddd %f12,%f22,%f12 + + faddd %f14,%f24,%f14 + + faddd %f16,%f26,%f16 + + fxor %f10,%f0,%f10 + + fxor %f12,%f2,%f12 + + fxor %f14,%f4,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f6,%f16 + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case1: + fmuld %f30,%f54,%f10 ! cos(x0) + fzero %f0 + + fmuld %f32,%f54,%f12 ! cos(x1) + fzero %f2 + + fmuld %f34,%f54,%f14 ! cos(x2) + fzero %f4 + + fmuld %f36,%f44,%f16 ! sin(x3) + + fmuld %f30,%f48,%f20 + faddd %f10,%f52,%f10 + + fmuld %f32,%f48,%f22 + faddd %f12,%f52,%f12 + + fmuld %f34,%f48,%f24 + faddd %f14,%f52,%f14 + + fmuld %f36,%f40,%f26 + faddd %f16,%f42,%f16 + + fmuld %f30,%f10,%f10 + faddd %f20,%f46,%f20 + + fmuld %f32,%f12,%f12 + faddd %f22,%f46,%f22 + + fmuld %f34,%f14,%f14 + faddd %f24,%f46,%f24 + + fmuld %f36,%f36,%f36 + faddd %f26,%f46,%f26 + + fmuld %f30,%f30,%f30 + faddd %f10,%f50,%f10 + and %l0,2,%g1 + + fmuld %f32,%f32,%f32 + faddd %f12,%f50,%f12 + and %l1,2,%g5 + + fmuld %f34,%f34,%f34 + faddd %f14,%f50,%f14 + and %l2,2,%o4 + + fmuld %f36,%f16,%f16 + fzero %f36 + + fmuld %f30,%f10,%f10 + fmovrdnz %g1,%f28,%f0 + + fmuld %f32,%f12,%f12 + fmovrdnz %g5,%f28,%f2 + + fmuld %f34,%f14,%f14 + fmovrdnz %o4,%f28,%f4 + + faddd %f16,%f26,%f16 + and %l3,2,%o5 + + faddd %f10,%f20,%f10 + + faddd %f12,%f22,%f12 + + faddd %f14,%f24,%f14 + + fmuld %f6,%f16,%f16 + fmovrdnz %o5,%f28,%f36 + + fxor %f10,%f0,%f10 + + fxor %f12,%f2,%f12 + + fxor %f14,%f4,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f36,%f16 + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case2: + fmuld %f6,%f6,%f36 + bz,pn %icc,.case3 +! delay slot + nop + + fmuld %f30,%f54,%f10 ! cos(x0) + fzero %f0 + + fmuld %f32,%f54,%f12 ! cos(x1) + fzero %f2 + + fmuld %f34,%f44,%f14 ! sin(x2) + + fmuld %f36,%f54,%f16 ! cos(x3) + fzero %f6 + + fmuld %f30,%f48,%f20 + faddd %f10,%f52,%f10 + + fmuld %f32,%f48,%f22 + faddd %f12,%f52,%f12 + + fmuld %f34,%f40,%f24 + faddd %f14,%f42,%f14 + + fmuld %f36,%f48,%f26 + faddd %f16,%f52,%f16 + + fmuld %f30,%f10,%f10 + faddd %f20,%f46,%f20 + + fmuld %f32,%f12,%f12 + faddd %f22,%f46,%f22 + + fmuld %f34,%f34,%f34 + faddd %f24,%f46,%f24 + + fmuld %f36,%f16,%f16 + faddd %f26,%f46,%f26 + + fmuld %f30,%f30,%f30 + faddd %f10,%f50,%f10 + and %l0,2,%g1 + + fmuld %f32,%f32,%f32 + faddd %f12,%f50,%f12 + and %l1,2,%g5 + + fmuld %f34,%f14,%f14 + fzero %f34 + + fmuld %f36,%f36,%f36 + faddd %f16,%f50,%f16 + and %l3,2,%o5 + + fmuld %f30,%f10,%f10 + fmovrdnz %g1,%f28,%f0 + + fmuld %f32,%f12,%f12 + fmovrdnz %g5,%f28,%f2 + + faddd %f14,%f24,%f14 + and %l2,2,%o4 + + fmuld %f36,%f16,%f16 + fmovrdnz %o5,%f28,%f6 + + faddd %f10,%f20,%f10 + + faddd %f12,%f22,%f12 + + fmuld %f4,%f14,%f14 + fmovrdnz %o4,%f28,%f34 + + faddd %f16,%f26,%f16 + + fxor %f10,%f0,%f10 + + fxor %f12,%f2,%f12 + + fxor %f14,%f34,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f6,%f16 + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case3: + fmuld %f30,%f54,%f10 ! cos(x0) + fzero %f0 + + fmuld %f32,%f54,%f12 ! cos(x1) + fzero %f2 + + fmuld %f34,%f44,%f14 ! sin(x2) + + fmuld %f36,%f44,%f16 ! sin(x3) + + fmuld %f30,%f48,%f20 + faddd %f10,%f52,%f10 + + fmuld %f32,%f48,%f22 + faddd %f12,%f52,%f12 + + fmuld %f34,%f40,%f24 + faddd %f14,%f42,%f14 + + fmuld %f36,%f40,%f26 + faddd %f16,%f42,%f16 + + fmuld %f30,%f10,%f10 + faddd %f20,%f46,%f20 + + fmuld %f32,%f12,%f12 + faddd %f22,%f46,%f22 + + fmuld %f34,%f34,%f34 + faddd %f24,%f46,%f24 + + fmuld %f36,%f36,%f36 + faddd %f26,%f46,%f26 + + fmuld %f30,%f30,%f30 + faddd %f10,%f50,%f10 + and %l0,2,%g1 + + fmuld %f32,%f32,%f32 + faddd %f12,%f50,%f12 + and %l1,2,%g5 + + fmuld %f34,%f14,%f14 + fzero %f34 + + fmuld %f36,%f16,%f16 + fzero %f36 + + fmuld %f30,%f10,%f10 + fmovrdnz %g1,%f28,%f0 + + fmuld %f32,%f12,%f12 + fmovrdnz %g5,%f28,%f2 + + faddd %f14,%f24,%f14 + and %l2,2,%o4 + + faddd %f16,%f26,%f16 + and %l3,2,%o5 + + faddd %f10,%f20,%f10 + + faddd %f12,%f22,%f12 + + fmuld %f4,%f14,%f14 + fmovrdnz %o4,%f28,%f34 + + fmuld %f6,%f16,%f16 + fmovrdnz %o5,%f28,%f36 + + fxor %f10,%f0,%f10 + + fxor %f12,%f2,%f12 + + fxor %f14,%f34,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f36,%f16 + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case4: + fmuld %f4,%f4,%f34 + bz,pn %icc,.case6 +! delay slot + andcc %l3,1,%g0 + + fmuld %f6,%f6,%f36 + bz,pn %icc,.case5 +! delay slot + nop + + fmuld %f30,%f54,%f10 ! cos(x0) + fzero %f0 + + fmuld %f32,%f44,%f12 ! sin(x1) + + fmuld %f34,%f54,%f14 ! cos(x2) + fzero %f4 + + fmuld %f36,%f54,%f16 ! cos(x3) + fzero %f6 + + fmuld %f30,%f48,%f20 + faddd %f10,%f52,%f10 + + fmuld %f32,%f40,%f22 + faddd %f12,%f42,%f12 + + fmuld %f34,%f48,%f24 + faddd %f14,%f52,%f14 + + fmuld %f36,%f48,%f26 + faddd %f16,%f52,%f16 + + fmuld %f30,%f10,%f10 + faddd %f20,%f46,%f20 + + fmuld %f32,%f32,%f32 + faddd %f22,%f46,%f22 + + fmuld %f34,%f14,%f14 + faddd %f24,%f46,%f24 + + fmuld %f36,%f16,%f16 + faddd %f26,%f46,%f26 + + fmuld %f30,%f30,%f30 + faddd %f10,%f50,%f10 + and %l0,2,%g1 + + fmuld %f32,%f12,%f12 + fzero %f32 + + fmuld %f34,%f34,%f34 + faddd %f14,%f50,%f14 + and %l2,2,%o4 + + fmuld %f36,%f36,%f36 + faddd %f16,%f50,%f16 + and %l3,2,%o5 + + fmuld %f30,%f10,%f10 + fmovrdnz %g1,%f28,%f0 + + faddd %f12,%f22,%f12 + and %l1,2,%g5 + + fmuld %f34,%f14,%f14 + fmovrdnz %o4,%f28,%f4 + + fmuld %f36,%f16,%f16 + fmovrdnz %o5,%f28,%f6 + + faddd %f10,%f20,%f10 + + fmuld %f2,%f12,%f12 + fmovrdnz %g5,%f28,%f32 + + faddd %f14,%f24,%f14 + + faddd %f16,%f26,%f16 + + fxor %f10,%f0,%f10 + + fxor %f12,%f32,%f12 + + fxor %f14,%f4,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f6,%f16 + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case5: + fmuld %f30,%f54,%f10 ! cos(x0) + fzero %f0 + + fmuld %f32,%f44,%f12 ! sin(x1) + + fmuld %f34,%f54,%f14 ! cos(x2) + fzero %f4 + + fmuld %f36,%f44,%f16 ! sin(x3) + + fmuld %f30,%f48,%f20 + faddd %f10,%f52,%f10 + + fmuld %f32,%f40,%f22 + faddd %f12,%f42,%f12 + + fmuld %f34,%f48,%f24 + faddd %f14,%f52,%f14 + + fmuld %f36,%f40,%f26 + faddd %f16,%f42,%f16 + + fmuld %f30,%f10,%f10 + faddd %f20,%f46,%f20 + + fmuld %f32,%f32,%f32 + faddd %f22,%f46,%f22 + + fmuld %f34,%f14,%f14 + faddd %f24,%f46,%f24 + + fmuld %f36,%f36,%f36 + faddd %f26,%f46,%f26 + + fmuld %f30,%f30,%f30 + faddd %f10,%f50,%f10 + and %l0,2,%g1 + + fmuld %f32,%f12,%f12 + fzero %f32 + + fmuld %f34,%f34,%f34 + faddd %f14,%f50,%f14 + and %l2,2,%o4 + + fmuld %f36,%f16,%f16 + fzero %f36 + + fmuld %f30,%f10,%f10 + fmovrdnz %g1,%f28,%f0 + + faddd %f12,%f22,%f12 + and %l1,2,%g5 + + fmuld %f34,%f14,%f14 + fmovrdnz %o4,%f28,%f4 + + faddd %f16,%f26,%f16 + and %l3,2,%o5 + + faddd %f10,%f20,%f10 + + fmuld %f2,%f12,%f12 + fmovrdnz %g5,%f28,%f32 + + faddd %f14,%f24,%f14 + + fmuld %f6,%f16,%f16 + fmovrdnz %o5,%f28,%f36 + + fxor %f10,%f0,%f10 + + fxor %f12,%f32,%f12 + + fxor %f14,%f4,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f36,%f16 + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case6: + fmuld %f6,%f6,%f36 + bz,pn %icc,.case7 +! delay slot + nop + + fmuld %f30,%f54,%f10 ! cos(x0) + fzero %f0 + + fmuld %f32,%f44,%f12 ! sin(x1) + + fmuld %f34,%f44,%f14 ! sin(x2) + + fmuld %f36,%f54,%f16 ! cos(x3) + fzero %f6 + + fmuld %f30,%f48,%f20 + faddd %f10,%f52,%f10 + + fmuld %f32,%f40,%f22 + faddd %f12,%f42,%f12 + + fmuld %f34,%f40,%f24 + faddd %f14,%f42,%f14 + + fmuld %f36,%f48,%f26 + faddd %f16,%f52,%f16 + + fmuld %f30,%f10,%f10 + faddd %f20,%f46,%f20 + + fmuld %f32,%f32,%f32 + faddd %f22,%f46,%f22 + + fmuld %f34,%f34,%f34 + faddd %f24,%f46,%f24 + + fmuld %f36,%f16,%f16 + faddd %f26,%f46,%f26 + + fmuld %f30,%f30,%f30 + faddd %f10,%f50,%f10 + and %l0,2,%g1 + + fmuld %f32,%f12,%f12 + fzero %f32 + + fmuld %f34,%f14,%f14 + fzero %f34 + + fmuld %f36,%f36,%f36 + faddd %f16,%f50,%f16 + and %l3,2,%o5 + + fmuld %f30,%f10,%f10 + fmovrdnz %g1,%f28,%f0 + + faddd %f12,%f22,%f12 + and %l1,2,%g5 + + faddd %f14,%f24,%f14 + and %l2,2,%o4 + + fmuld %f36,%f16,%f16 + fmovrdnz %o5,%f28,%f6 + + faddd %f10,%f20,%f10 + + fmuld %f2,%f12,%f12 + fmovrdnz %g5,%f28,%f32 + + fmuld %f4,%f14,%f14 + fmovrdnz %o4,%f28,%f34 + + faddd %f16,%f26,%f16 + + fxor %f10,%f0,%f10 + + fxor %f12,%f32,%f12 + + fxor %f14,%f34,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f6,%f16 + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case7: + fmuld %f30,%f54,%f10 ! cos(x0) + fzero %f0 + + fmuld %f32,%f44,%f12 ! sin(x1) + + fmuld %f34,%f44,%f14 ! sin(x2) + + fmuld %f36,%f44,%f16 ! sin(x3) + + fmuld %f30,%f48,%f20 + faddd %f10,%f52,%f10 + + fmuld %f32,%f40,%f22 + faddd %f12,%f42,%f12 + + fmuld %f34,%f40,%f24 + faddd %f14,%f42,%f14 + + fmuld %f36,%f40,%f26 + faddd %f16,%f42,%f16 + + fmuld %f30,%f10,%f10 + faddd %f20,%f46,%f20 + + fmuld %f32,%f32,%f32 + faddd %f22,%f46,%f22 + + fmuld %f34,%f34,%f34 + faddd %f24,%f46,%f24 + + fmuld %f36,%f36,%f36 + faddd %f26,%f46,%f26 + + fmuld %f30,%f30,%f30 + faddd %f10,%f50,%f10 + and %l0,2,%g1 + + fmuld %f32,%f12,%f12 + fzero %f32 + + fmuld %f34,%f14,%f14 + fzero %f34 + + fmuld %f36,%f16,%f16 + fzero %f36 + + fmuld %f30,%f10,%f10 + fmovrdnz %g1,%f28,%f0 + + faddd %f12,%f22,%f12 + and %l1,2,%g5 + + faddd %f14,%f24,%f14 + and %l2,2,%o4 + + faddd %f16,%f26,%f16 + and %l3,2,%o5 + + faddd %f10,%f20,%f10 + + fmuld %f2,%f12,%f12 + fmovrdnz %g5,%f28,%f32 + + fmuld %f4,%f14,%f14 + fmovrdnz %o4,%f28,%f34 + + fmuld %f6,%f16,%f16 + fmovrdnz %o5,%f28,%f36 + + fxor %f10,%f0,%f10 + + fxor %f12,%f32,%f12 + + fxor %f14,%f34,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f36,%f16 + + ba,pt %icc,.end +! delay slot + nop + + + .align 16 +.case8: + fmuld %f2,%f2,%f32 + bz,pn %icc,.case12 +! delay slot + andcc %l2,1,%g0 + + fmuld %f4,%f4,%f34 + bz,pn %icc,.case10 +! delay slot + andcc %l3,1,%g0 + + fmuld %f6,%f6,%f36 + bz,pn %icc,.case9 +! delay slot + nop + + fmuld %f30,%f44,%f10 ! sin(x0) + + fmuld %f32,%f54,%f12 ! cos(x1) + fzero %f2 + + fmuld %f34,%f54,%f14 ! cos(x2) + fzero %f4 + + fmuld %f36,%f54,%f16 ! cos(x3) + fzero %f6 + + fmuld %f30,%f40,%f20 + faddd %f10,%f42,%f10 + + fmuld %f32,%f48,%f22 + faddd %f12,%f52,%f12 + + fmuld %f34,%f48,%f24 + faddd %f14,%f52,%f14 + + fmuld %f36,%f48,%f26 + faddd %f16,%f52,%f16 + + fmuld %f30,%f30,%f30 + faddd %f20,%f46,%f20 + + fmuld %f32,%f12,%f12 + faddd %f22,%f46,%f22 + + fmuld %f34,%f14,%f14 + faddd %f24,%f46,%f24 + + fmuld %f36,%f16,%f16 + faddd %f26,%f46,%f26 + + fmuld %f30,%f10,%f10 + fzero %f30 + + fmuld %f32,%f32,%f32 + faddd %f12,%f50,%f12 + and %l1,2,%g5 + + fmuld %f34,%f34,%f34 + faddd %f14,%f50,%f14 + and %l2,2,%o4 + + fmuld %f36,%f36,%f36 + faddd %f16,%f50,%f16 + and %l3,2,%o5 + + faddd %f10,%f20,%f10 + and %l0,2,%g1 + + fmuld %f32,%f12,%f12 + fmovrdnz %g5,%f28,%f2 + + fmuld %f34,%f14,%f14 + fmovrdnz %o4,%f28,%f4 + + fmuld %f36,%f16,%f16 + fmovrdnz %o5,%f28,%f6 + + fmuld %f0,%f10,%f10 + fmovrdnz %g1,%f28,%f30 + + faddd %f12,%f22,%f12 + + faddd %f14,%f24,%f14 + + faddd %f16,%f26,%f16 + + fxor %f10,%f30,%f10 + + fxor %f12,%f2,%f12 + + fxor %f14,%f4,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f6,%f16 + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case9: + fmuld %f30,%f44,%f10 ! sin(x0) + + fmuld %f32,%f54,%f12 ! cos(x1) + fzero %f2 + + fmuld %f34,%f54,%f14 ! cos(x2) + fzero %f4 + + fmuld %f36,%f44,%f16 ! sin(x3) + + fmuld %f30,%f40,%f20 + faddd %f10,%f42,%f10 + + fmuld %f32,%f48,%f22 + faddd %f12,%f52,%f12 + + fmuld %f34,%f48,%f24 + faddd %f14,%f52,%f14 + + fmuld %f36,%f40,%f26 + faddd %f16,%f42,%f16 + + fmuld %f30,%f30,%f30 + faddd %f20,%f46,%f20 + + fmuld %f32,%f12,%f12 + faddd %f22,%f46,%f22 + + fmuld %f34,%f14,%f14 + faddd %f24,%f46,%f24 + + fmuld %f36,%f36,%f36 + faddd %f26,%f46,%f26 + + fmuld %f30,%f10,%f10 + fzero %f30 + + fmuld %f32,%f32,%f32 + faddd %f12,%f50,%f12 + and %l1,2,%g5 + + fmuld %f34,%f34,%f34 + faddd %f14,%f50,%f14 + and %l2,2,%o4 + + fmuld %f36,%f16,%f16 + fzero %f36 + + faddd %f10,%f20,%f10 + and %l0,2,%g1 + + fmuld %f32,%f12,%f12 + fmovrdnz %g5,%f28,%f2 + + fmuld %f34,%f14,%f14 + fmovrdnz %o4,%f28,%f4 + + faddd %f16,%f26,%f16 + and %l3,2,%o5 + + fmuld %f0,%f10,%f10 + fmovrdnz %g1,%f28,%f30 + + faddd %f12,%f22,%f12 + + faddd %f14,%f24,%f14 + + fmuld %f6,%f16,%f16 + fmovrdnz %o5,%f28,%f36 + + fxor %f10,%f30,%f10 + + fxor %f12,%f2,%f12 + + fxor %f14,%f4,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f36,%f16 + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case10: + fmuld %f6,%f6,%f36 + bz,pn %icc,.case11 +! delay slot + nop + + fmuld %f30,%f44,%f10 ! sin(x0) + + fmuld %f32,%f54,%f12 ! cos(x1) + fzero %f2 + + fmuld %f34,%f44,%f14 ! sin(x2) + + fmuld %f36,%f54,%f16 ! cos(x3) + fzero %f6 + + fmuld %f30,%f40,%f20 + faddd %f10,%f42,%f10 + + fmuld %f32,%f48,%f22 + faddd %f12,%f52,%f12 + + fmuld %f34,%f40,%f24 + faddd %f14,%f42,%f14 + + fmuld %f36,%f48,%f26 + faddd %f16,%f52,%f16 + + fmuld %f30,%f30,%f30 + faddd %f20,%f46,%f20 + + fmuld %f32,%f12,%f12 + faddd %f22,%f46,%f22 + + fmuld %f34,%f34,%f34 + faddd %f24,%f46,%f24 + + fmuld %f36,%f16,%f16 + faddd %f26,%f46,%f26 + + fmuld %f30,%f10,%f10 + fzero %f30 + + fmuld %f32,%f32,%f32 + faddd %f12,%f50,%f12 + and %l1,2,%g5 + + fmuld %f34,%f14,%f14 + fzero %f34 + + fmuld %f36,%f36,%f36 + faddd %f16,%f50,%f16 + and %l3,2,%o5 + + faddd %f10,%f20,%f10 + and %l0,2,%g1 + + fmuld %f32,%f12,%f12 + fmovrdnz %g5,%f28,%f2 + + faddd %f14,%f24,%f14 + and %l2,2,%o4 + + fmuld %f36,%f16,%f16 + fmovrdnz %o5,%f28,%f6 + + fmuld %f0,%f10,%f10 + fmovrdnz %g1,%f28,%f30 + + faddd %f12,%f22,%f12 + + fmuld %f4,%f14,%f14 + fmovrdnz %o4,%f28,%f34 + + faddd %f16,%f26,%f16 + + fxor %f10,%f30,%f10 + + fxor %f12,%f2,%f12 + + fxor %f14,%f34,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f6,%f16 + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case11: + fmuld %f30,%f44,%f10 ! sin(x0) + + fmuld %f32,%f54,%f12 ! cos(x1) + fzero %f2 + + fmuld %f34,%f44,%f14 ! sin(x2) + + fmuld %f36,%f44,%f16 ! sin(x3) + + fmuld %f30,%f40,%f20 + faddd %f10,%f42,%f10 + + fmuld %f32,%f48,%f22 + faddd %f12,%f52,%f12 + + fmuld %f34,%f40,%f24 + faddd %f14,%f42,%f14 + + fmuld %f36,%f40,%f26 + faddd %f16,%f42,%f16 + + fmuld %f30,%f30,%f30 + faddd %f20,%f46,%f20 + + fmuld %f32,%f12,%f12 + faddd %f22,%f46,%f22 + + fmuld %f34,%f34,%f34 + faddd %f24,%f46,%f24 + + fmuld %f36,%f36,%f36 + faddd %f26,%f46,%f26 + + fmuld %f30,%f10,%f10 + fzero %f30 + + fmuld %f32,%f32,%f32 + faddd %f12,%f50,%f12 + and %l1,2,%g5 + + fmuld %f34,%f14,%f14 + fzero %f34 + + fmuld %f36,%f16,%f16 + fzero %f36 + + faddd %f10,%f20,%f10 + and %l0,2,%g1 + + fmuld %f32,%f12,%f12 + fmovrdnz %g5,%f28,%f2 + + faddd %f14,%f24,%f14 + and %l2,2,%o4 + + faddd %f16,%f26,%f16 + and %l3,2,%o5 + + fmuld %f0,%f10,%f10 + fmovrdnz %g1,%f28,%f30 + + faddd %f12,%f22,%f12 + + fmuld %f4,%f14,%f14 + fmovrdnz %o4,%f28,%f34 + + fmuld %f6,%f16,%f16 + fmovrdnz %o5,%f28,%f36 + + fxor %f10,%f30,%f10 + + fxor %f12,%f2,%f12 + + fxor %f14,%f34,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f36,%f16 + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case12: + fmuld %f4,%f4,%f34 + bz,pn %icc,.case14 +! delay slot + andcc %l3,1,%g0 + + fmuld %f6,%f6,%f36 + bz,pn %icc,.case13 +! delay slot + nop + + fmuld %f30,%f44,%f10 ! sin(x0) + + fmuld %f32,%f44,%f12 ! sin(x1) + + fmuld %f34,%f54,%f14 ! cos(x2) + fzero %f4 + + fmuld %f36,%f54,%f16 ! cos(x3) + fzero %f6 + + fmuld %f30,%f40,%f20 + faddd %f10,%f42,%f10 + + fmuld %f32,%f40,%f22 + faddd %f12,%f42,%f12 + + fmuld %f34,%f48,%f24 + faddd %f14,%f52,%f14 + + fmuld %f36,%f48,%f26 + faddd %f16,%f52,%f16 + + fmuld %f30,%f30,%f30 + faddd %f20,%f46,%f20 + + fmuld %f32,%f32,%f32 + faddd %f22,%f46,%f22 + + fmuld %f34,%f14,%f14 + faddd %f24,%f46,%f24 + + fmuld %f36,%f16,%f16 + faddd %f26,%f46,%f26 + + fmuld %f30,%f10,%f10 + fzero %f30 + + fmuld %f32,%f12,%f12 + fzero %f32 + + fmuld %f34,%f34,%f34 + faddd %f14,%f50,%f14 + and %l2,2,%o4 + + fmuld %f36,%f36,%f36 + faddd %f16,%f50,%f16 + and %l3,2,%o5 + + faddd %f10,%f20,%f10 + and %l0,2,%g1 + + faddd %f12,%f22,%f12 + and %l1,2,%g5 + + fmuld %f34,%f14,%f14 + fmovrdnz %o4,%f28,%f4 + + fmuld %f36,%f16,%f16 + fmovrdnz %o5,%f28,%f6 + + fmuld %f0,%f10,%f10 + fmovrdnz %g1,%f28,%f30 + + fmuld %f2,%f12,%f12 + fmovrdnz %g5,%f28,%f32 + + faddd %f14,%f24,%f14 + + faddd %f16,%f26,%f16 + + fxor %f10,%f30,%f10 + + fxor %f12,%f32,%f12 + + fxor %f14,%f4,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f6,%f16 + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case13: + fmuld %f30,%f44,%f10 ! sin(x0) + + fmuld %f32,%f44,%f12 ! sin(x1) + + fmuld %f34,%f54,%f14 ! cos(x2) + fzero %f4 + + fmuld %f36,%f44,%f16 ! sin(x3) + + fmuld %f30,%f40,%f20 + faddd %f10,%f42,%f10 + + fmuld %f32,%f40,%f22 + faddd %f12,%f42,%f12 + + fmuld %f34,%f48,%f24 + faddd %f14,%f52,%f14 + + fmuld %f36,%f40,%f26 + faddd %f16,%f42,%f16 + + fmuld %f30,%f30,%f30 + faddd %f20,%f46,%f20 + + fmuld %f32,%f32,%f32 + faddd %f22,%f46,%f22 + + fmuld %f34,%f14,%f14 + faddd %f24,%f46,%f24 + + fmuld %f36,%f36,%f36 + faddd %f26,%f46,%f26 + + fmuld %f30,%f10,%f10 + fzero %f30 + + fmuld %f32,%f12,%f12 + fzero %f32 + + fmuld %f34,%f34,%f34 + faddd %f14,%f50,%f14 + and %l2,2,%o4 + + fmuld %f36,%f16,%f16 + fzero %f36 + + faddd %f10,%f20,%f10 + and %l0,2,%g1 + + faddd %f12,%f22,%f12 + and %l1,2,%g5 + + fmuld %f34,%f14,%f14 + fmovrdnz %o4,%f28,%f4 + + faddd %f16,%f26,%f16 + and %l3,2,%o5 + + fmuld %f0,%f10,%f10 + fmovrdnz %g1,%f28,%f30 + + fmuld %f2,%f12,%f12 + fmovrdnz %g5,%f28,%f32 + + faddd %f14,%f24,%f14 + + fmuld %f6,%f16,%f16 + fmovrdnz %o5,%f28,%f36 + + fxor %f10,%f30,%f10 + + fxor %f12,%f32,%f12 + + fxor %f14,%f4,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f36,%f16 + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case14: + fmuld %f6,%f6,%f36 + bz,pn %icc,.case15 +! delay slot + nop + + fmuld %f30,%f44,%f10 ! sin(x0) + + fmuld %f32,%f44,%f12 ! sin(x1) + + fmuld %f34,%f44,%f14 ! sin(x2) + + fmuld %f36,%f54,%f16 ! cos(x3) + fzero %f6 + + fmuld %f30,%f40,%f20 + faddd %f10,%f42,%f10 + + fmuld %f32,%f40,%f22 + faddd %f12,%f42,%f12 + + fmuld %f34,%f40,%f24 + faddd %f14,%f42,%f14 + + fmuld %f36,%f48,%f26 + faddd %f16,%f52,%f16 + + fmuld %f30,%f30,%f30 + faddd %f20,%f46,%f20 + + fmuld %f32,%f32,%f32 + faddd %f22,%f46,%f22 + + fmuld %f34,%f34,%f34 + faddd %f24,%f46,%f24 + + fmuld %f36,%f16,%f16 + faddd %f26,%f46,%f26 + + fmuld %f30,%f10,%f10 + fzero %f30 + + fmuld %f32,%f12,%f12 + fzero %f32 + + fmuld %f34,%f14,%f14 + fzero %f34 + + fmuld %f36,%f36,%f36 + faddd %f16,%f50,%f16 + and %l3,2,%o5 + + faddd %f10,%f20,%f10 + and %l0,2,%g1 + + faddd %f12,%f22,%f12 + and %l1,2,%g5 + + faddd %f14,%f24,%f14 + and %l2,2,%o4 + + fmuld %f36,%f16,%f16 + fmovrdnz %o5,%f28,%f6 + + fmuld %f0,%f10,%f10 + fmovrdnz %g1,%f28,%f30 + + fmuld %f2,%f12,%f12 + fmovrdnz %g5,%f28,%f32 + + fmuld %f4,%f14,%f14 + fmovrdnz %o4,%f28,%f34 + + faddd %f16,%f26,%f16 + + fxor %f10,%f30,%f10 + + fxor %f12,%f32,%f12 + + fxor %f14,%f34,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f6,%f16 + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case15: + fmuld %f30,%f44,%f10 ! sin(x0) + + fmuld %f32,%f44,%f12 ! sin(x1) + + fmuld %f34,%f44,%f14 ! sin(x2) + + fmuld %f36,%f44,%f16 ! sin(x3) + + fmuld %f30,%f40,%f20 + faddd %f10,%f42,%f10 + + fmuld %f32,%f40,%f22 + faddd %f12,%f42,%f12 + + fmuld %f34,%f40,%f24 + faddd %f14,%f42,%f14 + + fmuld %f36,%f40,%f26 + faddd %f16,%f42,%f16 + + fmuld %f30,%f30,%f30 + faddd %f20,%f46,%f20 + + fmuld %f32,%f32,%f32 + faddd %f22,%f46,%f22 + + fmuld %f34,%f34,%f34 + faddd %f24,%f46,%f24 + + fmuld %f36,%f36,%f36 + faddd %f26,%f46,%f26 + + fmuld %f30,%f10,%f10 + fzero %f30 + + fmuld %f32,%f12,%f12 + fzero %f32 + + fmuld %f34,%f14,%f14 + fzero %f34 + + fmuld %f36,%f16,%f16 + fzero %f36 + + faddd %f10,%f20,%f10 + and %l0,2,%g1 + + faddd %f12,%f22,%f12 + and %l1,2,%g5 + + faddd %f14,%f24,%f14 + and %l2,2,%o4 + + faddd %f16,%f26,%f16 + and %l3,2,%o5 + + fmuld %f0,%f10,%f10 + fmovrdnz %g1,%f28,%f30 + + fmuld %f2,%f12,%f12 + fmovrdnz %g5,%f28,%f32 + + fmuld %f4,%f14,%f14 + fmovrdnz %o4,%f28,%f34 + + fmuld %f6,%f16,%f16 + fmovrdnz %o5,%f28,%f36 + + fxor %f10,%f30,%f10 + + fxor %f12,%f32,%f12 + + fxor %f14,%f34,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f36,%f16 + + ba,pt %icc,.end +! delay slot + nop + + + .align 32 +.end: + fdtos %f10,%f10 + st %f10,[%o0] + fdtos %f12,%f12 + st %f12,[%o1] + fdtos %f14,%f14 + st %f14,[%o2] + fdtos %f16,%f16 + tst %i5 ! check for huge arguments remaining + be,pt %icc,.exit +! delay slot + st %f16,[%o3] +#ifdef __sparcv9 + ldx [%fp+xsave],%o1 + ldx [%fp+ysave],%o3 +#else + ld [%fp+xsave],%o1 + ld [%fp+ysave],%o3 +#endif + ld [%fp+nsave],%o0 + ld [%fp+sxsave],%o2 + ld [%fp+sysave],%o4 + sra %o2,0,%o2 ! sign-extend for V9 + call __vlibm_vcos_bigf + sra %o4,0,%o4 ! delay slot + +.exit: + ret + restore + + + .align 32 +.last1: + fdtos %f12,%f12 + st %f12,[%o1] + fzeros %f2 + add %fp,junk,%o1 +.last2: + fdtos %f14,%f14 + st %f14,[%o2] + fzeros %f4 + add %fp,junk,%o2 +.last3: + fdtos %f16,%f16 + st %f16,[%o3] + fzeros %f6 + ba,pt %icc,.cont +! delay slot + add %fp,junk,%o3 + + + .align 16 +.range0: + fcmpgt32 %f38,%f30,%l0 + andcc %l0,2,%g0 + bnz,a,pt %icc,1f ! branch if finite +! delay slot, squashed if branch not taken + mov 1,%i5 ! set biguns + fzeros %f1 + fmuls %f0,%f1,%f0 + st %f0,[%o0] +1: + addcc %i0,-1,%i0 + ble,pn %icc,1f +! delay slot + nop + ld [%i1],%f0 + add %i1,%i2,%i1 + mov %i3,%o0 + add %i3,%i4,%i3 + fabsd %f0,%f30 + fcmple32 %f30,%f18,%l0 + andcc %l0,2,%g0 + bz,pn %icc,.range0 +! delay slot + nop + ba,pt %icc,.check1 +! delay slot + fcmple32 %f30,%f8,%l0 +1: + fzero %f0 ! set up dummy argument + add %fp,junk,%o0 + mov 2,%l0 + ba,pt %icc,.check1 +! delay slot + fzero %f30 + + + .align 16 +.range1: + fcmpgt32 %f38,%f32,%l1 + andcc %l1,2,%g0 + bnz,a,pt %icc,1f ! branch if finite +! delay slot, squashed if branch not taken + mov 1,%i5 ! set biguns + fzeros %f3 + fmuls %f2,%f3,%f2 + st %f2,[%o1] +1: + addcc %i0,-1,%i0 + ble,pn %icc,1f +! delay slot + nop + ld [%i1],%f2 + add %i1,%i2,%i1 + mov %i3,%o1 + add %i3,%i4,%i3 + fabsd %f2,%f32 + fcmple32 %f32,%f18,%l1 + andcc %l1,2,%g0 + bz,pn %icc,.range1 +! delay slot + nop + ba,pt %icc,.check2 +! delay slot + fcmple32 %f32,%f8,%l1 +1: + fzero %f2 ! set up dummy argument + add %fp,junk,%o1 + mov 2,%l1 + ba,pt %icc,.check2 +! delay slot + fzero %f32 + + + .align 16 +.range2: + fcmpgt32 %f38,%f34,%l2 + andcc %l2,2,%g0 + bnz,a,pt %icc,1f ! branch if finite +! delay slot, squashed if branch not taken + mov 1,%i5 ! set biguns + fzeros %f5 + fmuls %f4,%f5,%f4 + st %f4,[%o2] +1: + addcc %i0,-1,%i0 + ble,pn %icc,1f +! delay slot + nop + ld [%i1],%f4 + add %i1,%i2,%i1 + mov %i3,%o2 + add %i3,%i4,%i3 + fabsd %f4,%f34 + fcmple32 %f34,%f18,%l2 + andcc %l2,2,%g0 + bz,pn %icc,.range2 +! delay slot + nop + ba,pt %icc,.check3 +! delay slot + fcmple32 %f34,%f8,%l2 +1: + fzero %f4 ! set up dummy argument + add %fp,junk,%o2 + mov 2,%l2 + ba,pt %icc,.check3 +! delay slot + fzero %f34 + + + .align 16 +.range3: + fcmpgt32 %f38,%f36,%l3 + andcc %l3,2,%g0 + bnz,a,pt %icc,1f ! branch if finite +! delay slot, squashed if branch not taken + mov 1,%i5 ! set biguns + fzeros %f7 + fmuls %f6,%f7,%f6 + st %f6,[%o3] +1: + addcc %i0,-1,%i0 + ble,pn %icc,1f +! delay slot + nop + ld [%i1],%f6 + add %i1,%i2,%i1 + mov %i3,%o3 + add %i3,%i4,%i3 + fabsd %f6,%f36 + fcmple32 %f36,%f18,%l3 + andcc %l3,2,%g0 + bz,pn %icc,.range3 +! delay slot + nop + ba,pt %icc,.checkprimary +! delay slot + fcmple32 %f36,%f8,%l3 +1: + fzero %f6 ! set up dummy argument + add %fp,junk,%o3 + mov 2,%l3 + ba,pt %icc,.checkprimary +! delay slot + fzero %f36 + + SET_SIZE(__vcosf) + diff --git a/usr/src/lib/libmvec/common/vis/__vexp.S b/usr/src/lib/libmvec/common/vis/__vexp.S new file mode 100644 index 0000000000..fc11df08ee --- /dev/null +++ b/usr/src/lib/libmvec/common/vis/__vexp.S @@ -0,0 +1,1282 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "__vexp.S" + +#include "libm.h" + + RO_DATA + +/******************************************************************** + * vexp() algorithm is from mopt:f_exp.c. Basics are included here + * to supplement comments within this file. vexp() has been unrolled + * to a depth of 3. Only element 0 is documented. + * + * Note 1: INVLN2_256, LN2_256H, and LN2_256L were originally scaled by + * 2^44 to allow *2^k w/o shifting within the FP registers. These + * had to be removed for CHEETAH to avoid the fdtox of a very large + * number, which would trap to kernel (2^52). + * + * Let x = (k + j/256)ln2 + r + * then exp(x) = exp(ln2^(k+j/256)) * exp(r) + * = 2^k * 2^(j/256) * exp(r) + * where r is polynomial approximation + * exp(r) = 1 + r + r^2*B1 + r^3*B2 + r^4*B3 + * = 1 + r*(1+r*(B1+r*(B2+r*B3))) + * let + * p = r*(1+r*(B1+r*(B2+r*B3))) ! notice, not quite exp(r) + * q = 2^(j/256) (high 64 bits) + * t = 2^(j/256) (extra precision) ! both from _TBL_exp_z[] + * then + * 2^(j/256) * exp(r) = (q+t)(1+p) ~ q + ( t + q*p ) + * then actual computation is 2^k * ( q + ( t + q*p ) ) + * + ********************************************************************/ + + .align 16 +TBL: + .word 0x3ff00000,0x00000000 + .word 0x00000000,0x00000000 + .word 0x3ff00b1a,0xfa5abcbf + .word 0xbc84f6b2,0xa7609f71 + .word 0x3ff0163d,0xa9fb3335 + .word 0x3c9b6129,0x9ab8cdb7 + .word 0x3ff02168,0x143b0281 + .word 0xbc82bf31,0x0fc54eb6 + .word 0x3ff02c9a,0x3e778061 + .word 0xbc719083,0x535b085d + .word 0x3ff037d4,0x2e11bbcc + .word 0x3c656811,0xeeade11a + .word 0x3ff04315,0xe86e7f85 + .word 0xbc90a31c,0x1977c96e + .word 0x3ff04e5f,0x72f654b1 + .word 0x3c84c379,0x3aa0d08c + .word 0x3ff059b0,0xd3158574 + .word 0x3c8d73e2,0xa475b465 + .word 0x3ff0650a,0x0e3c1f89 + .word 0xbc95cb7b,0x5799c396 + .word 0x3ff0706b,0x29ddf6de + .word 0xbc8c91df,0xe2b13c26 + .word 0x3ff07bd4,0x2b72a836 + .word 0x3c832334,0x54458700 + .word 0x3ff08745,0x18759bc8 + .word 0x3c6186be,0x4bb284ff + .word 0x3ff092bd,0xf66607e0 + .word 0xbc968063,0x800a3fd1 + .word 0x3ff09e3e,0xcac6f383 + .word 0x3c914878,0x18316136 + .word 0x3ff0a9c7,0x9b1f3919 + .word 0x3c85d16c,0x873d1d38 + .word 0x3ff0b558,0x6cf9890f + .word 0x3c98a62e,0x4adc610a + .word 0x3ff0c0f1,0x45e46c85 + .word 0x3c94f989,0x06d21cef + .word 0x3ff0cc92,0x2b7247f7 + .word 0x3c901edc,0x16e24f71 + .word 0x3ff0d83b,0x23395dec + .word 0xbc9bc14d,0xe43f316a + .word 0x3ff0e3ec,0x32d3d1a2 + .word 0x3c403a17,0x27c57b53 + .word 0x3ff0efa5,0x5fdfa9c5 + .word 0xbc949db9,0xbc54021b + .word 0x3ff0fb66,0xaffed31b + .word 0xbc6b9bed,0xc44ebd7b + .word 0x3ff10730,0x28d7233e + .word 0x3c8d46eb,0x1692fdd5 + .word 0x3ff11301,0xd0125b51 + .word 0xbc96c510,0x39449b3a + .word 0x3ff11edb,0xab5e2ab6 + .word 0xbc9ca454,0xf703fb72 + .word 0x3ff12abd,0xc06c31cc + .word 0xbc51b514,0xb36ca5c7 + .word 0x3ff136a8,0x14f204ab + .word 0xbc67108f,0xba48dcf0 + .word 0x3ff1429a,0xaea92de0 + .word 0xbc932fbf,0x9af1369e + .word 0x3ff14e95,0x934f312e + .word 0xbc8b91e8,0x39bf44ab + .word 0x3ff15a98,0xc8a58e51 + .word 0x3c82406a,0xb9eeab0a + .word 0x3ff166a4,0x5471c3c2 + .word 0x3c58f23b,0x82ea1a32 + .word 0x3ff172b8,0x3c7d517b + .word 0xbc819041,0xb9d78a76 + .word 0x3ff17ed4,0x8695bbc0 + .word 0x3c709e3f,0xe2ac5a64 + .word 0x3ff18af9,0x388c8dea + .word 0xbc911023,0xd1970f6c + .word 0x3ff19726,0x58375d2f + .word 0x3c94aadd,0x85f17e08 + .word 0x3ff1a35b,0xeb6fcb75 + .word 0x3c8e5b4c,0x7b4968e4 + .word 0x3ff1af99,0xf8138a1c + .word 0x3c97bf85,0xa4b69280 + .word 0x3ff1bbe0,0x84045cd4 + .word 0xbc995386,0x352ef607 + .word 0x3ff1c82f,0x95281c6b + .word 0x3c900977,0x8010f8c9 + .word 0x3ff1d487,0x3168b9aa + .word 0x3c9e016e,0x00a2643c + .word 0x3ff1e0e7,0x5eb44027 + .word 0xbc96fdd8,0x088cb6de + .word 0x3ff1ed50,0x22fcd91d + .word 0xbc91df98,0x027bb78c + .word 0x3ff1f9c1,0x8438ce4d + .word 0xbc9bf524,0xa097af5c + .word 0x3ff2063b,0x88628cd6 + .word 0x3c8dc775,0x814a8494 + .word 0x3ff212be,0x3578a819 + .word 0x3c93592d,0x2cfcaac9 + .word 0x3ff21f49,0x917ddc96 + .word 0x3c82a97e,0x9494a5ee + .word 0x3ff22bdd,0xa27912d1 + .word 0x3c8d34fb,0x5577d69e + .word 0x3ff2387a,0x6e756238 + .word 0x3c99b07e,0xb6c70573 + .word 0x3ff2451f,0xfb82140a + .word 0x3c8acfcc,0x911ca996 + .word 0x3ff251ce,0x4fb2a63f + .word 0x3c8ac155,0xbef4f4a4 + .word 0x3ff25e85,0x711ece75 + .word 0x3c93e1a2,0x4ac31b2c + .word 0x3ff26b45,0x65e27cdd + .word 0x3c82bd33,0x9940e9d9 + .word 0x3ff2780e,0x341ddf29 + .word 0x3c9e067c,0x05f9e76c + .word 0x3ff284df,0xe1f56381 + .word 0xbc9a4c3a,0x8c3f0d7e + .word 0x3ff291ba,0x7591bb70 + .word 0xbc82cc72,0x28401cbc + .word 0x3ff29e9d,0xf51fdee1 + .word 0x3c8612e8,0xafad1255 + .word 0x3ff2ab8a,0x66d10f13 + .word 0xbc995743,0x191690a7 + .word 0x3ff2b87f,0xd0dad990 + .word 0xbc410adc,0xd6381aa4 + .word 0x3ff2c57e,0x39771b2f + .word 0xbc950145,0xa6eb5124 + .word 0x3ff2d285,0xa6e4030b + .word 0x3c900247,0x54db41d5 + .word 0x3ff2df96,0x1f641589 + .word 0x3c9d16cf,0xfbbce198 + .word 0x3ff2ecaf,0xa93e2f56 + .word 0x3c71ca0f,0x45d52383 + .word 0x3ff2f9d2,0x4abd886b + .word 0xbc653c55,0x532bda93 + .word 0x3ff306fe,0x0a31b715 + .word 0x3c86f46a,0xd23182e4 + .word 0x3ff31432,0xedeeb2fd + .word 0x3c8959a3,0xf3f3fcd0 + .word 0x3ff32170,0xfc4cd831 + .word 0x3c8a9ce7,0x8e18047c + .word 0x3ff32eb8,0x3ba8ea32 + .word 0xbc9c45e8,0x3cb4f318 + .word 0x3ff33c08,0xb26416ff + .word 0x3c932721,0x843659a6 + .word 0x3ff34962,0x66e3fa2d + .word 0xbc835a75,0x930881a4 + .word 0x3ff356c5,0x5f929ff1 + .word 0xbc8b5cee,0x5c4e4628 + .word 0x3ff36431,0xa2de883b + .word 0xbc8c3144,0xa06cb85e + .word 0x3ff371a7,0x373aa9cb + .word 0xbc963aea,0xbf42eae2 + .word 0x3ff37f26,0x231e754a + .word 0xbc99f5ca,0x9eceb23c + .word 0x3ff38cae,0x6d05d866 + .word 0xbc9e958d,0x3c9904bd + .word 0x3ff39a40,0x1b7140ef + .word 0xbc99a9a5,0xfc8e2934 + .word 0x3ff3a7db,0x34e59ff7 + .word 0xbc75e436,0xd661f5e3 + .word 0x3ff3b57f,0xbfec6cf4 + .word 0x3c954c66,0xe26fff18 + .word 0x3ff3c32d,0xc313a8e5 + .word 0xbc9efff8,0x375d29c3 + .word 0x3ff3d0e5,0x44ede173 + .word 0x3c7fe8d0,0x8c284c71 + .word 0x3ff3dea6,0x4c123422 + .word 0x3c8ada09,0x11f09ebc + .word 0x3ff3ec70,0xdf1c5175 + .word 0xbc8af663,0x7b8c9bca + .word 0x3ff3fa45,0x04ac801c + .word 0xbc97d023,0xf956f9f3 + .word 0x3ff40822,0xc367a024 + .word 0x3c8bddf8,0xb6f4d048 + .word 0x3ff4160a,0x21f72e2a + .word 0xbc5ef369,0x1c309278 + .word 0x3ff423fb,0x2709468a + .word 0xbc98462d,0xc0b314dd + .word 0x3ff431f5,0xd950a897 + .word 0xbc81c7dd,0xe35f7998 + .word 0x3ff43ffa,0x3f84b9d4 + .word 0x3c8880be,0x9704c002 + .word 0x3ff44e08,0x6061892d + .word 0x3c489b7a,0x04ef80d0 + .word 0x3ff45c20,0x42a7d232 + .word 0xbc686419,0x82fb1f8e + .word 0x3ff46a41,0xed1d0057 + .word 0x3c9c944b,0xd1648a76 + .word 0x3ff4786d,0x668b3237 + .word 0xbc9c20f0,0xed445733 + .word 0x3ff486a2,0xb5c13cd0 + .word 0x3c73c1a3,0xb69062f0 + .word 0x3ff494e1,0xe192aed2 + .word 0xbc83b289,0x5e499ea0 + .word 0x3ff4a32a,0xf0d7d3de + .word 0x3c99cb62,0xf3d1be56 + .word 0x3ff4b17d,0xea6db7d7 + .word 0xbc8125b8,0x7f2897f0 + .word 0x3ff4bfda,0xd5362a27 + .word 0x3c7d4397,0xafec42e2 + .word 0x3ff4ce41,0xb817c114 + .word 0x3c905e29,0x690abd5d + .word 0x3ff4dcb2,0x99fddd0d + .word 0x3c98ecdb,0xbc6a7833 + .word 0x3ff4eb2d,0x81d8abff + .word 0xbc95257d,0x2e5d7a52 + .word 0x3ff4f9b2,0x769d2ca7 + .word 0xbc94b309,0xd25957e3 + .word 0x3ff50841,0x7f4531ee + .word 0x3c7a249b,0x49b7465f + .word 0x3ff516da,0xa2cf6642 + .word 0xbc8f7685,0x69bd93ee + .word 0x3ff5257d,0xe83f4eef + .word 0xbc7c998d,0x43efef71 + .word 0x3ff5342b,0x569d4f82 + .word 0xbc807abe,0x1db13cac + .word 0x3ff542e2,0xf4f6ad27 + .word 0x3c87926d,0x192d5f7e + .word 0x3ff551a4,0xca5d920f + .word 0xbc8d689c,0xefede59a + .word 0x3ff56070,0xdde910d2 + .word 0xbc90fb6e,0x168eebf0 + .word 0x3ff56f47,0x36b527da + .word 0x3c99bb2c,0x011d93ad + .word 0x3ff57e27,0xdbe2c4cf + .word 0xbc90b98c,0x8a57b9c4 + .word 0x3ff58d12,0xd497c7fd + .word 0x3c8295e1,0x5b9a1de8 + .word 0x3ff59c08,0x27ff07cc + .word 0xbc97e2ce,0xe467e60f + .word 0x3ff5ab07,0xdd485429 + .word 0x3c96324c,0x054647ad + .word 0x3ff5ba11,0xfba87a03 + .word 0xbc9b77a1,0x4c233e1a + .word 0x3ff5c926,0x8a5946b7 + .word 0x3c3c4b1b,0x816986a2 + .word 0x3ff5d845,0x90998b93 + .word 0xbc9cd6a7,0xa8b45642 + .word 0x3ff5e76f,0x15ad2148 + .word 0x3c9ba6f9,0x3080e65e + .word 0x3ff5f6a3,0x20dceb71 + .word 0xbc89eadd,0xe3cdcf92 + .word 0x3ff605e1,0xb976dc09 + .word 0xbc93e242,0x9b56de47 + .word 0x3ff6152a,0xe6cdf6f4 + .word 0x3c9e4b3e,0x4ab84c27 + .word 0x3ff6247e,0xb03a5585 + .word 0xbc9383c1,0x7e40b497 + .word 0x3ff633dd,0x1d1929fd + .word 0x3c984710,0xbeb964e5 + .word 0x3ff64346,0x34ccc320 + .word 0xbc8c483c,0x759d8932 + .word 0x3ff652b9,0xfebc8fb7 + .word 0xbc9ae3d5,0xc9a73e08 + .word 0x3ff66238,0x82552225 + .word 0xbc9bb609,0x87591c34 + .word 0x3ff671c1,0xc70833f6 + .word 0xbc8e8732,0x586c6134 + .word 0x3ff68155,0xd44ca973 + .word 0x3c6038ae,0x44f73e65 + .word 0x3ff690f4,0xb19e9538 + .word 0x3c8804bd,0x9aeb445c + .word 0x3ff6a09e,0x667f3bcd + .word 0xbc9bdd34,0x13b26456 + .word 0x3ff6b052,0xfa75173e + .word 0x3c7a38f5,0x2c9a9d0e + .word 0x3ff6c012,0x750bdabf + .word 0xbc728956,0x67ff0b0d + .word 0x3ff6cfdc,0xddd47645 + .word 0x3c9c7aa9,0xb6f17309 + .word 0x3ff6dfb2,0x3c651a2f + .word 0xbc6bbe3a,0x683c88ab + .word 0x3ff6ef92,0x98593ae5 + .word 0xbc90b974,0x9e1ac8b2 + .word 0x3ff6ff7d,0xf9519484 + .word 0xbc883c0f,0x25860ef6 + .word 0x3ff70f74,0x66f42e87 + .word 0x3c59d644,0xd45aa65f + .word 0x3ff71f75,0xe8ec5f74 + .word 0xbc816e47,0x86887a99 + .word 0x3ff72f82,0x86ead08a + .word 0xbc920aa0,0x2cd62c72 + .word 0x3ff73f9a,0x48a58174 + .word 0xbc90a8d9,0x6c65d53c + .word 0x3ff74fbd,0x35d7cbfd + .word 0x3c9047fd,0x618a6e1c + .word 0x3ff75feb,0x564267c9 + .word 0xbc902459,0x57316dd3 + .word 0x3ff77024,0xb1ab6e09 + .word 0x3c9b7877,0x169147f8 + .word 0x3ff78069,0x4fde5d3f + .word 0x3c9866b8,0x0a02162c + .word 0x3ff790b9,0x38ac1cf6 + .word 0x3c9349a8,0x62aadd3e + .word 0x3ff7a114,0x73eb0187 + .word 0xbc841577,0xee04992f + .word 0x3ff7b17b,0x0976cfdb + .word 0xbc9bebb5,0x8468dc88 + .word 0x3ff7c1ed,0x0130c132 + .word 0x3c9f124c,0xd1164dd6 + .word 0x3ff7d26a,0x62ff86f0 + .word 0x3c91bddb,0xfb72b8b4 + .word 0x3ff7e2f3,0x36cf4e62 + .word 0x3c705d02,0xba15797e + .word 0x3ff7f387,0x8491c491 + .word 0xbc807f11,0xcf9311ae + .word 0x3ff80427,0x543e1a12 + .word 0xbc927c86,0x626d972b + .word 0x3ff814d2,0xadd106d9 + .word 0x3c946437,0x0d151d4d + .word 0x3ff82589,0x994cce13 + .word 0xbc9d4c1d,0xd41532d8 + .word 0x3ff8364c,0x1eb941f7 + .word 0x3c999b9a,0x31df2bd5 + .word 0x3ff8471a,0x4623c7ad + .word 0xbc88d684,0xa341cdfb + .word 0x3ff857f4,0x179f5b21 + .word 0xbc5ba748,0xf8b216d0 + .word 0x3ff868d9,0x9b4492ec + .word 0x3ca01c83,0xb21584a3 + .word 0x3ff879ca,0xd931a436 + .word 0x3c85d2d7,0xd2db47bc + .word 0x3ff88ac7,0xd98a6699 + .word 0x3c9994c2,0xf37cb53a + .word 0x3ff89bd0,0xa478580f + .word 0x3c9d5395,0x4475202a + .word 0x3ff8ace5,0x422aa0db + .word 0x3c96e9f1,0x56864b27 + .word 0x3ff8be05,0xbad61778 + .word 0x3c9ecb5e,0xfc43446e + .word 0x3ff8cf32,0x16b5448c + .word 0xbc70d55e,0x32e9e3aa + .word 0x3ff8e06a,0x5e0866d9 + .word 0xbc97114a,0x6fc9b2e6 + .word 0x3ff8f1ae,0x99157736 + .word 0x3c85cc13,0xa2e3976c + .word 0x3ff902fe,0xd0282c8a + .word 0x3c9592ca,0x85fe3fd2 + .word 0x3ff9145b,0x0b91ffc6 + .word 0xbc9dd679,0x2e582524 + .word 0x3ff925c3,0x53aa2fe2 + .word 0xbc83455f,0xa639db7f + .word 0x3ff93737,0xb0cdc5e5 + .word 0xbc675fc7,0x81b57ebc + .word 0x3ff948b8,0x2b5f98e5 + .word 0xbc8dc3d6,0x797d2d99 + .word 0x3ff95a44,0xcbc8520f + .word 0xbc764b7c,0x96a5f039 + .word 0x3ff96bdd,0x9a7670b3 + .word 0xbc5ba596,0x7f19c896 + .word 0x3ff97d82,0x9fde4e50 + .word 0xbc9d185b,0x7c1b85d0 + .word 0x3ff98f33,0xe47a22a2 + .word 0x3c7cabda,0xa24c78ed + .word 0x3ff9a0f1,0x70ca07ba + .word 0xbc9173bd,0x91cee632 + .word 0x3ff9b2bb,0x4d53fe0d + .word 0xbc9dd84e,0x4df6d518 + .word 0x3ff9c491,0x82a3f090 + .word 0x3c7c7c46,0xb071f2be + .word 0x3ff9d674,0x194bb8d5 + .word 0xbc9516be,0xa3dd8233 + .word 0x3ff9e863,0x19e32323 + .word 0x3c7824ca,0x78e64c6e + .word 0x3ff9fa5e,0x8d07f29e + .word 0xbc84a9ce,0xaaf1face + .word 0x3ffa0c66,0x7b5de565 + .word 0xbc935949,0x5d1cd533 + .word 0x3ffa1e7a,0xed8eb8bb + .word 0x3c9c6618,0xee8be70e + .word 0x3ffa309b,0xec4a2d33 + .word 0x3c96305c,0x7ddc36ab + .word 0x3ffa42c9,0x80460ad8 + .word 0xbc9aa780,0x589fb120 + .word 0x3ffa5503,0xb23e255d + .word 0xbc9d2f6e,0xdb8d41e1 + .word 0x3ffa674a,0x8af46052 + .word 0x3c650f56,0x30670366 + .word 0x3ffa799e,0x1330b358 + .word 0x3c9bcb7e,0xcac563c6 + .word 0x3ffa8bfe,0x53c12e59 + .word 0xbc94f867,0xb2ba15a8 + .word 0x3ffa9e6b,0x5579fdbf + .word 0x3c90fac9,0x0ef7fd31 + .word 0x3ffab0e5,0x21356eba + .word 0x3c889c31,0xdae94544 + .word 0x3ffac36b,0xbfd3f37a + .word 0xbc8f9234,0xcae76cd0 + .word 0x3ffad5ff,0x3a3c2774 + .word 0x3c97ef3b,0xb6b1b8e4 + .word 0x3ffae89f,0x995ad3ad + .word 0x3c97a1cd,0x345dcc81 + .word 0x3ffafb4c,0xe622f2ff + .word 0xbc94b2fc,0x0f315ecc + .word 0x3ffb0e07,0x298db666 + .word 0xbc9bdef5,0x4c80e425 + .word 0x3ffb20ce,0x6c9a8952 + .word 0x3c94dd02,0x4a0756cc + .word 0x3ffb33a2,0xb84f15fb + .word 0xbc62805e,0x3084d708 + .word 0x3ffb4684,0x15b749b1 + .word 0xbc7f763d,0xe9df7c90 + .word 0x3ffb5972,0x8de5593a + .word 0xbc9c71df,0xbbba6de3 + .word 0x3ffb6c6e,0x29f1c52a + .word 0x3c92a8f3,0x52883f6e + .word 0x3ffb7f76,0xf2fb5e47 + .word 0xbc75584f,0x7e54ac3b + .word 0x3ffb928c,0xf22749e4 + .word 0xbc9b7216,0x54cb65c6 + .word 0x3ffba5b0,0x30a1064a + .word 0xbc9efcd3,0x0e54292e + .word 0x3ffbb8e0,0xb79a6f1f + .word 0xbc3f52d1,0xc9696205 + .word 0x3ffbcc1e,0x904bc1d2 + .word 0x3c823dd0,0x7a2d9e84 + .word 0x3ffbdf69,0xc3f3a207 + .word 0xbc3c2623,0x60ea5b52 + .word 0x3ffbf2c2,0x5bd71e09 + .word 0xbc9efdca,0x3f6b9c73 + .word 0x3ffc0628,0x6141b33d + .word 0xbc8d8a5a,0xa1fbca34 + .word 0x3ffc199b,0xdd85529c + .word 0x3c811065,0x895048dd + .word 0x3ffc2d1c,0xd9fa652c + .word 0xbc96e516,0x17c8a5d7 + .word 0x3ffc40ab,0x5fffd07a + .word 0x3c9b4537,0xe083c60a + .word 0x3ffc5447,0x78fafb22 + .word 0x3c912f07,0x2493b5af + .word 0x3ffc67f1,0x2e57d14b + .word 0x3c92884d,0xff483cad + .word 0x3ffc7ba8,0x8988c933 + .word 0xbc8e76bb,0xbe255559 + .word 0x3ffc8f6d,0x9406e7b5 + .word 0x3c71acbc,0x48805c44 + .word 0x3ffca340,0x5751c4db + .word 0xbc87f2be,0xd10d08f4 + .word 0x3ffcb720,0xdcef9069 + .word 0x3c7503cb,0xd1e949db + .word 0x3ffccb0f,0x2e6d1675 + .word 0xbc7d220f,0x86009093 + .word 0x3ffcdf0b,0x555dc3fa + .word 0xbc8dd83b,0x53829d72 + .word 0x3ffcf315,0x5b5bab74 + .word 0xbc9a08e9,0xb86dff57 + .word 0x3ffd072d,0x4a07897c + .word 0xbc9cbc37,0x43797a9c + .word 0x3ffd1b53,0x2b08c968 + .word 0x3c955636,0x219a36ee + .word 0x3ffd2f87,0x080d89f2 + .word 0xbc9d487b,0x719d8578 + .word 0x3ffd43c8,0xeacaa1d6 + .word 0x3c93db53,0xbf5a1614 + .word 0x3ffd5818,0xdcfba487 + .word 0x3c82ed02,0xd75b3706 + .word 0x3ffd6c76,0xe862e6d3 + .word 0x3c5fe87a,0x4a8165a0 + .word 0x3ffd80e3,0x16c98398 + .word 0xbc911ec1,0x8beddfe8 + .word 0x3ffd955d,0x71ff6075 + .word 0x3c9a052d,0xbb9af6be + .word 0x3ffda9e6,0x03db3285 + .word 0x3c9c2300,0x696db532 + .word 0x3ffdbe7c,0xd63a8315 + .word 0xbc9b76f1,0x926b8be4 + .word 0x3ffdd321,0xf301b460 + .word 0x3c92da57,0x78f018c2 + .word 0x3ffde7d5,0x641c0658 + .word 0xbc9ca552,0x8e79ba8f + .word 0x3ffdfc97,0x337b9b5f + .word 0xbc91a5cd,0x4f184b5c + .word 0x3ffe1167,0x6b197d17 + .word 0xbc72b529,0xbd5c7f44 + .word 0x3ffe2646,0x14f5a129 + .word 0xbc97b627,0x817a1496 + .word 0x3ffe3b33,0x3b16ee12 + .word 0xbc99f4a4,0x31fdc68a + .word 0x3ffe502e,0xe78b3ff6 + .word 0x3c839e89,0x80a9cc8f + .word 0x3ffe6539,0x24676d76 + .word 0xbc863ff8,0x7522b734 + .word 0x3ffe7a51,0xfbc74c83 + .word 0x3c92d522,0xca0c8de2 + .word 0x3ffe8f79,0x77cdb740 + .word 0xbc910894,0x80b054b1 + .word 0x3ffea4af,0xa2a490da + .word 0xbc9e9c23,0x179c2893 + .word 0x3ffeb9f4,0x867cca6e + .word 0x3c94832f,0x2293e4f2 + .word 0x3ffecf48,0x2d8e67f1 + .word 0xbc9c93f3,0xb411ad8c + .word 0x3ffee4aa,0xa2188510 + .word 0x3c91c68d,0xa487568d + .word 0x3ffefa1b,0xee615a27 + .word 0x3c9dc7f4,0x86a4b6b0 + .word 0x3fff0f9c,0x1cb6412a + .word 0xbc932200,0x65181d45 + .word 0x3fff252b,0x376bba97 + .word 0x3c93a1a5,0xbf0d8e43 + .word 0x3fff3ac9,0x48dd7274 + .word 0xbc795a5a,0x3ed837de + .word 0x3fff5076,0x5b6e4540 + .word 0x3c99d3e1,0x2dd8a18b + .word 0x3fff6632,0x798844f8 + .word 0x3c9fa37b,0x3539343e + .word 0x3fff7bfd,0xad9cbe14 + .word 0xbc9dbb12,0xd006350a + .word 0x3fff91d8,0x02243c89 + .word 0xbc612ea8,0xa779f689 + .word 0x3fffa7c1,0x819e90d8 + .word 0x3c874853,0xf3a5931e + .word 0x3fffbdba,0x3692d514 + .word 0xbc796773,0x15098eb6 + .word 0x3fffd3c2,0x2b8f71f1 + .word 0x3c62eb74,0x966579e7 + .word 0x3fffe9d9,0x6b2a23d9 + .word 0x3c74a603,0x7442fde3 + + .align 16 +constants: + .word 0x3ef00000,0x00000000 + .word 0x40862e42,0xfefa39ef + .word 0x01000000,0x00000000 + .word 0x7f000000,0x00000000 + .word 0x80000000,0x00000000 + .word 0x43f00000,0x00000000 ! scaling 2^12 two96 + .word 0xfff00000,0x00000000 + .word 0x3ff00000,0x00000000 + .word 0x3fdfffff,0xfffffff6 + .word 0x3fc55555,0x721a1d14 + .word 0x3fa55555,0x6e0896af + .word 0x41371547,0x652b82fe ! scaling 2^12 invln2_256 + .word 0x3ea62e42,0xfee00000 ! scaling 2^(-12) ln2_256h + .word 0x3caa39ef,0x35793c76 ! scaling 2^(-12) ln2_256l + + ! base set w/o scaling + ! .word 0x43300000,0x00000000 ! scaling two96 + ! .word 0x40771547,0x652b82fe ! scaling invln2_256 + ! .word 0x3f662e42,0xfee00000 ! scaling ln2_256h + ! .word 0x3d6a39ef,0x35793c76 ! scaling ln2_256l + +#define ox3ef 0x0 +#define thresh 0x8 +#define tiny 0x10 +#define huge 0x18 +#define signbit 0x20 +#define two96 0x28 +#define neginf 0x30 +#define one 0x38 +#define B1OFF 0x40 +#define B2OFF 0x48 +#define B3OFF 0x50 +#define invln2_256 0x58 +#define ln2_256h 0x60 +#define ln2_256l 0x68 + +! local storage indices + +#define m2 STACK_BIAS-0x4 +#define m1 STACK_BIAS-0x8 +#define m0 STACK_BIAS-0xc +#define jnk STACK_BIAS-0x20 +! sizeof temp storage - must be a multiple of 16 for V9 +#define tmps 0x20 + +! register use + +! i0 n +! i1 x +! i2 stridex +! i3 y +! i4 stridey +! i5 0x80000000 + +! g1 TBL + +! l0 m0 +! l1 m1 +! l2 m2 +! l3 j0,oy0 +! l4 j1,oy1 +! l5 j2,oy2 +! l6 0x3e300000 +! l7 0x40862e41 + +! o0 py0 +! o1 py1 +! o2 py2 +! o3 scratch +! o4 scratch +! o5 0x40874910 +! o7 0x7ff00000 + +! f0 x0 +! f2 +! f4 +! f6 +! f8 +! f10 x1 +! f12 +! f14 +! f16 +! f18 +! f20 x2 +! f22 +! f24 +! f26 +! f28 +! f30 +! f32 +! f34 +! f36 0x3ef0... +! f38 thresh +! f40 tiny +! f42 huge +! f44 signbit +! f46 two96 +! f48 neginf +! f50 one +! f52 B1 +! f54 B2 +! f56 B3 +! f58 invln2_256 +! f60 ln2_256h +! f62 ln2_256l +#define BOUNDRY %f36 +#define THRESH %f38 +#define TINY %f40 +#define HUGE %f42 +#define SIGNBIT %f44 +#define TWO96 %f46 +#define NEGINF %f48 +#define ONE %f50 +#define B1 %f52 +#define B2 %f54 +#define B3 %f56 +#define INVLN2_256 %f58 +#define LN2_256H %f60 +#define LN2_256L %f62 + + ENTRY(__vexp) + save %sp,-SA(MINFRAME)-tmps,%sp + PIC_SETUP(l7) + PIC_SET(l7,constants,o3) + PIC_SET(l7,TBL,o0) + mov %o0,%g1 + wr %g0,0x82,%asi ! set %asi for non-faulting loads + + sethi %hi(0x80000000),%i5 + sethi %hi(0x3e300000),%l6 + sethi %hi(0x40862e41),%l7 + or %l7,%lo(0x40862e41),%l7 + sethi %hi(0x40874910),%o5 + or %o5,%lo(0x40874910),%o5 + sethi %hi(0x7ff00000),%o7 + ldd [%o3+ox3ef],BOUNDRY + ldd [%o3+thresh],THRESH + ldd [%o3+tiny],TINY + ldd [%o3+huge],HUGE + ldd [%o3+signbit],SIGNBIT + ldd [%o3+two96],TWO96 + ldd [%o3+neginf],NEGINF + ldd [%o3+one],ONE + ldd [%o3+B1OFF],B1 + ldd [%o3+B2OFF],B2 + ldd [%o3+B3OFF],B3 + ldd [%o3+invln2_256],INVLN2_256 + ldd [%o3+ln2_256h],LN2_256H + ldd [%o3+ln2_256l],LN2_256L + sll %i2,3,%i2 ! scale strides + sll %i4,3,%i4 + add %fp,jnk,%l3 ! precondition loop + add %fp,jnk,%l4 + add %fp,jnk,%l5 + ld [%i1],%l0 ! hx = *x + ld [%i1],%f0 + ld [%i1+4],%f1 + andn %l0,%i5,%l0 ! hx &= ~0x80000000 + ba .loop0 + add %i1,%i2,%i1 ! x += stridex + + .align 16 +! -- 16 byte aligned +.loop0: + lda [%i1]%asi,%l1 ! preload next argument + sub %l0,%l6,%o3 + sub %l7,%l0,%o4 + fand %f0,SIGNBIT,%f2 ! get sign bit + + lda [%i1]%asi,%f10 + orcc %o3,%o4,%g0 + mov %i3,%o0 ! py0 = y + bl,pn %icc,.range0 ! if hx < 0x3e300000 or > 0x40862e41 + +! delay slot + lda [%i1+4]%asi,%f11 + addcc %i0,-1,%i0 + add %i3,%i4,%i3 ! y += stridey + ble,pn %icc,.endloop1 + +! delay slot + andn %l1,%i5,%l1 + add %i1,%i2,%i1 ! x += stridex + for %f2,TWO96,%f2 ! used to strip least sig bits + fmuld %f0,INVLN2_256,%f4 ! x/ (ln2/256) , creating k + +.loop1: + lda [%i1]%asi,%l2 ! preload next argument + sub %l1,%l6,%o3 + sub %l7,%l1,%o4 + fand %f10,SIGNBIT,%f12 + + lda [%i1]%asi,%f20 + orcc %o3,%o4,%g0 + mov %i3,%o1 ! py1 = y + bl,pn %icc,.range1 ! if hx < 0x3e300000 or > 0x40862e41 + +! delay slot + lda [%i1+4]%asi,%f21 + addcc %i0,-1,%i0 + add %i3,%i4,%i3 ! y += stridey + ble,pn %icc,.endloop2 + +! delay slot + andn %l2,%i5,%l2 + add %i1,%i2,%i1 ! x += stridex + for %f12,TWO96,%f12 + fmuld %f10,INVLN2_256,%f14 + +.loop2: + sub %l2,%l6,%o3 + sub %l7,%l2,%o4 + fand %f20,SIGNBIT,%f22 + fmuld %f20,INVLN2_256,%f24 ! okay to put this here; for alignment + + orcc %o3,%o4,%g0 + bl,pn %icc,.range2 ! if hx < 0x3e300000 or > 0x40862e41 +! delay slot + for %f22,TWO96,%f22 + faddd %f4,%f2,%f4 ! creating k+j/256, sra to zero bits + +.cont: + faddd %f14,%f12,%f14 + mov %i3,%o2 ! py2 = y + + faddd %f24,%f22,%f24 + add %i3,%i4,%i3 ! y += stridey + + ! BUBBLE USIII + + fsubd %f4,%f2,%f8 ! creating k+j/256: sll + st %f6,[%l3] ! store previous loop x0 + + fsubd %f14,%f12,%f18 + st %f7,[%l3+4] ! store previous loop x0 + + fsubd %f24,%f22,%f28 + st %f16,[%l4] + + ! BUBBLE USIII + + fmuld %f8,LN2_256H,%f2 ! closest LN2_256 to x + st %f17,[%l4+4] + + fmuld %f18,LN2_256H,%f12 + st %f26,[%l5] + + fmuld %f28,LN2_256H,%f22 + st %f27,[%l5+4] + + ! BUBBLE USIII + + fsubd %f0,%f2,%f0 ! r = x - p*LN2_256H + fmuld %f8,LN2_256L,%f4 ! closest LN2_256 to x , added prec + + fsubd %f10,%f12,%f10 + fmuld %f18,LN2_256L,%f14 + + fsubd %f20,%f22,%f20 + fmuld %f28,LN2_256L,%f24 + + ! BUBBLE USIII + + fsubd %f0,%f4,%f0 ! r -= p*LN2_256L + + fsubd %f10,%f14,%f10 + + fsubd %f20,%f24,%f20 + +!!!!!!!!!!!!!!!!!!! New polynomial reorder starts here + + ! Alternate polynomial grouping allowing non-sequential calc of p + ! OLD : p = r * ( 1 + r * ( B1 + r * ( B2 + r * B3) ) ) + ! NEW : p = r * [ (1+r*B1) + (r*r) * ( B2 + r * B3) ) ] + ! + ! let SLi Ri SRi be accumulators + + fmuld %f0,B3,%f2 ! SR1 = r1 * B3 + fdtoi %f8,%f8 ! convert k+j/256 to int + st %f8,[%fp+m0] ! store k, to shift return/use + + fmuld %f10,B3,%f12 ! SR2 = r2 * B3 + fdtoi %f18,%f18 ! convert k+j/256 to int + st %f18,[%fp+m1] ! store k, to shift return/use + + fmuld %f20,B3,%f22 ! SR3 = r3 * B3 + fdtoi %f28,%f28 ! convert k+j/256 to int + st %f28,[%fp+m2] ! store k, to shift return/use + + fmuld %f0,%f0,%f4 ! R1 = r1 * r1 + + fmuld %f10,%f10,%f14 ! R2 = r2 * r2 + faddd %f2,B2,%f2 ! SR1 += B2 + + fmuld %f20,%f20,%f24 ! R3 = r3 * r3 + faddd %f12,B2,%f12 ! SR2 += B2 + + faddd %f22,B2,%f22 ! SR3 += B2 + fmuld %f0,B1,%f6 ! SL1 = r1 * B1 + + fmuld %f10,B1,%f32 ! SL2 = r2 * B1 + fand %f8,NEGINF,%f8 + ! best here for RAW BYPASS + ld [%fp+m0],%l0 ! get nonshifted k into intreg + + fmuld %f20,B1,%f34 ! SL3 = r3 * B1 + fand %f18,NEGINF,%f18 + ld [%fp+m1],%l1 ! get nonshifted k into intreg + + fmuld %f4,%f2,%f4 ! R1 = R1 * SR1 + fand %f28,NEGINF,%f28 + ld [%fp+m2],%l2 ! get nonshifted k into intreg + + fmuld %f14,%f12,%f14 ! R2 = R2 * SR2 + faddd %f6,ONE,%f6 ! SL1 += 1 + + fmuld %f24,%f22,%f24 ! R3 = R3 * SR3 + faddd %f32,ONE,%f32 ! SL2 += 1 + sra %l0,8,%l3 ! shift k tobe offset 256-8byte + + faddd %f34,ONE,%f34 ! SL3 += 1 + sra %l1,8,%l4 ! shift k tobe offset 256-8byte + sra %l2,8,%l5 ! shift k tobe offset 256-8byte + + ! BUBBLE in USIII + and %l3,0xff0,%l3 + and %l4,0xff0,%l4 + + + + faddd %f6,%f4,%f6 ! R1 = SL1 + R1 + ldd [%g1+%l3],%f4 ! tbl[j] + add %l3,8,%l3 ! inc j + and %l5,0xff0,%l5 + + + faddd %f32,%f14,%f32 ! R2 = SL2 + R2 + ldd [%g1+%l4],%f14 ! tbl[j] + add %l4,8,%l4 ! inc j + sra %l0,20,%o3 + + faddd %f34,%f24,%f34 ! R3 = SL3 + R3 + ldd [%g1+%l5],%f24 ! tbl[j] + add %l5,8,%l5 ! inc j + sra %l1,20,%l1 + + ! BUBBLE in USIII + ldd [%g1+%l4],%f16 ! tbl[j+1] + add %o3,1021,%o3 ! inc j + + fmuld %f0,%f6,%f0 ! p1 = r1 * R1 + ldd [%g1+%l3],%f6 ! tbl[j+1] + add %l1,1021,%l1 ! inc j + sra %l2,20,%l2 + + fmuld %f10,%f32,%f10 ! p2 = r2 * R2 + ldd [%g1+%l5],%f26 ! tbl[j+1] + add %l2,1021,%l2 ! inc j + + fmuld %f20,%f34,%f20 ! p3 = r3 * R3 + + + + + +!!!!!!!!!!!!!!!!!!! poly-reorder - ends here + + fmuld %f0,%f4,%f0 ! start exp(x) = exp(r) * tbl[j] + mov %o0,%l3 + + fmuld %f10,%f14,%f10 + mov %o1,%l4 + + fmuld %f20,%f24,%f20 + mov %o2,%l5 + + faddd %f0,%f6,%f6 ! cont exp(x) : apply tbl[j] high bits + lda [%i1]%asi,%l0 ! preload next argument + + faddd %f10,%f16,%f16 + lda [%i1]%asi,%f0 + + faddd %f20,%f26,%f26 + lda [%i1+4]%asi,%f1 + + faddd %f6,%f4,%f6 ! cont exp(x) : apply tbl[j+1] low bits + add %i1,%i2,%i1 ! x += stridex + + faddd %f16,%f14,%f16 + andn %l0,%i5,%l0 + or %o3,%l1,%o4 + +! -- 16 byte aligned + orcc %o4,%l2,%o4 + bl,pn %icc,.small +! delay slot + faddd %f26,%f24,%f26 + + fpadd32 %f6,%f8,%f6 ! done exp(x) : apply 2^k + fpadd32 %f16,%f18,%f16 + + + addcc %i0,-1,%i0 + bg,pn %icc,.loop0 +! delay slot + fpadd32 %f26,%f28,%f26 + + ba,pt %icc,.endloop0 +! delay slot + nop + + + .align 16 +.small: + tst %o3 + bge,pt %icc,1f +! delay slot + fpadd32 %f6,%f8,%f6 + fpadd32 %f6,BOUNDRY,%f6 + fmuld %f6,TINY,%f6 +1: + tst %l1 + bge,pt %icc,1f +! delay slot + fpadd32 %f16,%f18,%f16 + fpadd32 %f16,BOUNDRY,%f16 + fmuld %f16,TINY,%f16 +1: + tst %l2 + bge,pt %icc,1f +! delay slot + fpadd32 %f26,%f28,%f26 + fpadd32 %f26,BOUNDRY,%f26 + fmuld %f26,TINY,%f26 +1: + addcc %i0,-1,%i0 + bg,pn %icc,.loop0 +! delay slot + nop + ba,pt %icc,.endloop0 +! delay slot + nop + + +.endloop2: + for %f12,TWO96,%f12 + fmuld %f10,INVLN2_256,%f14 + faddd %f14,%f12,%f14 + fsubd %f14,%f12,%f18 + fmuld %f18,LN2_256H,%f12 + fsubd %f10,%f12,%f10 + fmuld %f18,LN2_256L,%f14 + fsubd %f10,%f14,%f10 + fmuld %f10,B3,%f12 + fdtoi %f18,%f18 + st %f18,[%fp+m1] + fmuld %f10,%f10,%f14 + faddd %f12,B2,%f12 + fmuld %f10,B1,%f32 + fand %f18,NEGINF,%f18 + ld [%fp+m1],%l1 + fmuld %f14,%f12,%f14 + faddd %f32,ONE,%f32 + sra %l1,8,%o4 + and %o4,0xff0,%o4 + faddd %f32,%f14,%f32 + ldd [%g1+%o4],%f14 + add %o4,8,%o4 + sra %l1,20,%l1 + ldd [%g1+%o4],%f30 + addcc %l1,1021,%l1 + fmuld %f10,%f32,%f10 + fmuld %f10,%f14,%f10 + faddd %f10,%f30,%f30 + faddd %f30,%f14,%f30 + bge,pt %icc,1f +! delay slot + fpadd32 %f30,%f18,%f30 + fpadd32 %f30,BOUNDRY,%f30 + fmuld %f30,TINY,%f30 +1: + st %f30,[%o1] + st %f31,[%o1+4] + +.endloop1: + for %f2,TWO96,%f2 + fmuld %f0,INVLN2_256,%f4 + faddd %f4,%f2,%f4 + fsubd %f4,%f2,%f8 + fmuld %f8,LN2_256H,%f2 + fsubd %f0,%f2,%f0 + fmuld %f8,LN2_256L,%f4 + fsubd %f0,%f4,%f0 + fmuld %f0,B3,%f2 + fdtoi %f8,%f8 + st %f8,[%fp+m0] + fmuld %f0,%f0,%f4 + faddd %f2,B2,%f2 + fmuld %f0,B1,%f32 + fand %f8,NEGINF,%f8 + ld [%fp+m0],%l0 + fmuld %f4,%f2,%f4 + faddd %f32,ONE,%f32 + sra %l0,8,%o4 + and %o4,0xff0,%o4 + faddd %f32,%f4,%f32 + ldd [%g1+%o4],%f4 + add %o4,8,%o4 + sra %l0,20,%o3 + ldd [%g1+%o4],%f30 + addcc %o3,1021,%o3 + fmuld %f0,%f32,%f0 + fmuld %f0,%f4,%f0 + faddd %f0,%f30,%f30 + faddd %f30,%f4,%f30 + bge,pt %icc,1f +! delay slot + fpadd32 %f30,%f8,%f30 + fpadd32 %f30,BOUNDRY,%f30 + fmuld %f30,TINY,%f30 +1: + st %f30,[%o0] + st %f31,[%o0+4] + +.endloop0: + st %f6,[%l3] + st %f7,[%l3+4] + st %f16,[%l4] + st %f17,[%l4+4] + st %f26,[%l5] + st %f27,[%l5+4] + ret + restore + + +.range0: + cmp %l0,%l6 + bl,a,pt %icc,3f ! if x is tiny +! delay slot, annulled if branch not taken + faddd %f0,ONE,%f4 + + cmp %l0,%o5 + bg,pt %icc,1f ! if x is huge, inf, nan +! delay slot + nop + + fcmpd %fcc0,%f0,THRESH + fbg,a,pt %fcc0,3f ! if x is huge and positive +! delay slot, annulled if branch not taken + fmuld HUGE,HUGE,%f4 + +! x is near the extremes but within range; return to the loop + addcc %i0,-1,%i0 + add %i3,%i4,%i3 ! y += stridey + ble,pn %icc,.endloop1 +! delay slot + andn %l1,%i5,%l1 + add %i1,%i2,%i1 ! x += stridex + for %f2,TWO96,%f2 + ba,pt %icc,.loop1 +! delay slot + fmuld %f0,INVLN2_256,%f4 + +1: + cmp %l0,%o7 + bl,pn %icc,2f ! if x is finite +! delay slot + nop + fzero %f4 + fcmpd %fcc0,%f0,NEGINF + fmovdne %fcc0,%f0,%f4 + ba,pt %icc,3f + fmuld %f4,%f4,%f4 ! x*x or zero*zero +2: + fmovd HUGE,%f4 + fcmpd %fcc0,%f0,ONE + fmovdl %fcc0,TINY,%f4 + fmuld %f4,%f4,%f4 ! huge*huge or tiny*tiny +3: + st %f4,[%o0] + andn %l1,%i5,%l0 + add %i1,%i2,%i1 ! x += stridex + fmovd %f10,%f0 + st %f5,[%o0+4] + addcc %i0,-1,%i0 + bg,pt %icc,.loop0 +! delay slot + add %i3,%i4,%i3 ! y += stridey + ba,pt %icc,.endloop0 +! delay slot + nop + + +.range1: + cmp %l1,%l6 + bl,a,pt %icc,3f ! if x is tiny +! delay slot, annulled if branch not taken + faddd %f10,ONE,%f14 + + cmp %l1,%o5 + bg,pt %icc,1f ! if x is huge, inf, nan +! delay slot + nop + + fcmpd %fcc0,%f10,THRESH + fbg,a,pt %fcc0,3f ! if x is huge and positive +! delay slot, annulled if branch not taken + fmuld HUGE,HUGE,%f14 + +! x is near the extremes but within range; return to the loop + addcc %i0,-1,%i0 + add %i3,%i4,%i3 ! y += stridey + ble,pn %icc,.endloop2 +! delay slot + andn %l2,%i5,%l2 + add %i1,%i2,%i1 ! x += stridex + for %f12,TWO96,%f12 + ba,pt %icc,.loop2 +! delay slot + fmuld %f10,INVLN2_256,%f14 + +1: + cmp %l1,%o7 + bl,pn %icc,2f ! if x is finite +! delay slot + nop + fzero %f14 + fcmpd %fcc0,%f10,NEGINF + fmovdne %fcc0,%f10,%f14 + ba,pt %icc,3f + fmuld %f14,%f14,%f14 ! x*x or zero*zero +2: + fmovd HUGE,%f14 + fcmpd %fcc0,%f10,ONE + fmovdl %fcc0,TINY,%f14 + fmuld %f14,%f14,%f14 ! huge*huge or tiny*tiny +3: + st %f14,[%o1] + andn %l2,%i5,%l1 + add %i1,%i2,%i1 ! x += stridex + fmovd %f20,%f10 + st %f15,[%o1+4] + addcc %i0,-1,%i0 + bg,pt %icc,.loop1 +! delay slot + add %i3,%i4,%i3 ! y += stridey + ba,pt %icc,.endloop1 +! delay slot + nop + + +.range2: + cmp %l2,%l6 + bl,a,pt %icc,3f ! if x is tiny +! delay slot, annulled if branch not taken + faddd %f20,ONE,%f24 + + cmp %l2,%o5 + bg,pt %icc,1f ! if x is huge, inf, nan +! delay slot + nop + + fcmpd %fcc0,%f20,THRESH + fbg,a,pt %fcc0,3f ! if x is huge and positive +! delay slot, annulled if branch not taken + fmuld HUGE,HUGE,%f24 + +! x is near the extremes but within range; return to the loop + ba,pt %icc,.cont +! delay slot + faddd %f4,%f2,%f4 + +1: + cmp %l2,%o7 + bl,pn %icc,2f ! if x is finite +! delay slot + nop + fzero %f24 + fcmpd %fcc0,%f20,NEGINF + fmovdne %fcc0,%f20,%f24 + ba,pt %icc,3f + fmuld %f24,%f24,%f24 ! x*x or zero*zero +2: + fmovd HUGE,%f24 + fcmpd %fcc0,%f20,ONE + fmovdl %fcc0,TINY,%f24 + fmuld %f24,%f24,%f24 ! huge*huge or tiny*tiny +3: + st %f24,[%i3] + st %f25,[%i3+4] + lda [%i1]%asi,%l2 ! preload next argument + lda [%i1]%asi,%f20 + lda [%i1+4]%asi,%f21 + andn %l2,%i5,%l2 + add %i1,%i2,%i1 ! x += stridex + addcc %i0,-1,%i0 + bg,pt %icc,.loop2 +! delay slot + add %i3,%i4,%i3 ! y += stridey + ba,pt %icc,.endloop2 +! delay slot + nop + + SET_SIZE(__vexp) + diff --git a/usr/src/lib/libmvec/common/vis/__vexpf.S b/usr/src/lib/libmvec/common/vis/__vexpf.S new file mode 100644 index 0000000000..76ae2752b0 --- /dev/null +++ b/usr/src/lib/libmvec/common/vis/__vexpf.S @@ -0,0 +1,2114 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "__vexpf.S" + +#include "libm.h" + + RO_DATA + .align 64 +!! 2^(i/256) - ((i & 0xf0) << 44), i = [0, 255] +.CONST_TBL: + .word 0x3ff00000, 0x00000000, 0x3ff00b1a, 0xfa5abcbf + .word 0x3ff0163d, 0xa9fb3335, 0x3ff02168, 0x143b0281 + .word 0x3ff02c9a, 0x3e778061, 0x3ff037d4, 0x2e11bbcc + .word 0x3ff04315, 0xe86e7f85, 0x3ff04e5f, 0x72f654b1 + .word 0x3ff059b0, 0xd3158574, 0x3ff0650a, 0x0e3c1f89 + .word 0x3ff0706b, 0x29ddf6de, 0x3ff07bd4, 0x2b72a836 + .word 0x3ff08745, 0x18759bc8, 0x3ff092bd, 0xf66607e0 + .word 0x3ff09e3e, 0xcac6f383, 0x3ff0a9c7, 0x9b1f3919 + .word 0x3fefb558, 0x6cf9890f, 0x3fefc0f1, 0x45e46c85 + .word 0x3fefcc92, 0x2b7247f7, 0x3fefd83b, 0x23395dec + .word 0x3fefe3ec, 0x32d3d1a2, 0x3fefefa5, 0x5fdfa9c5 + .word 0x3feffb66, 0xaffed31b, 0x3ff00730, 0x28d7233e + .word 0x3ff01301, 0xd0125b51, 0x3ff01edb, 0xab5e2ab6 + .word 0x3ff02abd, 0xc06c31cc, 0x3ff036a8, 0x14f204ab + .word 0x3ff0429a, 0xaea92de0, 0x3ff04e95, 0x934f312e + .word 0x3ff05a98, 0xc8a58e51, 0x3ff066a4, 0x5471c3c2 + .word 0x3fef72b8, 0x3c7d517b, 0x3fef7ed4, 0x8695bbc0 + .word 0x3fef8af9, 0x388c8dea, 0x3fef9726, 0x58375d2f + .word 0x3fefa35b, 0xeb6fcb75, 0x3fefaf99, 0xf8138a1c + .word 0x3fefbbe0, 0x84045cd4, 0x3fefc82f, 0x95281c6b + .word 0x3fefd487, 0x3168b9aa, 0x3fefe0e7, 0x5eb44027 + .word 0x3fefed50, 0x22fcd91d, 0x3feff9c1, 0x8438ce4d + .word 0x3ff0063b, 0x88628cd6, 0x3ff012be, 0x3578a819 + .word 0x3ff01f49, 0x917ddc96, 0x3ff02bdd, 0xa27912d1 + .word 0x3fef387a, 0x6e756238, 0x3fef451f, 0xfb82140a + .word 0x3fef51ce, 0x4fb2a63f, 0x3fef5e85, 0x711ece75 + .word 0x3fef6b45, 0x65e27cdd, 0x3fef780e, 0x341ddf29 + .word 0x3fef84df, 0xe1f56381, 0x3fef91ba, 0x7591bb70 + .word 0x3fef9e9d, 0xf51fdee1, 0x3fefab8a, 0x66d10f13 + .word 0x3fefb87f, 0xd0dad990, 0x3fefc57e, 0x39771b2f + .word 0x3fefd285, 0xa6e4030b, 0x3fefdf96, 0x1f641589 + .word 0x3fefecaf, 0xa93e2f56, 0x3feff9d2, 0x4abd886b + .word 0x3fef06fe, 0x0a31b715, 0x3fef1432, 0xedeeb2fd + .word 0x3fef2170, 0xfc4cd831, 0x3fef2eb8, 0x3ba8ea32 + .word 0x3fef3c08, 0xb26416ff, 0x3fef4962, 0x66e3fa2d + .word 0x3fef56c5, 0x5f929ff1, 0x3fef6431, 0xa2de883b + .word 0x3fef71a7, 0x373aa9cb, 0x3fef7f26, 0x231e754a + .word 0x3fef8cae, 0x6d05d866, 0x3fef9a40, 0x1b7140ef + .word 0x3fefa7db, 0x34e59ff7, 0x3fefb57f, 0xbfec6cf4 + .word 0x3fefc32d, 0xc313a8e5, 0x3fefd0e5, 0x44ede173 + .word 0x3feedea6, 0x4c123422, 0x3feeec70, 0xdf1c5175 + .word 0x3feefa45, 0x04ac801c, 0x3fef0822, 0xc367a024 + .word 0x3fef160a, 0x21f72e2a, 0x3fef23fb, 0x2709468a + .word 0x3fef31f5, 0xd950a897, 0x3fef3ffa, 0x3f84b9d4 + .word 0x3fef4e08, 0x6061892d, 0x3fef5c20, 0x42a7d232 + .word 0x3fef6a41, 0xed1d0057, 0x3fef786d, 0x668b3237 + .word 0x3fef86a2, 0xb5c13cd0, 0x3fef94e1, 0xe192aed2 + .word 0x3fefa32a, 0xf0d7d3de, 0x3fefb17d, 0xea6db7d7 + .word 0x3feebfda, 0xd5362a27, 0x3feece41, 0xb817c114 + .word 0x3feedcb2, 0x99fddd0d, 0x3feeeb2d, 0x81d8abff + .word 0x3feef9b2, 0x769d2ca7, 0x3fef0841, 0x7f4531ee + .word 0x3fef16da, 0xa2cf6642, 0x3fef257d, 0xe83f4eef + .word 0x3fef342b, 0x569d4f82, 0x3fef42e2, 0xf4f6ad27 + .word 0x3fef51a4, 0xca5d920f, 0x3fef6070, 0xdde910d2 + .word 0x3fef6f47, 0x36b527da, 0x3fef7e27, 0xdbe2c4cf + .word 0x3fef8d12, 0xd497c7fd, 0x3fef9c08, 0x27ff07cc + .word 0x3feeab07, 0xdd485429, 0x3feeba11, 0xfba87a03 + .word 0x3feec926, 0x8a5946b7, 0x3feed845, 0x90998b93 + .word 0x3feee76f, 0x15ad2148, 0x3feef6a3, 0x20dceb71 + .word 0x3fef05e1, 0xb976dc09, 0x3fef152a, 0xe6cdf6f4 + .word 0x3fef247e, 0xb03a5585, 0x3fef33dd, 0x1d1929fd + .word 0x3fef4346, 0x34ccc320, 0x3fef52b9, 0xfebc8fb7 + .word 0x3fef6238, 0x82552225, 0x3fef71c1, 0xc70833f6 + .word 0x3fef8155, 0xd44ca973, 0x3fef90f4, 0xb19e9538 + .word 0x3feea09e, 0x667f3bcd, 0x3feeb052, 0xfa75173e + .word 0x3feec012, 0x750bdabf, 0x3feecfdc, 0xddd47645 + .word 0x3feedfb2, 0x3c651a2f, 0x3feeef92, 0x98593ae5 + .word 0x3feeff7d, 0xf9519484, 0x3fef0f74, 0x66f42e87 + .word 0x3fef1f75, 0xe8ec5f74, 0x3fef2f82, 0x86ead08a + .word 0x3fef3f9a, 0x48a58174, 0x3fef4fbd, 0x35d7cbfd + .word 0x3fef5feb, 0x564267c9, 0x3fef7024, 0xb1ab6e09 + .word 0x3fef8069, 0x4fde5d3f, 0x3fef90b9, 0x38ac1cf6 + .word 0x3feea114, 0x73eb0187, 0x3feeb17b, 0x0976cfdb + .word 0x3feec1ed, 0x0130c132, 0x3feed26a, 0x62ff86f0 + .word 0x3feee2f3, 0x36cf4e62, 0x3feef387, 0x8491c491 + .word 0x3fef0427, 0x543e1a12, 0x3fef14d2, 0xadd106d9 + .word 0x3fef2589, 0x994cce13, 0x3fef364c, 0x1eb941f7 + .word 0x3fef471a, 0x4623c7ad, 0x3fef57f4, 0x179f5b21 + .word 0x3fef68d9, 0x9b4492ed, 0x3fef79ca, 0xd931a436 + .word 0x3fef8ac7, 0xd98a6699, 0x3fef9bd0, 0xa478580f + .word 0x3feeace5, 0x422aa0db, 0x3feebe05, 0xbad61778 + .word 0x3feecf32, 0x16b5448c, 0x3feee06a, 0x5e0866d9 + .word 0x3feef1ae, 0x99157736, 0x3fef02fe, 0xd0282c8a + .word 0x3fef145b, 0x0b91ffc6, 0x3fef25c3, 0x53aa2fe2 + .word 0x3fef3737, 0xb0cdc5e5, 0x3fef48b8, 0x2b5f98e5 + .word 0x3fef5a44, 0xcbc8520f, 0x3fef6bdd, 0x9a7670b3 + .word 0x3fef7d82, 0x9fde4e50, 0x3fef8f33, 0xe47a22a2 + .word 0x3fefa0f1, 0x70ca07ba, 0x3fefb2bb, 0x4d53fe0d + .word 0x3feec491, 0x82a3f090, 0x3feed674, 0x194bb8d5 + .word 0x3feee863, 0x19e32323, 0x3feefa5e, 0x8d07f29e + .word 0x3fef0c66, 0x7b5de565, 0x3fef1e7a, 0xed8eb8bb + .word 0x3fef309b, 0xec4a2d33, 0x3fef42c9, 0x80460ad8 + .word 0x3fef5503, 0xb23e255d, 0x3fef674a, 0x8af46052 + .word 0x3fef799e, 0x1330b358, 0x3fef8bfe, 0x53c12e59 + .word 0x3fef9e6b, 0x5579fdbf, 0x3fefb0e5, 0x21356eba + .word 0x3fefc36b, 0xbfd3f37a, 0x3fefd5ff, 0x3a3c2774 + .word 0x3feee89f, 0x995ad3ad, 0x3feefb4c, 0xe622f2ff + .word 0x3fef0e07, 0x298db666, 0x3fef20ce, 0x6c9a8952 + .word 0x3fef33a2, 0xb84f15fb, 0x3fef4684, 0x15b749b1 + .word 0x3fef5972, 0x8de5593a, 0x3fef6c6e, 0x29f1c52a + .word 0x3fef7f76, 0xf2fb5e47, 0x3fef928c, 0xf22749e4 + .word 0x3fefa5b0, 0x30a1064a, 0x3fefb8e0, 0xb79a6f1f + .word 0x3fefcc1e, 0x904bc1d2, 0x3fefdf69, 0xc3f3a207 + .word 0x3feff2c2, 0x5bd71e09, 0x3ff00628, 0x6141b33d + .word 0x3fef199b, 0xdd85529c, 0x3fef2d1c, 0xd9fa652c + .word 0x3fef40ab, 0x5fffd07a, 0x3fef5447, 0x78fafb22 + .word 0x3fef67f1, 0x2e57d14b, 0x3fef7ba8, 0x8988c933 + .word 0x3fef8f6d, 0x9406e7b5, 0x3fefa340, 0x5751c4db + .word 0x3fefb720, 0xdcef9069, 0x3fefcb0f, 0x2e6d1675 + .word 0x3fefdf0b, 0x555dc3fa, 0x3feff315, 0x5b5bab74 + .word 0x3ff0072d, 0x4a07897c, 0x3ff01b53, 0x2b08c968 + .word 0x3ff02f87, 0x080d89f2, 0x3ff043c8, 0xeacaa1d6 + .word 0x3fef5818, 0xdcfba487, 0x3fef6c76, 0xe862e6d3 + .word 0x3fef80e3, 0x16c98398, 0x3fef955d, 0x71ff6075 + .word 0x3fefa9e6, 0x03db3285, 0x3fefbe7c, 0xd63a8315 + .word 0x3fefd321, 0xf301b460, 0x3fefe7d5, 0x641c0658 + .word 0x3feffc97, 0x337b9b5f, 0x3ff01167, 0x6b197d17 + .word 0x3ff02646, 0x14f5a129, 0x3ff03b33, 0x3b16ee12 + .word 0x3ff0502e, 0xe78b3ff6, 0x3ff06539, 0x24676d76 + .word 0x3ff07a51, 0xfbc74c83, 0x3ff08f79, 0x77cdb740 + .word 0x3fefa4af, 0xa2a490da, 0x3fefb9f4, 0x867cca6e + .word 0x3fefcf48, 0x2d8e67f1, 0x3fefe4aa, 0xa2188510 + .word 0x3feffa1b, 0xee615a27, 0x3ff00f9c, 0x1cb6412a + .word 0x3ff0252b, 0x376bba97, 0x3ff03ac9, 0x48dd7274 + .word 0x3ff05076, 0x5b6e4540, 0x3ff06632, 0x798844f8 + .word 0x3ff07bfd, 0xad9cbe14, 0x3ff091d8, 0x02243c89 + .word 0x3ff0a7c1, 0x819e90d8, 0x3ff0bdba, 0x3692d514 + .word 0x3ff0d3c2, 0x2b8f71f1, 0x3ff0e9d9, 0x6b2a23d9 + + .word 0x7149f2ca, 0x0da24260 ! 1.0e30f, 1.0e-30f + .word 0x3ecebfbe, 0x9d182250 ! KA2 = 3.66556671660783833261e-06 + .word 0x3f662e43, 0xe2528362 ! KA1 = 2.70760782821392980564e-03 + .word 0x40771547, 0x652b82fe ! K256ONLN2 = 369.3299304675746271 + .word 0x42aeac4f, 0x42b17218 ! THRESHOLD = 87.3365402f + ! THRESHOLDL = 88.7228394f +! local storage indices + +#define tmp0 STACK_BIAS-32 +#define tmp1 STACK_BIAS-28 +#define tmp2 STACK_BIAS-24 +#define tmp3 STACK_BIAS-20 +#define tmp4 STACK_BIAS-16 +#define tmp5 STACK_BIAS-12 +#define tmp6 STACK_BIAS-8 +#define tmp7 STACK_BIAS-4 + +! sizeof temp storage - must be a multiple of 16 for V9 +#define tmps 0x20 + +#define I5_THRESHOLD %i5 +#define G1_CONST_TBL %g5 +#define G5_CONST %g1 + +#define F62_K256ONLN2 %f62 +#define F60_KA2 %f60 +#define F58_KA1 %f58 + +#define THRESHOLDL %f0 + +! register use +! i0 n +! i1 x +! i2 stridex +! i3 y +! i4 stridey + +! i5 0x42aeac4f (87.3365402f) + +! g1 CONST_TBL +! g5 0x7fffffff + +! f62 K256ONLN2 = 369.3299304675746271 +! f60 KA2 = 3.66556671660783833261e-06 +! f58 KA1 = 2.70760782821392980564e-03 + + +! !!!!! Algorithm !!!!! +! +! double y, dtmp, drez; +! int k, sign, Xi; +! float X, Y; +! int THRESHOLD = 0x42aeac4f; /* 87.3365402f */ +! float THRESHOLDL = 88.7228394f; +! double KA2 = 3.66556671660783833261e-06; +! double KA1 = 2.70760782821392980564e-03; +! double K256ONLN2 = 369.3299304675746271; +! char *CONST_TBL; +! +! X = px[0]; +! Xi = ((int*)px)[0]; +! ax = Xi & 0x7fffffff; +! +! if (ax > THRESHOLD) { +! sign = ((unsigned)Xi >> 29) & 4; +! if (ax >= 0x7f800000) { /* Inf or NaN */ +! if (ax > 0x7f800000) { /* NaN */ +! Y = X * X; /* NaN -> NaN */ +! return Y; +! } +! Y = (sign) ? zero : X; /* +Inf -> +Inf , -Inf -> zero */ +! return Y; +! } +! +! if ( X < 0.0f || X >= THRESHOLDL ) { +! Y = ((float*)(CONST_TBL + 2048 + sign))[0]; +! /* Xi >= THRESHOLDL : Y = 1.0e+30f */ +! /* Xi < -THRESHOLD : Y = 1.0e-30f */ +! Y = Y * Y; +! /* Xi >= THRESHOLDL : +Inf + overflow */ +! /* Xi < -THRESHOLD : +0 + underflow */ +! return Y; +! } +! } +! vis_write_gsr(12 << 3); +! y = (double) X; +! y = K256ONLN2 * y; +! k = (int) y; +! dtmp = (double) k; +! y -= dtmp; +! dtmp = y * KA2; +! dtmp += KA1; +! y *= dtmp; +! y = (y * KA2 + KA1) * y; +! ((int*)&drez)[0] = k; +! ((int*)&drez)[1] = 0; +! ((float*)&drez)[0] = vis_fpackfix(drez); +! k &= 255; +! k <<= 3; +! dtmp = ((double*)(CONST_TBL + k))[0]; +! drez = vis_fpadd32(drez,dtmp); +! y *= drez; +! y += drez; +! Y = (float) y; +! +! +! fstod %f16,%f40 ! y = (double) X +! fmuld F62_K256ONLN2,%f40,%f40 ! y *= K256ONLN2 +! fdtoi %f40,%f16 ! k = (int) y +! st %f16,[%fp+tmp0] ! store k +! fitod %f16,%f34 ! dtmp = (double) k +! fpackfix %f16,%f16 ! ((float*)&drez)[0] = vis_fpackfix(drez) +! fsubd %f40,%f34,%f40 ! y -= dtmp +! fmuld F60_KA2,%f40,%f34 ! dtmp = y * KA2 +! faddd F58_KA1,%f34,%f34 ! dtmp += KA1 +! ld [%fp+tmp0],%o0 ! load k +! fmuld %f34,%f40,%f40 ! y *= dtmp +! and %o0,255,%o0 ! k &= 255 +! sll %o0,3,%o0 ! k <<= 3 +! ldd [G1_CONST_TBL+%o0],%f34 ! dtmp = ((double*)(CONST_TBL + k))[0] +! fpadd32 %f16,%f34,%f34 ! drez = vis_fpadd32(drez,dtmp) +! fmuld %f34,%f40,%f40 ! y *= drez +! faddd %f34,%f40,%f40 ! y += drez +! fdtos %f40,%f26 ! (float) y +!-------------------------------------------------------------------- + + ENTRY(__vexpf) + save %sp,-SA(MINFRAME)-tmps,%sp + PIC_SETUP(l7) + PIC_SET(l7,.CONST_TBL,g5) + + wr %g0,0x82,%asi ! set %asi for non-faulting loads + wr %g0,0x60,%gsr + + sll %i2,2,%i2 + sll %i4,2,%i4 + + ldd [G1_CONST_TBL+2056],F60_KA2 + sethi %hi(0x7ffffc00),G5_CONST + ldd [G1_CONST_TBL+2064],F58_KA1 + add G5_CONST,1023,G5_CONST + ldd [G1_CONST_TBL+2072],F62_K256ONLN2 + ld [G1_CONST_TBL+2080],I5_THRESHOLD + ld [G1_CONST_TBL+2084],THRESHOLDL + + subcc %i0,8,%i0 + bneg,pn %icc,.tail + fzeros %f3 + +.main_loop_preload: + +! preload 8 elements and get absolute values + ld [%i1],%l0 ! (0) Xi = ((int*)px)[0] + fzeros %f5 + ld [%i1],%f16 ! (0) X = px[0] + fzeros %f7 + add %i1,%i2,%o5 ! px += stridex + ld [%o5],%l1 ! (1) Xi = ((int*)px)[0] + and %l0,G5_CONST,%l0 ! (0) ax = Xi & 0x7fffffff + fzeros %f9 + ld [%o5],%f2 ! (1) X = px[0] + fzeros %f11 + add %o5,%i2,%i1 ! px += stridex + ld [%i1],%l2 ! (2) Xi = ((int*)px)[0] + and %l1,G5_CONST,%l1 ! (1) ax = Xi & 0x7fffffff + fzeros %f13 + ld [%i1],%f4 ! (2) X = px[0] + fzeros %f15 + add %i1,%i2,%o5 ! px += stridex + ld [%o5],%l3 ! (3) Xi = ((int*)px)[0] + and %l2,G5_CONST,%l2 ! (2) ax = Xi & 0x7fffffff + fzeros %f17 + ld [%o5],%f6 ! (3) X = px[0] + add %o5,%i2,%o0 ! px += stridex + ld [%o0],%l4 ! (4) Xi = ((int*)px)[0] + and %l3,G5_CONST,%l3 ! (3) ax = Xi & 0x7fffffff + add %o0,%i2,%o1 ! px += stridex + ld [%o1],%l5 ! (5) Xi = ((int*)px)[0] + add %o1,%i2,%o2 ! px += stridex + ld [%o2],%l6 ! (6) Xi = ((int*)px)[0] + and %l4,G5_CONST,%l4 ! (4) ax = Xi & 0x7fffffff + add %o2,%i2,%o3 ! px += stridex + ld [%o3],%l7 ! (7) Xi = ((int*)px)[0] + add %o3,%i2,%i1 ! px += stridex + and %l5,G5_CONST,%l5 ! (5) ax = Xi & 0x7fffffff + and %l6,G5_CONST,%l6 ! (6) ax = Xi & 0x7fffffff + ba .main_loop + and %l7,G5_CONST,%l7 ! (7) ax = Xi & 0x7fffffff + + .align 16 +.main_loop: + cmp %l0,I5_THRESHOLD + bg,pn %icc,.spec0 ! (0) if (ax > THRESHOLD) + lda [%o0]%asi,%f8 ! (4) X = px[0] + fstod %f16,%f40 ! (0) y = (double) X +.spec0_cont: + cmp %l1,I5_THRESHOLD + bg,pn %icc,.spec1 ! (1) if (ax > THRESHOLD) + lda [%o1]%asi,%f10 ! (5) X = px[0] + fstod %f2,%f42 ! (1) y = (double) X +.spec1_cont: + cmp %l2,I5_THRESHOLD + bg,pn %icc,.spec2 ! (2) if (ax > THRESHOLD) + lda [%o2]%asi,%f12 ! (6) X = px[0] + fstod %f4,%f44 ! (2) y = (double) X +.spec2_cont: + cmp %l3,I5_THRESHOLD + bg,pn %icc,.spec3 ! (3) if (ax > THRESHOLD) + lda [%o3]%asi,%f14 ! (7) X = px[0] + fstod %f6,%f46 ! (3) y = (double) X +.spec3_cont: + cmp %l4,I5_THRESHOLD + bg,pn %icc,.spec4 ! (4) if (ax > THRESHOLD) + fmuld F62_K256ONLN2,%f40,%f40 ! (0) y *= K256ONLN2 + fstod %f8,%f48 ! (4) y = (double) X +.spec4_cont: + cmp %l5,I5_THRESHOLD + bg,pn %icc,.spec5 ! (5) if (ax > THRESHOLD) + fmuld F62_K256ONLN2,%f42,%f42 ! (1) y *= K256ONLN2 + fstod %f10,%f50 ! (5) y = (double) X +.spec5_cont: + cmp %l6,I5_THRESHOLD + bg,pn %icc,.spec6 ! (6) if (ax > THRESHOLD) + fmuld F62_K256ONLN2,%f44,%f44 ! (2) y *= K256ONLN2 + fstod %f12,%f52 ! (6) y = (double) X +.spec6_cont: + cmp %l7,I5_THRESHOLD + bg,pn %icc,.spec7 ! (7) if (ax > THRESHOLD) + fmuld F62_K256ONLN2,%f46,%f46 ! (3) y *= K256ONLN2 + fstod %f14,%f54 ! (7) y = (double) X +.spec7_cont: + fdtoi %f40,%f16 ! (0) k = (int) y + st %f16,[%fp+tmp0] + fmuld F62_K256ONLN2,%f48,%f48 ! (4) y *= K256ONLN2 + + fdtoi %f42,%f2 ! (1) k = (int) y + st %f2,[%fp+tmp1] + fmuld F62_K256ONLN2,%f50,%f50 ! (5) y *= K256ONLN2 + + fdtoi %f44,%f4 ! (2) k = (int) y + st %f4,[%fp+tmp2] + fmuld F62_K256ONLN2,%f52,%f52 ! (6) y *= K256ONLN2 + + fdtoi %f46,%f6 ! (3) k = (int) y + st %f6,[%fp+tmp3] + fmuld F62_K256ONLN2,%f54,%f54 ! (7) y *= K256ONLN2 + + fdtoi %f48,%f8 ! (4) k = (int) y + st %f8,[%fp+tmp4] + + fdtoi %f50,%f10 ! (5) k = (int) y + st %f10,[%fp+tmp5] + + fitod %f16,%f34 ! (0) dtmp = (double) k + fpackfix %f16,%f16 ! (0) ((float*)&drez)[0] = vis_fpackfix(drez) + nop + nop + + fdtoi %f52,%f12 ! (6) k = (int) y + st %f12,[%fp+tmp6] + + fdtoi %f54,%f14 ! (7) k = (int) y + st %f14,[%fp+tmp7] + + lda [%i1]%asi,%l0 ! (8) Xi = ((int*)px)[0] + add %i1,%i2,%o5 ! px += stridex + fitod %f2,%f18 ! (1) dtmp = (double) k + fpackfix %f2,%f2 ! (1) ((float*)&drez)[0] = vis_fpackfix(drez) + + lda [%o5]%asi,%l1 ! (9) Xi = ((int*)px)[0] + add %o5,%i2,%i1 ! px += stridex + fitod %f4,%f20 ! (2) dtmp = (double) k + fpackfix %f4,%f4 ! (2) ((float*)&drez)[0] = vis_fpackfix(drez) + + lda [%i1]%asi,%l2 ! (10) Xi = ((int*)px)[0] + add %i1,%i2,%o5 ! px += stridex + fitod %f6,%f22 ! (3) dtmp = (double) k + fpackfix %f6,%f6 ! (3) ((float*)&drez)[0] = vis_fpackfix(drez) + + lda [%o5]%asi,%l3 ! (11) Xi = ((int*)px)[0] + add %o5,%i2,%i1 ! px += stridex + fitod %f8,%f24 ! (4) dtmp = (double) k + fpackfix %f8,%f8 ! (4) ((float*)&drez)[0] = vis_fpackfix(drez) + + fitod %f10,%f26 ! (5) dtmp = (double) k + fpackfix %f10,%f10 ! (5) ((float*)&drez)[0] = vis_fpackfix(drez) + + fitod %f12,%f28 ! (6) dtmp = (double) k + fpackfix %f12,%f12 ! (6) ((float*)&drez)[0] = vis_fpackfix(drez) + + fitod %f14,%f30 ! (7) dtmp = (double) k + fpackfix %f14,%f14 ! (7) ((float*)&drez)[0] = vis_fpackfix(drez) + + ld [%fp+tmp0],%o0 ! (0) load k + and %l0,G5_CONST,%l0 ! (8) ax = Xi & 0x7fffffff + fsubd %f40,%f34,%f40 ! (0) y -= dtmp + + ld [%fp+tmp1],%o1 ! (1) load k + and %l1,G5_CONST,%l1 ! (9) ax = Xi & 0x7fffffff + fsubd %f42,%f18,%f42 ! (1) y -= dtmp + + ld [%fp+tmp2],%o2 ! (2) load k + and %l2,G5_CONST,%l2 ! (10) ax = Xi & 0x7fffffff + and %o0,255,%o0 ! (0) k &= 255 + fsubd %f44,%f20,%f44 ! (2) y -= dtmp + + ld [%fp+tmp3],%o3 ! (3) load k + and %o1,255,%o1 ! (1) k &= 255 + fsubd %f46,%f22,%f46 ! (3) y -= dtmp + + sll %o0,3,%o0 ! (0) k <<= 3 + sll %o1,3,%o1 ! (1) k <<= 3 + fmuld F60_KA2,%f40,%f34 ! (0) dtmp = y * KA2 + fsubd %f48,%f24,%f48 ! (4) y -= dtmp + + and %l3,G5_CONST,%l3 ! (11) ax = Xi & 0x7fffffff + and %o2,255,%o2 ! (2) k &= 255 + fmuld F60_KA2,%f42,%f18 ! (1) dtmp = y * KA2 + fsubd %f50,%f26,%f50 ! (5) y -= dtmp + + sll %o2,3,%o2 ! (2) k <<= 3 + fmuld F60_KA2,%f44,%f20 ! (2) dtmp = y * KA2 + fsubd %f52,%f28,%f52 ! (6) y -= dtmp + + ld [%fp+tmp4],%o4 ! (4) load k + and %o3,255,%o3 ! (3) k &= 255 + fmuld F60_KA2,%f46,%f22 ! (3) dtmp = y * KA2 + fsubd %f54,%f30,%f54 ! (7) y -= dtmp + + ld [%fp+tmp5],%o5 ! (5) load k + sll %o3,3,%o3 ! (3) k <<= 3 + fmuld F60_KA2,%f48,%f24 ! (4) dtmp = y * KA2 + faddd F58_KA1,%f34,%f34 ! (0) dtmp += KA1 + + ld [%fp+tmp6],%o7 ! (6) load k + and %o4,255,%o4 ! (4) k &= 255 + fmuld F60_KA2,%f50,%f26 ! (5) dtmp = y * KA2 + faddd F58_KA1,%f18,%f18 ! (1) dtmp += KA1 + + ld [%fp+tmp7],%l4 ! (7) load k + and %o5,255,%o5 ! (5) k &= 255 + fmuld F60_KA2,%f52,%f28 ! (6) dtmp = y * KA2 + faddd F58_KA1,%f20,%f20 ! (2) dtmp += KA1 + + sll %o5,3,%o5 ! (5) k <<= 3 + fmuld F60_KA2,%f54,%f30 ! (7) dtmp = y * KA2 + faddd F58_KA1,%f22,%f22 ! (3) dtmp += KA1 + + fmuld %f34,%f40,%f40 ! (0) y *= dtmp + ldd [G1_CONST_TBL+%o0],%f34 ! (0) dtmp = ((double*)(CONST_TBL + k))[0] + and %l4,255,%l4 ! (7) k &= 255 + faddd F58_KA1,%f24,%f24 ! (4) dtmp += KA1 + + fmuld %f18,%f42,%f42 ! (1) y *= dtmp + ldd [G1_CONST_TBL+%o1],%f18 ! (1) dtmp = ((double*)(CONST_TBL + k))[0] + sll %l4,3,%l4 ! (7) k <<= 3 + faddd F58_KA1,%f26,%f26 ! (5) dtmp += KA1 + + fmuld %f20,%f44,%f44 ! (2) y *= dtmp + ldd [G1_CONST_TBL+%o2],%f20 ! (2) dtmp = ((double*)(CONST_TBL + k))[0] + faddd F58_KA1,%f28,%f28 ! (6) dtmp += KA1 + + fmuld %f22,%f46,%f46 ! (3) y *= dtmp + ldd [G1_CONST_TBL+%o3],%f22 ! (3) dtmp = ((double*)(CONST_TBL + k))[0] + sll %o4,3,%o4 ! (4) k <<= 3 + faddd F58_KA1,%f30,%f30 ! (7) dtmp += KA1 + + fmuld %f24,%f48,%f48 ! (4) y *= dtmp + ldd [G1_CONST_TBL+%o4],%f24 ! (4) dtmp = ((double*)(CONST_TBL + k))[0] + and %o7,255,%o7 ! (6) k &= 255 + fpadd32 %f16,%f34,%f34 ! (0) drez = vis_fpadd32(drez,dtmp) + + fmuld %f26,%f50,%f50 ! (5) y *= dtmp + ldd [G1_CONST_TBL+%o5],%f26 ! (5) dtmp = ((double*)(CONST_TBL + k))[0] + sll %o7,3,%o7 ! (6) k <<= 3 + fpadd32 %f2,%f18,%f18 ! (1) drez = vis_fpadd32(drez,dtmp) + + fmuld %f28,%f52,%f52 ! (6) y *= dtmp + ldd [G1_CONST_TBL+%o7],%f28 ! (6) dtmp = ((double*)(CONST_TBL + k))[0] + sll %i2,2,%o0 + fpadd32 %f4,%f20,%f20 ! (2) drez = vis_fpadd32(drez,dtmp) + + fmuld %f30,%f54,%f54 ! (7) y *= dtmp + ldd [G1_CONST_TBL+%l4],%f30 ! (7) dtmp = ((double*)(CONST_TBL + k))[0] + sub %i1,%o0,%o0 + fpadd32 %f6,%f22,%f22 ! (3) drez = vis_fpadd32(drez,dtmp) + + lda [%i1]%asi,%l4 ! (12) Xi = ((int*)px)[0] + add %i1,%i2,%o1 ! px += stridex + fpadd32 %f8,%f24,%f24 ! (4) drez = vis_fpadd32(drez,dtmp) + fmuld %f34,%f40,%f40 ! (0) y *= drez + + lda [%o1]%asi,%l5 ! (13) Xi = ((int*)px)[0] + add %o1,%i2,%o2 ! px += stridex + fpadd32 %f10,%f26,%f26 ! (5) drez = vis_fpadd32(drez,dtmp) + fmuld %f18,%f42,%f42 ! (1) y *= drez + + lda [%o2]%asi,%l6 ! (14) Xi = ((int*)px)[0] + add %o2,%i2,%o3 ! px += stridex + fpadd32 %f12,%f28,%f28 ! (6) drez = vis_fpadd32(drez,dtmp) + fmuld %f20,%f44,%f44 ! (2) y *= drez + + lda [%o3]%asi,%l7 ! (15) Xi = ((int*)px)[0] + add %o3,%i2,%i1 ! px += stridex + fpadd32 %f14,%f30,%f30 ! (7) drez = vis_fpadd32(drez,dtmp) + fmuld %f22,%f46,%f46 ! (3) y *= drez + + lda [%o0]%asi,%f16 ! (8) X = px[0] + add %o0,%i2,%o5 + fmuld %f24,%f48,%f48 ! (4) y *= drez + faddd %f34,%f40,%f40 ! (0) y += drez + + lda [%o5]%asi,%f2 ! (9) X = px[0] + add %o5,%i2,%o0 + fmuld %f26,%f50,%f50 ! (5) y *= drez + faddd %f18,%f42,%f42 ! (1) y += drez + + lda [%o0]%asi,%f4 ! (10) X = px[0] + add %o0,%i2,%o5 + fmuld %f28,%f52,%f52 ! (6) y *= drez + faddd %f20,%f44,%f44 ! (2) y += drez + + lda [%o5]%asi,%f6 ! (11) X = px[0] + add %o5,%i2,%o0 + fmuld %f30,%f54,%f54 ! (7) y *= drez + faddd %f22,%f46,%f46 ! (3) y += drez + + and %l4,G5_CONST,%l4 ! (12) ax = Xi & 0x7fffffff + faddd %f24,%f48,%f48 ! (4) y += drez + + and %l5,G5_CONST,%l5 ! (13) ax = Xi & 0x7fffffff + faddd %f26,%f50,%f50 ! (5) y += drez + + and %l6,G5_CONST,%l6 ! (14) ax = Xi & 0x7fffffff + faddd %f28,%f52,%f52 ! (6) y += drez + + and %l7,G5_CONST,%l7 ! (15) ax = Xi & 0x7fffffff + faddd %f30,%f54,%f54 ! (7) y += drez + + fdtos %f40,%f26 ! (0) (float) y + st %f26,[%i3] + add %i3,%i4,%o4 ! py += stridey + + fdtos %f42,%f18 ! (1) (float) y + st %f18,[%o4] + add %o4,%i4,%i3 ! py += stridey + + fdtos %f44,%f20 ! (2) (float) y + st %f20,[%i3] + add %i3,%i4,%o4 ! py += stridey + + fdtos %f46,%f22 ! (3) (float) y + st %f22,[%o4] + add %o4,%i4,%i3 ! py += stridey + + fdtos %f48,%f24 ! (4) (float) y + st %f24,[%i3] + subcc %i0,8,%i0 + add %i3,%i4,%o4 ! py += stridey + + fdtos %f50,%f26 ! (5) (float) y + st %f26,[%o4] + add %o4,%i4,%o5 ! py += stridey + add %i4,%i4,%o7 + + fdtos %f52,%f28 ! (6) (float) y + st %f28,[%o5] + add %o5,%i4,%o4 ! py += stridey + add %o5,%o7,%i3 ! py += stridey + + fdtos %f54,%f30 ! (7) (float) y + st %f30,[%o4] + bpos,pt %icc,.main_loop + nop +.after_main_loop: + sll %i2,3,%o2 + sub %i1,%o2,%i1 + +.tail: + add %i0,8,%i0 + subcc %i0,1,%i0 + bneg,pn %icc,.exit + + ld [%i1],%l0 + ld [%i1],%f2 + add %i1,%i2,%i1 + +.tail_loop: + and %l0,G5_CONST,%l1 + cmp %l1,I5_THRESHOLD + bg,pn %icc,.tail_spec + nop +.tail_spec_cont: + fstod %f2,%f40 + fmuld F62_K256ONLN2,%f40,%f40 + fdtoi %f40,%f2 + st %f2,[%fp+tmp0] + fitod %f2,%f16 + fpackfix %f2,%f2 + fsubd %f40,%f16,%f40 + fmuld F60_KA2,%f40,%f16 + faddd F58_KA1,%f16,%f16 + ld [%fp+tmp0],%o0 + fmuld %f16,%f40,%f40 + and %o0,255,%o0 + sll %o0,3,%o0 + ldd [G1_CONST_TBL+%o0],%f16 + fpadd32 %f2,%f16,%f16 + lda [%i1]%asi,%l0 + fmuld %f16,%f40,%f40 + lda [%i1]%asi,%f2 + faddd %f16,%f40,%f40 + add %i1,%i2,%i1 + fdtos %f40,%f16 + st %f16,[%i3] + add %i3,%i4,%i3 + subcc %i0,1,%i0 + bpos,pt %icc,.tail_loop + nop + +.exit: + ret + restore + +.tail_spec: + sethi %hi(0x7f800000),%o4 + cmp %l1,%o4 + bl,pt %icc,.tail_spec_out_of_range + nop + + srl %l0,29,%l0 + ble,pn %icc,.tail_spec_inf + andcc %l0,4,%g0 + +! NaN -> NaN + + fmuls %f2,%f2,%f2 + ba .tail_spec_exit + st %f2,[%i3] + +.tail_spec_inf: + be,a,pn %icc,.tail_spec_exit + st %f2,[%i3] + + ba .tail_spec_exit + st %f3,[%i3] + +.tail_spec_out_of_range: + fcmpes %fcc0,%f2,%f3 + fcmpes %fcc1,%f2,THRESHOLDL + fbl,pn %fcc0,1f ! if ( X < 0.0f ) + nop + fbl,pt %fcc1,.tail_spec_cont ! if ( X < THRESHOLDL ) + nop +1: + srl %l0,29,%l0 + and %l0,4,%l0 + add %l0,2048,%l0 + ld [G1_CONST_TBL+%l0],%f2 + fmuls %f2,%f2,%f2 + st %f2,[%i3] + +.tail_spec_exit: + lda [%i1]%asi,%l0 + lda [%i1]%asi,%f2 + add %i1,%i2,%i1 + + subcc %i0,1,%i0 + bpos,pt %icc,.tail_loop + add %i3,%i4,%i3 + ba .exit + nop + + .align 16 +.spec0: + sethi %hi(0x7f800000),%o5 + cmp %l0,%o5 + bl,pt %icc,.spec0_out_of_range + sll %i2,3,%o4 + + ble,pn %icc,.spec0_inf + sub %i1,%o4,%o4 + +! NaN -> NaN + + fmuls %f16,%f16,%f16 + ba .spec0_exit + st %f16,[%i3] + +.spec0_inf: + ld [%o4],%l0 + srl %l0,29,%l0 + andcc %l0,4,%l0 + be,a,pn %icc,.spec0_exit + st %f16,[%i3] + + ba .spec0_exit + st %f3,[%i3] + +.spec0_out_of_range: + fcmpes %fcc0,%f16,%f3 + fcmpes %fcc1,%f16,THRESHOLDL + fbl,a,pn %fcc0,1f ! if ( X < 0.0f ) + fstod %f16,%f40 ! (0) y = (double) X + fbl,a,pt %fcc1,.spec0_cont ! if ( X < THRESHOLDL ) + fstod %f16,%f40 ! (0) y = (double) X +1: + sub %i1,%o4,%o4 + ld [%o4],%l0 + srl %l0,29,%l0 + and %l0,4,%l0 + add %l0,2048,%l0 + ld [G1_CONST_TBL+%l0],%f16 + fmuls %f16,%f16,%f16 + st %f16,[%i3] + +.spec0_exit: + fmovs %f2,%f16 + mov %l1,%l0 + fmovs %f4,%f2 + mov %l2,%l1 + fmovs %f6,%f4 + mov %l3,%l2 + fmovs %f8,%f6 + mov %l4,%l3 + mov %l5,%l4 + mov %l6,%l5 + mov %l7,%l6 + lda [%i1]%asi,%l7 + add %i1,%i2,%i1 + mov %o1,%o0 + mov %o2,%o1 + mov %o3,%o2 + and %l7,G5_CONST,%l7 + add %o2,%i2,%o3 + + subcc %i0,1,%i0 + bpos,pt %icc,.main_loop + add %i3,%i4,%i3 + ba .after_main_loop + nop + + .align 16 +.spec1: + sethi %hi(0x7f800000),%o5 + cmp %l1,%o5 + bge,pn %icc,1f + nop + fcmpes %fcc0,%f2,%f3 + fcmpes %fcc1,%f2,THRESHOLDL + fbl,a,pn %fcc0,1f ! if ( X < 0.0f ) + fstod %f2,%f42 ! (1) y = (double) X + fbl,a,pt %fcc1,.spec1_cont ! if ( X < THRESHOLDL ) + fstod %f2,%f42 ! (1) y = (double) X +1: + fmuld F62_K256ONLN2,%f40,%f40 + fdtoi %f40,%f16 + st %f16,[%fp+tmp0] + fitod %f16,%f34 + fpackfix %f16,%f16 + fsubd %f40,%f34,%f40 + fmuld F60_KA2,%f40,%f34 + faddd F58_KA1,%f34,%f34 + ld [%fp+tmp0],%o0 + fmuld %f34,%f40,%f40 + and %o0,255,%o0 + sll %o0,3,%o0 + ldd [G1_CONST_TBL+%o0],%f34 + fpadd32 %f16,%f34,%f34 + fmuld %f34,%f40,%f40 + faddd %f34,%f40,%f40 + fdtos %f40,%f26 + st %f26,[%i3] + add %i3,%i4,%i3 + + cmp %l1,%o5 + bl,pt %icc,.spec1_out_of_range + sll %i2,3,%o4 + + ble,pn %icc,.spec1_inf + sub %i1,%o4,%o4 + +! NaN -> NaN + + fmuls %f2,%f2,%f2 + ba .spec1_exit + st %f2,[%i3] + +.spec1_inf: + add %o4,%i2,%o4 + ld [%o4],%l0 + srl %l0,29,%l0 + andcc %l0,4,%l0 + be,a,pn %icc,.spec1_exit + st %f2,[%i3] + + ba .spec1_exit + st %f3,[%i3] + +.spec1_out_of_range: + sub %i1,%o4,%o4 + add %o4,%i2,%o4 + ld [%o4],%l0 + srl %l0,29,%l0 + and %l0,4,%l0 + add %l0,2048,%l0 + ld [G1_CONST_TBL+%l0],%f2 + fmuls %f2,%f2,%f2 + st %f2,[%i3] + +.spec1_exit: + fmovs %f4,%f16 + mov %l2,%l0 + fmovs %f6,%f2 + mov %l3,%l1 + fmovs %f8,%f4 + mov %l4,%l2 + fmovs %f10,%f6 + mov %l5,%l3 + mov %l6,%l4 + mov %l7,%l5 + lda [%i1]%asi,%l6 + add %i1,%i2,%i1 + lda [%i1]%asi,%l7 + add %i1,%i2,%i1 + and %l6,G5_CONST,%l6 + and %l7,G5_CONST,%l7 + mov %o2,%o0 + mov %o3,%o1 + add %o1,%i2,%o2 + add %o2,%i2,%o3 + + subcc %i0,2,%i0 + bpos,pt %icc,.main_loop + add %i3,%i4,%i3 + ba .after_main_loop + nop + + .align 16 +.spec2: + sethi %hi(0x7f800000),%o5 + cmp %l2,%o5 + bge,pn %icc,1f + nop + fcmpes %fcc0,%f4,%f3 + fcmpes %fcc1,%f4,THRESHOLDL + fbl,a,pn %fcc0,1f ! if ( X < 0.0f ) + fstod %f4,%f44 ! (2) y = (double) X + fbl,a,pt %fcc1,.spec2_cont ! if ( X < THRESHOLDL ) + fstod %f4,%f44 ! (2) y = (double) X +1: + fmuld F62_K256ONLN2,%f40,%f40 + + fmuld F62_K256ONLN2,%f42,%f42 + + fdtoi %f40,%f16 + st %f16,[%fp+tmp0] + + fdtoi %f42,%f2 + st %f2,[%fp+tmp1] + + fitod %f16,%f34 + fpackfix %f16,%f16 + + fitod %f2,%f18 + fpackfix %f2,%f2 + + fsubd %f40,%f34,%f40 + + fsubd %f42,%f18,%f42 + + fmuld F60_KA2,%f40,%f34 + + fmuld F60_KA2,%f42,%f18 + + faddd F58_KA1,%f34,%f34 + + faddd F58_KA1,%f18,%f18 + + ld [%fp+tmp0],%o0 + fmuld %f34,%f40,%f40 + + ld [%fp+tmp1],%o1 + fmuld %f18,%f42,%f42 + + and %o0,255,%o0 + + and %o1,255,%o1 + + sll %o0,3,%o0 + + sll %o1,3,%o1 + + ldd [G1_CONST_TBL+%o0],%f34 + + ldd [G1_CONST_TBL+%o1],%f18 + + fpadd32 %f16,%f34,%f34 + + fpadd32 %f2,%f18,%f18 + + fmuld %f34,%f40,%f40 + + fmuld %f18,%f42,%f42 + + faddd %f34,%f40,%f40 + + faddd %f18,%f42,%f42 + + fdtos %f40,%f26 + st %f26,[%i3] + add %i3,%i4,%o4 + + fdtos %f42,%f18 + st %f18,[%o4] + add %o4,%i4,%i3 + + cmp %l2,%o5 + sll %i2,1,%o5 + bl,pt %icc,.spec2_out_of_range + sll %i2,2,%o4 + + ble,pn %icc,.spec2_inf + add %o4,%o5,%o4 + +! NaN -> NaN + + fmuls %f4,%f4,%f4 + ba .spec2_exit + st %f4,[%i3] + +.spec2_inf: + sub %i1,%o4,%o4 + ld [%o4],%l0 + srl %l0,29,%l0 + andcc %l0,4,%l0 + be,a,pn %icc,.spec2_exit + st %f4,[%i3] + + ba .spec2_exit + st %f3,[%i3] + +.spec2_out_of_range: + add %o4,%o5,%o4 + sub %i1,%o4,%o4 + ld [%o4],%l0 + srl %l0,29,%l0 + and %l0,4,%l0 + add %l0,2048,%l0 + ld [G1_CONST_TBL+%l0],%f2 + fmuls %f2,%f2,%f2 + st %f2,[%i3] + +.spec2_exit: + fmovs %f6,%f16 + mov %l3,%l0 + mov %o3,%o0 + fmovs %f8,%f2 + mov %l4,%l1 + add %o0,%i2,%o1 + fmovs %f10,%f4 + mov %l5,%l2 + add %o1,%i2,%o2 + fmovs %f12,%f6 + mov %l6,%l3 + mov %l7,%l4 + lda [%i1]%asi,%l5 + add %i1,%i2,%i1 + add %o2,%i2,%o3 + lda [%i1]%asi,%l6 + add %i1,%i2,%i1 + lda [%i1]%asi,%l7 + add %i1,%i2,%i1 + and %l5,G5_CONST,%l5 + and %l6,G5_CONST,%l6 + and %l7,G5_CONST,%l7 + + subcc %i0,3,%i0 + bpos,pt %icc,.main_loop + add %i3,%i4,%i3 + ba .after_main_loop + nop +.spec3: + sethi %hi(0x7f800000),%o5 + cmp %l3,%o5 + bge,pn %icc,1f + nop + fcmpes %fcc0,%f6,%f3 + fcmpes %fcc1,%f6,THRESHOLDL + fbl,a,pn %fcc0,1f ! if ( X < 0.0f ) + fstod %f6,%f46 ! (3) y = (double) X + fbl,a,pt %fcc1,.spec3_cont ! if ( X < THRESHOLDL ) + fstod %f6,%f46 ! (3) y = (double) X +1: + fmuld F62_K256ONLN2,%f40,%f40 + + fmuld F62_K256ONLN2,%f42,%f42 + + fmuld F62_K256ONLN2,%f44,%f44 + + fdtoi %f40,%f16 + st %f16,[%fp+tmp0] + + fdtoi %f42,%f2 + st %f2,[%fp+tmp1] + + fdtoi %f44,%f4 + st %f4,[%fp+tmp2] + + fitod %f16,%f34 + fpackfix %f16,%f16 + + fitod %f2,%f18 + fpackfix %f2,%f2 + + fitod %f4,%f20 + fpackfix %f4,%f4 + + fsubd %f40,%f34,%f40 + + fsubd %f42,%f18,%f42 + + fsubd %f44,%f20,%f44 + + fmuld F60_KA2,%f40,%f34 + + fmuld F60_KA2,%f42,%f18 + + fmuld F60_KA2,%f44,%f20 + + faddd F58_KA1,%f34,%f34 + + faddd F58_KA1,%f18,%f18 + + faddd F58_KA1,%f20,%f20 + + ld [%fp+tmp0],%o0 + fmuld %f34,%f40,%f40 + + ld [%fp+tmp1],%o1 + fmuld %f18,%f42,%f42 + + ld [%fp+tmp2],%o2 + fmuld %f20,%f44,%f44 + + and %o0,255,%o0 + and %o1,255,%o1 + + and %o2,255,%o2 + sll %o0,3,%o0 + + sll %o1,3,%o1 + sll %o2,3,%o2 + + ldd [G1_CONST_TBL+%o0],%f34 + + ldd [G1_CONST_TBL+%o1],%f18 + + ldd [G1_CONST_TBL+%o2],%f20 + + fpadd32 %f16,%f34,%f34 + + fpadd32 %f2,%f18,%f18 + + fpadd32 %f4,%f20,%f20 + + fmuld %f34,%f40,%f40 + + fmuld %f18,%f42,%f42 + + fmuld %f20,%f44,%f44 + + faddd %f34,%f40,%f40 + + faddd %f18,%f42,%f42 + + faddd %f20,%f44,%f44 + + fdtos %f40,%f26 + st %f26,[%i3] + add %i3,%i4,%o4 + + fdtos %f42,%f18 + st %f18,[%o4] + add %o4,%i4,%i3 + + fdtos %f44,%f20 + st %f20,[%i3] + add %i3,%i4,%i3 + + cmp %l3,%o5 + bl,pt %icc,.spec3_out_of_range + sll %i2,2,%o4 + + ble,pn %icc,.spec3_inf + add %o4,%i2,%o4 + +! NaN -> NaN + + fmuls %f6,%f6,%f6 + ba .spec3_exit + st %f6,[%i3] + +.spec3_inf: + sub %i1,%o4,%o4 + ld [%o4],%l0 + srl %l0,29,%l0 + andcc %l0,4,%l0 + be,a,pn %icc,.spec3_exit + st %f6,[%i3] + + ba .spec3_exit + st %f3,[%i3] + +.spec3_out_of_range: + add %o4,%i2,%o4 + sub %i1,%o4,%o4 + ld [%o4],%l0 + srl %l0,29,%l0 + and %l0,4,%l0 + add %l0,2048,%l0 + ld [G1_CONST_TBL+%l0],%f2 + fmuls %f2,%f2,%f2 + st %f2,[%i3] + +.spec3_exit: + fmovs %f8,%f16 + mov %l4,%l0 + fmovs %f10,%f2 + mov %l5,%l1 + fmovs %f12,%f4 + mov %l6,%l2 + fmovs %f14,%f6 + mov %l7,%l3 + mov %i1,%o0 + lda [%o0]%asi,%l4 + add %o0,%i2,%o1 + lda [%o1]%asi,%l5 + add %o1,%i2,%o2 + lda [%o2]%asi,%l6 + add %o2,%i2,%o3 + lda [%o3]%asi,%l7 + add %o3,%i2,%i1 + and %l4,G5_CONST,%l4 + and %l5,G5_CONST,%l5 + and %l6,G5_CONST,%l6 + and %l7,G5_CONST,%l7 + + subcc %i0,4,%i0 + bpos,pt %icc,.main_loop + add %i3,%i4,%i3 + ba .after_main_loop + nop + + .align 16 +.spec4: + sethi %hi(0x7f800000),%o5 + cmp %l4,%o5 + bge,pn %icc,1f + nop + fcmpes %fcc0,%f8,%f3 + fcmpes %fcc1,%f8,THRESHOLDL + fbl,a,pn %fcc0,1f ! if ( X < 0.0f ) + fstod %f8,%f48 ! (4) y = (double) X + fbl,a,pt %fcc1,.spec4_cont ! if ( X < THRESHOLDL ) + fstod %f8,%f48 ! (4) y = (double) X +1: + fmuld F62_K256ONLN2,%f42,%f42 + + fmuld F62_K256ONLN2,%f44,%f44 + + fmuld F62_K256ONLN2,%f46,%f46 + + fdtoi %f40,%f16 + st %f16,[%fp+tmp0] + + fdtoi %f42,%f2 + st %f2,[%fp+tmp1] + + fdtoi %f44,%f4 + st %f4,[%fp+tmp2] + + fdtoi %f46,%f6 + st %f6,[%fp+tmp3] + + fitod %f16,%f34 + fpackfix %f16,%f16 + + fitod %f2,%f18 + fpackfix %f2,%f2 + + fitod %f4,%f20 + fpackfix %f4,%f4 + + fitod %f6,%f22 + fpackfix %f6,%f6 + + fsubd %f40,%f34,%f40 + + fsubd %f42,%f18,%f42 + + fsubd %f44,%f20,%f44 + + fsubd %f46,%f22,%f46 + + fmuld F60_KA2,%f40,%f34 + + fmuld F60_KA2,%f42,%f18 + + fmuld F60_KA2,%f44,%f20 + + fmuld F60_KA2,%f46,%f22 + + faddd F58_KA1,%f34,%f34 + + faddd F58_KA1,%f18,%f18 + + faddd F58_KA1,%f20,%f20 + + faddd F58_KA1,%f22,%f22 + + ld [%fp+tmp0],%o0 + fmuld %f34,%f40,%f40 + + ld [%fp+tmp1],%o1 + fmuld %f18,%f42,%f42 + + ld [%fp+tmp2],%o2 + fmuld %f20,%f44,%f44 + + ld [%fp+tmp3],%o3 + fmuld %f22,%f46,%f46 + + and %o0,255,%o0 + and %o1,255,%o1 + + and %o2,255,%o2 + and %o3,255,%o3 + + sll %o0,3,%o0 + sll %o1,3,%o1 + + sll %o2,3,%o2 + sll %o3,3,%o3 + + ldd [G1_CONST_TBL+%o0],%f34 + + ldd [G1_CONST_TBL+%o1],%f18 + + ldd [G1_CONST_TBL+%o2],%f20 + + ldd [G1_CONST_TBL+%o3],%f22 + + fpadd32 %f16,%f34,%f34 + + fpadd32 %f2,%f18,%f18 + + fpadd32 %f4,%f20,%f20 + + fpadd32 %f6,%f22,%f22 + + fmuld %f34,%f40,%f40 + + fmuld %f18,%f42,%f42 + + fmuld %f20,%f44,%f44 + + fmuld %f22,%f46,%f46 + + faddd %f34,%f40,%f40 + + faddd %f18,%f42,%f42 + + faddd %f20,%f44,%f44 + + faddd %f22,%f46,%f46 + + fdtos %f40,%f26 + st %f26,[%i3] + add %i3,%i4,%o4 + + fdtos %f42,%f18 + st %f18,[%o4] + add %o4,%i4,%i3 + + fdtos %f44,%f20 + st %f20,[%i3] + add %i3,%i4,%o4 + + fdtos %f46,%f22 + st %f22,[%o4] + add %o4,%i4,%i3 + + cmp %l4,%o5 + bl,pt %icc,.spec4_out_of_range + sll %i2,2,%o4 + + ble,pn %icc,.spec4_inf + sub %i1,%o4,%o4 + +! NaN -> NaN + + fmuls %f8,%f8,%f8 + ba .spec4_exit + st %f8,[%i3] + +.spec4_inf: + ld [%o4],%l0 + srl %l0,29,%l0 + andcc %l0,4,%l0 + be,a,pn %icc,.spec4_exit + st %f8,[%i3] + + ba .spec4_exit + st %f3,[%i3] + +.spec4_out_of_range: + sub %i1,%o4,%o4 + ld [%o4],%l0 + srl %l0,29,%l0 + and %l0,4,%l0 + add %l0,2048,%l0 + ld [G1_CONST_TBL+%l0],%f2 + fmuls %f2,%f2,%f2 + st %f2,[%i3] + +.spec4_exit: + fmovs %f10,%f16 + mov %l5,%l0 + fmovs %f12,%f2 + mov %l6,%l1 + fmovs %f14,%f4 + mov %l7,%l2 + lda [%i1]%asi,%l3 + lda [%i1]%asi,%f6 + add %i1,%i2,%o0 + lda [%o0]%asi,%l4 + add %o0,%i2,%o1 + lda [%o1]%asi,%l5 + add %o1,%i2,%o2 + lda [%o2]%asi,%l6 + add %o2,%i2,%o3 + lda [%o3]%asi,%l7 + add %o3,%i2,%i1 + and %l3,G5_CONST,%l3 + and %l4,G5_CONST,%l4 + and %l5,G5_CONST,%l5 + and %l6,G5_CONST,%l6 + and %l7,G5_CONST,%l7 + + subcc %i0,5,%i0 + bpos,pt %icc,.main_loop + add %i3,%i4,%i3 + ba .after_main_loop + nop + + .align 16 +.spec5: + sethi %hi(0x7f800000),%o5 + cmp %l5,%o5 + bge,pn %icc,1f + nop + fcmpes %fcc0,%f10,%f3 + fcmpes %fcc1,%f10,THRESHOLDL + fbl,a,pn %fcc0,1f ! if ( X < 0.0f ) + fstod %f10,%f50 ! (5) y = (double) X + fbl,a,pt %fcc1,.spec5_cont ! if ( X < THRESHOLDL ) + fstod %f10,%f50 ! (5) y = (double) X +1: + fmuld F62_K256ONLN2,%f44,%f44 + + fmuld F62_K256ONLN2,%f46,%f46 + + fdtoi %f40,%f16 + st %f16,[%fp+tmp0] + fmuld F62_K256ONLN2,%f48,%f48 + + fdtoi %f42,%f2 + st %f2,[%fp+tmp1] + + fdtoi %f44,%f4 + st %f4,[%fp+tmp2] + + fdtoi %f46,%f6 + st %f6,[%fp+tmp3] + + fdtoi %f48,%f8 + st %f8,[%fp+tmp4] + + fitod %f16,%f34 + fpackfix %f16,%f16 + + fitod %f2,%f18 + fpackfix %f2,%f2 + + fitod %f4,%f20 + fpackfix %f4,%f4 + + fitod %f6,%f22 + fpackfix %f6,%f6 + + fitod %f8,%f24 + fpackfix %f8,%f8 + + ld [%fp+tmp0],%o0 + fsubd %f40,%f34,%f40 + + ld [%fp+tmp1],%o1 + fsubd %f42,%f18,%f42 + + ld [%fp+tmp2],%o2 + and %o0,255,%o0 + fsubd %f44,%f20,%f44 + + ld [%fp+tmp3],%o3 + and %o1,255,%o1 + fsubd %f46,%f22,%f46 + + sll %o0,3,%o0 + sll %o1,3,%o1 + fmuld F60_KA2,%f40,%f34 + fsubd %f48,%f24,%f48 + + and %o2,255,%o2 + fmuld F60_KA2,%f42,%f18 + + sll %o2,3,%o2 + fmuld F60_KA2,%f44,%f20 + + ld [%fp+tmp4],%o4 + and %o3,255,%o3 + fmuld F60_KA2,%f46,%f22 + + sll %o3,3,%o3 + fmuld F60_KA2,%f48,%f24 + faddd F58_KA1,%f34,%f34 + + and %o4,255,%o4 + faddd F58_KA1,%f18,%f18 + + faddd F58_KA1,%f20,%f20 + + faddd F58_KA1,%f22,%f22 + + fmuld %f34,%f40,%f40 + ldd [G1_CONST_TBL+%o0],%f34 + faddd F58_KA1,%f24,%f24 + + fmuld %f18,%f42,%f42 + ldd [G1_CONST_TBL+%o1],%f18 + + fmuld %f20,%f44,%f44 + ldd [G1_CONST_TBL+%o2],%f20 + + fmuld %f22,%f46,%f46 + ldd [G1_CONST_TBL+%o3],%f22 + sll %o4,3,%o4 + + fmuld %f24,%f48,%f48 + ldd [G1_CONST_TBL+%o4],%f24 + fpadd32 %f16,%f34,%f34 + + fpadd32 %f2,%f18,%f18 + + fpadd32 %f4,%f20,%f20 + + fpadd32 %f6,%f22,%f22 + + fpadd32 %f8,%f24,%f24 + fmuld %f34,%f40,%f40 + + fmuld %f18,%f42,%f42 + + fmuld %f20,%f44,%f44 + + fmuld %f22,%f46,%f46 + + fmuld %f24,%f48,%f48 + faddd %f34,%f40,%f40 + + faddd %f18,%f42,%f42 + + faddd %f20,%f44,%f44 + + faddd %f22,%f46,%f46 + + faddd %f24,%f48,%f48 + + fdtos %f40,%f26 + st %f26,[%i3] + add %i3,%i4,%o4 + + fdtos %f42,%f18 + st %f18,[%o4] + add %o4,%i4,%i3 + + fdtos %f44,%f20 + st %f20,[%i3] + add %i3,%i4,%o4 + + fdtos %f46,%f22 + st %f22,[%o4] + add %o4,%i4,%i3 + + fdtos %f48,%f24 + st %f24,[%i3] + add %i3,%i4,%i3 + + cmp %l5,%o5 + bl,pt %icc,.spec5_out_of_range + sll %i2,2,%o4 + + ble,pn %icc,.spec5_inf + sub %o4,%i2,%o4 + +! NaN -> NaN + + fmuls %f10,%f10,%f10 + ba .spec5_exit + st %f10,[%i3] + +.spec5_inf: + sub %i1,%o4,%o4 + ld [%o4],%l0 + srl %l0,29,%l0 + andcc %l0,4,%l0 + be,a,pn %icc,.spec5_exit + st %f10,[%i3] + + ba .spec5_exit + st %f3,[%i3] + +.spec5_out_of_range: + sub %o4,%i2,%o4 + sub %i1,%o4,%o4 + ld [%o4],%l0 + srl %l0,29,%l0 + and %l0,4,%l0 + add %l0,2048,%l0 + ld [G1_CONST_TBL+%l0],%f2 + fmuls %f2,%f2,%f2 + st %f2,[%i3] + +.spec5_exit: + fmovs %f12,%f16 + mov %l6,%l0 + fmovs %f14,%f2 + mov %l7,%l1 + lda [%i1]%asi,%l2 + lda [%i1]%asi,%f4 + add %i1,%i2,%i1 + lda [%i1]%asi,%l3 + lda [%i1]%asi,%f6 + add %i1,%i2,%o0 + lda [%o0]%asi,%l4 + add %o0,%i2,%o1 + lda [%o1]%asi,%l5 + add %o1,%i2,%o2 + lda [%o2]%asi,%l6 + add %o2,%i2,%o3 + lda [%o3]%asi,%l7 + add %o3,%i2,%i1 + and %l2,G5_CONST,%l2 + and %l3,G5_CONST,%l3 + and %l4,G5_CONST,%l4 + and %l5,G5_CONST,%l5 + and %l6,G5_CONST,%l6 + and %l7,G5_CONST,%l7 + + subcc %i0,6,%i0 + bpos,pt %icc,.main_loop + add %i3,%i4,%i3 + ba .after_main_loop + nop +.spec6: + sethi %hi(0x7f800000),%o5 + cmp %l6,%o5 + bge,pn %icc,1f + nop + fcmpes %fcc0,%f12,%f3 + fcmpes %fcc1,%f12,THRESHOLDL + fbl,a,pn %fcc0,1f ! if ( X < 0.0f ) + fstod %f12,%f52 ! (6) y = (double) X + fbl,a,pt %fcc1,.spec6_cont ! if ( X < THRESHOLDL ) + fstod %f12,%f52 ! (6) y = (double) X +1: + fmuld F62_K256ONLN2,%f46,%f46 + + fdtoi %f40,%f16 + st %f16,[%fp+tmp0] + fmuld F62_K256ONLN2,%f48,%f48 + + fdtoi %f42,%f2 + st %f2,[%fp+tmp1] + fmuld F62_K256ONLN2,%f50,%f50 + + fdtoi %f44,%f4 + st %f4,[%fp+tmp2] + + fdtoi %f46,%f6 + st %f6,[%fp+tmp3] + + fdtoi %f48,%f8 + st %f8,[%fp+tmp4] + + fdtoi %f50,%f10 + st %f10,[%fp+tmp5] + + fitod %f16,%f34 + fpackfix %f16,%f16 + + fitod %f2,%f18 + fpackfix %f2,%f2 + + fitod %f4,%f20 + fpackfix %f4,%f4 + + fitod %f6,%f22 + fpackfix %f6,%f6 + + fitod %f8,%f24 + fpackfix %f8,%f8 + + fitod %f10,%f26 + fpackfix %f10,%f10 + + ld [%fp+tmp0],%o0 + fsubd %f40,%f34,%f40 + + ld [%fp+tmp1],%o1 + fsubd %f42,%f18,%f42 + + ld [%fp+tmp2],%o2 + and %o0,255,%o0 + fsubd %f44,%f20,%f44 + + ld [%fp+tmp3],%o3 + and %o1,255,%o1 + fsubd %f46,%f22,%f46 + + sll %o0,3,%o0 + sll %o1,3,%o1 + fmuld F60_KA2,%f40,%f34 + fsubd %f48,%f24,%f48 + + and %o2,255,%o2 + fmuld F60_KA2,%f42,%f18 + fsubd %f50,%f26,%f50 + + sll %o2,3,%o2 + fmuld F60_KA2,%f44,%f20 + + ld [%fp+tmp4],%o4 + and %o3,255,%o3 + fmuld F60_KA2,%f46,%f22 + + ld [%fp+tmp5],%o5 + sll %o3,3,%o3 + fmuld F60_KA2,%f48,%f24 + faddd F58_KA1,%f34,%f34 + + and %o4,255,%o4 + fmuld F60_KA2,%f50,%f26 + faddd F58_KA1,%f18,%f18 + + and %o5,255,%o5 + faddd F58_KA1,%f20,%f20 + + sll %o5,3,%o5 + faddd F58_KA1,%f22,%f22 + + fmuld %f34,%f40,%f40 + ldd [G1_CONST_TBL+%o0],%f34 + faddd F58_KA1,%f24,%f24 + + fmuld %f18,%f42,%f42 + ldd [G1_CONST_TBL+%o1],%f18 + faddd F58_KA1,%f26,%f26 + + fmuld %f20,%f44,%f44 + ldd [G1_CONST_TBL+%o2],%f20 + + fmuld %f22,%f46,%f46 + ldd [G1_CONST_TBL+%o3],%f22 + sll %o4,3,%o4 + + fmuld %f24,%f48,%f48 + ldd [G1_CONST_TBL+%o4],%f24 + fpadd32 %f16,%f34,%f34 + + fmuld %f26,%f50,%f50 + ldd [G1_CONST_TBL+%o5],%f26 + fpadd32 %f2,%f18,%f18 + + fpadd32 %f4,%f20,%f20 + + fpadd32 %f6,%f22,%f22 + + fpadd32 %f8,%f24,%f24 + fmuld %f34,%f40,%f40 + + fpadd32 %f10,%f26,%f26 + fmuld %f18,%f42,%f42 + + fmuld %f20,%f44,%f44 + + fmuld %f22,%f46,%f46 + + fmuld %f24,%f48,%f48 + faddd %f34,%f40,%f40 + + fmuld %f26,%f50,%f50 + faddd %f18,%f42,%f42 + + faddd %f20,%f44,%f44 + + faddd %f22,%f46,%f46 + + faddd %f24,%f48,%f48 + + faddd %f26,%f50,%f50 + + fdtos %f40,%f26 + st %f26,[%i3] + add %i3,%i4,%o4 + + fdtos %f42,%f18 + st %f18,[%o4] + add %o4,%i4,%i3 + + fdtos %f44,%f20 + st %f20,[%i3] + add %i3,%i4,%o4 + + fdtos %f46,%f22 + st %f22,[%o4] + add %o4,%i4,%i3 + + fdtos %f48,%f24 + st %f24,[%i3] + add %i3,%i4,%o4 + + fdtos %f50,%f26 + st %f26,[%o4] + add %o4,%i4,%i3 + + sethi %hi(0x7f800000),%o5 + cmp %l6,%o5 + bl,pt %icc,.spec6_out_of_range + sll %i2,1,%o4 + + ble,pn %icc,.spec6_inf + sub %i1,%o4,%o4 + +! NaN -> NaN + + fmuls %f12,%f12,%f12 + ba .spec6_exit + st %f12,[%i3] + +.spec6_inf: + ld [%o4],%l0 + srl %l0,29,%l0 + andcc %l0,4,%l0 + be,a,pn %icc,.spec6_exit + st %f12,[%i3] + + ba .spec6_exit + st %f3,[%i3] + +.spec6_out_of_range: + sub %i1,%o4,%o4 + ld [%o4],%l0 + srl %l0,29,%l0 + and %l0,4,%l0 + add %l0,2048,%l0 + ld [G1_CONST_TBL+%l0],%f2 + fmuls %f2,%f2,%f2 + st %f2,[%i3] + +.spec6_exit: + fmovs %f14,%f16 + mov %l7,%l0 + lda [%i1]%asi,%l1 + lda [%i1]%asi,%f2 + add %i1,%i2,%i1 + lda [%i1]%asi,%l2 + lda [%i1]%asi,%f4 + add %i1,%i2,%i1 + lda [%i1]%asi,%l3 + lda [%i1]%asi,%f6 + add %i1,%i2,%o0 + lda [%o0]%asi,%l4 + add %o0,%i2,%o1 + lda [%o1]%asi,%l5 + add %o1,%i2,%o2 + lda [%o2]%asi,%l6 + add %o2,%i2,%o3 + lda [%o3]%asi,%l7 + add %o3,%i2,%i1 + and %l1,G5_CONST,%l1 + and %l2,G5_CONST,%l2 + and %l3,G5_CONST,%l3 + and %l4,G5_CONST,%l4 + and %l5,G5_CONST,%l5 + and %l6,G5_CONST,%l6 + and %l7,G5_CONST,%l7 + + subcc %i0,7,%i0 + bpos,pt %icc,.main_loop + add %i3,%i4,%i3 + ba .after_main_loop + nop + + .align 16 +.spec7: + sethi %hi(0x7f800000),%o5 + cmp %l7,%o5 + bge,pn %icc,1f + nop + fcmpes %fcc0,%f14,%f3 + fcmpes %fcc1,%f14,THRESHOLDL + fbl,a,pn %fcc0,1f ! if ( X < 0.0f ) + fstod %f14,%f54 ! (7) y = (double) X + fbl,a,pt %fcc1,.spec7_cont ! if ( X < THRESHOLDL ) + fstod %f14,%f54 ! (7) y = (double) X +1: + fdtoi %f40,%f16 + st %f16,[%fp+tmp0] + fmuld F62_K256ONLN2,%f48,%f48 + + fdtoi %f42,%f2 + st %f2,[%fp+tmp1] + fmuld F62_K256ONLN2,%f50,%f50 + + fdtoi %f44,%f4 + st %f4,[%fp+tmp2] + fmuld F62_K256ONLN2,%f52,%f52 + + fdtoi %f46,%f6 + st %f6,[%fp+tmp3] + + fdtoi %f48,%f8 + st %f8,[%fp+tmp4] + + fdtoi %f50,%f10 + st %f10,[%fp+tmp5] + + fdtoi %f52,%f12 + st %f12,[%fp+tmp6] + + fitod %f16,%f34 + fpackfix %f16,%f16 + + fitod %f2,%f18 + fpackfix %f2,%f2 + + fitod %f4,%f20 + fpackfix %f4,%f4 + + fitod %f6,%f22 + fpackfix %f6,%f6 + + fitod %f8,%f24 + fpackfix %f8,%f8 + + fitod %f10,%f26 + fpackfix %f10,%f10 + + fitod %f12,%f28 + fpackfix %f12,%f12 + + ld [%fp+tmp0],%o0 + fsubd %f40,%f34,%f40 + + ld [%fp+tmp1],%o1 + fsubd %f42,%f18,%f42 + + ld [%fp+tmp2],%o2 + and %o0,255,%o0 + fsubd %f44,%f20,%f44 + + ld [%fp+tmp3],%o3 + and %o1,255,%o1 + fsubd %f46,%f22,%f46 + + sll %o0,3,%o0 + sll %o1,3,%o1 + fmuld F60_KA2,%f40,%f34 + fsubd %f48,%f24,%f48 + + and %o2,255,%o2 + fmuld F60_KA2,%f42,%f18 + fsubd %f50,%f26,%f50 + + sll %o2,3,%o2 + fmuld F60_KA2,%f44,%f20 + fsubd %f52,%f28,%f52 + + ld [%fp+tmp4],%o4 + and %o3,255,%o3 + fmuld F60_KA2,%f46,%f22 + + ld [%fp+tmp5],%o5 + sll %o3,3,%o3 + fmuld F60_KA2,%f48,%f24 + faddd F58_KA1,%f34,%f34 + + ld [%fp+tmp6],%o7 + and %o4,255,%o4 + fmuld F60_KA2,%f50,%f26 + faddd F58_KA1,%f18,%f18 + + and %o5,255,%o5 + fmuld F60_KA2,%f52,%f28 + faddd F58_KA1,%f20,%f20 + + sll %o5,3,%o5 + faddd F58_KA1,%f22,%f22 + + fmuld %f34,%f40,%f40 + ldd [G1_CONST_TBL+%o0],%f34 + faddd F58_KA1,%f24,%f24 + + fmuld %f18,%f42,%f42 + ldd [G1_CONST_TBL+%o1],%f18 + faddd F58_KA1,%f26,%f26 + + fmuld %f20,%f44,%f44 + ldd [G1_CONST_TBL+%o2],%f20 + faddd F58_KA1,%f28,%f28 + + fmuld %f22,%f46,%f46 + ldd [G1_CONST_TBL+%o3],%f22 + sll %o4,3,%o4 + + fmuld %f24,%f48,%f48 + ldd [G1_CONST_TBL+%o4],%f24 + and %o7,255,%o7 + fpadd32 %f16,%f34,%f34 + + fmuld %f26,%f50,%f50 + ldd [G1_CONST_TBL+%o5],%f26 + sll %o7,3,%o7 + fpadd32 %f2,%f18,%f18 + + fmuld %f28,%f52,%f52 + ldd [G1_CONST_TBL+%o7],%f28 + fpadd32 %f4,%f20,%f20 + + fpadd32 %f6,%f22,%f22 + + fpadd32 %f8,%f24,%f24 + fmuld %f34,%f40,%f40 + + fpadd32 %f10,%f26,%f26 + fmuld %f18,%f42,%f42 + + fpadd32 %f12,%f28,%f28 + fmuld %f20,%f44,%f44 + + fmuld %f22,%f46,%f46 + + fmuld %f24,%f48,%f48 + faddd %f34,%f40,%f40 + + fmuld %f26,%f50,%f50 + faddd %f18,%f42,%f42 + + fmuld %f28,%f52,%f52 + faddd %f20,%f44,%f44 + + faddd %f22,%f46,%f46 + + faddd %f24,%f48,%f48 + + faddd %f26,%f50,%f50 + + faddd %f28,%f52,%f52 + + fdtos %f40,%f26 + st %f26,[%i3] + add %i3,%i4,%o4 + + fdtos %f42,%f18 + st %f18,[%o4] + add %o4,%i4,%i3 + + fdtos %f44,%f20 + st %f20,[%i3] + add %i3,%i4,%o4 + + fdtos %f46,%f22 + st %f22,[%o4] + add %o4,%i4,%i3 + + fdtos %f48,%f24 + st %f24,[%i3] + add %i3,%i4,%o4 + + fdtos %f50,%f26 + st %f26,[%o4] + add %o4,%i4,%i3 + + fdtos %f52,%f28 + st %f28,[%i3] + add %i3,%i4,%i3 + + sethi %hi(0x7f800000),%o5 + cmp %l7,%o5 + bl,pt %icc,.spec7_out_of_range + sub %i1,%i2,%o4 + + ble,pn %icc,.spec7_inf + ld [%o4],%l0 + +! NaN -> NaN + + fmuls %f14,%f14,%f14 + ba .spec7_exit + st %f14,[%i3] + +.spec7_inf: + srl %l0,29,%l0 + andcc %l0,4,%l0 + be,a,pn %icc,.spec7_exit + st %f14,[%i3] + + ba .spec7_exit + st %f3,[%i3] + +.spec7_out_of_range: + ld [%o4],%l0 + srl %l0,29,%l0 + and %l0,4,%l0 + add %l0,2048,%l0 + ld [G1_CONST_TBL+%l0],%f2 + fmuls %f2,%f2,%f2 + st %f2,[%i3] + +.spec7_exit: + subcc %i0,8,%i0 + bpos,pt %icc,.main_loop_preload + add %i3,%i4,%i3 + + ba .tail + nop + SET_SIZE(__vexpf) + diff --git a/usr/src/lib/libmvec/common/vis/__vhypot.S b/usr/src/lib/libmvec/common/vis/__vhypot.S new file mode 100644 index 0000000000..ef8436d33b --- /dev/null +++ b/usr/src/lib/libmvec/common/vis/__vhypot.S @@ -0,0 +1,1243 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "__vhypot.S" + +#include "libm.h" + + RO_DATA + .align 64 + +.CONST_TBL: + .word 0x7ff00000, 0 ! DC0 + .word 0x7fe00000, 0 ! DC1 + .word 0x00100000, 0 ! DC2 + .word 0x41b00000, 0 ! D2ON28 = 268435456.0 + .word 0x7fd00000, 0 ! DC3 + +#define counter %i0 +#define tmp_counter %l3 +#define tmp_px %l5 +#define tmp_py %o7 +#define stridex %i2 +#define stridey %i4 +#define stridez %l0 + +#define DC0 %f8 +#define DC0_HI %f8 +#define DC0_LO %f9 +#define DC1 %f46 +#define DC2 %f48 +#define DC3 %f0 +#define D2ON28 %f62 + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +! !!!!! algorithm !!!!! +! ((float*)&x)[0] = ((float*)px)[0]; +! ((float*)&x)[1] = ((float*)px)[1]; +! +! ((float*)&y)[0] = ((float*)py)[0]; +! ((float*)&y)[1] = ((float*)py)[1]; +! +! x = fabs(x); +! y = fabs(y); +! +! c0 = vis_fcmple32(DC1,x); +! c2 = vis_fcmple32(DC1,y); +! c1 = vis_fcmpgt32(DC2,x); +! c3 = vis_fcmpgt32(DC2,y); +! +! c0 |= c2; +! c1 &= c3; +! if ( (c0 & 2) != 0 ) +! { +! lx = ((int*)px)[1]; +! ly = ((int*)py)[1]; +! hx = *(int*)px; +! hy = *(int*)py; +! +! hx &= 0x7fffffff; +! hy &= 0x7fffffff; +! +! j0 = hx; +! if ( j0 < hy ) j0 = hy; +! j0 &= 0x7ff00000; +! if ( j0 >= 0x7ff00000 ) +! { +! if ( hx == 0x7ff00000 && lx == 0 ) res = x == y ? y : x; +! else if ( hy == 0x7ff00000 && ly == 0 ) res = x == y ? x : y; +! else res = x * y; +! +! ((float*)pz)[0] = ((float*)&res)[0]; +! ((float*)pz)[1] = ((float*)&res)[1]; +! } +! else +! { +! diff = hy - hx; +! j0 = diff >> 31; +! if ( ((diff ^ j0) - j0) < 0x03600000 ) +! {! +! x *= D2ONM1022; +! y *= D2ONM1022; +! +! x_hi = ( x + two28 ) - two28; +! x_lo = x - x_hi; +! y_hi = ( y + two28 ) - two28; +! y_lo = y - y_hi; +! res = (x_hi * x_hi + y_hi * y_hi); +! res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo); +! +! res = sqrt(res); +! +! res = D2ONP1022 * res; +! ((float*)pz)[0] = ((float*)&res)[0]; +! ((float*)pz)[1] = ((float*)&res)[1]; +! } +! else +! { +! res = x + y; +! ((float*)pz)[0] = ((float*)&res)[0]; +! ((float*)pz)[1] = ((float*)&res)[1]; +! } +! } +! px += stridex; +! py += stridey; +! pz += stridez; +! continue; +! } +! if ( (c1 & 2) != 0 ) +! { +! x *= D2ONP1022; +! y *= D2ONP1022; +! +! x_hi = ( x + two28 ) - two28; +! x_lo = x - x_hi; +! y_hi = ( y + two28 ) - two28; +! y_lo = y - y_hi; +! res = (x_hi * x_hi + y_hi * y_hi); +! res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo); +! +! res = sqrt(res); +! +! res = D2ONM1022 * res; +! ((float*)pz)[0] = ((float*)&res)[0]; +! ((float*)pz)[1] = ((float*)&res)[1]; +! px += stridex; +! py += stridey; +! pz += stridez; +! continue; +! } +! +! dmax = x; +! if ( dmax < y ) dmax = y; +! +! dmax = vis_fand(dmax,DC0); +! dnorm = vis_fpsub32(DC1,dmax); +! +! x *= dnorm; +! y *= dnorm; +! +! x_hi = x + D2ON28; +! x_hi -= D2ON28; +! x_lo = x - x_hi; +! +! y_hi = y + D2ON28; +! y_hi -= D2ON28; +! y_lo = y - y_hi; +! +! res = x_hi * x_hi; +! dtmp1 = x + x_hi; +! dtmp0 = y_hi * y_hi; +! dtmp2 = y + y_hi; +! +! res += dtmp0; +! dtmp1 *= x_lo; +! dtmp2 *= y_lo; +! dtmp1 += dtmp2; +! res += dtmp1; +! +! res = sqrt(res); +! +! res = dmax * res; +! ((float*)pz)[0] = ((float*)&res)[0]; +! ((float*)pz)[1] = ((float*)&res)[1]; +! +! px += stridex; +! py += stridey; +! pz += stridez; +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + ENTRY(__vhypot) + save %sp,-SA(MINFRAME),%sp + PIC_SETUP(l7) + PIC_SET(l7,.CONST_TBL,o3) + wr %g0,0x82,%asi + +#ifdef __sparcv9 + ldx [%fp+STACK_BIAS+176],%l0 +#else + ld [%fp+STACK_BIAS+92],%l0 +#endif + ldd [%o3],DC0 + sll %i2,3,stridex + mov %i0,tmp_counter + + ldd [%o3+8],DC1 + sll %i4,3,stridey + mov %i1,tmp_px + + ldd [%o3+16],DC2 + sll %l0,3,stridez + mov %i3,tmp_py + + ldd [%o3+24],D2ON28 + + ldd [%o3+32],DC3 + +.begin: + mov tmp_counter,counter + mov tmp_px,%i1 + mov tmp_py,%i3 + clr tmp_counter +.begin1: + cmp counter,0 + ble,pn %icc,.exit + nop + + lda [%i1]%asi,%o0 + sethi %hi(0x7ffffc00),%o5 + + lda [%i3]%asi,%o2 + add %o5,1023,%o5 + + lda [%i1]%asi,%f26 ! (1_0) ((float*)&x)[0] = ((float*)px)[0]; + + lda [%i1+4]%asi,%f27 ! (1_0) ((float*)&x)[1] = ((float*)px)[1]; + add %i1,stridex,%o1 ! px += stridex + + lda [%i3]%asi,%f24 ! (1_0) ((float*)&y)[0] = ((float*)py)[0]; + sethi %hi(0x00100000),%l7 + and %o0,%o5,%o0 + + lda [%i3+4]%asi,%f25 ! (1_0) ((float*)&y)[1] = ((float*)py)[1]; + and %o2,%o5,%o2 + sethi %hi(0x7fe00000),%l6 + + fabsd %f26,%f36 ! (1_0) x = fabs(x); + cmp %o0,%o2 + mov %o2,%l4 + + fabsd %f24,%f54 ! (1_0) y = fabs(y); + add %i3,stridey,%o5 ! py += stridey + movg %icc,%o0,%o2 + lda [%o5]%asi,%f28 ! (2_0) ((float*)&y)[0] = ((float*)py)[0]; + + cmp %o2,%l6 + sethi %hi(0x7ff00000),%o4 + bge,pn %icc,.spec0 + lda [%o5+4]%asi,%f29 ! (2_0) ((float*)&y)[1] = ((float*)py)[1]; + + cmp %o2,%l7 + bl,pn %icc,.spec1 + nop + lda [%o1]%asi,%f26 ! (2_0) ((float*)&x)[0] = ((float*)px)[0]; + + lda [%o1+4]%asi,%f27 ! (2_0) ((float*)&x)[1] = ((float*)px)[1]; + add %i3,stridey,%i3 ! py += stridey + + fabsd %f28,%f34 ! (2_0) y = fabs(y); + + fabsd %f26,%f50 ! (2_0) x = fabs(x); + + fcmple32 DC1,%f50,%o3 ! (2_0) c0 = vis_fcmple32(DC1,x); + + fcmple32 DC1,%f34,%o0 ! (2_0) c2 = vis_fcmple32(DC1,y); + + fcmpgt32 DC2,%f50,%o4 ! (2_0) c1 = vis_fcmpgt32(DC2,x); + + fcmpgt32 DC2,%f34,%o5 ! (2_0) c3 = vis_fcmpgt32(DC2,y); + + or %o3,%o0,%o3 ! (2_0) c0 |= c2; + + andcc %o3,2,%g0 ! (2_0) c0 & 2 + bnz,pn %icc,.update0 ! (2_0) if ( (c0 & 2) != 0 ) + and %o4,%o5,%o4 ! (2_0) c1 &= c3; +.cont0: + add %i3,stridey,%l4 ! py += stridey + andcc %o4,2,%g0 ! (2_0) c1 & 2 + bnz,pn %icc,.update1 ! (2_0) if ( (c1 & 2) != 0 ) + fmovd %f36,%f56 ! (1_0) dmax = x; +.cont1: + lda [%l4]%asi,%f30 ! (3_0) ((float*)&y)[0] = ((float*)py)[0]; + add %o1,stridex,%l2 ! px += stridex + + lda [%l4+4]%asi,%f31 ! (3_0) ((float*)&y)[1] = ((float*)py)[1]; + + lda [%l2]%asi,%f18 ! (3_1) ((float*)&x)[0] = ((float*)px)[0]; + + lda [%l2+4]%asi,%f19 ! (3_1) ((float*)&x)[1] = ((float*)px)[1]; + + fabsd %f30,%f30 ! (3_1) y = fabs(y); + + fabsd %f18,%f18 ! (3_1) x = fabs(x); + + fcmped %fcc2,%f54,%f56 ! (1_1) dmax ? y + + fmovdg %fcc2,%f54,%f56 ! (1_1) if ( dmax < y ) dmax = y; + + fcmple32 DC1,%f18,%o3 ! (3_1) c0 = vis_fcmple32(DC1,x); + + fcmple32 DC1,%f30,%o0 ! (3_1) c2 = vis_fcmple32(DC1,y); + + fcmpgt32 DC2,%f18,%o4 ! (3_1) c1 = vis_fcmpgt32(DC2,x); + + fcmpgt32 DC2,%f30,%o1 ! (3_1) c3 = vis_fcmpgt32(DC2,y); + + fand %f56,DC0,%f38 ! (1_1) dmax = vis_fand(dmax,DC0); + + or %o3,%o0,%o3 ! (3_1) c0 |= c2; + + andcc %o3,2,%g0 ! (3_1) c0 & 2 + bnz,pn %icc,.update2 ! (3_1) if ( (c0 & 2) != 0 ) + and %o4,%o1,%o4 ! (3_1) c1 &= c3; +.cont2: + add %l4,stridey,%i3 ! py += stridey + andcc %o4,2,%g0 ! (3_1) c1 & 2 + bnz,pn %icc,.update3 ! (3_1) if ( (c1 & 2) != 0 ) + fmovd %f50,%f32 ! (2_1) dmax = x; +.cont3: + fpsub32 DC1,%f38,%f10 ! (1_1) dnorm = vis_fpsub32(DC1,dmax); + lda [%i3]%asi,%f20 ! (0_0) ((float*)&y)[0] = ((float*)py)[0]; + + lda [%i3+4]%asi,%f21 ! (0_0) ((float*)&y)[1] = ((float*)py)[1]; + + add %l2,stridex,%l1 ! px += stridex + + fmuld %f36,%f10,%f36 ! (1_1) x *= dnorm; + lda [%l1]%asi,%f22 ! (0_0) ((float*)&x)[0] = ((float*)px)[0] + + lda [%l1+4]%asi,%f23 ! (0_0) ((float*)&x)[1] = ((float*)px)[1]; + + fmuld %f54,%f10,%f56 ! (1_1) y *= dnorm; + fabsd %f20,%f40 ! (0_0) y = fabs(y); + + fabsd %f22,%f20 ! (0_0) x = fabs(x); + + fcmped %fcc3,%f34,%f32 ! (2_1) dmax ? y + + + fmovdg %fcc3,%f34,%f32 ! (2_1) if ( dmax < y ) dmax = y; + + faddd %f36,D2ON28,%f58 ! (1_1) x_hi = x + D2ON28; + fcmple32 DC1,%f20,%g5 ! (0_0) c0 = vis_fcmple32(DC1,x); + + faddd %f56,D2ON28,%f22 ! (1_1) y_hi = y + D2ON28; + fcmple32 DC1,%f40,%o2 ! (0_0) c2 = vis_fcmple32(DC1,y); + + fcmpgt32 DC2,%f20,%g1 ! (0_0) c1 = vis_fcmpgt32(DC2,x); + + fcmpgt32 DC2,%f40,%o4 ! (0_0) c3 = vis_fcmpgt32(DC2,y); + + fand %f32,DC0,%f52 ! (2_1) dmax = vis_fand(dmax,DC0); + + or %g5,%o2,%g5 ! (0_0) c0 |= c2; + fsubd %f58,D2ON28,%f58 ! (1_1) x_hi -= D2ON28; + + andcc %g5,2,%g0 ! (0_0) c0 & 2 + bnz,pn %icc,.update4 ! (0_0) if ( (c0 & 2) != 0 ) + fsubd %f22,D2ON28,%f22 ! (1_1) y_hi -= D2ON28; +.cont4: + and %g1,%o4,%g1 ! (0_0) c1 &= c3; + + add %i3,stridey,%l2 ! py += stridey + andcc %g1,2,%g0 ! (0_0) c1 & 2 + bnz,pn %icc,.update5 ! (0_0) if ( (c1 & 2) != 0 ) + fmovd %f18,%f44 ! (3_1) dmax = x; +.cont5: + fpsub32 DC1,%f52,%f10 ! (2_1) dnorm = vis_fpsub32(DC1,dmax); + lda [%l2]%asi,%f24 ! (1_0) ((float*)&y)[0] = ((float*)py)[0]; + + fmuld %f58,%f58,%f60 ! (1_1) res = x_hi * x_hi; + lda [%l2+4]%asi,%f25 ! (1_0) ((float*)&y)[1] = ((float*)py)[1]; + add %l1,stridex,%l7 ! px += stridex + faddd %f56,%f22,%f28 ! (1_1) dtmp2 = y + y_hi; + + faddd %f36,%f58,%f6 ! (1_1) dtmp1 = x + x_hi; + lda [%l7]%asi,%f26 ! (1_0) ((float*)&x)[0] = ((float*)px)[0]; + + fmuld %f50,%f10,%f50 ! (2_1) x *= dnorm; + fsubd %f36,%f58,%f58 ! (1_1) x_lo = x - x_hi; + lda [%l7+4]%asi,%f27 ! (1_0) ((float*)&x)[1] = ((float*)px)[1]; + + fmuld %f22,%f22,%f2 ! (1_1) dtmp0 = y_hi * y_hi; + fsubd %f56,%f22,%f56 ! (1_1) y_lo = y - y_hi; + + fmuld %f34,%f10,%f34 ! (2_1) y *= dnorm; + fabsd %f24,%f54 ! (1_0) y = fabs(y); + + fabsd %f26,%f36 ! (1_0) x = fabs(x); + + fmuld %f6,%f58,%f10 ! (1_1) dtmp1 *= x_lo; + fcmped %fcc0,%f30,%f44 ! (3_1) dmax ? y + + fmuld %f28,%f56,%f26 ! (1_1) dtmp2 *= y_lo; + + fmovdg %fcc0,%f30,%f44 ! (3_1) if ( dmax < y ) dmax = y; + + faddd %f50,D2ON28,%f58 ! (2_1) x_hi = x + D2ON28; + fcmple32 DC1,%f36,%g1 ! (1_0) c0 = vis_fcmple32(DC1,x); + + faddd %f34,D2ON28,%f22 ! (2_1) y_hi = y + D2ON28; + fcmple32 DC1,%f54,%g5 ! (1_0) c2 = vis_fcmple32(DC1,y); + + faddd %f60,%f2,%f24 ! (1_1) res += dtmp0; + fcmpgt32 DC2,%f36,%o5 ! (1_0) c1 = vis_fcmpgt32(DC2,x); + + faddd %f10,%f26,%f28 ! (1_1) dtmp1 += dtmp2; + fcmpgt32 DC2,%f54,%o1 ! (1_0) c3 = vis_fcmpgt32(DC2,y); + + fand %f44,DC0,%f14 ! (3_1) dmax = vis_fand(dmax,DC0); + + or %g1,%g5,%g1 ! (1_0) c0 |= c2; + fsubd %f58,D2ON28,%f44 ! (2_1) x_hi -= D2ON28; + + andcc %g1,2,%g0 ! (1_0) c0 & 2 + bnz,pn %icc,.update6 ! (1_0) if ( (c0 & 2) != 0 ) + fsubd %f22,D2ON28,%f58 ! (2_1) y_hi -= D2ON28; +.cont6: + and %o5,%o1,%o5 ! (1_0) c1 &= c3; + faddd %f24,%f28,%f26 ! (1_1) res += dtmp1; + + add %l2,stridey,%i3 ! py += stridey + andcc %o5,2,%g0 ! (1_0) c1 & 2 + bnz,pn %icc,.update7 ! (1_0) if ( (c1 & 2) != 0 ) + fmovd %f20,%f4 ! (0_0) dmax = x; +.cont7: + fpsub32 DC1,%f14,%f10 ! (3_1) dnorm = vis_fpsub32(DC1,dmax); + lda [%i3]%asi,%f28 ! (2_0) ((float*)&y)[0] = ((float*)py)[0]; + + fmuld %f44,%f44,%f2 ! (2_1) res = x_hi * x_hi; + lda [%i3+4]%asi,%f29 ! (2_0) ((float*)&y)[1] = ((float*)py)[1]; + add %l7,stridex,%o1 ! px += stridex + faddd %f34,%f58,%f60 ! (2_1) dtmp2 = y + y_hi; + + fsqrtd %f26,%f24 ! (1_1) res = sqrt(res); + lda [%o1]%asi,%f26 ! (2_0) ((float*)&x)[0] = ((float*)px)[0]; + faddd %f50,%f44,%f56 ! (2_1) dtmp1 = x + x_hi; + + fmuld %f18,%f10,%f6 ! (3_1) x *= dnorm; + fsubd %f50,%f44,%f18 ! (2_1) x_lo = x - x_hi; + lda [%o1+4]%asi,%f27 ! (2_0) ((float*)&x)[1] = ((float*)px)[1]; + + fmuld %f58,%f58,%f44 ! (2_1) dtmp0 = y_hi * y_hi; + fsubd %f34,%f58,%f22 ! (2_1) y_lo = y - y_hi; + + fmuld %f30,%f10,%f58 ! (3_1) y *= dnorm; + fabsd %f28,%f34 ! (2_0) y = fabs(y); + + fabsd %f26,%f50 ! (2_0) x = fabs(x); + + fmuld %f56,%f18,%f10 ! (2_1) dtmp1 *= x_lo; + fcmped %fcc1,%f40,%f4 ! (0_0) dmax ? y + + fmuld %f60,%f22,%f12 ! (2_1) dtmp2 *= y_lo; + + fmovdg %fcc1,%f40,%f4 ! (0_0) if ( dmax < y ) dmax = y; + + faddd %f6,D2ON28,%f56 ! (3_1) x_hi = x + D2ON28; + fcmple32 DC1,%f50,%o3 ! (2_0) c0 = vis_fcmple32(DC1,x); + + faddd %f58,D2ON28,%f28 ! (3_1) y_hi = y + D2ON28; + fcmple32 DC1,%f34,%o0 ! (2_0) c2 = vis_fcmple32(DC1,y); + + faddd %f2,%f44,%f30 ! (2_1) res += dtmp0; + fcmpgt32 DC2,%f50,%o4 ! (2_0) c1 = vis_fcmpgt32(DC2,x); + + faddd %f10,%f12,%f26 ! (2_1) dtmp1 += dtmp2; + fcmpgt32 DC2,%f34,%o5 ! (2_0) c3 = vis_fcmpgt32(DC2,y); + + fand %f4,DC0,%f16 ! (0_0) dmax = vis_fand(dmax,DC0); + + or %o3,%o0,%o3 ! (2_0) c0 |= c2; + fsubd %f56,D2ON28,%f18 ! (3_1) x_hi -= D2ON28; + + andcc %o3,2,%g0 ! (2_0) c0 & 2 + bnz,pn %icc,.update8 ! (2_0) if ( (c0 & 2) != 0 ) + fsubd %f28,D2ON28,%f4 ! (3_1) y_hi -= D2ON28; +.cont8: + and %o4,%o5,%o4 ! (2_0) c1 &= c3; + faddd %f30,%f26,%f12 ! (2_1) res += dtmp1; + + add %i3,stridey,%l4 ! py += stridey + andcc %o4,2,%g0 ! (2_0) c1 & 2 + bnz,pn %icc,.update9 ! (2_0) if ( (c1 & 2) != 0 ) + fmovd %f36,%f56 ! (1_0) dmax = x; +.cont9: + lda [%l4]%asi,%f30 ! (3_0) ((float*)&y)[0] = ((float*)py)[0]; + add %o1,stridex,%l2 ! px += stridex + fpsub32 DC1,%f16,%f44 ! (0_0) dnorm = vis_fpsub32(DC1,dmax); + + fmuld %f18,%f18,%f60 ! (3_1) res = x_hi * x_hi; + lda [%l4+4]%asi,%f31 ! (3_0) ((float*)&y)[1] = ((float*)py)[1]; + faddd %f58,%f4,%f32 ! (3_1) dtmp2 = y + y_hi; + + fsqrtd %f12,%f12 ! (2_1) res = sqrt(res); + faddd %f6,%f18,%f28 ! (3_1) dtmp1 = x + x_hi; + + cmp counter,4 + bl,pn %icc,.tail + nop + + ba .main_loop + sub counter,4,counter + + .align 16 +.main_loop: + fmuld %f20,%f44,%f2 ! (0_1) x *= dnorm; + fsubd %f6,%f18,%f20 ! (3_2) x_lo = x - x_hi; + lda [%l2]%asi,%f18 ! (3_1) ((float*)&x)[0] = ((float*)px)[0]; + + fmuld %f4,%f4,%f22 ! (3_2) dtmp0 = y_hi * y_hi; + lda [%l2+4]%asi,%f19 ! (3_1) ((float*)&x)[1] = ((float*)px)[1]; + fsubd %f58,%f4,%f58 ! (3_2) y_lo = y - y_hi; + + fmuld %f40,%f44,%f44 ! (0_1) y *= dnorm; + fabsd %f30,%f30 ! (3_1) y = fabs(y); + + fmuld %f38,%f24,%f10 ! (1_2) res = dmax * res; + fabsd %f18,%f18 ! (3_1) x = fabs(x); + st %f10,[%i5] ! (1_2) ((float*)pz)[0] = ((float*)&res)[0]; + + fmuld %f28,%f20,%f28 ! (3_2) dtmp1 *= x_lo; + st %f11,[%i5+4] ! (1_2) ((float*)pz)[1] = ((float*)&res)[1]; + fcmped %fcc2,%f54,%f56 ! (1_1) dmax ? y + + fmuld %f32,%f58,%f24 ! (3_2) dtmp2 *= y_lo; + + fmovdg %fcc2,%f54,%f56 ! (1_1) if ( dmax < y ) dmax = y; + + faddd %f2,D2ON28,%f10 ! (0_1) x_hi = x + D2ON28; + fcmple32 DC1,%f18,%o3 ! (3_1) c0 = vis_fcmple32(DC1,x); + + faddd %f44,D2ON28,%f20 ! (0_1) y_hi = y + D2ON28; + fcmple32 DC1,%f30,%o0 ! (3_1) c2 = vis_fcmple32(DC1,y); + + faddd %f60,%f22,%f22 ! (3_2) res += dtmp0; + fcmpgt32 DC2,%f18,%o4 ! (3_1) c1 = vis_fcmpgt32(DC2,x); + + faddd %f28,%f24,%f26 ! (3_2) dtmp1 += dtmp2; + fcmpgt32 DC2,%f30,%o1 ! (3_1) c3 = vis_fcmpgt32(DC2,y); + + fand %f56,DC0,%f38 ! (1_1) dmax = vis_fand(dmax,DC0); + + or %o3,%o0,%o3 ! (3_1) c0 |= c2; + fsubd %f10,D2ON28,%f58 ! (0_1) x_hi -= D2ON28; + + andcc %o3,2,%g0 ! (3_1) c0 & 2 + bnz,pn %icc,.update10 ! (3_1) if ( (c0 & 2) != 0 ) + fsubd %f20,D2ON28,%f56 ! (0_1) y_hi -= D2ON28; +.cont10: + faddd %f22,%f26,%f28 ! (3_2) res += dtmp1; + and %o4,%o1,%o4 ! (3_1) c1 &= c3; + + add %l4,stridey,%i3 ! py += stridey + andcc %o4,2,%g0 ! (3_1) c1 & 2 + bnz,pn %icc,.update11 ! (3_1) if ( (c1 & 2) != 0 ) + fmovd %f50,%f32 ! (2_1) dmax = x; +.cont11: + fpsub32 DC1,%f38,%f10 ! (1_1) dnorm = vis_fpsub32(DC1,dmax); + add %l2,stridex,%l1 ! px += stridex + lda [%i3]%asi,%f20 ! (0_0) ((float*)&y)[0] = ((float*)py)[0]; + + fmuld %f58,%f58,%f6 ! (0_1) res = x_hi * x_hi; + lda [%i3+4]%asi,%f21 ! (0_0) ((float*)&y)[1] = ((float*)py)[1]; + add %i5,stridez,%l6 ! pz += stridez + faddd %f44,%f56,%f60 ! (0_1) dtmp2 = y + y_hi; + + fsqrtd %f28,%f4 ! (3_2) res = sqrt(res); + lda [%l1]%asi,%f22 ! (0_0) ((float*)&x)[0] = ((float*)px)[0]; + faddd %f2,%f58,%f24 ! (0_1) dtmp1 = x + x_hi; + + fmuld %f36,%f10,%f36 ! (1_1) x *= dnorm; + fsubd %f2,%f58,%f26 ! (0_1) x_lo = x - x_hi; + lda [%l1+4]%asi,%f23 ! (0_0) ((float*)&x)[1] = ((float*)px)[1]; + + fmuld %f56,%f56,%f28 ! (0_1) dtmp0 = y_hi * y_hi; + fsubd %f44,%f56,%f44 ! (0_1) y_lo = y - y_hi; + + fmuld %f54,%f10,%f56 ! (1_1) y *= dnorm; + fabsd %f20,%f40 ! (0_0) y = fabs(y); + + fmuld %f52,%f12,%f12 ! (2_2) res = dmax * res; + fabsd %f22,%f20 ! (0_0) x = fabs(x); + st %f12,[%l6] ! (2_2) ((float*)pz)[0] = ((float*)&res)[0]; + + fmuld %f24,%f26,%f10 ! (0_1) dtmp1 *= x_lo; + st %f13,[%l6+4] ! (2_2) ((float*)pz)[1] = ((float*)&res)[1]; + fcmped %fcc3,%f34,%f32 ! (2_1) dmax ? y + + fmuld %f60,%f44,%f12 ! (0_1) dtmp2 *= y_lo; + + fmovdg %fcc3,%f34,%f32 ! (2_1) if ( dmax < y ) dmax = y; + + faddd %f36,D2ON28,%f58 ! (1_1) x_hi = x + D2ON28; + fcmple32 DC1,%f20,%g5 ! (0_0) c0 = vis_fcmple32(DC1,x); + + faddd %f56,D2ON28,%f22 ! (1_1) y_hi = y + D2ON28; + fcmple32 DC1,%f40,%o2 ! (0_0) c2 = vis_fcmple32(DC1,y); + + faddd %f6,%f28,%f24 ! (0_1) res += dtmp0; + fcmpgt32 DC2,%f20,%g1 ! (0_0) c1 = vis_fcmpgt32(DC2,x); + + faddd %f10,%f12,%f26 ! (0_1) dtmp1 += dtmp2; + fcmpgt32 DC2,%f40,%o4 ! (0_0) c3 = vis_fcmpgt32(DC2,y); + + fand %f32,DC0,%f52 ! (2_1) dmax = vis_fand(dmax,DC0); + + or %g5,%o2,%g5 ! (0_0) c0 |= c2; + fsubd %f58,D2ON28,%f58 ! (1_1) x_hi -= D2ON28; + + andcc %g5,2,%g0 ! (0_0) c0 & 2 + bnz,pn %icc,.update12 ! (0_0) if ( (c0 & 2) != 0 ) + fsubd %f22,D2ON28,%f22 ! (1_1) y_hi -= D2ON28; +.cont12: + and %g1,%o4,%g1 ! (0_0) c1 &= c3; + faddd %f24,%f26,%f12 ! (0_1) res += dtmp1; + + add %i3,stridey,%l2 ! py += stridey + andcc %g1,2,%g0 ! (0_0) c1 & 2 + bnz,pn %icc,.update13 ! (0_0) if ( (c1 & 2) != 0 ) + fmovd %f18,%f44 ! (3_1) dmax = x; +.cont13: + fpsub32 DC1,%f52,%f10 ! (2_1) dnorm = vis_fpsub32(DC1,dmax); + add %l1,stridex,%l7 ! px += stridex + lda [%l2]%asi,%f24 ! (1_0) ((float*)&y)[0] = ((float*)py)[0]; + + fmuld %f58,%f58,%f60 ! (1_1) res = x_hi * x_hi; + add %l6,stridez,%i5 ! pz += stridez + lda [%l2+4]%asi,%f25 ! (1_0) ((float*)&y)[1] = ((float*)py)[1]; + faddd %f56,%f22,%f28 ! (1_1) dtmp2 = y + y_hi; + + fsqrtd %f12,%f12 ! (0_1) res = sqrt(res); + lda [%l7]%asi,%f26 ! (1_0) ((float*)&x)[0] = ((float*)px)[0]; + faddd %f36,%f58,%f6 ! (1_1) dtmp1 = x + x_hi; + + fmuld %f50,%f10,%f50 ! (2_1) x *= dnorm; + fsubd %f36,%f58,%f58 ! (1_1) x_lo = x - x_hi; + lda [%l7+4]%asi,%f27 ! (1_0) ((float*)&x)[1] = ((float*)px)[1]; + + fmuld %f22,%f22,%f2 ! (1_1) dtmp0 = y_hi * y_hi; + fsubd %f56,%f22,%f56 ! (1_1) y_lo = y - y_hi; + + fmuld %f34,%f10,%f34 ! (2_1) y *= dnorm; + fabsd %f24,%f54 ! (1_0) y = fabs(y); + + fmuld %f14,%f4,%f14 ! (3_2) res = dmax * res; + fabsd %f26,%f36 ! (1_0) x = fabs(x); + st %f14,[%i5] ! (3_2) ((float*)pz)[0] = ((float*)&res)[0]; + + fmuld %f6,%f58,%f10 ! (1_1) dtmp1 *= x_lo; + st %f15,[%i5+4] ! (3_2) ((float*)pz)[1] = ((float*)&res)[1]; + fcmped %fcc0,%f30,%f44 ! (3_1) dmax ? y + + fmuld %f28,%f56,%f26 ! (1_1) dtmp2 *= y_lo; + + fmovdg %fcc0,%f30,%f44 ! (3_1) if ( dmax < y ) dmax = y; + + faddd %f50,D2ON28,%f58 ! (2_1) x_hi = x + D2ON28; + fcmple32 DC1,%f36,%g1 ! (1_0) c0 = vis_fcmple32(DC1,x); + + faddd %f34,D2ON28,%f22 ! (2_1) y_hi = y + D2ON28; + fcmple32 DC1,%f54,%g5 ! (1_0) c2 = vis_fcmple32(DC1,y); + + faddd %f60,%f2,%f24 ! (1_1) res += dtmp0; + fcmpgt32 DC2,%f36,%o5 ! (1_0) c1 = vis_fcmpgt32(DC2,x); + + faddd %f10,%f26,%f28 ! (1_1) dtmp1 += dtmp2; + fcmpgt32 DC2,%f54,%o1 ! (1_0) c3 = vis_fcmpgt32(DC2,y); + + fand %f44,DC0,%f14 ! (3_1) dmax = vis_fand(dmax,DC0); + + or %g1,%g5,%g1 ! (1_0) c0 |= c2; + fsubd %f58,D2ON28,%f44 ! (2_1) x_hi -= D2ON28; + + andcc %g1,2,%g0 ! (1_0) c0 & 2 + bnz,pn %icc,.update14 ! (1_0) if ( (c0 & 2) != 0 ) + fsubd %f22,D2ON28,%f58 ! (2_1) y_hi -= D2ON28; +.cont14: + and %o5,%o1,%o5 ! (1_0) c1 &= c3; + faddd %f24,%f28,%f26 ! (1_1) res += dtmp1; + + add %l2,stridey,%i3 ! py += stridey + andcc %o5,2,%g0 ! (1_0) c1 & 2 + bnz,pn %icc,.update15 ! (1_0) if ( (c1 & 2) != 0 ) + fmovd %f20,%f4 ! (0_0) dmax = x; +.cont15: + fpsub32 DC1,%f14,%f10 ! (3_1) dnorm = vis_fpsub32(DC1,dmax); + add %l7,stridex,%o1 ! px += stridex + lda [%i3]%asi,%f28 ! (2_0) ((float*)&y)[0] = ((float*)py)[0]; + + fmuld %f44,%f44,%f2 ! (2_1) res = x_hi * x_hi; + add %i5,stridez,%g5 ! pz += stridez + lda [%i3+4]%asi,%f29 ! (2_0) ((float*)&y)[1] = ((float*)py)[1]; + faddd %f34,%f58,%f60 ! (2_1) dtmp2 = y + y_hi; + + fsqrtd %f26,%f24 ! (1_1) res = sqrt(res); + lda [%o1]%asi,%f26 ! (2_0) ((float*)&x)[0] = ((float*)px)[0]; + faddd %f50,%f44,%f56 ! (2_1) dtmp1 = x + x_hi; + + fmuld %f18,%f10,%f6 ! (3_1) x *= dnorm; + fsubd %f50,%f44,%f18 ! (2_1) x_lo = x - x_hi; + lda [%o1+4]%asi,%f27 ! (2_0) ((float*)&x)[1] = ((float*)px)[1]; + + fmuld %f58,%f58,%f44 ! (2_1) dtmp0 = y_hi * y_hi; + fsubd %f34,%f58,%f22 ! (2_1) y_lo = y - y_hi; + + fmuld %f30,%f10,%f58 ! (3_1) y *= dnorm; + fabsd %f28,%f34 ! (2_0) y = fabs(y); + + fmuld %f16,%f12,%f16 ! (0_1) res = dmax * res; + fabsd %f26,%f50 ! (2_0) x = fabs(x); + st %f16,[%g5] ! (0_1) ((float*)pz)[0] = ((float*)&res)[0]; + + fmuld %f56,%f18,%f10 ! (2_1) dtmp1 *= x_lo; + st %f17,[%g5+4] ! (0_1) ((float*)pz)[1] = ((float*)&res)[1]; + fcmped %fcc1,%f40,%f4 ! (0_0) dmax ? y + + fmuld %f60,%f22,%f12 ! (2_1) dtmp2 *= y_lo; + + fmovdg %fcc1,%f40,%f4 ! (0_0) if ( dmax < y ) dmax = y; + + faddd %f6,D2ON28,%f56 ! (3_1) x_hi = x + D2ON28; + fcmple32 DC1,%f50,%o3 ! (2_0) c0 = vis_fcmple32(DC1,x); + + faddd %f58,D2ON28,%f28 ! (3_1) y_hi = y + D2ON28; + fcmple32 DC1,%f34,%o0 ! (2_0) c2 = vis_fcmple32(DC1,y); + + faddd %f2,%f44,%f30 ! (2_1) res += dtmp0; + fcmpgt32 DC2,%f50,%o4 ! (2_0) c1 = vis_fcmpgt32(DC2,x); + + faddd %f10,%f12,%f26 ! (2_1) dtmp1 += dtmp2; + fcmpgt32 DC2,%f34,%o5 ! (2_0) c3 = vis_fcmpgt32(DC2,y); + + fand %f4,DC0,%f16 ! (0_0) dmax = vis_fand(dmax,DC0); + + or %o3,%o0,%o3 ! (2_0) c0 |= c2; + fsubd %f56,D2ON28,%f18 ! (3_1) x_hi -= D2ON28; + + andcc %o3,2,%g0 ! (2_0) c0 & 2 + bnz,pn %icc,.update16 ! (2_0) if ( (c0 & 2) != 0 ) + fsubd %f28,D2ON28,%f4 ! (3_1) y_hi -= D2ON28; +.cont16: + and %o4,%o5,%o4 ! (2_0) c1 &= c3; + faddd %f30,%f26,%f12 ! (2_1) res += dtmp1; + + add %i3,stridey,%l4 ! py += stridey + andcc %o4,2,%g0 ! (2_0) c1 & 2 + bnz,pn %icc,.update17 ! (2_0) if ( (c1 & 2) != 0 ) + fmovd %f36,%f56 ! (1_0) dmax = x; +.cont17: + lda [%l4]%asi,%f30 ! (3_0) ((float*)&y)[0] = ((float*)py)[0]; + add %o1,stridex,%l2 ! px += stridex + fpsub32 DC1,%f16,%f44 ! (0_0) dnorm = vis_fpsub32(DC1,dmax); + + fmuld %f18,%f18,%f60 ! (3_1) res = x_hi * x_hi; + add %g5,stridez,%i5 ! pz += stridez + lda [%l4+4]%asi,%f31 ! (3_0) ((float*)&y)[1] = ((float*)py)[1]; + faddd %f58,%f4,%f32 ! (3_1) dtmp2 = y + y_hi; + + fsqrtd %f12,%f12 ! (2_1) res = sqrt(res); + subcc counter,4,counter ! counter -= 4; + bpos,pt %icc,.main_loop + faddd %f6,%f18,%f28 ! (3_1) dtmp1 = x + x_hi; + + add counter,4,counter + +.tail: + subcc counter,1,counter + bneg,a .begin + nop + + fsubd %f6,%f18,%f20 ! (3_2) x_lo = x - x_hi; + + fmuld %f4,%f4,%f22 ! (3_2) dtmp0 = y_hi * y_hi; + fsubd %f58,%f4,%f58 ! (3_2) y_lo = y - y_hi; + + fmuld %f38,%f24,%f10 ! (1_2) res = dmax * res; + st %f10,[%i5] ! (1_2) ((float*)pz)[0] = ((float*)&res)[0]; + + st %f11,[%i5+4] ! (1_2) ((float*)pz)[1] = ((float*)&res)[1]; + + subcc counter,1,counter + bneg,a .begin + add %i5,stridez,%i5 + + fmuld %f28,%f20,%f28 ! (3_2) dtmp1 *= x_lo; + + fmuld %f32,%f58,%f24 ! (3_2) dtmp2 *= y_lo; + + faddd %f60,%f22,%f22 ! (3_2) res += dtmp0; + + faddd %f28,%f24,%f26 ! (3_2) dtmp1 += dtmp2; + + faddd %f22,%f26,%f28 ! (3_2) res += dtmp1; + + add %i5,stridez,%l6 ! pz += stridez + + fsqrtd %f28,%f4 ! (3_2) res = sqrt(res); + add %l2,stridex,%l1 ! px += stridex + + fmuld %f52,%f12,%f12 ! (2_2) res = dmax * res; + st %f12,[%l6] ! (2_2) ((float*)pz)[0] = ((float*)&res)[0]; + + st %f13,[%l6+4] ! (2_2) ((float*)pz)[1] = ((float*)&res)[1]; + + subcc counter,1,counter + bneg .begin + add %l6,stridez,%i5 + + fmuld %f14,%f4,%f14 ! (3_2) res = dmax * res; + st %f14,[%i5] ! (3_2) ((float*)pz)[0] = ((float*)&res)[0]; + + st %f15,[%i5+4] ! (3_2) ((float*)pz)[1] = ((float*)&res)[1]; + + ba .begin + add %i5,stridez,%i5 + + .align 16 +.spec0: + ld [%i1+4],%l1 ! lx = ((int*)px)[1]; + cmp %o2,%o4 ! j0 ? 0x7ff00000 + bge,pn %icc,1f ! if ( j0 >= 0x7ff00000 ) + fabsd %f26,%f26 ! x = fabs(x); + + sub %o0,%l4,%o0 ! diff = hy - hx; + fabsd %f24,%f24 ! y = fabs(y); + + sra %o0,31,%l4 ! j0 = diff >> 31; + + xor %o0,%l4,%o0 ! diff ^ j0 + + sethi %hi(0x03600000),%l1 + sub %o0,%l4,%o0 ! (diff ^ j0) - j0 + + cmp %o0,%l1 ! ((diff ^ j0) - j0) ? 0x03600000 + bge,a,pn %icc,2f ! if ( ((diff ^ j0) - j0) >= 0x03600000 ) + faddd %f26,%f24,%f24 ! *pz = x + y + + fmuld %f26,DC2,%f36 ! (1_1) x *= dnorm; + + fmuld %f24,DC2,%f56 ! (1_1) y *= dnorm; + + faddd %f36,D2ON28,%f58 ! (1_1) x_hi = x + D2ON28; + + faddd %f56,D2ON28,%f22 ! (1_1) y_hi = y + D2ON28; + + fsubd %f58,D2ON28,%f58 ! (1_1) x_hi -= D2ON28; + + fsubd %f22,D2ON28,%f22 ! (1_1) y_hi -= D2ON28; + + fmuld %f58,%f58,%f60 ! (1_1) res = x_hi * x_hi; + faddd %f56,%f22,%f28 ! (1_1) dtmp2 = y + y_hi; + + faddd %f36,%f58,%f6 ! (1_1) dtmp1 = x + x_hi; + + fsubd %f36,%f58,%f58 ! (1_1) x_lo = x - x_hi; + + fmuld %f22,%f22,%f2 ! (1_1) dtmp0 = y_hi * y_hi; + fsubd %f56,%f22,%f56 ! (1_1) y_lo = y - y_hi; + + fmuld %f6,%f58,%f10 ! (1_1) dtmp1 *= x_lo; + + fmuld %f28,%f56,%f26 ! (1_1) dtmp2 *= y_lo; + + faddd %f60,%f2,%f24 ! (1_1) res += dtmp0; + + faddd %f10,%f26,%f28 ! (1_1) dtmp1 += dtmp2; + + faddd %f24,%f28,%f26 ! (1_1) res += dtmp1; + + fsqrtd %f26,%f24 ! (1_1) res = sqrt(res); + + fmuld DC3,%f24,%f24 ! (1_2) res = dmax * res; +2: + add %i3,stridey,%i3 + add %i1,stridex,%i1 + st %f24,[%i5] ! ((float*)pz)[0] = ((float*)&res)[0]; + st %f25,[%i5+4] ! ((float*)pz)[1] = ((float*)&res)[1]; + + add %i5,stridez,%i5 + ba .begin1 + sub counter,1,counter + +1: + ld [%i3+4],%l2 ! ly = ((int*)py)[1]; + cmp %o0,%o4 ! hx ? 0x7ff00000 + bne,pn %icc,1f ! if ( hx != 0x7ff00000 ) + fabsd %f24,%f24 ! y = fabs(y); + + cmp %l1,0 ! lx ? 0 + be,pn %icc,2f ! if ( lx == 0 ) + nop +1: + cmp %l4,%o4 ! hy ? 0x7ff00000 + bne,pn %icc,1f ! if ( hy != 0x7ff00000 ) + nop + + cmp %l2,0 ! ly ? 0 + be,pn %icc,2f ! if ( ly == 0 ) + nop +1: + add %i3,stridey,%i3 + add %i1,stridex,%i1 + fmuld %f26,%f24,%f24 ! res = x * y; + st %f24,[%i5] ! ((float*)pz)[0] = ((float*)&res)[0]; + + st %f25,[%i5+4] ! ((float*)pz)[1] = ((float*)&res)[1]; + + add %i5,stridez,%i5 + ba .begin1 + sub counter,1,counter + +2: + add %i1,stridex,%i1 + add %i3,stridey,%i3 + st DC0_HI,[%i5] ! ((int*)pz)[0] = 0x7ff00000; + st DC0_LO,[%i5+4] ! ((int*)pz)[1] = 0; + fcmpd %f26,%f24 ! x ? y + + add %i5,stridez,%i5 + ba .begin1 + sub counter,1,counter + + .align 16 +.spec1: + fmuld %f26,DC3,%f36 ! (1_1) x *= dnorm; + + fmuld %f24,DC3,%f56 ! (1_1) y *= dnorm; + + faddd %f36,D2ON28,%f58 ! (1_1) x_hi = x + D2ON28; + + faddd %f56,D2ON28,%f22 ! (1_1) y_hi = y + D2ON28; + + fsubd %f58,D2ON28,%f58 ! (1_1) x_hi -= D2ON28; + + fsubd %f22,D2ON28,%f22 ! (1_1) y_hi -= D2ON28; + + fmuld %f58,%f58,%f60 ! (1_1) res = x_hi * x_hi; + faddd %f56,%f22,%f28 ! (1_1) dtmp2 = y + y_hi; + + faddd %f36,%f58,%f6 ! (1_1) dtmp1 = x + x_hi; + + fsubd %f36,%f58,%f58 ! (1_1) x_lo = x - x_hi; + + fmuld %f22,%f22,%f2 ! (1_1) dtmp0 = y_hi * y_hi; + fsubd %f56,%f22,%f56 ! (1_1) y_lo = y - y_hi; + + fmuld %f6,%f58,%f10 ! (1_1) dtmp1 *= x_lo; + + fmuld %f28,%f56,%f26 ! (1_1) dtmp2 *= y_lo; + + faddd %f60,%f2,%f24 ! (1_1) res += dtmp0; + + faddd %f10,%f26,%f28 ! (1_1) dtmp1 += dtmp2; + + faddd %f24,%f28,%f26 ! (1_1) res += dtmp1; + + fsqrtd %f26,%f24 ! (1_1) res = sqrt(res); + + fmuld DC2,%f24,%f24 ! (1_2) res = dmax * res; + + add %i3,stridey,%i3 + add %i1,stridex,%i1 + st %f24,[%i5] ! ((float*)pz)[0] = ((float*)&res)[0]; + + st %f25,[%i5+4] ! ((float*)pz)[1] = ((float*)&res)[1]; + add %i5,stridez,%i5 + ba .begin1 + sub counter,1,counter + + .align 16 +.update0: + fzero %f50 + cmp counter,1 + ble .cont0 + fzero %f34 + + mov %o1,tmp_px + mov %i3,tmp_py + + sub counter,1,tmp_counter + ba .cont0 + mov 1,counter + + .align 16 +.update1: + fzero %f50 + cmp counter,1 + ble .cont1 + fzero %f34 + + mov %o1,tmp_px + mov %i3,tmp_py + + sub counter,1,tmp_counter + ba .cont1 + mov 1,counter + + .align 16 +.update2: + fzero %f18 + cmp counter,2 + ble .cont2 + fzero %f30 + + mov %l2,tmp_px + mov %l4,tmp_py + + sub counter,2,tmp_counter + ba .cont1 + mov 2,counter + + .align 16 +.update3: + fzero %f18 + cmp counter,2 + ble .cont3 + fzero %f30 + + mov %l2,tmp_px + mov %l4,tmp_py + + sub counter,2,tmp_counter + ba .cont3 + mov 2,counter + + .align 16 +.update4: + fzero %f20 + cmp counter,3 + ble .cont4 + fzero %f40 + + mov %l1,tmp_px + mov %i3,tmp_py + + sub counter,3,tmp_counter + ba .cont4 + mov 3,counter + + .align 16 +.update5: + fzero %f20 + cmp counter,3 + ble .cont5 + fzero %f40 + + mov %l1,tmp_px + mov %i3,tmp_py + + sub counter,3,tmp_counter + ba .cont5 + mov 3,counter + + .align 16 +.update6: + fzero %f36 + cmp counter,4 + ble .cont6 + fzero %f54 + + mov %l7,tmp_px + mov %l2,tmp_py + + sub counter,4,tmp_counter + ba .cont6 + mov 4,counter + + .align 16 +.update7: + fzero %f36 + cmp counter,4 + ble .cont7 + fzero %f54 + + mov %l7,tmp_px + mov %l2,tmp_py + + sub counter,4,tmp_counter + ba .cont7 + mov 4,counter + + .align 16 +.update8: + fzero %f50 + cmp counter,5 + ble .cont8 + fzero %f34 + + mov %o1,tmp_px + mov %i3,tmp_py + + sub counter,5,tmp_counter + ba .cont8 + mov 5,counter + + .align 16 +.update9: + fzero %f50 + cmp counter,5 + ble .cont9 + fzero %f34 + + mov %o1,tmp_px + mov %i3,tmp_py + + sub counter,5,tmp_counter + ba .cont9 + mov 5,counter + + + .align 16 +.update10: + fzero %f18 + cmp counter,2 + ble .cont10 + fzero %f30 + + mov %l2,tmp_px + mov %l4,tmp_py + + sub counter,2,tmp_counter + ba .cont10 + mov 2,counter + + .align 16 +.update11: + fzero %f18 + cmp counter,2 + ble .cont11 + fzero %f30 + + mov %l2,tmp_px + mov %l4,tmp_py + + sub counter,2,tmp_counter + ba .cont11 + mov 2,counter + + .align 16 +.update12: + fzero %f20 + cmp counter,3 + ble .cont12 + fzero %f40 + + mov %l1,tmp_px + mov %i3,tmp_py + + sub counter,3,tmp_counter + ba .cont12 + mov 3,counter + + .align 16 +.update13: + fzero %f20 + cmp counter,3 + ble .cont13 + fzero %f40 + + mov %l1,tmp_px + mov %i3,tmp_py + + sub counter,3,tmp_counter + ba .cont13 + mov 3,counter + + .align 16 +.update14: + fzero %f54 + cmp counter,4 + ble .cont14 + fzero %f36 + + mov %l7,tmp_px + mov %l2,tmp_py + + sub counter,4,tmp_counter + ba .cont14 + mov 4,counter + + .align 16 +.update15: + fzero %f54 + cmp counter,4 + ble .cont15 + fzero %f36 + + mov %l7,tmp_px + mov %l2,tmp_py + + sub counter,4,tmp_counter + ba .cont15 + mov 4,counter + + .align 16 +.update16: + fzero %f50 + cmp counter,5 + ble .cont16 + fzero %f34 + + mov %o1,tmp_px + mov %i3,tmp_py + + sub counter,5,tmp_counter + ba .cont16 + mov 5,counter + + .align 16 +.update17: + fzero %f50 + cmp counter,5 + ble .cont17 + fzero %f34 + + mov %o1,tmp_px + mov %i3,tmp_py + + sub counter,5,tmp_counter + ba .cont17 + mov 5,counter + + .align 16 +.exit: + ret + restore + SET_SIZE(__vhypot) + diff --git a/usr/src/lib/libmvec/common/vis/__vhypotf.S b/usr/src/lib/libmvec/common/vis/__vhypotf.S new file mode 100644 index 0000000000..4be65b8199 --- /dev/null +++ b/usr/src/lib/libmvec/common/vis/__vhypotf.S @@ -0,0 +1,1227 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "__vhypotf.S" + +#include "libm.h" + + RO_DATA + .align 64 + +.CONST_TBL: + .word 0x3fe00001, 0x80007e00 ! K1 = 5.00000715259318464227e-01 + .word 0xbfc00003, 0xc0017a01 ! K2 = -1.25000447037521686593e-01 + .word 0x000fffff, 0xffffffff ! DC0 = 0x000fffffffffffff + .word 0x3ff00000, 0x00000000 ! DC1 = 0x3ff0000000000000 + .word 0x7ffff000, 0x00000000 ! DC2 = 0x7ffff00000000000 + .word 0x7fe00000, 0x00000000 ! DA0 = 0x7fe0000000000000 + .word 0x47efffff, 0xe0000000 ! DFMAX = 3.402823e+38 + .word 0x7f7fffff, 0x80808080 ! FMAX = 3.402823e+38 , SCALE = 0x80808080 + .word 0x20000000, 0x00000000 ! DA1 = 0x2000000000000000 + +#define DC0 %f12 +#define DC1 %f10 +#define DC2 %f42 +#define DA0 %f6 +#define DA1 %f4 +#define K2 %f26 +#define K1 %f28 +#define SCALE %f3 +#define FMAX %f2 +#define DFMAX %f50 + +#define stridex %l6 +#define stridey %i4 +#define stridez %l5 +#define _0x7fffffff %o1 +#define _0x7f3504f3 %o2 +#define _0x1ff0 %l2 +#define TBL %l1 + +#define counter %l0 + +#define tmp_px STACK_BIAS-0x30 +#define tmp_py STACK_BIAS-0x28 +#define tmp_counter STACK_BIAS-0x20 +#define tmp0 STACK_BIAS-0x18 +#define tmp1 STACK_BIAS-0x10 +#define tmp2 STACK_BIAS-0x0c +#define tmp3 STACK_BIAS-0x08 +#define tmp4 STACK_BIAS-0x04 + +! sizeof temp storage - must be a multiple of 16 for V9 +#define tmps 0x30 + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +! !!!!! algorithm !!!!! +! hx0 = *(int*)px; +! x0 = *px; +! px += stridex; +! +! hy0 = *(int*)py; +! y0 = *py; +! py += stridey; +! +! hx0 &= 0x7fffffff; +! hy0 &= 0x7fffffff; +! +! if ( hx >= 0x7f3504f3 || hy >= 0x7f3504f3 ) +! { +! if ( hx >= 0x7f800000 || hy >= 0x7f800000 ) +! { +! if ( hx == 0x7f800000 || hy == 0x7f800000 ) +! *(int*)pz = 0x7f800000; +! else *pz = x * y; +! } +! else +! { +! hyp = sqrt(x * (double)x + y * (double)y); +! if ( hyp <= DMAX ) ftmp0 = (float)hyp; +! else ftmp0 = FMAX * FMAX; +! *pz = ftmp0; +! } +! pz += stridez; +! continue; +! } +! if ( (hx | hy) == 0 ) +! { +! *pz = 0; +! pz += stridez; +! continue; +! } +! dx0 = x0 * (double)x0; +! dy0 = y0 * (double)y0; +! db0 = dx0 + dy0; +! +! iexp0 = ((int*)&db0)[0]; +! +! h0 = vis_fand(db0,DC0); +! h0 = vis_for(h0,DC1); +! h_hi0 = vis_fand(h0,DC2); +! +! db0 = vis_fand(db0,DA0); +! db0 = vis_fmul8x16(SCALE, db0); +! db0 = vis_fpadd32(db0,DA1); +! +! iexp0 >>= 8; +! di0 = iexp0 & 0x1ff0; +! si0 = (char*)sqrt_arr + di0; +! +! dtmp0 = ((double*)((char*)div_arr + di0))[0]; +! xx0 = h0 - h_hi0; +! xx0 *= dmp0; +! +! dtmp0 = ((double*)si0)[1]; +! res0 = K2 * xx0; +! res0 += K1; +! res0 *= xx0; +! res0 += DC1; +! res0 = dtmp0 * res0; +! res0 *= db0; +! ftmp0 = (float)res0; +! *pz = ftmp0; +! pz += stridez; +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + ENTRY(__vhypotf) + save %sp,-SA(MINFRAME)-tmps,%sp + PIC_SETUP(l7) + PIC_SET(l7,.CONST_TBL,o3) + PIC_SET(l7,__vlibm_TBL_sqrtf,l1) + +#ifdef __sparcv9 + ldx [%fp+STACK_BIAS+176],stridez +#else + ld [%fp+STACK_BIAS+92],stridez +#endif + st %i0,[%fp+tmp_counter] + + stx %i1,[%fp+tmp_px] + + stx %i3,[%fp+tmp_py] + + ldd [%o3],K1 + sethi %hi(0x7ffffc00),%o1 + + ldd [%o3+8],K2 + sethi %hi(0x7f350400),%o2 + + ldd [%o3+16],DC0 + add %o1,1023,_0x7fffffff + add %o2,0xf3,_0x7f3504f3 + + ldd [%o3+24],DC1 + sll %i2,2,stridex + + ld [%o3+56],FMAX + + ldd [%o3+32],DC2 + sll %i4,2,stridey + + ldd [%o3+40],DA0 + sll stridez,2,stridez + + ldd [%o3+48],DFMAX + + ld [%o3+60],SCALE + or %g0,0xff8,%l2 + + ldd [%o3+64],DA1 + sll %l2,1,_0x1ff0 + or %g0,%i5,%l7 + +.begin: + ld [%fp+tmp_counter],counter + ldx [%fp+tmp_px],%i1 + ldx [%fp+tmp_py],%i2 + st %g0,[%fp+tmp_counter] +.begin1: + cmp counter,0 + ble,pn %icc,.exit + lda [%i1]0x82,%l3 ! (3_0) hx0 = *(int*)px; + + lda [%i2]0x82,%l4 ! (3_0) hy0 = *(int*)py; + + lda [%i1]0x82,%f17 ! (3_0) x0 = *px; + and %l3,_0x7fffffff,%l3 ! (3_0) hx0 &= 0x7fffffff; + + cmp %l3,_0x7f3504f3 ! (3_0) hx ? 0x7f3504f3 + bge,pn %icc,.spec ! (3_0) if ( hx >= 0x7f3504f3 ) + and %l4,_0x7fffffff,%l4 ! (3_0) hy0 &= 0x7fffffff; + + cmp %l4,_0x7f3504f3 ! (3_0) hy ? 0x7f3504f3 + bge,pn %icc,.spec ! (3_0) if ( hy >= 0x7f3504f3 ) + or %g0,%i2,%o7 + + orcc %l3,%l4,%g0 + bz,pn %icc,.spec1 + + add %i1,stridex,%i1 ! px += stridex + fsmuld %f17,%f17,%f44 ! (3_0) dx0 = x0 * (double)x0; + lda [%i2]0x82,%f17 ! (3_0) y0 = *py; + + lda [%i1]0x82,%l3 ! (4_0) hx0 = *(int*)px; + + lda [stridey+%o7]0x82,%l4 ! (4_0) hy0 = *(int*)py; + + and %l3,_0x7fffffff,%l3 ! (4_0) hx0 &= 0x7fffffff; + + fsmuld %f17,%f17,%f24 ! (3_0) dy0 = y0 * (double)y0; + cmp %l3,_0x7f3504f3 ! (4_0) hx ? 0x7f3504f3 + bge,pn %icc,.update0 ! (4_0) if ( hx >= 0x7f3504f3 ) + and %l4,_0x7fffffff,%l4 ! (4_0) hy0 &= 0x7fffffff; + + orcc %l3,%l4,%g0 + bz,pn %icc,.update0 + lda [%i1]0x82,%f17 ! (4_0) x0 = *px; +.cont0: + faddd %f44,%f24,%f24 ! (3_0) db0 = dx0 + dy0; + + fsmuld %f17,%f17,%f40 ! (4_1) dy0 = x0 * (double)x0; + cmp %l4,_0x7f3504f3 ! (4_1) hy ? 0x7f3504f3 + lda [stridey+%o7]0x82,%f17 ! (4_1) hy0 = *py; + + add %o7,stridey,%i5 ! py += stridey + lda [%i1+stridex]0x82,%l3 ! (0_0) hx0 = *(int*)px; + + bge,pn %icc,.update1 ! (4_1) if ( hy >= 0x7f3504f3 ) + st %f24,[%fp+tmp0] ! (3_1) iexp0 = ((int*)&db0)[0]; +.cont1: + and %l3,_0x7fffffff,%l3 ! (0_0) hx0 &= 0x7fffffff; + + fsmuld %f17,%f17,%f48 ! (4_1) dy0 = y0 * (double)y0; + lda [%i1+stridex]0x82,%f8 ! (0_0) x0 = *px; + + add %i1,stridex,%i1 ! px += stridex + + lda [%i5+stridey]0x82,%l4 ! (0_0) hy0 = *(int*)py; + cmp %l3,_0x7f3504f3 ! (0_0) hx ? 0x7f3504f3 + bge,pn %icc,.update2 ! (0_0) if ( hx >= 0x7f3504f3 ) + add %i5,stridey,%o4 ! py += stridey +.cont2: + faddd %f40,%f48,%f20 ! (4_1) db0 = dx0 + dy0; + + fsmuld %f8,%f8,%f40 ! (0_0) dx0 = x0 * (double)x0; + and %l4,_0x7fffffff,%l4 ! (0_0) hy0 &= 0x7fffffff; + lda [%i5+stridey]0x82,%f17 ! (0_0) hy0 = *py; + + cmp %l4,_0x7f3504f3 ! (0_0) hy ? 0x7f3504f3 + bge,pn %icc,.update3 ! (0_0) if ( hy >= 0x7f3504f3 ) + st %f20,[%fp+tmp1] ! (4_1) iexp0 = ((int*)&db0)[0]; + + orcc %l3,%l4,%g0 + bz,pn %icc,.update3 +.cont3: + lda [%i1+stridex]0x82,%l3 ! (1_0) hx0 = *(int*)px; + + fand %f24,DC0,%f60 ! (3_1) h0 = vis_fand(db0,DC0); + + and %l3,_0x7fffffff,%l3 ! (1_0) hx0 &= 0x7fffffff; + + fsmuld %f17,%f17,%f34 ! (0_0) dy0 = y0 * (double)y0; + cmp %l3,_0x7f3504f3 ! (1_0) hx ? 0x7f3504f3 + lda [%o4+stridey]0x82,%l4 ! (1_0) hy0 = *(int*)py; + + add %i1,stridex,%i1 ! px += stridex + + lda [%i1]0x82,%f17 ! (1_0) x0 = *px; + bge,pn %icc,.update4 ! (1_0) if ( hx >= 0x7f3504f3 ) + add %o4,stridey,%i5 ! py += stridey +.cont4: + and %l4,_0x7fffffff,%l4 ! (1_0) hy0 &= 0x7fffffff; + for %f60,DC1,%f46 ! (3_1) h0 = vis_for(h0,DC1); + + cmp %l4,_0x7f3504f3 ! (1_0) hy ? 0x7f3504f3 + ld [%fp+tmp0],%o0 ! (3_1) iexp0 = ((int*)&db0)[0]; + faddd %f40,%f34,%f0 ! (0_0) db0 = dx0 + dy0; + + fsmuld %f17,%f17,%f40 ! (1_0) dx0 = x0 * (double)x0; + add %i1,stridex,%i1 ! px += stridex + lda [%o4+stridey]0x82,%f17 ! (1_0) y0 = *py; + + srax %o0,8,%o0 ! (3_1) iexp0 >>= 8; + bge,pn %icc,.update5 ! (1_0) if ( hy >= 0x7f3504f3 ) + fand %f46,DC2,%f38 ! (3_1) h_hi0 = vis_fand(h0,DC2); + + orcc %l3,%l4,%g0 + bz,pn %icc,.update5 +.cont5: + lda [%i1]0x82,%l3 ! (2_0) hx0 = *(int*)px; + + and %o0,_0x1ff0,%o0 ! (3_1) di0 = iexp0 & 0x1ff0; + st %f0,[%fp+tmp2] ! (0_0) iexp0 = ((int*)&db0)[0]; + fand %f20,DC0,%f60 ! (4_1) h0 = vis_fand(db0,DC0); + + ldd [TBL+%o0],%f22 ! (3_1) dtmp0 = ((double*)((char*)div_arr + di0))[0]; + fsubd %f46,%f38,%f38 ! (3_1) xx0 = h0 - h_hi0; + + fsmuld %f17,%f17,%f32 ! (1_0) dy0 = y0 * (double)y0; + add %i5,stridey,%i2 ! py += stridey + lda [stridey+%i5]0x82,%l4 ! (2_0) hy0 = *(int*)py; + + and %l3,_0x7fffffff,%l3 ! (2_0) hx0 &= 0x7fffffff; + + lda [%i1]0x82,%f17 ! (2_0) x0 = *px; + cmp %l3,_0x7f3504f3 ! (2_0) hx ? 0x7f3504f3 + + fmuld %f38,%f22,%f38 ! (3_1) xx0 *= dmp0; + and %l4,_0x7fffffff,%l4 ! (2_0) hy0 &= 0x7fffffff; + for %f60,DC1,%f46 ! (4_1) h0 = vis_for(h0,DC1); + + bge,pn %icc,.update6 ! (2_0) if ( hx >= 0x7f3504f3 ) + ld [%fp+tmp1],%o3 ! (4_1) iexp0 = ((int*)&db0)[0]; +.cont6: + faddd %f40,%f32,%f18 ! (1_0) db0 = dx0 + dy0; + + fsmuld %f17,%f17,%f44 ! (2_0) dx0 = x0 * (double)x0; + cmp %l4,_0x7f3504f3 ! (2_0) hy ? 0x7f3504f3 + lda [stridey+%i5]0x82,%f17 ! (2_0) y0 = *py; + + add %i1,stridex,%i1 ! px += stridex + bge,pn %icc,.update7 ! (2_0) if ( hy >= 0x7f3504f3 ) + fand %f46,DC2,%f58 ! (4_1) h_hi0 = vis_fand(h0,DC2); + + orcc %l3,%l4,%g0 + bz,pn %icc,.update7 + nop +.cont7: + fmuld K2,%f38,%f56 ! (3_1) res0 = K2 * xx0; + srax %o3,8,%o3 ! (4_1) iexp0 >>= 8; + lda [%i1]0x82,%l3 ! (3_0) hx0 = *(int*)px; + + and %o3,_0x1ff0,%o3 ! (4_1) di0 = iexp0 & 0x1ff0; + st %f18,[%fp+tmp3] ! (1_0) iexp0 = ((int*)&db0)[0]; + fand %f0,DC0,%f60 ! (0_0) h0 = vis_fand(db0,DC0); + + ldd [TBL+%o3],%f22 ! (4_1) dtmp0 = ((double*)((char*)div_arr + di0))[0]; + add %i2,stridey,%o7 ! py += stridey + fsubd %f46,%f58,%f58 ! (4_1) xx0 = h0 - h_hi0; + + fsmuld %f17,%f17,%f30 ! (2_0) dy0 = y0 * (double)y0; + lda [stridey+%i2]0x82,%l4 ! (3_0) hy0 = *(int*)py; + and %l3,_0x7fffffff,%l3 ! (3_0) hx0 &= 0x7fffffff; + + faddd %f56,K1,%f54 ! (3_1) res0 += K1; + cmp %l3,_0x7f3504f3 ! (3_0) hx ? 0x7f3504f3 + + lda [%i1]0x82,%f17 ! (3_0) x0 = *px; + add %i1,stridex,%i1 ! px += stridex + bge,pn %icc,.update8 ! (3_0) if ( hx >= 0x7f3504f3 ) + + fmuld %f58,%f22,%f58 ! (4_1) xx0 *= dmp0; +.cont8: + and %l4,_0x7fffffff,%l4 ! (3_0) hy0 &= 0x7fffffff; + for %f60,DC1,%f46 ! (0_0) h0 = vis_for(h0,DC1); + + cmp %l4,_0x7f3504f3 ! (3_0) hy ? 0x7f3504f3 + ld [%fp+tmp2],%g1 ! (0_0) iexp0 = ((int*)&db0)[0]; + faddd %f44,%f30,%f30 ! (2_0) db0 = dx0 + dy0; + + fsmuld %f17,%f17,%f44 ! (3_0) dx0 = x0 * (double)x0; + bge,pn %icc,.update9 ! (3_0) if ( hy >= 0x7f3504f3 ) + lda [stridey+%i2]0x82,%f17 ! (3_0) y0 = *py; + + orcc %l3,%l4,%g0 + bz,pn %icc,.update9 + nop +.cont9: + fmuld %f54,%f38,%f40 ! (3_1) res0 *= xx0; + lda [%i1]0x82,%l3 ! (4_0) hx0 = *(int*)px; + fand %f46,DC2,%f38 ! (0_0) h_hi0 = vis_fand(h0,DC2); + + fmuld K2,%f58,%f54 ! (4_1) res0 = K2 * xx0; + srax %g1,8,%o5 ! (0_0) iexp0 >>= 8; + lda [stridey+%o7]0x82,%l4 ! (4_0) hy0 = *(int*)py; + fand %f24,DA0,%f56 ! (3_1) db0 = vis_fand(db0,DA0); + + and %o5,_0x1ff0,%o5 ! (0_0) di0 = iexp0 & 0x1ff0; + st %f30,[%fp+tmp4] ! (2_0) iexp0 = ((int*)&db0)[0]; + fand %f18,DC0,%f60 ! (1_0) h0 = vis_fand(db0,DC0); + + ldd [TBL+%o5],%f22 ! (0_0) dtmp0 = ((double*)((char*)div_arr + di0))[0]; + add %o0,TBL,%g1 ! (3_1) si0 = (char*)sqrt_arr + di0; + and %l3,_0x7fffffff,%l3 ! (4_0) hx0 &= 0x7fffffff; + fsubd %f46,%f38,%f38 ! (0_0) xx0 = h0 - h_hi0; + + fsmuld %f17,%f17,%f24 ! (3_0) dy0 = y0 * (double)y0; + cmp %l3,_0x7f3504f3 ! (4_0) hx ? 0x7f3504f3 + bge,pn %icc,.update10 ! (4_0) if ( hx >= 0x7f3504f3 ) + faddd %f40,DC1,%f40 ! (3_1) res0 += DC1; + + fmul8x16 SCALE,%f56,%f36 ! (3_1) db0 = vis_fmul8x16(SCALE, db0); + and %l4,_0x7fffffff,%l4 ! (4_0) hy0 &= 0x7fffffff; + ldd [%g1+8],%f56 ! (3_1) dtmp0 = ((double*)si0)[1]; + faddd %f54,K1,%f54 ! (4_1) res0 += K1; + + lda [%i1]0x82,%f17 ! (4_0) x0 = *px; +.cont10: + fmuld %f38,%f22,%f38 ! (0_0) xx0 *= dmp0; + cmp counter,5 + for %f60,DC1,%f46 ! (1_0) h0 = vis_for(h0,DC1); + + ld [%fp+tmp3],%g1 ! (1_0) iexp0 = ((int*)&db0)[0]; + fmuld %f56,%f40,%f62 ! (3_1) res0 = dtmp0 * res0; + faddd %f44,%f24,%f24 ! (3_0) db0 = dx0 + dy0; + + bl,pn %icc,.tail + nop + + ba .main_loop + sub counter,5,counter + + .align 16 +.main_loop: + fsmuld %f17,%f17,%f40 ! (4_1) dy0 = x0 * (double)x0; + cmp %l4,_0x7f3504f3 ! (4_1) hy ? 0x7f3504f3 + lda [stridey+%o7]0x82,%f17 ! (4_1) hy0 = *py; + fpadd32 %f36,DA1,%f36 ! (3_2) db0 = vis_fpadd32(db0,DA1); + + fmuld %f54,%f58,%f58 ! (4_2) res0 *= xx0; + add %o7,stridey,%i5 ! py += stridey + st %f24,[%fp+tmp0] ! (3_1) iexp0 = ((int*)&db0)[0]; + fand %f46,DC2,%f44 ! (1_1) h_hi0 = vis_fand(h0,DC2); + + fmuld K2,%f38,%f56 ! (0_1) res0 = K2 * xx0; + srax %g1,8,%g5 ! (1_1) iexp0 >>= 8; + bge,pn %icc,.update11 ! (4_1) if ( hy >= 0x7f3504f3 ) + fand %f20,DA0,%f54 ! (4_2) db0 = vis_fand(db0,DA0); + + orcc %l3,%l4,%g0 + nop + bz,pn %icc,.update11 + fzero %f52 +.cont11: + fmuld %f62,%f36,%f62 ! (3_2) res0 *= db0; + and %g5,_0x1ff0,%g5 ! (1_1) di0 = iexp0 & 0x1ff0; + lda [%i1+stridex]0x82,%l3 ! (0_0) hx0 = *(int*)px; + fand %f30,DC0,%f60 ! (2_1) h0 = vis_fand(db0,DC0); + + ldd [%g5+TBL],%f22 ! (1_1) dtmp0 = ((double*)((char*)div_arr + di0))[0]; + add %o3,TBL,%g1 ! (4_2) si0 = (char*)sqrt_arr + di0; + add %i1,stridex,%i0 ! px += stridex + fsubd %f46,%f44,%f44 ! (1_1) xx0 = h0 - h_hi0; + + fsmuld %f17,%f17,%f48 ! (4_1) dy0 = y0 * (double)y0; + nop + lda [%i1+stridex]0x82,%f8 ! (0_0) x0 = *px; + faddd %f58,DC1,%f36 ! (4_2) res0 += DC1; + + faddd %f56,K1,%f58 ! (0_1) res0 += K1; + and %l3,_0x7fffffff,%l3 ! (0_0) hx0 &= 0x7fffffff; + ldd [%g1+8],%f56 ! (4_2) dtmp0 = ((double*)si0)[1]; + fmul8x16 SCALE,%f54,%f54 ! (4_2) db0 = vis_fmul8x16(SCALE, db0); + + lda [%i5+stridey]0x82,%l4 ! (0_0) hy0 = *(int*)py; + cmp %l3,_0x7f3504f3 ! (0_0) hx ? 0x7f3504f3 + bge,pn %icc,.update12 ! (0_0) if ( hx >= 0x7f3504f3 ) + fdtos %f62,%f14 ! (3_2) ftmp0 = (float)res0; +.cont12: + fmuld %f44,%f22,%f44 ! (1_1) xx0 *= dmp0; + add %l7,stridez,%o7 ! pz += stridez + st %f14,[%l7] ! (3_2) *pz = ftmp0; + for %f60,DC1,%f46 ! (2_1) h0 = vis_for(h0,DC1); + + fmuld %f56,%f36,%f36 ! (4_2) res0 = dtmp0 * res0; + add %i5,stridey,%o4 ! py += stridey + ld [%fp+tmp4],%g1 ! (2_1) iexp0 = ((int*)&db0)[0]; + faddd %f40,%f48,%f20 ! (4_1) db0 = dx0 + dy0; + + fsmuld %f8,%f8,%f40 ! (0_0) dx0 = x0 * (double)x0; + and %l4,_0x7fffffff,%l4 ! (0_0) hy0 &= 0x7fffffff; + lda [%i5+stridey]0x82,%f17 ! (0_0) hy0 = *py; + fpadd32 %f54,DA1,%f62 ! (4_2) db0 = vis_fpadd32(db0,DA1); + + fmuld %f58,%f38,%f38 ! (0_1) res0 *= xx0; + cmp %l4,_0x7f3504f3 ! (0_0) hy ? 0x7f3504f3 + st %f20,[%fp+tmp1] ! (4_1) iexp0 = ((int*)&db0)[0]; + fand %f46,DC2,%f58 ! (2_1) h_hi0 = vis_fand(h0,DC2); + + fmuld K2,%f44,%f56 ! (1_1) res0 = K2 * xx0; + srax %g1,8,%g1 ! (2_1) iexp0 >>= 8; + bge,pn %icc,.update13 ! (0_0) if ( hy >= 0x7f3504f3 ) + fand %f0,DA0,%f54 ! (0_1) db0 = vis_fand(db0,DA0); + + orcc %l3,%l4,%g0 + nop + bz,pn %icc,.update13 + fzero %f52 +.cont13: + fmuld %f36,%f62,%f62 ! (4_2) res0 *= db0; + and %g1,_0x1ff0,%g1 ! (2_1) di0 = iexp0 & 0x1ff0; + lda [%i0+stridex]0x82,%l3 ! (1_0) hx0 = *(int*)px; + fand %f24,DC0,%f60 ! (3_1) h0 = vis_fand(db0,DC0); + + ldd [TBL+%g1],%f22 ! (2_1) dtmp0 = ((double*)((char*)div_arr + di0))[0]; + add %o5,TBL,%o0 ! (0_1) si0 = (char*)sqrt_arr + di0; + add %i0,stridex,%i1 ! px += stridex + fsubd %f46,%f58,%f58 ! (2_1) xx0 = h0 - h_hi0; + + fsmuld %f17,%f17,%f34 ! (0_0) dy0 = y0 * (double)y0; + add %o7,stridez,%i0 ! pz += stridez + lda [%o4+stridey]0x82,%l4 ! (1_0) hy0 = *(int*)py; + faddd %f38,DC1,%f36 ! (0_1) res0 += DC1; + + faddd %f56,K1,%f38 ! (1_1) res0 += K1; + and %l3,_0x7fffffff,%l3 ! (1_0) hx0 &= 0x7fffffff; + ldd [%o0+8],%f56 ! (0_1) dtmp0 = ((double*)si0)[1]; + fmul8x16 SCALE,%f54,%f54 ! (0_1) db0 = vis_fmul8x16(SCALE, db0); + + lda [%i1]0x82,%f17 ! (1_0) x0 = *px; + cmp %l3,_0x7f3504f3 ! (1_0) hx ? 0x7f3504f3 + bge,pn %icc,.update14 ! (1_0) if ( hx >= 0x7f3504f3 ) + fdtos %f62,%f14 ! (4_2) ftmp0 = (float)res0; +.cont14: + fmuld %f58,%f22,%f58 ! (2_1) xx0 *= dmp0; + and %l4,_0x7fffffff,%l4 ! (1_0) hy0 &= 0x7fffffff; + add %o4,stridey,%i5 ! py += stridey + for %f60,DC1,%f46 ! (3_1) h0 = vis_for(h0,DC1); + + fmuld %f56,%f36,%f36 ! (0_1) res0 = dtmp0 * res0; + cmp %l4,_0x7f3504f3 ! (1_0) hy ? 0x7f3504f3 + ld [%fp+tmp0],%o0 ! (3_1) iexp0 = ((int*)&db0)[0]; + faddd %f40,%f34,%f0 ! (0_0) db0 = dx0 + dy0; + + fsmuld %f17,%f17,%f40 ! (1_0) dx0 = x0 * (double)x0; + add %i1,stridex,%i1 ! px += stridex + lda [%o4+stridey]0x82,%f17 ! (1_0) y0 = *py; + fpadd32 %f54,DA1,%f62 ! (0_1) db0 = vis_fpadd32(db0,DA1); + + fmuld %f38,%f44,%f44 ! (1_1) res0 *= xx0; + st %f14,[%o7] ! (4_2) *pz = ftmp0; + bge,pn %icc,.update15 ! (1_0) if ( hy >= 0x7f3504f3 ) + fand %f46,DC2,%f38 ! (3_1) h_hi0 = vis_fand(h0,DC2); + + orcc %l3,%l4,%g0 + bz,pn %icc,.update15 + nop +.cont15: + fmuld K2,%f58,%f54 ! (2_1) res0 = K2 * xx0; + srax %o0,8,%o0 ! (3_1) iexp0 >>= 8; + st %f0,[%fp+tmp2] ! (0_0) iexp0 = ((int*)&db0)[0]; + fand %f18,DA0,%f56 ! (1_1) db0 = vis_fand(db0,DA0); + + fmuld %f36,%f62,%f62 ! (0_1) res0 *= db0; + and %o0,_0x1ff0,%o0 ! (3_1) di0 = iexp0 & 0x1ff0; + lda [%i1]0x82,%l3 ! (2_0) hx0 = *(int*)px; + fand %f20,DC0,%f60 ! (4_1) h0 = vis_fand(db0,DC0); + + ldd [TBL+%o0],%f22 ! (3_1) dtmp0 = ((double*)((char*)div_arr + di0))[0]; + add %g5,TBL,%o3 ! (1_1) si0 = (char*)sqrt_arr + di0; + add %i0,stridez,%i3 ! pz += stridez + fsubd %f46,%f38,%f38 ! (3_1) xx0 = h0 - h_hi0; + + fsmuld %f17,%f17,%f32 ! (1_0) dy0 = y0 * (double)y0; + add %i5,stridey,%i2 ! py += stridey + lda [stridey+%i5]0x82,%l4 ! (2_0) hy0 = *(int*)py; + faddd %f44,DC1,%f44 ! (1_1) res0 += DC1; + + fmul8x16 SCALE,%f56,%f36 ! (1_1) db0 = vis_fmul8x16(SCALE, db0); + and %l3,_0x7fffffff,%l3 ! (2_0) hx0 &= 0x7fffffff; + ldd [%o3+8],%f56 ! (1_1) dtmp0 = ((double*)si0)[1]; + faddd %f54,K1,%f54 ! (2_1) res0 += K1; + + lda [%i1]0x82,%f17 ! (2_0) x0 = *px; + cmp %l3,_0x7f3504f3 ! (2_0) hx ? 0x7f3504f3 + add %i3,stridez,%o4 ! pz += stridez + fdtos %f62,%f14 ! (0_1) ftmp0 = (float)res0; + + fmuld %f38,%f22,%f38 ! (3_1) xx0 *= dmp0; + and %l4,_0x7fffffff,%l4 ! (2_0) hy0 &= 0x7fffffff; + st %f14,[%i0] ! (0_1) *pz = ftmp0; + for %f60,DC1,%f46 ! (4_1) h0 = vis_for(h0,DC1); + + fmuld %f56,%f44,%f62 ! (1_1) res0 = dtmp0 * res0; + bge,pn %icc,.update16 ! (2_0) if ( hx >= 0x7f3504f3 ) + ld [%fp+tmp1],%o3 ! (4_1) iexp0 = ((int*)&db0)[0]; + faddd %f40,%f32,%f18 ! (1_0) db0 = dx0 + dy0; +.cont16: + fsmuld %f17,%f17,%f44 ! (2_0) dx0 = x0 * (double)x0; + cmp %l4,_0x7f3504f3 ! (2_0) hy ? 0x7f3504f3 + lda [stridey+%i5]0x82,%f17 ! (2_0) y0 = *py; + fpadd32 %f36,DA1,%f36 ! (1_1) db0 = vis_fpadd32(db0,DA1); + + fmuld %f54,%f58,%f54 ! (2_1) res0 *= xx0; + add %i1,stridex,%l7 ! px += stridex + bge,pn %icc,.update17 ! (2_0) if ( hy >= 0x7f3504f3 ) + fand %f46,DC2,%f58 ! (4_1) h_hi0 = vis_fand(h0,DC2); + + orcc %l3,%l4,%g0 + nop + bz,pn %icc,.update17 + fzero %f52 +.cont17: + fmuld K2,%f38,%f56 ! (3_1) res0 = K2 * xx0; + srax %o3,8,%o3 ! (4_1) iexp0 >>= 8; + st %f18,[%fp+tmp3] ! (1_0) iexp0 = ((int*)&db0)[0]; + fand %f30,DA0,%f40 ! (2_1) db0 = vis_fand(db0,DA0); + + fmuld %f62,%f36,%f62 ! (1_1) res0 *= db0; + and %o3,_0x1ff0,%o3 ! (4_1) di0 = iexp0 & 0x1ff0; + lda [%l7]0x82,%l3 ! (3_0) hx0 = *(int*)px; + fand %f0,DC0,%f60 ! (0_0) h0 = vis_fand(db0,DC0); + + ldd [TBL+%o3],%f22 ! (4_1) dtmp0 = ((double*)((char*)div_arr + di0))[0]; + add %g1,TBL,%g1 ! (2_1) si0 = (char*)sqrt_arr + di0; + add %i2,stridey,%o7 ! py += stridey + fsubd %f46,%f58,%f58 ! (4_1) xx0 = h0 - h_hi0; + + fsmuld %f17,%f17,%f30 ! (2_0) dy0 = y0 * (double)y0; + lda [stridey+%i2]0x82,%l4 ! (3_0) hy0 = *(int*)py; + add %l7,stridex,%i1 ! px += stridex + faddd %f54,DC1,%f36 ! (2_1) res0 += DC1; + + faddd %f56,K1,%f54 ! (3_1) res0 += K1; + and %l3,_0x7fffffff,%l3 ! (3_0) hx0 &= 0x7fffffff; + ldd [%g1+8],%f56 ! (2_1) dtmp0 = ((double*)si0)[1]; + fmul8x16 SCALE,%f40,%f40 ! (2_1) db0 = vis_fmul8x16(SCALE, db0); + + lda [%l7]0x82,%f17 ! (3_0) x0 = *px; + cmp %l3,_0x7f3504f3 ! (3_0) hx ? 0x7f3504f3 + bge,pn %icc,.update18 ! (3_0) if ( hx >= 0x7f3504f3 ) + fdtos %f62,%f14 ! (1_1) ftmp0 = (float)res0; +.cont18: + fmuld %f58,%f22,%f58 ! (4_1) xx0 *= dmp0; + and %l4,_0x7fffffff,%l4 ! (3_0) hy0 &= 0x7fffffff; + st %f14,[%i3] ! (1_1) *pz = ftmp0; + for %f60,DC1,%f46 ! (0_0) h0 = vis_for(h0,DC1); + + fmuld %f56,%f36,%f36 ! (2_1) res0 = dtmp0 * res0; + cmp %l4,_0x7f3504f3 ! (3_0) hy ? 0x7f3504f3 + ld [%fp+tmp2],%g1 ! (0_0) iexp0 = ((int*)&db0)[0]; + faddd %f44,%f30,%f30 ! (2_0) db0 = dx0 + dy0; + + fsmuld %f17,%f17,%f44 ! (3_0) dx0 = x0 * (double)x0; + bge,pn %icc,.update19 ! (3_0) if ( hy >= 0x7f3504f3 ) + lda [stridey+%i2]0x82,%f17 ! (3_0) y0 = *py; + fpadd32 %f40,DA1,%f62 ! (2_1) db0 = vis_fpadd32(db0,DA1); + +.cont19: + fmuld %f54,%f38,%f40 ! (3_1) res0 *= xx0; + orcc %l3,%l4,%g0 + st %f30,[%fp+tmp4] ! (2_0) iexp0 = ((int*)&db0)[0]; + fand %f46,DC2,%f38 ! (0_0) h_hi0 = vis_fand(h0,DC2); + + fmuld K2,%f58,%f54 ! (4_1) res0 = K2 * xx0; + srax %g1,8,%o5 ! (0_0) iexp0 >>= 8; + lda [%i1]0x82,%l3 ! (4_0) hx0 = *(int*)px; + fand %f24,DA0,%f56 ! (3_1) db0 = vis_fand(db0,DA0); + + fmuld %f36,%f62,%f62 ! (2_1) res0 *= db0; + and %o5,_0x1ff0,%o5 ! (0_0) di0 = iexp0 & 0x1ff0; + bz,pn %icc,.update19a + fand %f18,DC0,%f60 ! (1_0) h0 = vis_fand(db0,DC0); +.cont19a: + ldd [TBL+%o5],%f22 ! (0_0) dtmp0 = ((double*)((char*)div_arr + di0))[0]; + add %o0,TBL,%g1 ! (3_1) si0 = (char*)sqrt_arr + di0; + and %l3,_0x7fffffff,%l3 ! (4_0) hx0 &= 0x7fffffff; + fsubd %f46,%f38,%f38 ! (0_0) xx0 = h0 - h_hi0; + + fsmuld %f17,%f17,%f24 ! (3_0) dy0 = y0 * (double)y0; + cmp %l3,_0x7f3504f3 ! (4_0) hx ? 0x7f3504f3 + lda [stridey+%o7]0x82,%l4 ! (4_0) hy0 = *(int*)py; + faddd %f40,DC1,%f40 ! (3_1) res0 += DC1; + + fmul8x16 SCALE,%f56,%f36 ! (3_1) db0 = vis_fmul8x16(SCALE, db0); + bge,pn %icc,.update20 ! (4_0) if ( hx >= 0x7f3504f3 ) + ldd [%g1+8],%f56 ! (3_1) dtmp0 = ((double*)si0)[1]; + faddd %f54,K1,%f54 ! (4_1) res0 += K1; + + lda [%i1]0x82,%f17 ! (4_0) x0 = *px; +.cont20: + subcc counter,5,counter ! counter -= 5 + add %o4,stridez,%l7 ! pz += stridez + fdtos %f62,%f14 ! (2_1) ftmp0 = (float)res0; + + fmuld %f38,%f22,%f38 ! (0_0) xx0 *= dmp0; + and %l4,_0x7fffffff,%l4 ! (4_0) hy0 &= 0x7fffffff; + st %f14,[%o4] ! (2_1) *pz = ftmp0; + for %f60,DC1,%f46 ! (1_0) h0 = vis_for(h0,DC1); + + ld [%fp+tmp3],%g1 ! (1_0) iexp0 = ((int*)&db0)[0]; + fmuld %f56,%f40,%f62 ! (3_1) res0 = dtmp0 * res0; + bpos,pt %icc,.main_loop + faddd %f44,%f24,%f24 ! (3_0) db0 = dx0 + dy0; + + add counter,5,counter + +.tail: + subcc counter,1,counter + bneg .begin + nop + + fpadd32 %f36,DA1,%f36 ! (3_2) db0 = vis_fpadd32(db0,DA1); + + fmuld %f54,%f58,%f58 ! (4_2) res0 *= xx0; + fand %f46,DC2,%f44 ! (1_1) h_hi0 = vis_fand(h0,DC2); + + fmuld K2,%f38,%f56 ! (0_1) res0 = K2 * xx0; + srax %g1,8,%g5 ! (1_1) iexp0 >>= 8; + fand %f20,DA0,%f54 ! (4_2) db0 = vis_fand(db0,DA0); + + fmuld %f62,%f36,%f62 ! (3_2) res0 *= db0; + and %g5,_0x1ff0,%g5 ! (1_1) di0 = iexp0 & 0x1ff0; + + ldd [%g5+TBL],%f22 ! (1_1) dtmp0 = ((double*)((char*)div_arr + di0))[0]; + add %o3,TBL,%g1 ! (4_2) si0 = (char*)sqrt_arr + di0; + fsubd %f46,%f44,%f44 ! (1_1) xx0 = h0 - h_hi0; + + faddd %f58,DC1,%f36 ! (4_2) res0 += DC1; + + faddd %f56,K1,%f58 ! (0_1) res0 += K1; + ldd [%g1+8],%f56 ! (4_2) dtmp0 = ((double*)si0)[1]; + fmul8x16 SCALE,%f54,%f54 ! (4_2) db0 = vis_fmul8x16(SCALE, db0); + + fdtos %f62,%f14 ! (3_2) ftmp0 = (float)res0; + + fmuld %f44,%f22,%f44 ! (1_1) xx0 *= dmp0; + add %l7,stridez,%o7 ! pz += stridez + st %f14,[%l7] ! (3_2) *pz = ftmp0; + + subcc counter,1,counter + bneg .begin + or %g0,%o7,%l7 + + fmuld %f56,%f36,%f36 ! (4_2) res0 = dtmp0 * res0; + + fpadd32 %f54,DA1,%f62 ! (4_2) db0 = vis_fpadd32(db0,DA1); + + fmuld %f58,%f38,%f38 ! (0_1) res0 *= xx0; + + fmuld K2,%f44,%f56 ! (1_1) res0 = K2 * xx0; + fand %f0,DA0,%f54 ! (0_1) db0 = vis_fand(db0,DA0); + + fmuld %f36,%f62,%f62 ! (4_2) res0 *= db0; + + add %o5,TBL,%o0 ! (0_1) si0 = (char*)sqrt_arr + di0; + + faddd %f38,DC1,%f36 ! (0_1) res0 += DC1; + + faddd %f56,K1,%f38 ! (1_1) res0 += K1; + ldd [%o0+8],%f56 ! (0_1) dtmp0 = ((double*)si0)[1]; + fmul8x16 SCALE,%f54,%f54 ! (0_1) db0 = vis_fmul8x16(SCALE, db0); + + add %o7,stridez,%i0 ! pz += stridez + fdtos %f62,%f14 ! (4_2) ftmp0 = (float)res0; + + fmuld %f56,%f36,%f36 ! (0_1) res0 = dtmp0 * res0; + + fpadd32 %f54,DA1,%f62 ! (0_1) db0 = vis_fpadd32(db0,DA1); + + fmuld %f38,%f44,%f44 ! (1_1) res0 *= xx0; + add %i0,stridez,%i3 ! pz += stridez + st %f14,[%o7] ! (4_2) *pz = ftmp0; + + subcc counter,1,counter + bneg .begin + or %g0,%i0,%l7 + + fand %f18,DA0,%f56 ! (1_1) db0 = vis_fand(db0,DA0); + + fmuld %f36,%f62,%f62 ! (0_1) res0 *= db0; + + add %g5,TBL,%o3 ! (1_1) si0 = (char*)sqrt_arr + di0; + + faddd %f44,DC1,%f44 ! (1_1) res0 += DC1; + + fmul8x16 SCALE,%f56,%f36 ! (1_1) db0 = vis_fmul8x16(SCALE, db0); + ldd [%o3+8],%f56 ! (1_1) dtmp0 = ((double*)si0)[1]; + + add %i3,stridez,%o4 ! pz += stridez + fdtos %f62,%f14 ! (0_1) ftmp0 = (float)res0; + + st %f14,[%i0] ! (0_1) *pz = ftmp0; + + subcc counter,1,counter + bneg .begin + or %g0,%i3,%l7 + + fmuld %f56,%f44,%f62 ! (1_1) res0 = dtmp0 * res0; + + fpadd32 %f36,DA1,%f36 ! (1_1) db0 = vis_fpadd32(db0,DA1); + + fmuld %f62,%f36,%f62 ! (1_1) res0 *= db0; + + fdtos %f62,%f14 ! (1_1) ftmp0 = (float)res0; + + st %f14,[%i3] ! (1_1) *pz = ftmp0; + + ba .begin + or %g0,%o4,%l7 + + .align 16 +.spec1: + st %g0,[%l7] ! *pz = 0; + add %l7,stridez,%l7 ! pz += stridez + + add %i2,stridey,%i2 ! py += stridey + ba .begin1 + sub counter,1,counter ! counter-- + + .align 16 +.spec: + sethi %hi(0x7f800000),%i0 + cmp %l3,%i0 ! hx ? 0x7f800000 + bge,pt %icc,2f ! if ( hx >= 0x7f800000 ) + ld [%i2],%f8 + + cmp %l4,%i0 ! hy ? 0x7f800000 + bge,pt %icc,2f ! if ( hy >= 0x7f800000 ) + nop + + fsmuld %f17,%f17,%f44 ! x * (double)x + fsmuld %f8,%f8,%f24 ! y * (double)y + faddd %f44,%f24,%f24 ! x * (double)x + y * (double)y + fsqrtd %f24,%f24 ! hyp = sqrt(x * (double)x + y * (double)y); + fcmped %f24,DFMAX ! hyp ? DMAX + fbug,a 1f ! if ( hyp > DMAX ) + fmuls FMAX,FMAX,%f20 ! ftmp0 = FMAX * FMAX; + + fdtos %f24,%f20 ! ftmp0 = (float)hyp; +1: + st %f20,[%l7] ! *pz = ftmp0; + add %l7,stridez,%l7 ! pz += stridez + add %i1,stridex,%i1 ! px += stridex + + add %i2,stridey,%i2 ! py += stridey + ba .begin1 + sub counter,1,counter ! counter-- +2: + fcmps %f17,%f8 ! exceptions + cmp %l3,%i0 ! hx ? 0x7f800000 + be,a %icc,1f ! if ( hx == 0x7f800000 ) + st %i0,[%l7] ! *(int*)pz = 0x7f800000; + + cmp %l4,%i0 ! hy ? 0x7f800000 + be,a %icc,1f ! if ( hy == 0x7f800000 + st %i0,[%l7] ! *(int*)pz = 0x7f800000; + + fmuls %f17,%f8,%f8 ! x * y + st %f8,[%l7] ! *pz = x * y; + +1: + add %l7,stridez,%l7 ! pz += stridez + add %i1,stridex,%i1 ! px += stridex + + add %i2,stridey,%i2 ! py += stridey + ba .begin1 + sub counter,1,counter ! counter-- + + .align 16 +.update0: + cmp counter,1 + ble .cont0 + fzeros %f17 + + stx %i1,[%fp+tmp_px] + + add %o7,stridey,%i5 + stx %i5,[%fp+tmp_py] + + sub counter,1,counter + st counter,[%fp+tmp_counter] + + ba .cont0 + or %g0,1,counter + + .align 16 +.update1: + cmp counter,1 + ble .cont1 + fzeros %f17 + + stx %i1,[%fp+tmp_px] + stx %i5,[%fp+tmp_py] + + sub counter,1,counter + st counter,[%fp+tmp_counter] + + ba .cont1 + or %g0,1,counter + + .align 16 +.update2: + cmp counter,2 + ble .cont2 + fzeros %f8 + + stx %i1,[%fp+tmp_px] + stx %o4,[%fp+tmp_py] + + sub counter,2,counter + st counter,[%fp+tmp_counter] + + ba .cont2 + or %g0,2,counter + + .align 16 +.update3: + cmp counter,2 + ble .cont3 + fzeros %f17 + + stx %i1,[%fp+tmp_px] + stx %o4,[%fp+tmp_py] + + sub counter,2,counter + st counter,[%fp+tmp_counter] + + ba .cont3 + or %g0,2,counter + + .align 16 +.update4: + cmp counter,3 + ble .cont4 + fzeros %f17 + + stx %i1,[%fp+tmp_px] + stx %i5,[%fp+tmp_py] + + sub counter,3,counter + st counter,[%fp+tmp_counter] + + ba .cont4 + or %g0,3,counter + + .align 16 +.update5: + cmp counter,3 + ble .cont5 + fzeros %f17 + + sub %i1,stridex,%i2 + stx %i2,[%fp+tmp_px] + stx %i5,[%fp+tmp_py] + + sub counter,3,counter + st counter,[%fp+tmp_counter] + + ba .cont5 + or %g0,3,counter + + .align 16 +.update6: + cmp counter,4 + ble .cont6 + fzeros %f17 + + stx %i1,[%fp+tmp_px] + stx %i2,[%fp+tmp_py] + + sub counter,4,counter + st counter,[%fp+tmp_counter] + + ba .cont6 + or %g0,4,counter + + .align 16 +.update7: + cmp counter,4 + ble .cont7 + fzeros %f17 + + sub %i1,stridex,%o7 + stx %o7,[%fp+tmp_px] + stx %i2,[%fp+tmp_py] + + sub counter,4,counter + st counter,[%fp+tmp_counter] + + ba .cont7 + or %g0,4,counter + + .align 16 +.update8: + cmp counter,5 + ble .cont8 + fzeros %f17 + + sub %i1,stridex,%o5 + stx %o5,[%fp+tmp_px] + stx %o7,[%fp+tmp_py] + + sub counter,5,counter + st counter,[%fp+tmp_counter] + + ba .cont8 + or %g0,5,counter + + .align 16 +.update9: + cmp counter,5 + ble .cont9 + fzeros %f17 + + sub %i1,stridex,%o5 + stx %o5,[%fp+tmp_px] + stx %o7,[%fp+tmp_py] + + sub counter,5,counter + st counter,[%fp+tmp_counter] + + ba .cont9 + or %g0,5,counter + + .align 16 +.update10: + fmul8x16 SCALE,%f56,%f36 ! (3_1) db0 = vis_fmul8x16(SCALE, db0); + and %l4,_0x7fffffff,%l4 ! (4_0) hy0 &= 0x7fffffff; + ldd [%g1+8],%f56 ! (3_1) dtmp0 = ((double*)si0)[1]; + faddd %f54,K1,%f54 ! (4_1) res0 += K1; + + cmp counter,6 + ble .cont10 + fzeros %f17 + + stx %i1,[%fp+tmp_px] + add %o7,stridey,%i5 + stx %i5,[%fp+tmp_py] + + sub counter,6,counter + st counter,[%fp+tmp_counter] + + ba .cont10 + or %g0,6,counter + + .align 16 +.update11: + cmp counter,1 + ble .cont11 + fzeros %f17 + + stx %i1,[%fp+tmp_px] + stx %i5,[%fp+tmp_py] + + sub counter,1,counter + st counter,[%fp+tmp_counter] + + ba .cont11 + or %g0,1,counter + + .align 16 +.update12: + cmp counter,2 + ble .cont12 + fzeros %f8 + + stx %i0,[%fp+tmp_px] + add %i5,stridey,%o4 + stx %o4,[%fp+tmp_py] + + sub counter,2,counter + st counter,[%fp+tmp_counter] + + ba .cont12 + or %g0,2,counter + + .align 16 +.update13: + cmp counter,2 + ble .cont13 + fzeros %f17 + + stx %i0,[%fp+tmp_px] + stx %o4,[%fp+tmp_py] + + sub counter,2,counter + st counter,[%fp+tmp_counter] + + ba .cont13 + or %g0,2,counter + + .align 16 +.update14: + cmp counter,3 + ble .cont14 + fzeros %f17 + + stx %i1,[%fp+tmp_px] + add %o4,stridey,%i5 + stx %i5,[%fp+tmp_py] + + sub counter,3,counter + st counter,[%fp+tmp_counter] + + ba .cont14 + or %g0,3,counter + + .align 16 +.update15: + cmp counter,3 + ble .cont15 + fzeros %f17 + + sub %i1,stridex,%i2 + stx %i2,[%fp+tmp_px] + stx %i5,[%fp+tmp_py] + + sub counter,3,counter + st counter,[%fp+tmp_counter] + + ba .cont15 + or %g0,3,counter + + .align 16 +.update16: + faddd %f40,%f32,%f18 ! (1_0) db0 = dx0 + dy0; + cmp counter,4 + ble .cont16 + fzeros %f17 + + stx %i1,[%fp+tmp_px] + stx %i2,[%fp+tmp_py] + + sub counter,4,counter + st counter,[%fp+tmp_counter] + + ba .cont16 + or %g0,4,counter + + .align 16 +.update17: + cmp counter,4 + ble .cont17 + fzeros %f17 + + stx %i1,[%fp+tmp_px] + stx %i2,[%fp+tmp_py] + + sub counter,4,counter + st counter,[%fp+tmp_counter] + + ba .cont17 + or %g0,4,counter + + .align 16 +.update18: + cmp counter,5 + ble .cont18 + fzeros %f17 + + stx %l7,[%fp+tmp_px] + stx %o7,[%fp+tmp_py] + + sub counter,5,counter + st counter,[%fp+tmp_counter] + + ba .cont18 + or %g0,5,counter + + .align 16 +.update19: + fpadd32 %f40,DA1,%f62 ! (2_1) db0 = vis_fpadd32(db0,DA1); + cmp counter,5 + ble .cont19 + fzeros %f17 + + stx %l7,[%fp+tmp_px] + stx %o7,[%fp+tmp_py] + + sub counter,5,counter + st counter,[%fp+tmp_counter] + + ba .cont19 + or %g0,5,counter + + .align 16 +.update19a: + cmp counter,5 + ble .cont19a + fzeros %f17 + + stx %l7,[%fp+tmp_px] + stx %o7,[%fp+tmp_py] + + sub counter,5,counter + st counter,[%fp+tmp_counter] + + ba .cont19a + or %g0,5,counter + + .align 16 +.update20: + faddd %f54,K1,%f54 ! (4_1) res0 += K1; + cmp counter,6 + ble .cont20 + fzeros %f17 + + stx %i1,[%fp+tmp_px] + add %o7,stridey,%g1 + stx %g1,[%fp+tmp_py] + + sub counter,6,counter + st counter,[%fp+tmp_counter] + + ba .cont20 + or %g0,6,counter + +.exit: + ret + restore + SET_SIZE(__vhypotf) + diff --git a/usr/src/lib/libmvec/common/vis/__vlog.S b/usr/src/lib/libmvec/common/vis/__vlog.S new file mode 100644 index 0000000000..9229323d7b --- /dev/null +++ b/usr/src/lib/libmvec/common/vis/__vlog.S @@ -0,0 +1,671 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "__vlog.S" + +#include "libm.h" + + RO_DATA + .align 32 +TBL: + .word 0xbfd522ae, 0x0738a000 + .word 0xbd2ebe70, 0x8164c759 + .word 0xbfd3c252, 0x77333000 + .word 0xbd183b54, 0xb606bd5c + .word 0xbfd26962, 0x1134e000 + .word 0x3d31b61f, 0x10522625 + .word 0xbfd1178e, 0x8227e000 + .word 0xbd31ef78, 0xce2d07f2 + .word 0xbfcf991c, 0x6cb3c000 + .word 0x3d390d04, 0xcd7cc834 + .word 0xbfcd1037, 0xf2656000 + .word 0x3d084a7e, 0x75b6f6e4 + .word 0xbfca93ed, 0x3c8ae000 + .word 0x3d287243, 0x50562169 + .word 0xbfc823c1, 0x6551a000 + .word 0xbd1e0ddb, 0x9a631e83 + .word 0xbfc5bf40, 0x6b544000 + .word 0x3d127023, 0xeb68981c + .word 0xbfc365fc, 0xb015a000 + .word 0x3d3fd3a0, 0xafb9691b + .word 0xbfc1178e, 0x8227e000 + .word 0xbd21ef78, 0xce2d07f2 + .word 0xbfbda727, 0x63844000 + .word 0xbd1a8940, 0x1fa71733 + .word 0xbfb9335e, 0x5d594000 + .word 0xbd23115c, 0x3abd47da + .word 0xbfb4d311, 0x5d208000 + .word 0x3cf53a25, 0x82f4e1ef + .word 0xbfb08598, 0xb59e4000 + .word 0x3d17e5dd, 0x7009902c + .word 0xbfa894aa, 0x149f8000 + .word 0xbd39a19a, 0x8be97661 + .word 0xbfa0415d, 0x89e78000 + .word 0x3d3dddc7, 0xf461c516 + .word 0xbf902056, 0x58930000 + .word 0xbd3611d2, 0x7c8e8417 + .word 0x00000000, 0x00000000 + .word 0x00000000, 0x00000000 + .word 0x3f9f829b, 0x0e780000 + .word 0x3d298026, 0x7c7e09e4 + .word 0x3faf0a30, 0xc0110000 + .word 0x3d48a998, 0x5f325c5c + .word 0x3fb6f0d2, 0x8ae58000 + .word 0xbd34b464, 0x1b664613 + .word 0x3fbe2707, 0x6e2b0000 + .word 0xbd2a342c, 0x2af0003c + .word 0x3fc29552, 0xf8200000 + .word 0xbd35b967, 0xf4471dfc + .word 0x3fc5ff30, 0x70a78000 + .word 0x3d43d3c8, 0x73e20a07 + .word 0x3fc9525a, 0x9cf44000 + .word 0x3d46b476, 0x41307539 + .word 0x3fcc8ff7, 0xc79a8000 + .word 0x3d4a21ac, 0x25d81ef3 + .word 0x3fcfb918, 0x6d5e4000 + .word 0xbd0d572a, 0xab993c87 + .word 0x3fd1675c, 0xababa000 + .word 0x3d38380e, 0x731f55c4 + .word 0x3fd2e8e2, 0xbae12000 + .word 0xbd267b1e, 0x99b72bd8 + .word 0x3fd4618b, 0xc21c6000 + .word 0xbd13d82f, 0x484c84cc + .word 0x3fd5d1bd, 0xbf580000 + .word 0x3d4394a1, 0x1b1c1ee4 +! constants: + .word 0x40000000,0x00000000 + .word 0x3fe55555,0x555571da + .word 0x3fd99999,0x8702be3a + .word 0x3fd24af7,0x3f4569b1 + .word 0x3ea62e42,0xfee00000 ! scaled by 2**-20 + .word 0x3caa39ef,0x35793c76 ! scaled by 2**-20 + .word 0xffff8000,0x00000000 + .word 0x43200000 + .word 0xfff00000 + .word 0xc0194000 + .word 0x4000 + +#define two 0x200 +#define A1 0x208 +#define A2 0x210 +#define A3 0x218 +#define ln2hi 0x220 +#define ln2lo 0x228 +#define mask 0x230 +#define ox43200000 0x238 +#define oxfff00000 0x23c +#define oxc0194000 0x240 +#define ox4000 0x244 + +! local storage indices + +#define jnk STACK_BIAS-0x8 +#define tmp2 STACK_BIAS-0x10 +#define tmp1 STACK_BIAS-0x18 +#define tmp0 STACK_BIAS-0x20 +! sizeof temp storage - must be a multiple of 16 for V9 +#define tmps 0x20 + +! register use + +! i0 n +! i1 x +! i2 stridex +! i3 y +! i4 stridey +! i5 + +! g1 TBL + +! l0 j0 +! l1 j1 +! l2 j2 +! l3 +! l4 0x94000 +! l5 +! l6 0x000fffff +! l7 0x7ff00000 + +! o0 py0 +! o1 py1 +! o2 py2 +! o3 +! o4 +! o5 +! o7 + +! f0 u0,q0 +! f2 v0,(two-v0)-u0,z0 +! f4 n0,f0,q0 +! f6 s0 +! f8 q +! f10 u1,q1 +! f12 v1,(two-v1)-u1,z1 +! f14 n1,f1,q1 +! f16 s1 +! f18 t +! f20 u2,q2 +! f22 v2,(two-v2)-u2,q2 +! f24 n2,f2,q2 +! f26 s2 +! f28 0xfff00000 +! f29 0x43200000 +! f30 0x4000 +! f31 0xc0194000 +! f32 t0 +! f34 h0,f0-(c0-h0) +! f36 c0 +! f38 A1 +! f40 two +! f42 t1 +! f44 h1,f1-(c1-h1) +! f46 c1 +! f48 A2 +! f50 0xffff8000... +! f52 t2 +! f54 h2,f2-(c2-h2) +! f56 c2 +! f58 A3 +! f60 ln2hi +! f62 ln2lo + + ENTRY(__vlog) + save %sp,-SA(MINFRAME)-tmps,%sp + PIC_SETUP(l7) + PIC_SET(l7,TBL,o0) + mov %o0,%g1 + wr %g0,0x82,%asi ! set %asi for non-faulting loads + sethi %hi(0x94000),%l4 + sethi %hi(0x000fffff),%l6 + or %l6,%lo(0x000fffff),%l6 + sethi %hi(0x7ff00000),%l7 + ldd [%g1+two],%f40 + ldd [%g1+A1],%f38 + ldd [%g1+A2],%f48 + ldd [%g1+A3],%f58 + ldd [%g1+ln2hi],%f60 + ldd [%g1+ln2lo],%f62 + ldd [%g1+mask],%f50 + ld [%g1+ox43200000],%f29 + ld [%g1+oxfff00000],%f28 + ld [%g1+oxc0194000],%f31 + ld [%g1+ox4000],%f30 + sll %i2,3,%i2 ! scale strides + sll %i4,3,%i4 + add %fp,jnk,%o0 ! precondition loop + add %fp,jnk,%o1 + add %fp,jnk,%o2 + fzero %f2 + fzero %f6 + fzero %f18 + fzero %f36 + fzero %f12 + fzero %f14 + fzero %f16 + fzero %f42 + fzero %f44 + fzero %f46 + std %f46,[%fp+tmp1] + fzero %f24 + fzero %f26 + fzero %f52 + fzero %f54 + std %f54,[%fp+tmp2] + sub %i3,%i4,%i3 + ld [%i1],%l0 ! ix + ld [%i1],%f0 ! u.l[0] = *x + ba .loop0 + ld [%i1+4],%f1 ! u.l[1] = *(1+x) + + .align 16 +! -- 16 byte aligned +.loop0: + sub %l0,%l7,%o3 + sub %l6,%l0,%o4 + fpadd32s %f0,%f31,%f4 ! n = (ix + 0xc0194000) & 0xfff00000 + fmuld %f6,%f2,%f8 ! (previous iteration) + + andcc %o3,%o4,%o4 + bge,pn %icc,.range0 ! ix <= 0x000fffff or >= 0x7ff00000 +! delay slot + fands %f4,%f28,%f4 + + add %i1,%i2,%i1 ! x += stridex + add %i3,%i4,%i3 ! y += stridey + fpsub32s %f0,%f4,%f0 ! u.l[0] -= n + +.cont0: + lda [%i1]%asi,%l1 ! preload next argument + add %l0,%l4,%l0 ! j = ix + 0x94000 + fpadd32s %f0,%f30,%f2 ! v.l[0] = u.l[0] + 0x4000 + + lda [%i1]%asi,%f10 + srl %l0,11,%l0 ! j = (j >> 11) & 0x1f0 + fand %f2,%f50,%f2 ! v.l &= 0xffff8000... + + lda [%i1+4]%asi,%f11 + and %l0,0x1f0,%l0 + fitod %f4,%f32 ! (double) n + + add %l0,8,%l3 + fsubd %f0,%f2,%f4 ! f = u.d - v.d + + faddd %f0,%f2,%f6 ! s = f / (u.d + v.d) + + fsubd %f40,%f2,%f2 ! two - v.d + fmuld %f32,%f60,%f34 ! h = n * ln2hi + TBL[j] + + faddd %f8,%f18,%f8 ! y = c + (t + q) + fmuld %f32,%f62,%f32 ! t = n * ln2lo + TBL[j+1] + + fdivd %f4,%f6,%f6 + + faddd %f54,%f24,%f56 ! c = h + f + fmuld %f26,%f26,%f22 ! z = s * s + + faddd %f8,%f36,%f8 + st %f8,[%o0] + + st %f9,[%o0+4] + mov %i3,%o0 + faddd %f14,%f38,%f14 + + fsubd %f56,%f54,%f54 ! t += f - (c - h) + fmuld %f22,%f58,%f20 ! q = ... + + fsubd %f2,%f0,%f2 ! (two - v.d) - u.d + ldd [%g1+%l0],%f36 + + faddd %f42,%f44,%f18 + fmuld %f12,%f14,%f14 + ldd [%fp+tmp1],%f12 + + faddd %f20,%f48,%f20 + nop + + faddd %f34,%f36,%f34 + ldd [%g1+%l3],%f0 + + faddd %f14,%f12,%f12 + + fsubd %f24,%f54,%f54 + fmuld %f22,%f20,%f24 + + std %f2,[%fp+tmp0] + addcc %i0,-1,%i0 + ble,pn %icc,.endloop0 +! delay slot + faddd %f32,%f0,%f32 + +! -- 16 byte aligned +.loop1: + sub %l1,%l7,%o3 + sub %l6,%l1,%o4 + fpadd32s %f10,%f31,%f14 ! n = (ix + 0xc0194000) & 0xfff00000 + fmuld %f16,%f12,%f8 ! (previous iteration) + + andcc %o3,%o4,%o4 + bge,pn %icc,.range1 ! ix <= 0x000fffff or >= 0x7ff00000 +! delay slot + fands %f14,%f28,%f14 + + add %i1,%i2,%i1 ! x += stridex + add %i3,%i4,%i3 ! y += stridey + fpsub32s %f10,%f14,%f10 ! u.l[0] -= n + +.cont1: + lda [%i1]%asi,%l2 ! preload next argument + add %l1,%l4,%l1 ! j = ix + 0x94000 + fpadd32s %f10,%f30,%f12 ! v.l[0] = u.l[0] + 0x4000 + + lda [%i1]%asi,%f20 + srl %l1,11,%l1 ! j = (j >> 11) & 0x1f0 + fand %f12,%f50,%f12 ! v.l &= 0xffff8000... + + lda [%i1+4]%asi,%f21 + and %l1,0x1f0,%l1 + fitod %f14,%f42 ! (double) n + + add %l1,8,%l3 + fsubd %f10,%f12,%f14 ! f = u.d - v.d + + faddd %f10,%f12,%f16 ! s = f / (u.d + v.d) + + fsubd %f40,%f12,%f12 ! two - v.d + fmuld %f42,%f60,%f44 ! h = n * ln2hi + TBL[j] + + faddd %f8,%f18,%f8 ! y = c + (t + q) + fmuld %f42,%f62,%f42 ! t = n * ln2lo + TBL[j+1] + + fdivd %f14,%f16,%f16 + + faddd %f34,%f4,%f36 ! c = h + f + fmuld %f6,%f6,%f2 ! z = s * s + + faddd %f8,%f46,%f8 + st %f8,[%o1] + + st %f9,[%o1+4] + mov %i3,%o1 + faddd %f24,%f38,%f24 + + fsubd %f36,%f34,%f34 ! t += f - (c - h) + fmuld %f2,%f58,%f0 ! q = ... + + fsubd %f12,%f10,%f12 ! (two - v.d) - u.d + ldd [%g1+%l1],%f46 + + faddd %f52,%f54,%f18 + fmuld %f22,%f24,%f24 + ldd [%fp+tmp2],%f22 + + faddd %f0,%f48,%f0 + nop + + faddd %f44,%f46,%f44 + ldd [%g1+%l3],%f10 + + faddd %f24,%f22,%f22 + + fsubd %f4,%f34,%f34 + fmuld %f2,%f0,%f4 + + std %f12,[%fp+tmp1] + addcc %i0,-1,%i0 + ble,pn %icc,.endloop1 +! delay slot + faddd %f42,%f10,%f42 + +! -- 16 byte aligned +.loop2: + sub %l2,%l7,%o3 + sub %l6,%l2,%o4 + fpadd32s %f20,%f31,%f24 ! n = (ix + 0xc0194000) & 0xfff00000 + fmuld %f26,%f22,%f8 ! (previous iteration) + + andcc %o3,%o4,%o4 + bge,pn %icc,.range2 ! ix <= 0x000fffff or >= 0x7ff00000 +! delay slot + fands %f24,%f28,%f24 + + add %i1,%i2,%i1 ! x += stridex + add %i3,%i4,%i3 ! y += stridey + fpsub32s %f20,%f24,%f20 ! u.l[0] -= n + +.cont2: + lda [%i1]%asi,%l0 ! preload next argument + add %l2,%l4,%l2 ! j = ix + 0x94000 + fpadd32s %f20,%f30,%f22 ! v.l[0] = u.l[0] + 0x4000 + + lda [%i1]%asi,%f0 + srl %l2,11,%l2 ! j = (j >> 11) & 0x1f0 + fand %f22,%f50,%f22 ! v.l &= 0xffff8000... + + lda [%i1+4]%asi,%f1 + and %l2,0x1f0,%l2 + fitod %f24,%f52 ! (double) n + + add %l2,8,%l3 + fsubd %f20,%f22,%f24 ! f = u.d - v.d + + faddd %f20,%f22,%f26 ! s = f / (u.d + v.d) + + fsubd %f40,%f22,%f22 ! two - v.d + fmuld %f52,%f60,%f54 ! h = n * ln2hi + TBL[j] + + faddd %f8,%f18,%f8 ! y = c + (t + q) + fmuld %f52,%f62,%f52 ! t = n * ln2lo + TBL[j+1] + + fdivd %f24,%f26,%f26 + + faddd %f44,%f14,%f46 ! c = h + f + fmuld %f16,%f16,%f12 ! z = s * s + + faddd %f8,%f56,%f8 + st %f8,[%o2] + + st %f9,[%o2+4] + mov %i3,%o2 + faddd %f4,%f38,%f4 + + fsubd %f46,%f44,%f44 ! t += f - (c - h) + fmuld %f12,%f58,%f10 ! q = ... + + fsubd %f22,%f20,%f22 ! (two - v.d) - u.d + ldd [%g1+%l2],%f56 + + faddd %f32,%f34,%f18 + fmuld %f2,%f4,%f4 + ldd [%fp+tmp0],%f2 + + faddd %f10,%f48,%f10 + nop + + faddd %f54,%f56,%f54 + ldd [%g1+%l3],%f20 + + faddd %f4,%f2,%f2 + + fsubd %f14,%f44,%f44 + fmuld %f12,%f10,%f14 + + std %f22,[%fp+tmp2] + addcc %i0,-1,%i0 + bg,pt %icc,.loop0 +! delay slot + faddd %f52,%f20,%f52 + + +! Once we get to the last element, we loop three more times to finish +! the computations in progress. This means we will load past the end +! of the argument vector, but since we use non-faulting loads and never +! use the data, the only potential problem is cache miss. (Note that +! when the argument is 2, the only exception that occurs in the compu- +! tation is an inexact result in the final addition, and we break out +! of the "extra" iterations before then.) +.endloop2: + sethi %hi(0x40000000),%l0 ! "next argument" = two + cmp %i0,-3 + bg,a,pt %icc,.loop0 +! delay slot + fmovd %f40,%f0 + ret + restore + + .align 16 +.endloop0: + sethi %hi(0x40000000),%l1 ! "next argument" = two + cmp %i0,-3 + bg,a,pt %icc,.loop1 +! delay slot + fmovd %f40,%f10 + ret + restore + + .align 16 +.endloop1: + sethi %hi(0x40000000),%l2 ! "next argument" = two + cmp %i0,-3 + bg,a,pt %icc,.loop2 +! delay slot + fmovd %f40,%f20 + ret + restore + + + .align 16 +.range0: + cmp %l0,%l7 + bgeu,pn %icc,2f ! if (unsigned) ix >= 0x7ff00000 +! delay slot + ld [%i1+4],%o5 + fxtod %f0,%f0 ! scale by 2**1074 w/o trapping + st %f0,[%fp+tmp0] + add %i1,%i2,%i1 ! x += stridex + orcc %l0,%o5,%g0 + be,pn %icc,1f ! if x == 0 +! delay slot + add %i3,%i4,%i3 ! y += stridey + fpadd32s %f0,%f31,%f4 ! n = (ix + 0xc0194000) & 0xfff00000 + fands %f4,%f28,%f4 + fpsub32s %f0,%f4,%f0 ! u.l[0] -= n + ld [%fp+tmp0],%l0 + ba,pt %icc,.cont0 +! delay slot + fpsub32s %f4,%f29,%f4 ! n -= 0x43200000 +1: + fdivs %f29,%f1,%f4 ! raise div-by-zero + ba,pt %icc,3f +! delay slot + st %f28,[%i3] ! store -inf +2: + sll %l0,1,%l0 ! lop off sign bit + add %i1,%i2,%i1 ! x += stridex + orcc %l0,%o5,%g0 + be,pn %icc,1b ! if x == -0 +! delay slot + add %i3,%i4,%i3 ! y += stridey + fabsd %f0,%f4 ! *y = (x + |x|) * inf + faddd %f0,%f4,%f0 + fand %f28,%f50,%f4 + fnegd %f4,%f4 + fmuld %f0,%f4,%f0 + st %f0,[%i3] +3: + addcc %i0,-1,%i0 + ble,pn %icc,.endloop2 +! delay slot + st %f1,[%i3+4] + ld [%i1],%l0 ! get next argument + ld [%i1],%f0 + ba,pt %icc,.loop0 +! delay slot + ld [%i1+4],%f1 + + + .align 16 +.range1: + cmp %l1,%l7 + bgeu,pn %icc,2f ! if (unsigned) ix >= 0x7ff00000 +! delay slot + ld [%i1+4],%o5 + fxtod %f10,%f10 ! scale by 2**1074 w/o trapping + st %f10,[%fp+tmp1] + add %i1,%i2,%i1 ! x += stridex + orcc %l1,%o5,%g0 + be,pn %icc,1f ! if x == 0 +! delay slot + add %i3,%i4,%i3 ! y += stridey + fpadd32s %f10,%f31,%f14 ! n = (ix + 0xc0194000) & 0xfff00000 + fands %f14,%f28,%f14 + fpsub32s %f10,%f14,%f10 ! u.l[0] -= n + ld [%fp+tmp1],%l1 + ba,pt %icc,.cont1 +! delay slot + fpsub32s %f14,%f29,%f14 ! n -= 0x43200000 +1: + fdivs %f29,%f11,%f14 ! raise div-by-zero + ba,pt %icc,3f +! delay slot + st %f28,[%i3] ! store -inf +2: + sll %l1,1,%l1 ! lop off sign bit + add %i1,%i2,%i1 ! x += stridex + orcc %l1,%o5,%g0 + be,pn %icc,1b ! if x == -0 +! delay slot + add %i3,%i4,%i3 ! y += stridey + fabsd %f10,%f14 ! *y = (x + |x|) * inf + faddd %f10,%f14,%f10 + fand %f28,%f50,%f14 + fnegd %f14,%f14 + fmuld %f10,%f14,%f10 + st %f10,[%i3] +3: + addcc %i0,-1,%i0 + ble,pn %icc,.endloop0 +! delay slot + st %f11,[%i3+4] + ld [%i1],%l1 ! get next argument + ld [%i1],%f10 + ba,pt %icc,.loop1 +! delay slot + ld [%i1+4],%f11 + + + .align 16 +.range2: + cmp %l2,%l7 + bgeu,pn %icc,2f ! if (unsigned) ix >= 0x7ff00000 +! delay slot + ld [%i1+4],%o5 + fxtod %f20,%f20 ! scale by 2**1074 w/o trapping + st %f20,[%fp+tmp2] + add %i1,%i2,%i1 ! x += stridex + orcc %l2,%o5,%g0 + be,pn %icc,1f ! if x == 0 +! delay slot + add %i3,%i4,%i3 ! y += stridey + fpadd32s %f20,%f31,%f24 ! n = (ix + 0xc0194000) & 0xfff00000 + fands %f24,%f28,%f24 + fpsub32s %f20,%f24,%f20 ! u.l[0] -= n + ld [%fp+tmp2],%l2 + ba,pt %icc,.cont2 +! delay slot + fpsub32s %f24,%f29,%f24 ! n -= 0x43200000 +1: + fdivs %f29,%f21,%f24 ! raise div-by-zero + ba,pt %icc,3f +! delay slot + st %f28,[%i3] ! store -inf +2: + sll %l2,1,%l2 ! lop off sign bit + add %i1,%i2,%i1 ! x += stridex + orcc %l2,%o5,%g0 + be,pn %icc,1b ! if x == -0 +! delay slot + add %i3,%i4,%i3 ! y += stridey + fabsd %f20,%f24 ! *y = (x + |x|) * inf + faddd %f20,%f24,%f20 + fand %f28,%f50,%f24 + fnegd %f24,%f24 + fmuld %f20,%f24,%f20 + st %f20,[%i3] +3: + addcc %i0,-1,%i0 + ble,pn %icc,.endloop1 +! delay slot + st %f21,[%i3+4] + ld [%i1],%l2 ! get next argument + ld [%i1],%f20 + ba,pt %icc,.loop2 +! delay slot + ld [%i1+4],%f21 + + SET_SIZE(__vlog) + diff --git a/usr/src/lib/libmvec/common/vis/__vlog_ultra3.S b/usr/src/lib/libmvec/common/vis/__vlog_ultra3.S new file mode 100644 index 0000000000..87c299bfda --- /dev/null +++ b/usr/src/lib/libmvec/common/vis/__vlog_ultra3.S @@ -0,0 +1,2905 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "__vlog_ultra3.S" + +#include "libm.h" +#if defined(LIBMVEC_SO_BUILD) + .weak __vlog + .type __vlog,#function + __vlog = __vlog_ultra3 +#endif + +/* + * ELEVENBIT table and order 5 POLYNOMIAL no explicit correction t + */ + + RO_DATA + .align 64 +!! this is a new 11 bit table. +TBL: + .word 0xbfd522ae, 0x0738a000 + .word 0xbd2ebe70, 0x8164c759 + .word 0xbfd5178d, 0x9ab55000 + .word 0xbd35c153, 0x0fe963b3 + .word 0xbfd50c6f, 0x1d11b000 + .word 0xbd42f8ca, 0x40bec1ea + .word 0xbfd50152, 0x8da1f000 + .word 0xbd42cfac, 0x6d29f4d7 + .word 0xbfd4f637, 0xebba9000 + .word 0xbd401f53, 0x9a676da3 + .word 0xbfd4eb1f, 0x36b07000 + .word 0xbd184047, 0x46e5797b + .word 0xbfd4e008, 0x6dd8b000 + .word 0xbd4594b6, 0xaf0ddc3c + .word 0xbfd4d4f3, 0x90890000 + .word 0xbd19fd79, 0x3a9f1441 + .word 0xbfd4c9e0, 0x9e172000 + .word 0xbd4877dd, 0xb93d49d7 + .word 0xbfd4becf, 0x95d97000 + .word 0xbd422662, 0x6ffee2c8 + .word 0xbfd4b3c0, 0x77267000 + .word 0xbd4d3497, 0x2fdf5a8c + .word 0xbfd4a8b3, 0x41552000 + .word 0xbd46127e, 0x3d0dc8d1 + .word 0xbfd49da7, 0xf3bcc000 + .word 0xbd307b33, 0x4daf4b9a + .word 0xbfd4929e, 0x8db4e000 + .word 0xbd3b9056, 0x556c70de + .word 0xbfd48797, 0x0e958000 + .word 0xbd3dc1b8, 0x465cf25f + .word 0xbfd47c91, 0x75b6f000 + .word 0xbd05acd1, 0x7009e35b + .word 0xbfd4718d, 0xc271c000 + .word 0xbd306c18, 0xfb4c14c5 + .word 0xbfd4668b, 0xf41ef000 + .word 0xbd432874, 0x4e9d2b85 + .word 0xbfd45b8c, 0x0a17d000 + .word 0xbd4e26ed, 0xf182f57b + .word 0xbfd4508e, 0x03b61000 + .word 0xbd40ef1c, 0x2579199c + .word 0xbfd44591, 0xe0539000 + .word 0xbd4e916a, 0x76d6dc28 + .word 0xbfd43a97, 0x9f4ac000 + .word 0xbd23ee07, 0x6a81f88e + .word 0xbfd42f9f, 0x3ff62000 + .word 0xbd390644, 0x0f7d3354 + .word 0xbfd424a8, 0xc1b0c000 + .word 0xbd2dc57c, 0x99ae2a25 + .word 0xbfd419b4, 0x23d5e000 + .word 0xbd418e43, 0x6ec90e0a + .word 0xbfd40ec1, 0x65c13000 + .word 0xbd3f59a8, 0xa01757f6 + .word 0xbfd403d0, 0x86cea000 + .word 0xbd3e6ef5, 0x74487308 + .word 0xbfd3f8e1, 0x865a8000 + .word 0xbd26f338, 0x912773e3 + .word 0xbfd3edf4, 0x63c16000 + .word 0xbd407cc1, 0xeb4069e1 + .word 0xbfd3e309, 0x1e604000 + .word 0xbd43f634, 0xa2afb68d + .word 0xbfd3d81f, 0xb5946000 + .word 0xbd4b74e0, 0xf558b217 + .word 0xbfd3cd38, 0x28bb6000 + .word 0xbd489faf, 0xb06c8342 + .word 0xbfd3c252, 0x77333000 + .word 0xbd183b54, 0xb606bd5c + .word 0xbfd3b76e, 0xa059f000 + .word 0xbd47b5cf, 0x9912c7cb + .word 0xbfd3ac8c, 0xa38e5000 + .word 0xbd48bd04, 0x10ff506d + .word 0xbfd3a1ac, 0x802f3000 + .word 0xbd398ecf, 0x399abd8d + .word 0xbfd396ce, 0x359bb000 + .word 0xbd4ea7c6, 0x3a99c99c + .word 0xbfd38bf1, 0xc3337000 + .word 0xbd4ce9e9, 0x41e9516d + .word 0xbfd38117, 0x28564000 + .word 0xbd496386, 0xdb17e3f5 + .word 0xbfd3763e, 0x64645000 + .word 0xbd318b1f, 0x291dcb56 + .word 0xbfd36b67, 0x76be1000 + .word 0xbd116ecd, 0xb0f177c8 + .word 0xbfd36092, 0x5ec44000 + .word 0xbd4eb929, 0xf344bbd1 + .word 0xbfd355bf, 0x1bd82000 + .word 0xbd491599, 0x1da6c3c6 + .word 0xbfd34aed, 0xad5b1000 + .word 0xbd3a2aac, 0xf2be1fdd + .word 0xbfd3401e, 0x12aec000 + .word 0xbd4741c6, 0x5548eb71 + .word 0xbfd33550, 0x4b355000 + .word 0xbd446efc, 0x89cefc92 + .word 0xbfd32a84, 0x56512000 + .word 0xbd04f928, 0x139af5d6 + .word 0xbfd31fba, 0x3364c000 + .word 0xbd4a08d8, 0x6ce5a16e + .word 0xbfd314f1, 0xe1d35000 + .word 0xbd49c761, 0x4b37b0d2 + .word 0xbfd30a2b, 0x61001000 + .word 0xbd4a53e9, 0x6290ef5b + .word 0xbfd2ff66, 0xb04ea000 + .word 0xbd43a896, 0xd5f0c8e9 + .word 0xbfd2f4a3, 0xcf22e000 + .word 0xbd4b8693, 0xf85f2705 + .word 0xbfd2e9e2, 0xbce12000 + .word 0xbd24300c, 0x128d1dc2 + .word 0xbfd2df23, 0x78edd000 + .word 0xbce292b7, 0xcd95c595 + .word 0xbfd2d466, 0x02adc000 + .word 0xbd49dcbc, 0x88caaf9b + .word 0xbfd2c9aa, 0x59863000 + .word 0xbd4a7f90, 0xe829d4d2 + .word 0xbfd2bef0, 0x7cdc9000 + .word 0xbd2a9cfa, 0x4a5004f4 + .word 0xbfd2b438, 0x6c168000 + .word 0xbd4e1827, 0x3a343630 + .word 0xbfd2a982, 0x269a3000 + .word 0xbd4b7e9c, 0x6aa35e8c + .word 0xbfd29ecd, 0xabcdf000 + .word 0xbd44073b, 0x3bdc2243 + .word 0xbfd2941a, 0xfb186000 + .word 0xbd46f79e, 0xa4678ebb + .word 0xbfd2896a, 0x13e08000 + .word 0xbd3a8ed0, 0x27e16952 + .word 0xbfd27eba, 0xf58d8000 + .word 0xbd49399d, 0xffd2d096 + .word 0xbfd2740d, 0x9f870000 + .word 0xbd45f660, 0x0b9a802a + .word 0xbfd26962, 0x1134d000 + .word 0xbd4724f0, 0x77d6ecee + .word 0xbfd25eb8, 0x49ff2000 + .word 0xbd310c25, 0x03f76b8e + .word 0xbfd25410, 0x494e5000 + .word 0xbd3b1d7a, 0xc0ef77f2 + .word 0xbfd2496a, 0x0e8b3000 + .word 0xbd003238, 0x687cfe2e + .word 0xbfd23ec5, 0x991eb000 + .word 0xbd44920d, 0xdbae8d6f + .word 0xbfd23422, 0xe8724000 + .word 0xbd40708a, 0x931c895b + .word 0xbfd22981, 0xfbef7000 + .word 0xbd42f5ef, 0x4fb53f93 + .word 0xbfd21ee2, 0xd3003000 + .word 0xbd40382e, 0x41be00e3 + .word 0xbfd21445, 0x6d0eb000 + .word 0xbd41a87d, 0xeba46baf + .word 0xbfd209a9, 0xc9857000 + .word 0xbd45b053, 0x3ba9c94d + .word 0xbfd1ff0f, 0xe7cf4000 + .word 0xbd3e9d5b, 0x513ff0c1 + .word 0xbfd1f477, 0xc7573000 + .word 0xbd26d6d4, 0x010d751a + .word 0xbfd1e9e1, 0x67889000 + .word 0xbd43e8a8, 0x961ba4d1 + .word 0xbfd1df4c, 0xc7cf2000 + .word 0xbd30b43f, 0x0455f7e4 + .word 0xbfd1d4b9, 0xe796c000 + .word 0xbd222a66, 0x7c42e56d + .word 0xbfd1ca28, 0xc64ba000 + .word 0xbd4ca760, 0xf7a15533 + .word 0xbfd1bf99, 0x635a6000 + .word 0xbd4729bb, 0x5451ef6e + .word 0xbfd1b50b, 0xbe2fc000 + .word 0xbd38ecd7, 0x3263201f + .word 0xbfd1aa7f, 0xd638d000 + .word 0xbd29f60a, 0x9616f7a0 + .word 0xbfd19ff5, 0xaae2f000 + .word 0xbce69fd9, 0x9ec05ba8 + .word 0xbfd1956d, 0x3b9bc000 + .word 0xbd27d2f7, 0x3ad1aa14 + .word 0xbfd18ae6, 0x87d13000 + .word 0xbd43a034, 0x64df39ff + .word 0xbfd18061, 0x8ef18000 + .word 0xbd45be80, 0x1bc9638d + .word 0xbfd175de, 0x506b3000 + .word 0xbd30c07c, 0x4da5752f + .word 0xbfd16b5c, 0xcbacf000 + .word 0xbd46e6b3, 0x7de945a0 + .word 0xbfd160dd, 0x0025e000 + .word 0xbd4ba5c1, 0xc499684a + .word 0xbfd1565e, 0xed455000 + .word 0xbd4f8629, 0x48125517 + .word 0xbfd14be2, 0x927ae000 + .word 0xbd49a817, 0xc85685e2 + .word 0xbfd14167, 0xef367000 + .word 0xbd3e0c07, 0x824daaf5 + .word 0xbfd136ef, 0x02e82000 + .word 0xbd4217d3, 0xe78d3ed8 + .word 0xbfd12c77, 0xcd007000 + .word 0xbd13b294, 0x8a11f797 + .word 0xbfd12202, 0x4cf00000 + .word 0xbd38fdd9, 0x76fabda5 + .word 0xbfd1178e, 0x8227e000 + .word 0xbd31ef78, 0xce2d07f2 + .word 0xbfd10d1c, 0x6c194000 + .word 0xbd4cb3de, 0x00324ee4 + .word 0xbfd102ac, 0x0a35c000 + .word 0xbd483810, 0x88080a5e + .word 0xbfd0f83d, 0x5bef2000 + .word 0xbd475fa0, 0x37a37ba8 + .word 0xbfd0edd0, 0x60b78000 + .word 0xbd0019b5, 0x2d8435f5 + .word 0xbfd0e365, 0x18012000 + .word 0xbd2a5943, 0x8bbdca93 + .word 0xbfd0d8fb, 0x813eb000 + .word 0xbd1ee8c8, 0x8753fa35 + .word 0xbfd0ce93, 0x9be30000 + .word 0xbd4e8266, 0xd788ddf1 + .word 0xbfd0c42d, 0x67616000 + .word 0xbd27188b, 0x163ceae9 + .word 0xbfd0b9c8, 0xe32d1000 + .word 0xbd42224e, 0x89208f94 + .word 0xbfd0af66, 0x0eb9e000 + .word 0xbd23c7c3, 0xf528d80a + .word 0xbfd0a504, 0xe97bb000 + .word 0xbd303094, 0xe6690c44 + .word 0xbfd09aa5, 0x72e6c000 + .word 0xbd3b50a1, 0xe1734342 + .word 0xbfd09047, 0xaa6f9000 + .word 0xbd3f18e8, 0x3ce75c0e + .word 0xbfd085eb, 0x8f8ae000 + .word 0xbd3e5d51, 0x3f45fe7b + .word 0xbfd07b91, 0x21adb000 + .word 0xbd4520ba, 0x8e9b8a72 + .word 0xbfd07138, 0x604d5000 + .word 0xbd40c4e6, 0xd8b76a75 + .word 0xbfd066e1, 0x4adf4000 + .word 0xbd47f6bb, 0x351a4a71 + .word 0xbfd05c8b, 0xe0d96000 + .word 0xbd2ad0f1, 0xc77ccb58 + .word 0xbfd05238, 0x21b1a000 + .word 0xbd4ec752, 0xd39776ce + .word 0xbfd047e6, 0x0cde8000 + .word 0xbd2dbdf1, 0x0d397f3c + .word 0xbfd03d95, 0xa1d67000 + .word 0xbd3a1788, 0x0f236109 + .word 0xbfd03346, 0xe0106000 + .word 0xbcf89ff8, 0xa966395c + .word 0xbfd028f9, 0xc7035000 + .word 0xbd483851, 0x858333c0 + .word 0xbfd01eae, 0x5626c000 + .word 0xbd3a43dc, 0xfade85ae + .word 0xbfd01464, 0x8cf23000 + .word 0xbd4d082a, 0x567b45ed + .word 0xbfd00a1c, 0x6adda000 + .word 0xbd31cd8d, 0x688b9e18 + .word 0xbfcfffab, 0xdec23000 + .word 0xbd236a1a, 0xdb4a75a4 + .word 0xbfcfeb22, 0x33ea0000 + .word 0xbd2f3418, 0xde00938b + .word 0xbfcfd69b, 0xd4240000 + .word 0xbd3641a8, 0xff2ccc45 + .word 0xbfcfc218, 0xbe620000 + .word 0xbd34bba4, 0x6f1cf6a0 + .word 0xbfcfad98, 0xf1965000 + .word 0xbd16ee92, 0x73d7c2de + .word 0xbfcf991c, 0x6cb3b000 + .word 0xbd1bcbec, 0xca0cdf30 + .word 0xbfcf84a3, 0x2ead7000 + .word 0xbd386af1, 0xd33d9e37 + .word 0xbfcf702d, 0x36777000 + .word 0xbd3bdf9a, 0xba663077 + .word 0xbfcf5bba, 0x83060000 + .word 0xbd341b25, 0x4a43da63 + .word 0xbfcf474b, 0x134df000 + .word 0xbd1146d8, 0x38821289 + .word 0xbfcf32de, 0xe6448000 + .word 0xbd2efb83, 0x625f1609 + .word 0xbfcf1e75, 0xfadf9000 + .word 0xbd37bcea, 0x6d13e04a + .word 0xbfcf0a10, 0x50157000 + .word 0xbd3dad5f, 0x7347f55b + .word 0xbfcef5ad, 0xe4dcf000 + .word 0xbd3fcbbd, 0xd53488e4 + .word 0xbfcee14e, 0xb82d6000 + .word 0xbd39d172, 0x6f4de261 + .word 0xbfceccf2, 0xc8fe9000 + .word 0xbd104e71, 0x7062a6fe + .word 0xbfceb89a, 0x1648b000 + .word 0xbd32e26f, 0x74808b80 + .word 0xbfcea444, 0x9f04a000 + .word 0xbd35e916, 0x63732a36 + .word 0xbfce8ff2, 0x622ba000 + .word 0xbd378e13, 0xd33981e5 + .word 0xbfce7ba3, 0x5eb77000 + .word 0xbd3c5422, 0x3b90d937 + .word 0xbfce6757, 0x93a26000 + .word 0xbd01dc8e, 0xc0554762 + .word 0xbfce530e, 0xffe71000 + .word 0xbcc21227, 0x6041f430 + .word 0xbfce3ec9, 0xa280c000 + .word 0xbd14bd96, 0x3fb80bff + .word 0xbfce2a87, 0x7a6b2000 + .word 0xbd382381, 0x7787081a + .word 0xbfce1648, 0x86a27000 + .word 0xbd36ce95, 0xba645527 + .word 0xbfce020c, 0xc6235000 + .word 0xbd356a7f, 0xa92375ee + .word 0xbfcdedd4, 0x37eae000 + .word 0xbd3e0125, 0x53595898 + .word 0xbfcdd99e, 0xdaf6d000 + .word 0xbd2fa273, 0x2c71522a + .word 0xbfcdc56c, 0xae452000 + .word 0xbd3eb37a, 0xa24e1817 + .word 0xbfcdb13d, 0xb0d48000 + .word 0xbd32806a, 0x847527e6 + .word 0xbfcd9d11, 0xe1a3f000 + .word 0xbd19da04, 0xfa9fa4c6 + .word 0xbfcd88e9, 0x3fb2f000 + .word 0xbd2141af, 0xfb96815e + .word 0xbfcd74c3, 0xca018000 + .word 0xbd393e4c, 0xfa17dce1 + .word 0xbfcd60a1, 0x7f903000 + .word 0xbd24523f, 0x207be58e + .word 0xbfcd4c82, 0x5f5fd000 + .word 0xbd3e3f04, 0x21df291e + .word 0xbfcd3866, 0x6871f000 + .word 0xbd21935e, 0x98ed9a88 + .word 0xbfcd244d, 0x99c85000 + .word 0xbd29cfb0, 0x0c890770 + .word 0xbfcd1037, 0xf2655000 + .word 0xbd3cf6b0, 0x31492124 + .word 0xbfccfc25, 0x714bd000 + .word 0xbd39fbd3, 0x34e03910 + .word 0xbfcce816, 0x157f1000 + .word 0xbd330faa, 0x2efb3576 + .word 0xbfccd409, 0xde02d000 + .word 0xbd132115, 0x39f1dcc5 + .word 0xbfccc000, 0xc9db3000 + .word 0xbd38a4a9, 0xe8aa1402 + .word 0xbfccabfa, 0xd80d0000 + .word 0xbd11e253, 0x70a10e3e + .word 0xbfcc97f8, 0x079d4000 + .word 0xbd23b161, 0xa8c6e6c5 + .word 0xbfcc83f8, 0x57919000 + .word 0xbd358740, 0x00c94a0f + .word 0xbfcc6ffb, 0xc6f00000 + .word 0xbd3ee138, 0xd3a69d43 + .word 0xbfcc5c02, 0x54bf2000 + .word 0xbd1d2f55, 0x73da163b + .word 0xbfcc480c, 0x0005c000 + .word 0xbd39a294, 0xd5e44e76 + .word 0xbfcc3418, 0xc7cb7000 + .word 0xbd234b5d, 0xe46e0516 + .word 0xbfcc2028, 0xab17f000 + .word 0xbd3368f8, 0x8d51c29d + .word 0xbfcc0c3b, 0xa8f3a000 + .word 0xbd3ac339, 0x48e7f56a + .word 0xbfcbf851, 0xc0675000 + .word 0xbd257be3, 0x67ef56a7 + .word 0xbfcbe46a, 0xf07c2000 + .word 0xbd350591, 0x910f505a + .word 0xbfcbd087, 0x383bd000 + .word 0xbd315a1d, 0xd355f6a5 + .word 0xbfcbbca6, 0x96b07000 + .word 0xbd3d0045, 0xea3f2624 + .word 0xbfcba8c9, 0x0ae4a000 + .word 0xbd3a32e7, 0xf44432da + .word 0xbfcb94ee, 0x93e36000 + .word 0xbd2f2a06, 0xe2db48a3 + .word 0xbfcb8117, 0x30b82000 + .word 0xbd1e9068, 0x3b9cd768 + .word 0xbfcb6d42, 0xe06ec000 + .word 0xbd302afe, 0x254869ba + .word 0xbfcb5971, 0xa213a000 + .word 0xbd39b50e, 0x83aa91df + .word 0xbfcb45a3, 0x74b39000 + .word 0xbd3701df, 0x22138fc3 + .word 0xbfcb31d8, 0x575bc000 + .word 0xbd3c794e, 0x562a63cb + .word 0xbfcb1e10, 0x4919e000 + .word 0xbd3fa006, 0x2597f33a + .word 0xbfcb0a4b, 0x48fc1000 + .word 0xbd368c69, 0x51e3338a + .word 0xbfcaf689, 0x5610d000 + .word 0xbd375beb, 0xba042b64 + .word 0xbfcae2ca, 0x6f672000 + .word 0xbd37a8d5, 0xae54f550 + .word 0xbfcacf0e, 0x940e7000 + .word 0xbd2800e3, 0xa7e64e07 + .word 0xbfcabb55, 0xc3169000 + .word 0xbd1d6694, 0xd43acc9f + .word 0xbfcaa79f, 0xfb8fc000 + .word 0xbd3a8bf1, 0x1c0d8aaa + .word 0xbfca93ed, 0x3c8ad000 + .word 0xbd33c6de, 0x57d4ef4c + .word 0xbfca803d, 0x8518d000 + .word 0xbd3e09d1, 0x87f293cc + .word 0xbfca6c90, 0xd44b7000 + .word 0xbce38901, 0xf909e74b + .word 0xbfca58e7, 0x29348000 + .word 0xbd3e867d, 0x504551b1 + .word 0xbfca4540, 0x82e6a000 + .word 0xbd360a77, 0xc81f7171 + .word 0xbfca319c, 0xe074a000 + .word 0xbcbd7dba, 0xe650d5b3 + .word 0xbfca1dfc, 0x40f1b000 + .word 0xbd2fc3e1, 0xff6190fe + .word 0xbfca0a5e, 0xa371a000 + .word 0xbd322191, 0x988b2e31 + .word 0xbfc9f6c4, 0x07089000 + .word 0xbd29904d, 0x6865817a + .word 0xbfc9e32c, 0x6acb0000 + .word 0xbd3e5e8d, 0xbc0fb4ac + .word 0xbfc9cf97, 0xcdce0000 + .word 0xbd3d862f, 0x10c414e3 + .word 0xbfc9bc06, 0x2f26f000 + .word 0xbd3874d8, 0x1809e6d5 + .word 0xbfc9a877, 0x8deba000 + .word 0xbd3470fa, 0x3efec390 + .word 0xbfc994eb, 0xe9325000 + .word 0xbd2a9c9d, 0x28bcbe25 + .word 0xbfc98163, 0x4011a000 + .word 0xbd34eadd, 0x9e9045e2 + .word 0xbfc96ddd, 0x91a0b000 + .word 0xbd32ac6b, 0x11cf6f2b + .word 0xbfc95a5a, 0xdcf70000 + .word 0xbd07f228, 0x58a0ff6f + .word 0xbfc946db, 0x212c6000 + .word 0xbd36cf76, 0x74ca02ba + .word 0xbfc9335e, 0x5d594000 + .word 0xbd33115c, 0x3abd47da + .word 0xbfc91fe4, 0x90965000 + .word 0xbd30369c, 0xf30a1c32 + .word 0xbfc90c6d, 0xb9fcb000 + .word 0xbd39b282, 0xa239ca0d + .word 0xbfc8f8f9, 0xd8a60000 + .word 0xbd2af16c, 0x8230ceca + .word 0xbfc8e588, 0xebac2000 + .word 0xbd3b7d5c, 0xab2d1140 + .word 0xbfc8d21a, 0xf2299000 + .word 0xbd14d652, 0x74757226 + .word 0xbfc8beaf, 0xeb38f000 + .word 0xbd3d1855, 0x6aa2da66 + .word 0xbfc8ab47, 0xd5f5a000 + .word 0xbd187eb8, 0x505d468f + .word 0xbfc897e2, 0xb17b1000 + .word 0xbd334a64, 0x63f9a0b1 + .word 0xbfc88480, 0x7ce56000 + .word 0xbd1c77ce, 0xf4a8712c + .word 0xbfc87121, 0x3750e000 + .word 0xbd3328eb, 0x42f9af75 + .word 0xbfc85dc4, 0xdfda7000 + .word 0xbd3785ab, 0x048301ba + .word 0xbfc84a6b, 0x759f5000 + .word 0xbd02ebfe, 0xa903cfb8 + .word 0xbfc83714, 0xf7bd0000 + .word 0xbd2ed83a, 0xf85a2ced + .word 0xbfc823c1, 0x6551a000 + .word 0xbd1e0ddb, 0x9a631e83 + .word 0xbfc81070, 0xbd7b9000 + .word 0xbcafe80a, 0x6682e646 + .word 0xbfc7fd22, 0xff599000 + .word 0xbd3a9d05, 0x02ea120c + .word 0xbfc7e9d8, 0x2a0b0000 + .word 0xbd116849, 0xfa40e4f0 + .word 0xbfc7d690, 0x3caf5000 + .word 0xbd359fca, 0x741e7f15 + .word 0xbfc7c34b, 0x3666a000 + .word 0xbd3175c9, 0x81b45e10 + .word 0xbfc7b009, 0x16515000 + .word 0xbd146280, 0xd3e606a3 + .word 0xbfc79cc9, 0xdb902000 + .word 0xbd1e00d0, 0x375e70bd + .word 0xbfc7898d, 0x85444000 + .word 0xbd38e67b, 0xe3dbaf3f + .word 0xbfc77654, 0x128f6000 + .word 0xbd0274ba, 0xdf268e7c + .word 0xbfc7631d, 0x82935000 + .word 0xbd350c41, 0x1c1d060f + .word 0xbfc74fe9, 0xd4729000 + .word 0xbd249736, 0xd91da11e + .word 0xbfc73cb9, 0x074fd000 + .word 0xbd04cab7, 0x97ffd2cc + .word 0xbfc7298b, 0x1a4e3000 + .word 0xbd15accc, 0xe43ce383 + .word 0xbfc71660, 0x0c914000 + .word 0xbce51b15, 0x7cec3838 + .word 0xbfc70337, 0xdd3ce000 + .word 0xbd206a17, 0x8a5eab9c + .word 0xbfc6f012, 0x8b756000 + .word 0xbd357739, 0x0d31ef0f + .word 0xbfc6dcf0, 0x165f8000 + .word 0xbd1b9566, 0x9a33e4c6 + .word 0xbfc6c9d0, 0x7d203000 + .word 0xbd3f8e30, 0x14099349 + .word 0xbfc6b6b3, 0xbedd1000 + .word 0xbd1a8f73, 0xa64d3813 + .word 0xbfc6a399, 0xdabbd000 + .word 0xbd1c1b2c, 0x6657a967 + .word 0xbfc69082, 0xcfe2b000 + .word 0xbd2da1e7, 0x20b79662 + .word 0xbfc67d6e, 0x9d785000 + .word 0xbd2dc2ef, 0x9eb1f25a + .word 0xbfc66a5d, 0x42a3a000 + .word 0xbd3a6893, 0x3aa00298 + .word 0xbfc6574e, 0xbe8c1000 + .word 0xbd19cf8b, 0x2c3c2e78 + .word 0xbfc64443, 0x10594000 + .word 0xbd22f605, 0xb0281916 + .word 0xbfc6313a, 0x37335000 + .word 0xbd3aec82, 0xac378565 + .word 0xbfc61e34, 0x3242d000 + .word 0xbd32bb2d, 0x97ecd861 + .word 0xbfc60b31, 0x00b09000 + .word 0xbd21d752, 0x6cee0fd8 + .word 0xbfc5f830, 0xa1a5c000 + .word 0xbd352268, 0x98ffc1bc + .word 0xbfc5e533, 0x144c1000 + .word 0xbd2c63e8, 0x189ade2b + .word 0xbfc5d238, 0x57cd7000 + .word 0xbd23530a, 0x5ba6e7ac + .word 0xbfc5bf40, 0x6b543000 + .word 0xbd3b63f7, 0x0525d9f9 + .word 0xbfc5ac4b, 0x4e0b2000 + .word 0xbd351709, 0xd7275f36 + .word 0xbfc59958, 0xff1d5000 + .word 0xbd178be9, 0xa258d7eb + .word 0xbfc58669, 0x7db62000 + .word 0xbd39e26c, 0x65e8cb44 + .word 0xbfc5737c, 0xc9018000 + .word 0xbd39baa7, 0xa6b887f6 + .word 0xbfc56092, 0xe02ba000 + .word 0xbd245850, 0x06899d98 + .word 0xbfc54dab, 0xc2610000 + .word 0xbd2746fe, 0xe5c8d0d8 + .word 0xbfc53ac7, 0x6ece9000 + .word 0xbd39ca8a, 0x2a8725d5 + .word 0xbfc527e5, 0xe4a1b000 + .word 0xbd2633e8, 0xe5697dc7 + .word 0xbfc51507, 0x2307f000 + .word 0xbd306b11, 0xecc0d77b + .word 0xbfc5022b, 0x292f6000 + .word 0xbd348a05, 0xff36a25b + .word 0xbfc4ef51, 0xf6466000 + .word 0xbd3bc83d, 0x21c8cd53 + .word 0xbfc4dc7b, 0x897bc000 + .word 0xbd0c79b6, 0x0ae1ff0f + .word 0xbfc4c9a7, 0xe1fe8000 + .word 0xbcff39f7, 0x50dbbb30 + .word 0xbfc4b6d6, 0xfefe2000 + .word 0xbd1522ec, 0xf56e7952 + .word 0xbfc4a408, 0xdfaa7000 + .word 0xbd33b41f, 0x86e5dd72 + .word 0xbfc4913d, 0x8333b000 + .word 0xbd258379, 0x54fdb678 + .word 0xbfc47e74, 0xe8ca5000 + .word 0xbd3ef836, 0xa48fdfcf + .word 0xbfc46baf, 0x0f9f5000 + .word 0xbd3b6d8c, 0xbe1bdef9 + .word 0xbfc458eb, 0xf6e3f000 + .word 0xbcf5c0fe, 0x1f2b8094 + .word 0xbfc4462b, 0x9dc9b000 + .word 0xbd1ede9d, 0x63b93e7a + .word 0xbfc4336e, 0x03829000 + .word 0xbd3ac363, 0xa859c2af + .word 0xbfc420b3, 0x2740f000 + .word 0xbd3ba75f, 0x4de97ddf + .word 0xbfc40dfb, 0x08378000 + .word 0xbc9bb453, 0xc4f7b685 + .word 0xbfc3fb45, 0xa5992000 + .word 0xbd319713, 0xc0cae559 + .word 0xbfc3e892, 0xfe995000 + .word 0xbd2b6aad, 0x914d5249 + .word 0xbfc3d5e3, 0x126bc000 + .word 0xbd13fb2f, 0x85096c4b + .word 0xbfc3c335, 0xe0447000 + .word 0xbd3ae77d, 0x114a8b5f + .word 0xbfc3b08b, 0x6757f000 + .word 0xbd15485c, 0x35b37c15 + .word 0xbfc39de3, 0xa6dae000 + .word 0xbd284fc7, 0x32ce95f1 + .word 0xbfc38b3e, 0x9e027000 + .word 0xbd21e21f, 0x5747d00e + .word 0xbfc3789c, 0x4c041000 + .word 0xbd19b4f4, 0x44d31e60 + .word 0xbfc365fc, 0xb0159000 + .word 0xbcc62fa8, 0x234b7289 + .word 0xbfc3535f, 0xc96d1000 + .word 0xbd013f1c, 0x3b1fab68 + .word 0xbfc340c5, 0x97411000 + .word 0xbd20b846, 0x104c58f3 + .word 0xbfc32e2e, 0x18c86000 + .word 0xbd3e6220, 0x6c327115 + .word 0xbfc31b99, 0x4d3a4000 + .word 0xbd3f098e, 0xe3a50810 + .word 0xbfc30907, 0x33ce3000 + .word 0xbd33f323, 0x7c4d853e + .word 0xbfc2f677, 0xcbbc0000 + .word 0xbd352b30, 0x2160f40d + .word 0xbfc2e3eb, 0x143bf000 + .word 0xbd218910, 0x2710016e + .word 0xbfc2d161, 0x0c868000 + .word 0xbd039d6c, 0xcb81b4a1 + .word 0xbfc2bed9, 0xb3d49000 + .word 0xbd095245, 0x4a40d26b + .word 0xbfc2ac55, 0x095f5000 + .word 0xbd38b2e6, 0x4bce4dd6 + .word 0xbfc299d3, 0x0c606000 + .word 0xbd3d4d00, 0x79dc08d9 + .word 0xbfc28753, 0xbc11a000 + .word 0xbd37494e, 0x359302e6 + .word 0xbfc274d7, 0x17ad4000 + .word 0xbd38a65b, 0xa0967592 + .word 0xbfc2625d, 0x1e6dd000 + .word 0xbd3ead69, 0xd0f61c28 + .word 0xbfc24fe5, 0xcf8e4000 + .word 0xbd318f96, 0x26b10d30 + .word 0xbfc23d71, 0x2a49c000 + .word 0xbd100d23, 0x8fd3df5c + .word 0xbfc22aff, 0x2ddbd000 + .word 0xbd32e1ea, 0xca7cb4f0 + .word 0xbfc2188f, 0xd9807000 + .word 0xbd131786, 0x02bce3fb + .word 0xbfc20623, 0x2c73c000 + .word 0xbd2351a5, 0x02bb95f5 + .word 0xbfc1f3b9, 0x25f25000 + .word 0xbd3a822c, 0x593df273 + .word 0xbfc1e151, 0xc5391000 + .word 0xbd38e5f5, 0xf578d80e + .word 0xbfc1ceed, 0x09853000 + .word 0xbd2d47c7, 0x8dcdaa0e + .word 0xbfc1bc8a, 0xf2143000 + .word 0xbd2acd64, 0xfb955458 + .word 0xbfc1aa2b, 0x7e23f000 + .word 0xbd2ca78e, 0x44389934 + .word 0xbfc197ce, 0xacf2a000 + .word 0xbd31ab14, 0x4caf6736 + .word 0xbfc18574, 0x7dbec000 + .word 0xbd3e6744, 0x45bd9b49 + .word 0xbfc1731c, 0xefc74000 + .word 0xbcfde27c, 0xd98317fd + .word 0xbfc160c8, 0x024b2000 + .word 0xbd2ec2d2, 0xa9009e3d + .word 0xbfc14e75, 0xb489f000 + .word 0xbd3fdf84, 0x66dfe192 + .word 0xbfc13c26, 0x05c39000 + .word 0xbd318501, 0x13584d7c + .word 0xbfc129d8, 0xf5381000 + .word 0xbd1d77cc, 0x415a172e + .word 0xbfc1178e, 0x8227e000 + .word 0xbd21ef78, 0xce2d07f2 + .word 0xbfc10546, 0xabd3d000 + .word 0xbd00189b, 0x51d162e8 + .word 0xbfc0f301, 0x717cf000 + .word 0xbcff64bb, 0xe51793b4 + .word 0xbfc0e0be, 0xd264a000 + .word 0xbd3bafe2, 0x3aeb549c + .word 0xbfc0ce7e, 0xcdccc000 + .word 0xbd14652d, 0xabff5447 + .word 0xbfc0bc41, 0x62f73000 + .word 0xbd36ca04, 0x73bd9c29 + .word 0xbfc0aa06, 0x91267000 + .word 0xbd2755cc, 0x51f9bdae + .word 0xbfc097ce, 0x579d2000 + .word 0xbce33742, 0xda652881 + .word 0xbfc08598, 0xb59e3000 + .word 0xbd340d11, 0x47fb37ea + .word 0xbfc07365, 0xaa6d1000 + .word 0xbd16e172, 0x43f1226a + .word 0xbfc06135, 0x354d4000 + .word 0xbd363046, 0x28340ee9 + .word 0xbfc04f07, 0x5582d000 + .word 0xbd1a3d31, 0x4c780403 + .word 0xbfc03cdc, 0x0a51e000 + .word 0xbd381a9c, 0xf169fc5c + .word 0xbfc02ab3, 0x52ff2000 + .word 0xbd27ce63, 0x5d569b2b + .word 0xbfc0188d, 0x2ecf6000 + .word 0xbd03f965, 0x1cff9dfe + .word 0xbfc00669, 0x9d07c000 + .word 0xbd3b8775, 0x304686e1 + .word 0xbfbfe891, 0x39dbd000 + .word 0xbd159653, 0x60bdea07 + .word 0xbfbfc454, 0x5b8f0000 + .word 0xbd29cba7, 0xd5591204 + .word 0xbfbfa01c, 0x9db57000 + .word 0xbd29c32b, 0x816dd634 + .word 0xbfbf7be9, 0xfedbf000 + .word 0xbd2bcbe8, 0xb535310e + .word 0xbfbf57bc, 0x7d900000 + .word 0xbd176a6c, 0x9ea8b04e + .word 0xbfbf3394, 0x185fa000 + .word 0xbd1ea383, 0x09d097b7 + .word 0xbfbf0f70, 0xcdd99000 + .word 0xbd0718fb, 0x613960ee + .word 0xbfbeeb52, 0x9c8d1000 + .word 0xbd0b6260, 0x903c8f99 + .word 0xbfbec739, 0x830a1000 + .word 0xbcf1fcba, 0x80cdd0fe + .word 0xbfbea325, 0x7fe10000 + .word 0xbd2ef30d, 0x47e4627a + .word 0xbfbe7f16, 0x91a32000 + .word 0xbd2a7c74, 0xc871080d + .word 0xbfbe5b0c, 0xb6e22000 + .word 0xbd109021, 0x3b34d95f + .word 0xbfbe3707, 0xee304000 + .word 0xbd20f684, 0xe6766abd + .word 0xbfbe1308, 0x36208000 + .word 0xbd21aeea, 0xf90019f9 + .word 0xbfbdef0d, 0x8d466000 + .word 0xbd2b715f, 0x7da2cb17 + .word 0xbfbdcb17, 0xf2361000 + .word 0xbd226a0a, 0x5ba47956 + .word 0xbfbda727, 0x63844000 + .word 0xbd1a8940, 0x1fa71733 + .word 0xbfbd833b, 0xdfc64000 + .word 0xbd24805c, 0x07408695 + .word 0xbfbd5f55, 0x65921000 + .word 0xbcec4739, 0x830a8d2a + .word 0xbfbd3b73, 0xf37e1000 + .word 0xbd2f3501, 0x33da5007 + .word 0xbfbd1797, 0x88219000 + .word 0xbd0b219d, 0xaf7df76b + .word 0xbfbcf3c0, 0x22142000 + .word 0xbce9d2b6, 0x6ddd996f + .word 0xbfbccfed, 0xbfee1000 + .word 0xbd0d4119, 0x7f3892ad + .word 0xbfbcac20, 0x60484000 + .word 0xbd2d53ed, 0xcc4f420b + .word 0xbfbc8858, 0x01bc4000 + .word 0xbd2646d1, 0xc65aacd3 + .word 0xbfbc6494, 0xa2e41000 + .word 0xbd214bd1, 0x564189cb + .word 0xbfbc40d6, 0x425a5000 + .word 0xbd296224, 0x3a3261b9 + .word 0xbfbc1d1c, 0xdeba5000 + .word 0xbd02f7e7, 0x23a02373 + .word 0xbfbbf968, 0x769fc000 + .word 0xbd24218c, 0x8d824283 + .word 0xbfbbd5b9, 0x08a72000 + .word 0xbd2236aa, 0x3ae84f31 + .word 0xbfbbb20e, 0x936d6000 + .word 0xbd22e8af, 0x9574c8e4 + .word 0xbfbb8e69, 0x15901000 + .word 0xbd22bef7, 0xf208fbd9 + .word 0xbfbb6ac8, 0x8dad5000 + .word 0xbd2637bf, 0xea044b8d + .word 0xbfbb472c, 0xfa63e000 + .word 0xbd1246f5, 0xc7f4588b + .word 0xbfbb2396, 0x5a52f000 + .word 0xbd2e009b, 0x115ec8f8 + .word 0xbfbb0004, 0xac1a8000 + .word 0xbd1aaf97, 0x037f2b35 + .word 0xbfbadc77, 0xee5ae000 + .word 0xbd25189b, 0xec79cdf7 + .word 0xbfbab8f0, 0x1fb52000 + .word 0xbd27f69d, 0xd23d3ac2 + .word 0xbfba956d, 0x3ecad000 + .word 0xbd2cc6f2, 0x9805895f + .word 0xbfba71ef, 0x4a3e2000 + .word 0xbd1bbc94, 0x7b201fbf + .word 0xbfba4e76, 0x40b1b000 + .word 0xbd286f52, 0x51aefe0e + .word 0xbfba2b02, 0x20c8e000 + .word 0xbd17d329, 0x8e6b7dbf + .word 0xbfba0792, 0xe9277000 + .word 0xbd2958c6, 0x4d94ab90 + .word 0xbfb9e428, 0x9871e000 + .word 0xbd22c483, 0xd0942b9c + .word 0xbfb9c0c3, 0x2d4d2000 + .word 0xbd1520fd, 0x85f1e661 + .word 0xbfb99d62, 0xa65eb000 + .word 0xbd22dd17, 0xd834450a + .word 0xbfb97a07, 0x024cb000 + .word 0xbd2ce867, 0xd19bed86 + .word 0xbfb956b0, 0x3fbdd000 + .word 0xbd286fb6, 0x03fe1b67 + .word 0xbfb9335e, 0x5d594000 + .word 0xbd23115c, 0x3abd47da + .word 0xbfb91011, 0x59c6c000 + .word 0xbd27af17, 0x9df80b59 + .word 0xbfb8ecc9, 0x33aeb000 + .word 0xbd1ba18c, 0x833010ab + .word 0xbfb8c985, 0xe9b9e000 + .word 0xbd290791, 0x0379ff94 + .word 0xbfb8a647, 0x7a91d000 + .word 0xbd285181, 0x5f37adbf + .word 0xbfb8830d, 0xe4e08000 + .word 0xbd05f60b, 0x79c8f66a + .word 0xbfb85fd9, 0x27506000 + .word 0xbd248fcf, 0xccd1e7c7 + .word 0xbfb83ca9, 0x408ca000 + .word 0xbd2326c8, 0xd744c7d1 + .word 0xbfb8197e, 0x2f40e000 + .word 0xbd0f80dc, 0xf96ffdf7 + .word 0xbfb7f657, 0xf2194000 + .word 0xbd21bef9, 0x43faf4d2 + .word 0xbfb7d336, 0x87c29000 + .word 0xbd0e4461, 0xf3833832 + .word 0xbfb7b019, 0xeeea0000 + .word 0xbd275649, 0xaee848d4 + .word 0xbfb78d02, 0x263d8000 + .word 0xbd069b57, 0x94b69fb7 + .word 0xbfb769ef, 0x2c6b5000 + .word 0xbd1a35d8, 0xc73b6a55 + .word 0xbfb746e1, 0x00226000 + .word 0xbd2db25d, 0x23c3bc5b + .word 0xbfb723d7, 0xa0123000 + .word 0xbd2c3cbb, 0x84fef08e + .word 0xbfb700d3, 0x0aeac000 + .word 0xbcec1e8d, 0xa99ded32 + .word 0xbfb6ddd3, 0x3f5c7000 + .word 0xbd2aeb06, 0x82906a06 + .word 0xbfb6bad8, 0x3c188000 + .word 0xbd0daf3c, 0xc08926ae + .word 0xbfb697e1, 0xffd06000 + .word 0xbd296c57, 0x15a12bb6 + .word 0xbfb674f0, 0x89365000 + .word 0xbd24f332, 0x993a6604 + .word 0xbfb65203, 0xd6fcf000 + .word 0xbd1ea006, 0x8199326b + .word 0xbfb62f1b, 0xe7d77000 + .word 0xbd1d0cd5, 0x02538764 + .word 0xbfb60c38, 0xba799000 + .word 0xbd1172c4, 0x3aec1296 + .word 0xbfb5e95a, 0x4d979000 + .word 0xbcfcb7ce, 0x1d171711 + .word 0xbfb5c680, 0x9fe63000 + .word 0xbd23c479, 0x935581b6 + .word 0xbfb5a3ab, 0xb01ad000 + .word 0xbd2c4ae9, 0x3cd5f430 + .word 0xbfb580db, 0x7ceb5000 + .word 0xbd1c07f6, 0xcbe60d53 + .word 0xbfb55e10, 0x050e0000 + .word 0xbd0c1d74, 0x0c53c72e + .word 0xbfb53b49, 0x4739c000 + .word 0xbd221868, 0x5306aaa5 + .word 0xbfb51887, 0x42261000 + .word 0xbd0850ec, 0xb12c59ec + .word 0xbfb4f5c9, 0xf48ad000 + .word 0xbd0580c1, 0x2c81f8fd + .word 0xbfb4d311, 0x5d207000 + .word 0xbd2d58bb, 0x4fa163c2 + .word 0xbfb4b05d, 0x7aa01000 + .word 0xbd07029c, 0x6ef93715 + .word 0xbfb48dae, 0x4bc31000 + .word 0xbcb85b20, 0x8c200bea + .word 0xbfb46b03, 0xcf437000 + .word 0xbd2787a5, 0x2f0f6296 + .word 0xbfb4485e, 0x03dbd000 + .word 0xbd2f5a8d, 0xd1a4d56e + .word 0xbfb425bc, 0xe8474000 + .word 0xbd2365ac, 0x5219daef + .word 0xbfb40320, 0x7b414000 + .word 0xbd26fd84, 0xaa8157c0 + .word 0xbfb3e088, 0xbb85f000 + .word 0xbd248068, 0xbdc331fa + .word 0xbfb3bdf5, 0xa7d1e000 + .word 0xbd2cc85e, 0xa5db4ed7 + .word 0xbfb39b67, 0x3ee24000 + .word 0xbd0a759b, 0xa99f5667 + .word 0xbfb378dd, 0x7f749000 + .word 0xbd1c5044, 0xa3c7eb28 + .word 0xbfb35658, 0x68470000 + .word 0xbd2464d7, 0x0035b508 + .word 0xbfb333d7, 0xf8183000 + .word 0xbd2e96d4, 0x957e477c + .word 0xbfb3115c, 0x2da75000 + .word 0xbd25bc37, 0x00651448 + .word 0xbfb2eee5, 0x07b40000 + .word 0xbd08081e, 0xdd77c860 + .word 0xbfb2cc72, 0x84fe5000 + .word 0xbd2e38bd, 0x0cb32a28 + .word 0xbfb2aa04, 0xa4471000 + .word 0xbd1e922e, 0xa2c72d06 + .word 0xbfb2879b, 0x644f5000 + .word 0xbd1752b6, 0xf65943ec + .word 0xbfb26536, 0xc3d8c000 + .word 0xbd0b4bac, 0x097c5ba3 + .word 0xbfb242d6, 0xc1a58000 + .word 0xbd24b838, 0xac648481 + .word 0xbfb2207b, 0x5c785000 + .word 0xbd127633, 0xf0431efb + .word 0xbfb1fe24, 0x93144000 + .word 0xbd27a374, 0xe1a7c696 + .word 0xbfb1dbd2, 0x643d1000 + .word 0xbd221649, 0xb2ef8928 + .word 0xbfb1b984, 0xceb6e000 + .word 0xbd121a31, 0x2f307601 + .word 0xbfb1973b, 0xd1465000 + .word 0xbd159b45, 0x53e4c2cb + .word 0xbfb174f7, 0x6ab09000 + .word 0xbcf71031, 0x7ee2e483 + .word 0xbfb152b7, 0x99bb3000 + .word 0xbd299135, 0xbe3f3df6 + .word 0xbfb1307c, 0x5d2c7000 + .word 0xbd2357c9, 0xfa3dbf1f + .word 0xbfb10e45, 0xb3cae000 + .word 0xbd20612d, 0xaf6b9737 + .word 0xbfb0ec13, 0x9c5da000 + .word 0xbd180247, 0xe54ebd73 + .word 0xbfb0c9e6, 0x15ac4000 + .word 0xbd2c2da8, 0x0974d976 + .word 0xbfb0a7bd, 0x1e7ef000 + .word 0xbd20f926, 0xcdf8dfb4 + .word 0xbfb08598, 0xb59e3000 + .word 0xbd240d11, 0x47fb37ea + .word 0xbfb06378, 0xd9d32000 + .word 0xbd104990, 0x672b0729 + .word 0xbfb0415d, 0x89e74000 + .word 0xbd1111c0, 0x5cf1d753 + .word 0xbfb01f46, 0xc4a4a000 + .word 0xbd11157c, 0x89ecf845 + .word 0xbfaffa69, 0x11ab9000 + .word 0xbcf80464, 0xc1c0d47a + .word 0xbfafb64d, 0xaa8b6000 + .word 0xbd13830d, 0xaeb373e0 + .word 0xbfaf723b, 0x517fc000 + .word 0xbd048a79, 0x154f796a + .word 0xbfaf2e32, 0x04209000 + .word 0xbcfb9ba8, 0x2f4d6e7f + .word 0xbfaeea31, 0xc006b000 + .word 0xbd10f760, 0xd81b6242 + .word 0xbfaea63a, 0x82cc0000 + .word 0xbd19f144, 0x08e210e7 + .word 0xbfae624c, 0x4a0b5000 + .word 0xbd1c368e, 0x2e6265dd + .word 0xbfae1e67, 0x13606000 + .word 0xbd1a0d3c, 0xb7b141db + .word 0xbfadda8a, 0xdc67e000 + .word 0xbd1c9ca7, 0x364c37a2 + .word 0xbfad96b7, 0xa2bf8000 + .word 0xbd12eb81, 0xf49d3d78 + .word 0xbfad52ed, 0x6405d000 + .word 0xbd10de8b, 0x575910a6 + .word 0xbfad0f2c, 0x1dda6000 + .word 0xbd0c6fc7, 0x04385ddf + .word 0xbfaccb73, 0xcdddb000 + .word 0xbcf65c36, 0xe09f5fe2 + .word 0xbfac87c4, 0x71b12000 + .word 0xbd13799a, 0xf29d923d + .word 0xbfac441e, 0x06f72000 + .word 0xbd153c7d, 0x26143455 + .word 0xbfac0080, 0x8b530000 + .word 0xbd003c05, 0x63baea2e + .word 0xbfabbceb, 0xfc68f000 + .word 0xbd0080f2, 0xe79d07ab + .word 0xbfab7960, 0x57de2000 + .word 0xbd0f5af1, 0xf7b24d0f + .word 0xbfab35dd, 0x9b58b000 + .word 0xbd1559d3, 0x5b3d5639 + .word 0xbfaaf263, 0xc47fb000 + .word 0xbd085458, 0x172a97ad + .word 0xbfaaaef2, 0xd0fb1000 + .word 0xbcdf8346, 0xa77685c1 + .word 0xbfaa6b8a, 0xbe73a000 + .word 0xbd1e988d, 0x46e25c90 + .word 0xbfaa282b, 0x8a936000 + .word 0xbce70a67, 0xf10371d7 + .word 0xbfa9e4d5, 0x3304e000 + .word 0xbcfec4a6, 0x991acef2 + .word 0xbfa9a187, 0xb573d000 + .word 0xbd1cf746, 0xc4ec9bca + .word 0xbfa95e43, 0x0f8ce000 + .word 0xbd01774c, 0x225e2c8d + .word 0xbfa91b07, 0x3efd7000 + .word 0xbcf8a0eb, 0x0224d5a9 + .word 0xbfa8d7d4, 0x4173f000 + .word 0xbcf24a7b, 0x7a089116 + .word 0xbfa894aa, 0x149fb000 + .word 0xbcfa19a8, 0xbe97660a + .word 0xbfa85188, 0xb630f000 + .word 0xbcca0544, 0x165f80aa + .word 0xbfa80e70, 0x23d8c000 + .word 0xbd1988fa, 0x435d02ec + .word 0xbfa7cb60, 0x5b495000 + .word 0xbcfc8af3, 0x69d6d0f4 + .word 0xbfa78859, 0x5a357000 + .word 0xbd0ee9e5, 0xef898b68 + .word 0xbfa7455b, 0x1e511000 + .word 0xbcfb28ce, 0xb91e296d + .word 0xbfa70265, 0xa550e000 + .word 0xbd0ddc83, 0xb80a8c63 + .word 0xbfa6bf78, 0xecea9000 + .word 0xbd163cc0, 0x0f16f7e9 + .word 0xbfa67c94, 0xf2d4b000 + .word 0xbd16b082, 0x09f3282f + .word 0xbfa639b9, 0xb4c6b000 + .word 0xbd14f37b, 0x6b7f9673 + .word 0xbfa5f6e7, 0x3078e000 + .word 0xbd1f6f4a, 0xffdb6d69 + .word 0xbfa5b41d, 0x63a49000 + .word 0xbd0abcc4, 0x7e8a0c20 + .word 0xbfa5715c, 0x4c03c000 + .word 0xbd1dddc8, 0x80ee2760 + .word 0xbfa52ea3, 0xe7519000 + .word 0xbd16ff79, 0x68012363 + .word 0xbfa4ebf4, 0x3349e000 + .word 0xbcf37578, 0x4620c465 + .word 0xbfa4a94d, 0x2da96000 + .word 0xbd18ace0, 0x8a56ed78 + .word 0xbfa466ae, 0xd42de000 + .word 0xbcff4c64, 0x521016be + .word 0xbfa42419, 0x2495d000 + .word 0xbd05f329, 0x88dd64a6 + .word 0xbfa3e18c, 0x1ca0a000 + .word 0xbd1d23b4, 0xfdb8de39 + .word 0xbfa39f07, 0xba0eb000 + .word 0xbd1ac4a7, 0x590b95de + .word 0xbfa35c8b, 0xfaa13000 + .word 0xbccabeaf, 0x7cf59aac + .word 0xbfa31a18, 0xdc1a1000 + .word 0xbd07dd58, 0xd860ceab + .word 0xbfa2d7ae, 0x5c3c5000 + .word 0xbd175b1a, 0xe989664c + .word 0xbfa2954c, 0x78cbc000 + .word 0xbd1c3526, 0x570c1572 + .word 0xbfa252f3, 0x2f8d1000 + .word 0xbd107d35, 0xc0436cf5 + .word 0xbfa210a2, 0x7e45c000 + .word 0xbcf8ceca, 0x131bef9c + .word 0xbfa1ce5a, 0x62bc3000 + .word 0xbd04e63c, 0x6c6fccc5 + .word 0xbfa18c1a, 0xdab7b000 + .word 0xbcf22af4, 0xd32f2ac0 + .word 0xbfa149e3, 0xe4005000 + .word 0xbd1519d5, 0x96fa5c0c + .word 0xbfa107b5, 0x7c5f2000 + .word 0xbd152b81, 0xe94af0a6 + .word 0xbfa0c58f, 0xa19df000 + .word 0xbd155317, 0x53a74377 + .word 0xbfa08372, 0x51877000 + .word 0xbd1cc91e, 0xb2004222 + .word 0xbfa0415d, 0x89e74000 + .word 0xbd0111c0, 0x5cf1d753 + .word 0xbf9ffea2, 0x91136000 + .word 0xbd04dd01, 0xd7640dc2 + .word 0xbf9f7a9b, 0x16782000 + .word 0xbd00ab64, 0x9c6f9f5c + .word 0xbf9ef6a4, 0x9f98f000 + .word 0xbd0671e4, 0xe8f151a3 + .word 0xbf9e72bf, 0x2813c000 + .word 0xbd0ca2ba, 0xda22cae5 + .word 0xbf9deeea, 0xab883000 + .word 0xbd0c6e1d, 0x7741b591 + .word 0xbf9d6b27, 0x25979000 + .word 0xbd000425, 0x79723e3d + .word 0xbf9ce774, 0x91e4d000 + .word 0xbd00d7ce, 0xf3d25198 + .word 0xbf9c63d2, 0xec14a000 + .word 0xbd05e318, 0xfe7acbca + .word 0xbf9be042, 0x2fcd6000 + .word 0xbd01ec42, 0x87f2c9ca + .word 0xbf9b5cc2, 0x58b71000 + .word 0xbd01cc23, 0x715f7fd0 + .word 0xbf9ad953, 0x627b6000 + .word 0xbd0ab5a1, 0x1a805efd + .word 0xbf9a55f5, 0x48c5c000 + .word 0xbcf0fc7b, 0x0697e1b5 + .word 0xbf99d2a8, 0x07432000 + .word 0xbcf7cf80, 0x538b441e + .word 0xbf994f6b, 0x99a24000 + .word 0xbcf1d5ef, 0x96cf7f51 + .word 0xbf98cc3f, 0xfb937000 + .word 0xbd050394, 0x323f2c7a + .word 0xbf984925, 0x28c8c000 + .word 0xbd057d17, 0x3697cf30 + .word 0xbf97c61b, 0x1cf5d000 + .word 0xbd0dc0dc, 0x1ed96ee4 + .word 0xbf974321, 0xd3d00000 + .word 0xbcfb4a69, 0x0fe94778 + .word 0xbf96c039, 0x490e3000 + .word 0xbcff7b34, 0x02fd59ca + .word 0xbf963d61, 0x78690000 + .word 0xbd07abf3, 0x89596542 + .word 0xbf95ba9a, 0x5d9ac000 + .word 0xbcacbb84, 0xe08d78ac + .word 0xbf9537e3, 0xf45f3000 + .word 0xbcf592ce, 0x96bf9299 + .word 0xbf94b53e, 0x3873e000 + .word 0xbd0b6ee9, 0xbca265c1 + .word 0xbf9432a9, 0x25980000 + .word 0xbd098139, 0x928637fe + .word 0xbf93b024, 0xb78c5000 + .word 0xbcf9a5e2, 0x3a02f82a + .word 0xbf932db0, 0xea132000 + .word 0xbd0c432c, 0x4c2257ef + .word 0xbf92ab4d, 0xb8f09000 + .word 0xbcf82c84, 0xa532c74c + .word 0xbf9228fb, 0x1fea2000 + .word 0xbd0c4f8c, 0xa12647f9 + .word 0xbf91a6b9, 0x1ac73000 + .word 0xbcec30e9, 0xb54e2dd6 + .word 0xbf912487, 0xa5507000 + .word 0xbd0edf2f, 0xf6a59c94 + .word 0xbf90a266, 0xbb508000 + .word 0xbcfa5be1, 0x7c2ec500 + .word 0xbf902056, 0x58935000 + .word 0xbd008e93, 0xe47420b7 + .word 0xbf8f3cac, 0xf1cd3000 + .word 0xbcf64d83, 0xc9a6875d + .word 0xbf8e38ce, 0x30333000 + .word 0xbcc0bbae, 0x12ebf308 + .word 0xbf8d3510, 0x63fa4000 + .word 0xbcea8d92, 0xdf000beb + .word 0xbf8c3173, 0x84c75000 + .word 0xbcfe0cc0, 0x31046026 + .word 0xbf8b2df7, 0x8a428000 + .word 0xbcf4c647, 0xa5d4542f + .word 0xbf8a2a9c, 0x6c170000 + .word 0xbce18876, 0x525971be + .word 0xbf892762, 0x21f33000 + .word 0xbcd456ba, 0x9344a27f + .word 0xbf882448, 0xa388a000 + .word 0xbcd55104, 0xb16137f1 + .word 0xbf87214f, 0xe88c0000 + .word 0xbcf27275, 0xd7338080 + .word 0xbf861e77, 0xe8b53000 + .word 0xbcff8c11, 0x507150cb + .word 0xbf851bc0, 0x9bbf4000 + .word 0xbcdae1ea, 0x5258a3c6 + .word 0xbf841929, 0xf9683000 + .word 0xbcd77c75, 0x5d013688 + .word 0xbf8316b3, 0xf9714000 + .word 0xbcfb8dcc, 0x8ba5563d + .word 0xbf82145e, 0x939ef000 + .word 0xbcce891c, 0x6274ffda + .word 0xbf811229, 0xbfb89000 + .word 0xbcf50ee4, 0x5fd053b1 + .word 0xbf801015, 0x7588d000 + .word 0xbcfce251, 0x998b505f + .word 0xbf7e1c43, 0x59bad000 + .word 0xbce9f504, 0xadbb6021 + .word 0xbf7c189c, 0xbb0e2000 + .word 0xbcdfeabb, 0x69dea7ed + .word 0xbf7a1536, 0xfeb35000 + .word 0xbcecb8e8, 0x91b69c25 + .word 0xbf781212, 0x14586000 + .word 0xbce6a81c, 0x14b9f937 + .word 0xbf760f2d, 0xebb16000 + .word 0xbcbb6835, 0x84891753 + .word 0xbf740c8a, 0x74787000 + .word 0xbce1c38e, 0xf838000c + .word 0xbf720a27, 0x9e6e0000 + .word 0xbce34d96, 0x922727aa + .word 0xbf700805, 0x59588000 + .word 0xbce66afc, 0xb31c67b2 + .word 0xbf6c0c47, 0x2a092000 + .word 0xbc657d36, 0x31cacba0 + .word 0xbf680904, 0x82898000 + .word 0xbcc701a5, 0xa9c30314 + .word 0xbf640642, 0x9be3c000 + .word 0xbcccf0de, 0xc26e96f3 + .word 0xbf600401, 0x55d58000 + .word 0xbcd13bce, 0x0ce3ddd8 + .word 0xbf580481, 0x20511000 + .word 0xbcc0a8ce, 0x7ceb0de6 + .word 0xbf500200, 0x55655000 + .word 0xbcc11266, 0xaf9afc3f + .word 0xbf400100, 0x15575000 + .word 0xbca62237, 0x79c0dc11 + .word 0x00000000, 0x00000000 + .word 0x00000000, 0x00000000 + .word 0x3f4ffc00, 0xaa8ab000 + .word 0x3c80fbc0, 0x4d051925 + .word 0x3f5ff802, 0xa9ab1000 + .word 0x3c8ccf14, 0xf1d0a9f2 + .word 0x3f67f704, 0x7d798000 + .word 0x3cbed344, 0xeb43240a + .word 0x3f6ff00a, 0xa2b10000 + .word 0x3cd78094, 0x10d6ad37 + .word 0x3f73f38a, 0x60f06000 + .word 0x3cd22569, 0x3c937494 + .word 0x3f77ee11, 0xebd82000 + .word 0x3ced274f, 0x0b48e81d + .word 0x3f7be79c, 0x70058000 + .word 0x3ced91f3, 0x4d808088 + .word 0x3f7fe02a, 0x6b106000 + .word 0x3cde23f0, 0xdda40e47 + .word 0x3f81ebde, 0x2d199000 + .word 0x3cef97c0, 0x0b723c9a + .word 0x3f83e729, 0x5d25a000 + .word 0x3cef63e0, 0x0d65eebc + .word 0x3f85e1f7, 0x03ecb000 + .word 0x3cfca09f, 0x585da1b5 + .word 0x3f87dc47, 0x5f810000 + .word 0x3cf4edba, 0x4a25e0b1 + .word 0x3f89d61a, 0xadc6b000 + .word 0x3cfb1963, 0x27b4256d + .word 0x3f8bcf71, 0x2c743000 + .word 0x3cf09782, 0x5ef65dc3 + .word 0x3f8dc84b, 0x19123000 + .word 0x3cf02950, 0x78e96cc1 + .word 0x3f8fc0a8, 0xb0fc0000 + .word 0x3cdf1e7c, 0xf6d3a69c + .word 0x3f90dc45, 0x18afc000 + .word 0x3d090f43, 0x1ff3b010 + .word 0x3f91d7f7, 0xeb9ee000 + .word 0x3d07cd8a, 0xf80670b5 + .word 0x3f92d36c, 0xefb55000 + .word 0x3cff0bb3, 0x41706c38 + .word 0x3f93cea4, 0x4346a000 + .word 0x3cf5d3bc, 0xd295bf53 + .word 0x3f94c99e, 0x04901000 + .word 0x3d0bd98c, 0xbbebe949 + .word 0x3f95c45a, 0x51b8d000 + .word 0x3cec449d, 0xe927827c + .word 0x3f96bed9, 0x48d1b000 + .word 0x3cff43be, 0x9f5bc086 + .word 0x3f97b91b, 0x07d5b000 + .word 0x3cd1aa92, 0x7f54c717 + .word 0x3f98b31f, 0xaca9b000 + .word 0x3c8c3ab4, 0x8db4decf + .word 0x3f99ace7, 0x551cc000 + .word 0x3cf45134, 0x09c1df81 + .word 0x3f9aa672, 0x1ee83000 + .word 0x3cf6a75a, 0xe2d7a49d + .word 0x3f9b9fc0, 0x27af9000 + .word 0x3cd97fbd, 0x465b7589 + .word 0x3f9c98d1, 0x8d00c000 + .word 0x3d0027ab, 0xe9d883c3 + .word 0x3f9d91a6, 0x6c543000 + .word 0x3d0987c5, 0x9633ee68 + .word 0x3f9e8a3e, 0xe30cd000 + .word 0x3d095817, 0x086b1c01 + .word 0x3f9f829b, 0x0e783000 + .word 0x3ce80267, 0xc7e09e3e + .word 0x3fa03d5d, 0x85e73000 + .word 0x3d1dde25, 0x83b4a73b + .word 0x3fa0b94f, 0x7c196000 + .word 0x3ce76769, 0x0fdd87d3 + .word 0x3fa13523, 0x78597000 + .word 0x3cef29e2, 0x4702d328 + .word 0x3fa1b0d9, 0x8923d000 + .word 0x3d12ff85, 0x945dd915 + .word 0x3fa22c71, 0xbcea8000 + .word 0x3cfd2818, 0xf87f888f + .word 0x3fa2a7ec, 0x2214e000 + .word 0x3d10e631, 0x0add3804 + .word 0x3fa32348, 0xc7001000 + .word 0x3d0a5b6e, 0x42c7927d + .word 0x3fa39e87, 0xb9feb000 + .word 0x3d1abf52, 0x02b64055 + .word 0x3fa419a9, 0x09593000 + .word 0x3d0ae6e3, 0x3ea4753a + .word 0x3fa494ac, 0xc34d9000 + .word 0x3ce1c78a, 0x56fd2473 + .word 0x3fa50f92, 0xf60f9000 + .word 0x3d12d9f6, 0x1523ffc6 + .word 0x3fa58a5b, 0xafc8e000 + .word 0x3d035231, 0xaa3d4b1d + .word 0x3fa60506, 0xfe98d000 + .word 0x3d1516fd, 0xf9ac7f28 + .word 0x3fa67f94, 0xf094b000 + .word 0x3d1b307c, 0xf9f93b5b + .word 0x3fa6fa05, 0x93c7b000 + .word 0x3d0a0af2, 0x0eb1a504 + .word 0x3fa77458, 0xf632d000 + .word 0x3d19f88c, 0x69e543dd + .word 0x3fa7ee8f, 0x25cd4000 + .word 0x3ce7bd3d, 0xcb47c2e4 + .word 0x3fa868a8, 0x3083f000 + .word 0x3d0b3b8b, 0xd96a72db + .word 0x3fa8e2a4, 0x243a1000 + .word 0x3d173dd6, 0x0284c920 + .word 0x3fa95c83, 0x0ec8e000 + .word 0x3cff5beb, 0x41d00a41 + .word 0x3fa9d644, 0xfdffa000 + .word 0x3cf3c905, 0x39a473b6 + .word 0x3faa4fe9, 0xffa3d000 + .word 0x3cf1a7b5, 0xfbfd6db2 + .word 0x3faac972, 0x21711000 + .word 0x3d1f1a7d, 0xe0264459 + .word 0x3fab42dd, 0x71197000 + .word 0x3cebec28, 0xd14c7d9f + .word 0x3fabbc2b, 0xfc44f000 + .word 0x3d005cf2, 0xdd7d04a2 + .word 0x3fac355d, 0xd0921000 + .word 0x3d1e5999, 0x357f0710 + .word 0x3facae72, 0xfb95c000 + .word 0x3cf0540d, 0xfda4e418 + .word 0x3fad276b, 0x8adb0000 + .word 0x3d16a423, 0xc78a64b0 + .word 0x3fada047, 0x8be39000 + .word 0x3cf2963d, 0x8fb7f02b + .word 0x3fae1907, 0x0c276000 + .word 0x3ca5b99b, 0x9d617a09 + .word 0x3fae91aa, 0x1914f000 + .word 0x3d10beaf, 0xf119cac5 + .word 0x3faf0a30, 0xc0116000 + .word 0x3cf5330b, 0xe64b8b77 + .word 0x3faf829b, 0x0e783000 + .word 0x3cf80267, 0xc7e09e3e + .word 0x3faffae9, 0x119b9000 + .word 0x3cf819ba, 0x13162a9c + .word 0x3fb0398d, 0x6b622000 + .word 0x3d153ac8, 0x0d00cc01 + .word 0x3fb07598, 0x3598e000 + .word 0x3d11c4c0, 0x6d2999e2 + .word 0x3fb0b194, 0xee0d1000 + .word 0x3d199ba9, 0x3da7b72e + .word 0x3fb0ed83, 0x9b552000 + .word 0x3d1bf82e, 0x4add5131 + .word 0x3fb12964, 0x4402e000 + .word 0x3d056224, 0x572ac464 + .word 0x3fb16536, 0xeea37000 + .word 0x3d25c1d0, 0xc4b82e7c + .word 0x3fb1a0fb, 0xa1bf8000 + .word 0x3d24a3fc, 0xc319d6dc + .word 0x3fb1dcb2, 0x63db1000 + .word 0x3d22889e, 0xbd3d1303 + .word 0x3fb2185b, 0x3b75a000 + .word 0x3cfce760, 0x70cdcfc5 + .word 0x3fb253f6, 0x2f0a1000 + .word 0x3d105be3, 0xeda69c04 + .word 0x3fb28f83, 0x450ed000 + .word 0x3d251aeb, 0x54232ed1 + .word 0x3fb2cb02, 0x83f5d000 + .word 0x3d2c3dc5, 0x94cae043 + .word 0x3fb30673, 0xf22c8000 + .word 0x3d24c9e2, 0x9dcf0ba5 + .word 0x3fb341d7, 0x961bd000 + .word 0x3cfd0929, 0x98376105 + .word 0x3fb37d2d, 0x76283000 + .word 0x3cfcfaab, 0x2400751e + .word 0x3fb3b875, 0x98b1b000 + .word 0x3d1bb7d4, 0xd6a6b9db + .word 0x3fb3f3b0, 0x04140000 + .word 0x3cee2474, 0xacdfcec5 + .word 0x3fb42edc, 0xbea64000 + .word 0x3d1bc0ee, 0xea7c9acd + .word 0x3fb469fb, 0xcebb5000 + .word 0x3d26cc78, 0x9e4ae327 + .word 0x3fb4a50d, 0x3aa1b000 + .word 0x3cd003d9, 0xeed183bb + .word 0x3fb4e011, 0x08a35000 + .word 0x3d25cb9f, 0xbe58b5c9 + .word 0x3fb51b07, 0x3f061000 + .word 0x3d207ed2, 0x4f1cd0d4 + .word 0x3fb555ef, 0xe40b5000 + .word 0x3ce692f1, 0x90d1c46b + .word 0x3fb590ca, 0xfdf01000 + .word 0x3d28509e, 0xae455754 + .word 0x3fb5cb98, 0x92ed4000 + .word 0x3d17be44, 0xa64fc52f + .word 0x3fb60658, 0xa9375000 + .word 0x3ce8763b, 0xdd389ef2 + .word 0x3fb6410b, 0x46fe7000 + .word 0x3d256038, 0x61a13976 + .word 0x3fb67bb0, 0x726ec000 + .word 0x3cef724b, 0x69ef5912 + .word 0x3fb6b648, 0x31afe000 + .word 0x3d1033d7, 0xb22085b8 + .word 0x3fb6f0d2, 0x8ae56000 + .word 0x3d269737, 0xc93373da + .word 0x3fb72b4f, 0x842ea000 + .word 0x3d21f666, 0x7fe6c45a + .word 0x3fb765bf, 0x23a6b000 + .word 0x3d2c2687, 0xf9477b53 + .word 0x3fb7a021, 0x6f649000 + .word 0x3d2c2499, 0x430831ff + .word 0x3fb7da76, 0x6d7b1000 + .word 0x3d066422, 0x240644d8 + .word 0x3fb814be, 0x23f8c000 + .word 0x3ccb2381, 0xda82fdfd + .word 0x3fb84ef8, 0x98e82000 + .word 0x3d205465, 0xb72d106e + .word 0x3fb88925, 0xd24fa000 + .word 0x3d2c55f5, 0x76088ff3 + .word 0x3fb8c345, 0xd6319000 + .word 0x3d2641eb, 0x596854cc + .word 0x3fb8fd58, 0xaa8c2000 + .word 0x3cf136fe, 0x4348da4e + .word 0x3fb9375e, 0x55595000 + .word 0x3d2dbb86, 0xe70186c9 + .word 0x3fb97156, 0xdc8f6000 + .word 0x3d0f01f3, 0x28123425 + .word 0x3fb9ab42, 0x46203000 + .word 0x3d0d66df, 0x661e3e7b + .word 0x3fb9e520, 0x97f9c000 + .word 0x3d235fac, 0xb52dd050 + .word 0x3fba1ef1, 0xd8061000 + .word 0x3d29a82e, 0xdbf2f796 + .word 0x3fba58b6, 0x0c2b2000 + .word 0x3d091c65, 0x1d1b06b1 + .word 0x3fba926d, 0x3a4ad000 + .word 0x3d158d94, 0x2f48aa71 + .word 0x3fbacc17, 0x68433000 + .word 0x3d0561f1, 0x7d2016d1 + .word 0x3fbb05b4, 0x9bee4000 + .word 0x3d0ff22c, 0x18f84a5e + .word 0x3fbb3f44, 0xdb221000 + .word 0x3d2fa2a7, 0xb1bc135d + .word 0x3fbb78c8, 0x2bb0e000 + .word 0x3d2b4210, 0x878cf032 + .word 0x3fbbb23e, 0x9368e000 + .word 0x3d22e9cf, 0x954c48ea + .word 0x3fbbeba8, 0x18146000 + .word 0x3d1d921d, 0x248382a6 + .word 0x3fbc2504, 0xbf79d000 + .word 0x3d1c5f13, 0x43bd2b70 + .word 0x3fbc5e54, 0x8f5bc000 + .word 0x3d1d0c57, 0x585fbe06 + .word 0x3fbc9797, 0x8d78e000 + .word 0x3d223fde, 0xd105cef9 + .word 0x3fbcd0cd, 0xbf8c1000 + .word 0x3d0f0a6d, 0xa86eba18 + .word 0x3fbd09f7, 0x2b4c4000 + .word 0x3d2048c0, 0x00354e33 + .word 0x3fbd4313, 0xd66cb000 + .word 0x3d0aeaf2, 0x1bb2a3b2 + .word 0x3fbd7c23, 0xc69cb000 + .word 0x3d0a046c, 0x8b35e23e + .word 0x3fbdb527, 0x0187d000 + .word 0x3d224ef0, 0xad5c303f + .word 0x3fbdee1d, 0x8cd5e000 + .word 0x3d2ae4bf, 0x1ac200ee + .word 0x3fbe2707, 0x6e2af000 + .word 0x3d072f4f, 0x543fff10 + .word 0x3fbe5fe4, 0xab272000 + .word 0x3d240a2c, 0x11600366 + .word 0x3fbe98b5, 0x49671000 + .word 0x3d119dd2, 0x27143a5b + .word 0x3fbed179, 0x4e837000 + .word 0x3d20175e, 0x45b17dbe + .word 0x3fbf0a30, 0xc0116000 + .word 0x3d05330b, 0xe64b8b77 + .word 0x3fbf42db, 0xa3a22000 + .word 0x3d29da91, 0x9a4127e6 + .word 0x3fbf7b79, 0xfec37000 + .word 0x3d2bbd9e, 0x05da04c0 + .word 0x3fbfb40b, 0xd6ff4000 + .word 0x3d2c0bec, 0xb7b53b5b + .word 0x3fbfec91, 0x31dbe000 + .word 0x3d257554, 0x5ca333f2 + .word 0x3fc01285, 0x0a6df000 + .word 0x3d395e79, 0xadfe901b + .word 0x3fc02ebb, 0x42bf3000 + .word 0x3d3a95c1, 0x68c7fc69 + .word 0x3fc04aeb, 0x449f6000 + .word 0x3d2afa90, 0x65ccd35c + .word 0x3fc06715, 0x12ca5000 + .word 0x3d32dc54, 0x3191fae2 + .word 0x3fc08338, 0xaffa2000 + .word 0x3d30533c, 0xac823e27 + .word 0x3fc09f56, 0x1ee71000 + .word 0x3d33867d, 0x4754172c + .word 0x3fc0bb6d, 0x6247a000 + .word 0x3d35464f, 0x3ccd04b3 + .word 0x3fc0d77e, 0x7cd08000 + .word 0x3d3cb2cd, 0x2ee2f482 + .word 0x3fc0f389, 0x7134b000 + .word 0x3d02e530, 0xbb6149cf + .word 0x3fc10f8e, 0x42253000 + .word 0x3d336263, 0xde634e7c + .word 0x3fc12b8c, 0xf2518000 + .word 0x3d348a4a, 0x13c0a0fc + .word 0x3fc14785, 0x84674000 + .word 0x3d156345, 0x1027c750 + .word 0x3fc16377, 0xfb124000 + .word 0x3d091e1a, 0xbf41763e + .word 0x3fc17f64, 0x58fca000 + .word 0x3d2843fa, 0xd093c8dc + .word 0x3fc19b4a, 0xa0ced000 + .word 0x3d03bedb, 0x4ef663a7 + .word 0x3fc1b72a, 0xd52f6000 + .word 0x3d2e80a4, 0x1811a396 + .word 0x3fc1d304, 0xf8c35000 + .word 0x3d164aec, 0x82ebbef7 + .word 0x3fc1eed9, 0x0e2dc000 + .word 0x3d161563, 0x7097648f + .word 0x3fc20aa7, 0x18102000 + .word 0x3d3f2c94, 0x348552fe + .word 0x3fc2266f, 0x190a5000 + .word 0x3d3596fa, 0xa3df8c05 + .word 0x3fc24231, 0x13ba5000 + .word 0x3cfc5ff8, 0x71162641 + .word 0x3fc25ded, 0x0abc6000 + .word 0x3d35a385, 0x4f176449 + .word 0x3fc279a3, 0x00ab4000 + .word 0x3d3ef432, 0xb3235108 + .word 0x3fc29552, 0xf81ff000 + .word 0x3d248d30, 0x1771c408 + .word 0x3fc2b0fc, 0xf3b1a000 + .word 0x3d177ca3, 0xe30a59ea + .word 0x3fc2cca0, 0xf5f5f000 + .word 0x3d128439, 0xb9403b82 + .word 0x3fc2e83f, 0x0180d000 + .word 0x3cee7aa7, 0xaf63c632 + .word 0x3fc303d7, 0x18e47000 + .word 0x3d3fa5fd, 0x28c704d4 + .word 0x3fc31f69, 0x3eb19000 + .word 0x3d32cc6c, 0x8d2e3482 + .word 0x3fc33af5, 0x75770000 + .word 0x3d3c9ecc, 0xa2fe72a5 + .word 0x3fc3567b, 0xbfc22000 + .word 0x3d3250d2, 0x53991a1f + .word 0x3fc371fc, 0x201e8000 + .word 0x3d3ee877, 0x9b2d8abc + .word 0x3fc38d76, 0x99164000 + .word 0x3d1844a5, 0x9e39bb70 + .word 0x3fc3a8eb, 0x2d31a000 + .word 0x3d1bafb7, 0x7d5d503e + .word 0x3fc3c459, 0xdef76000 + .word 0x3d3edc86, 0xf6b70d33 + .word 0x3fc3dfc2, 0xb0ecc000 + .word 0x3d28a72a, 0x62b8c13f + .word 0x3fc3fb25, 0xa5952000 + .word 0x3d3195be, 0x6b358ff7 + .word 0x3fc41682, 0xbf727000 + .word 0x3d377fdc, 0x7bf03db2 + .word 0x3fc431da, 0x01050000 + .word 0x3d304837, 0x836e0391 + .word 0x3fc44d2b, 0x6ccb7000 + .word 0x3d3a3ccf, 0xa7b2a1f1 + .word 0x3fc46877, 0x0542f000 + .word 0x3d03f5d0, 0x3957bc10 + .word 0x3fc483bc, 0xcce6e000 + .word 0x3d1eea52, 0x723f6369 + .word 0x3fc49efc, 0xc6313000 + .word 0x3d3cde14, 0xcc15551b + .word 0x3fc4ba36, 0xf39a5000 + .word 0x3d279568, 0x981bcc36 + .word 0x3fc4d56b, 0x5798e000 + .word 0x3d380580, 0x15a96555 + .word 0x3fc4f099, 0xf4a23000 + .word 0x3cf640d0, 0x50150d92 + .word 0x3fc50bc2, 0xcd29c000 + .word 0x3d1ada57, 0x28db8d4f + .word 0x3fc526e5, 0xe3a1b000 + .word 0x3d20de8b, 0x90075b8f + .word 0x3fc54203, 0x3a7a8000 + .word 0x3d268d68, 0xed855f0e + .word 0x3fc55d1a, 0xd4232000 + .word 0x3d3add94, 0xdda647e8 + .word 0x3fc5782c, 0xb3091000 + .word 0x3d28b739, 0x5d0d777d + .word 0x3fc59338, 0xd9982000 + .word 0x3cf0ba68, 0xb7555d4a + .word 0x3fc5ae3f, 0x4a3aa000 + .word 0x3d21ea25, 0xf012a8b9 + .word 0x3fc5c940, 0x07597000 + .word 0x3d15c9ad, 0xccb7337a + .word 0x3fc5e43b, 0x135bd000 + .word 0x3d278a96, 0x6224c79e + .word 0x3fc5ff30, 0x70a79000 + .word 0x3d1e9e43, 0x9f105039 + .word 0x3fc61a20, 0x21a0e000 + .word 0x3d3dd9dd, 0x1bdf3cdd + .word 0x3fc6350a, 0x28aaa000 + .word 0x3d2d5ec0, 0xab8163af + .word 0x3fc64fee, 0x8825f000 + .word 0x3d3896fc, 0xa298884b + .word 0x3fc66acd, 0x4272a000 + .word 0x3d3aa1bd, 0xbfc6c785 + .word 0x3fc685a6, 0x59eef000 + .word 0x3d3706ab, 0x49f7e6f6 + .word 0x3fc6a079, 0xd0f7a000 + .word 0x3d35a3f8, 0x448d14f5 + .word 0x3fc6bb47, 0xa9e80000 + .word 0x3d19f64d, 0x23ea3296 + .word 0x3fc6d60f, 0xe719d000 + .word 0x3d10e46a, 0xa3b2e266 + .word 0x3fc6f0d2, 0x8ae56000 + .word 0x3d369737, 0xc93373da + .word 0x3fc70b8f, 0x97a1a000 + .word 0x3d34ea64, 0xf6a95bef + .word 0x3fc72647, 0x0fa3f000 + .word 0x3d211641, 0xe3178b76 + .word 0x3fc740f8, 0xf5403000 + .word 0x3d2e9326, 0xcdfceabe + .word 0x3fc75ba5, 0x4ac8e000 + .word 0x3d3ddca5, 0x8bc4a7c0 + .word 0x3fc7764c, 0x128f2000 + .word 0x3d027490, 0x3479e3d1 + .word 0x3fc790ed, 0x4ee26000 + .word 0x3d199bbd, 0x4e7746f6 + .word 0x3fc7ab89, 0x0210d000 + .word 0x3d321237, 0xc6d65ad4 + .word 0x3fc7c61f, 0x2e673000 + .word 0x3d2b8da4, 0x99c82e40 + .word 0x3fc7e0af, 0xd630c000 + .word 0x3d139e7c, 0x1d8f1034 + .word 0x3fc7fb3a, 0xfbb75000 + .word 0x3d204815, 0xb73ec551 + .word 0x3fc815c0, 0xa1435000 + .word 0x3d2fab5a, 0x0dbfc630 + .word 0x3fc83040, 0xc91bc000 + .word 0x3d3e5b71, 0xc6e66f32 + .word 0x3fc84abb, 0x75865000 + .word 0x3d0392a9, 0x058ea173 + .word 0x3fc86530, 0xa8c70000 + .word 0x3d398bb0, 0xcb4ea3e3 + .word 0x3fc87fa0, 0x6520c000 + .word 0x3d322120, 0x401202fc + .word 0x3fc89a0a, 0xacd4e000 + .word 0x3d2c0bfb, 0xda8f5a72 + .word 0x3fc8b46f, 0x82236000 + .word 0x3d12d9f2, 0x102dd7c9 + .word 0x3fc8cece, 0xe74ad000 + .word 0x3d16917d, 0x56f5912d + .word 0x3fc8e928, 0xde886000 + .word 0x3d3a8154, 0xb13d72d5 + .word 0x3fc9037d, 0x6a180000 + .word 0x3d230dea, 0x57c1c8d9 + .word 0x3fc91dcc, 0x8c340000 + .word 0x3d37bc6a, 0xbddeff46 + .word 0x3fc93816, 0x47159000 + .word 0x3d267385, 0x2b8b8c4f + .word 0x3fc9525a, 0x9cf45000 + .word 0x3d2ad1d9, 0x04c1d4e3 + .word 0x3fc96c99, 0x9006a000 + .word 0x3d2a88d5, 0x9cbb452c + .word 0x3fc986d3, 0x22818000 + .word 0x3cf93b56, 0x4dd44000 + .word 0x3fc9a107, 0x56988000 + .word 0x3d264aa6, 0x242cd098 + .word 0x3fc9bb36, 0x2e7df000 + .word 0x3d3706ab, 0xaf18f802 + .word 0x3fc9d55f, 0xac62d000 + .word 0x3ce732c0, 0x789487af + .word 0x3fc9ef83, 0xd2769000 + .word 0x3d3467a4, 0x26031900 + .word 0x3fca09a2, 0xa2e79000 + .word 0x3d311331, 0x195f76e6 + .word 0x3fca23bc, 0x1fe2b000 + .word 0x3d258c64, 0xdc46c1ea + .word 0x3fca3dd0, 0x4b938000 + .word 0x3d297da1, 0x366e2c5a + .word 0x3fca57df, 0x28244000 + .word 0x3d3b99c8, 0xca1d9abb + .word 0x3fca71e8, 0xb7bdf000 + .word 0x3d377a9a, 0xc887d66f + .word 0x3fca8bec, 0xfc882000 + .word 0x3d3e3185, 0xcf21b9cf + .word 0x3fcaa5eb, 0xf8a93000 + .word 0x3d2abead, 0x92d5cae2 + .word 0x3fcabfe5, 0xae461000 + .word 0x3d125c2b, 0x1a83b18e + .word 0x3fcad9da, 0x1f827000 + .word 0x3d1df520, 0xdff03ebe + .word 0x3fcaf3c9, 0x4e80b000 + .word 0x3d3fe5b1, 0x9cc03270 + .word 0x3fcb0db3, 0x3d620000 + .word 0x3d3fee14, 0x38eab906 + .word 0x3fcb2797, 0xee463000 + .word 0x3d105dd5, 0xbe4bfd5c + .word 0x3fcb4177, 0x634ba000 + .word 0x3d355d01, 0x5666069f + .word 0x3fcb5b51, 0x9e8fb000 + .word 0x3d2691ba, 0x27fdc19e + .word 0x3fcb7526, 0xa22e4000 + .word 0x3d2c0dbf, 0x2e785490 + .word 0x3fcb8ef6, 0x70420000 + .word 0x3d387533, 0x321788e0 + .word 0x3fcba8c1, 0x0ae46000 + .word 0x3d3a32e2, 0x9eee9d85 + .word 0x3fcbc286, 0x742d8000 + .word 0x3d39ac53, 0xf39d121c + .word 0x3fcbdc46, 0xae344000 + .word 0x3d3625b4, 0x023d6505 + .word 0x3fcbf601, 0xbb0e4000 + .word 0x3d2386a9, 0x47c378b5 + .word 0x3fcc0fb7, 0x9ccfd000 + .word 0x3d272000, 0xcc2eb551 + .word 0x3fcc2968, 0x558c1000 + .word 0x3d318146, 0x108e3ae0 + .word 0x3fcc4313, 0xe754e000 + .word 0x3d3279be, 0x74cad7d6 + .word 0x3fcc5cba, 0x543ae000 + .word 0x3d20929d, 0xecb454fc + .word 0x3fcc765b, 0x9e4d6000 + .word 0x3d31ab6b, 0x36976f6c + .word 0x3fcc8ff7, 0xc79a9000 + .word 0x3d344358, 0x4bb03de6 + .word 0x3fcca98e, 0xd22f5000 + .word 0x3d3e9673, 0xe735df63 + .word 0x3fccc320, 0xc0176000 + .word 0x3d240903, 0x9a653794 + .word 0x3fccdcad, 0x935d1000 + .word 0x3d3cbe01, 0xf966cb77 + .word 0x3fccf635, 0x4e09c000 + .word 0x3d277123, 0x9a07d55b + .word 0x3fcd0fb7, 0xf2255000 + .word 0x3d3ca15a, 0x9bf3989b + .word 0x3fcd2935, 0x81b6b000 + .word 0x3d1f363f, 0xb5d55685 + .word 0x3fcd42ad, 0xfec35000 + .word 0x3d3a28ff, 0xc09fef63 + .word 0x3fcd5c21, 0x6b4fb000 + .word 0x3d3722b7, 0x221acbf2 + .word 0x3fcd758f, 0xc95ef000 + .word 0x3d3a97bd, 0x5d2fa755 + .word 0x3fcd8ef9, 0x1af31000 + .word 0x3d3abbe8, 0x0f26ce1f + .word 0x3fcda85d, 0x620ce000 + .word 0x3d240194, 0xc16cc7ec + .word 0x3fcdc1bc, 0xa0abe000 + .word 0x3d38fac1, 0xa628ccc6 + .word 0x3fcddb16, 0xd8ce9000 + .word 0x3d384421, 0xa3bed1d1 + .word 0x3fcdf46c, 0x0c722000 + .word 0x3d3a5e82, 0xb0b79039 + .word 0x3fce0dbc, 0x3d92a000 + .word 0x3d359233, 0xf0529bf1 + .word 0x3fce2707, 0x6e2af000 + .word 0x3d172f4f, 0x543fff10 + .word 0x3fce404d, 0xa034b000 + .word 0x3d2cf022, 0x3ecbb0ce + .word 0x3fce598e, 0xd5a87000 + .word 0x3d3c5d96, 0x861c2cec + .word 0x3fce72cb, 0x107da000 + .word 0x3d1dd48c, 0xcdf5471c + .word 0x3fce8c02, 0x52aa5000 + .word 0x3d34bfd2, 0x3f8b8c80 + .word 0x3fcea534, 0x9e23a000 + .word 0x3d381b93, 0x4c73ccb5 + .word 0x3fcebe61, 0xf4dd7000 + .word 0x3d3615d6, 0x67811ada + .word 0x3fced78a, 0x58ca8000 + .word 0x3d16f1b5, 0x3793387e + .word 0x3fcef0ad, 0xcbdc5000 + .word 0x3d326ca4, 0x31bca86e + .word 0x3fcf09cc, 0x50036000 + .word 0x3d3da094, 0x18d999db + .word 0x3fcf22e5, 0xe72f1000 + .word 0x3ce7561d, 0x7d037c19 + .word 0x3fcf3bfa, 0x934d6000 + .word 0x3d2d9f2a, 0x937b903b + .word 0x3fcf550a, 0x564b7000 + .word 0x3d366e0e, 0x2fb6fe81 + .word 0x3fcf6e15, 0x32153000 + .word 0x3d0b2b44, 0x29d89c5c + .word 0x3fcf871b, 0x28955000 + .word 0x3ce14052, 0xb5b2204b + .word 0x3fcfa01c, 0x3bb57000 + .word 0x3d397823, 0x81478a1f + .word 0x3fcfb918, 0x6d5e3000 + .word 0x3d3c551a, 0xaa8cd86f + .word 0x3fcfd20f, 0xbf76f000 + .word 0x3d3b8ea9, 0x234e4064 + .word 0x3fcfeb02, 0x33e60000 + .word 0x3d2f316e, 0x32d5e8c7 + .word 0x3fd001f7, 0xe6484000 + .word 0x3d38a957, 0x40c9abbc + .word 0x3fd00e6c, 0x45ad5000 + .word 0x3cdcc68d, 0x52e01203 + .word 0x3fd01ade, 0x39139000 + .word 0x3d4deed9, 0xe6647d5c + .word 0x3fd0274d, 0xc16c2000 + .word 0x3d2979e8, 0x9cf835c2 + .word 0x3fd033ba, 0xdfa74000 + .word 0x3d0c30bc, 0x1485bdff + .word 0x3fd04025, 0x94b4d000 + .word 0x3cf036b8, 0x9ef42d7f + .word 0x3fd04c8d, 0xe1841000 + .word 0x3d4c0328, 0xb5da628f + .word 0x3fd058f3, 0xc703e000 + .word 0x3d478bcc, 0xa196e4a9 + .word 0x3fd06557, 0x46227000 + .word 0x3d0131df, 0xb4868d6a + .word 0x3fd071b8, 0x5fcd5000 + .word 0x3d421a3a, 0x2e0ff2f8 + .word 0x3fd07e17, 0x14f1c000 + .word 0x3d40819c, 0xd863da16 + .word 0x3fd08a73, 0x667c5000 + .word 0x3d3ebc1d, 0x40c5a329 + .word 0x3fd096cd, 0x55591000 + .word 0x3d3f998d, 0x20550a31 + .word 0x3fd0a324, 0xe2739000 + .word 0x3d0c6bee, 0x7ef4030e + .word 0x3fd0af7a, 0x0eb6c000 + .word 0x3d23ccf9, 0x4945adad + .word 0x3fd0bbcc, 0xdb0d2000 + .word 0x3d32f32c, 0xcc5dcdfb + .word 0x3fd0c81d, 0x4860a000 + .word 0x3d40d218, 0x5ff17467 + .word 0x3fd0d46b, 0x579ab000 + .word 0x3d3d2c81, 0xf640e1e6 + .word 0x3fd0e0b7, 0x09a43000 + .word 0x3d32a038, 0xa7862f2a + .word 0x3fd0ed00, 0x5f657000 + .word 0x3d4b48e2, 0xb5e955ff + .word 0x3fd0f947, 0x59c66000 + .word 0x3d4356cf, 0x407bf3a5 + .word 0x3fd1058b, 0xf9ae4000 + .word 0x3d45aa31, 0x3f415699 + .word 0x3fd111ce, 0x4003e000 + .word 0x3d4c99b9, 0x1ed29693 + .word 0x3fd11e0e, 0x2dad9000 + .word 0x3d496e01, 0xdc0cc691 + .word 0x3fd12a4b, 0xc3911000 + .word 0x3d452c57, 0xcf5c66d4 + .word 0x3fd13687, 0x0293a000 + .word 0x3d4160bd, 0xb314c76f + .word 0x3fd142bf, 0xeb9a0000 + .word 0x3d31ce61, 0x85b58a9e + .word 0x3fd14ef6, 0x7f886000 + .word 0x3d40b42c, 0xd101b436 + .word 0x3fd15b2a, 0xbf428000 + .word 0x3d489c71, 0x2d927594 + .word 0x3fd1675c, 0xababa000 + .word 0x3d38380e, 0x731f55c4 + .word 0x3fd1738c, 0x45a66000 + .word 0x3d431c8b, 0x7fe69f45 + .word 0x3fd17fb9, 0x8e150000 + .word 0x3d42baba, 0x2c5aecbe + .word 0x3fd18be4, 0x85d93000 + .word 0x3d3c167f, 0x6f3604ab + .word 0x3fd1980d, 0x2dd42000 + .word 0x3d2b7b3a, 0x7a361c9a + .word 0x3fd1a433, 0x86e67000 + .word 0x3d4e857a, 0xf9cb1f55 + .word 0x3fd1b057, 0x91f07000 + .word 0x3d46915c, 0xc91d50e9 + .word 0x3fd1bc79, 0x4fd1c000 + .word 0x3d419879, 0xc5c22c21 + .word 0x3fd1c898, 0xc1699000 + .word 0x3d43f5f7, 0x8d1cea80 + .word 0x3fd1d4b5, 0xe796a000 + .word 0x3d222a5b, 0xd197bac2 + .word 0x3fd1e0d0, 0xc3371000 + .word 0x3d3af8f2, 0xa9b0d4a0 + .word 0x3fd1ece9, 0x5528a000 + .word 0x3d4cf630, 0x9ec96b89 + .word 0x3fd1f8ff, 0x9e48a000 + .word 0x3d27946c, 0x040cbe77 + .word 0x3fd20513, 0x9f73b000 + .word 0x3cf6e15e, 0x1609e0a4 + .word 0x3fd21125, 0x59861000 + .word 0x3d382e78, 0xba2950c4 + .word 0x3fd21d34, 0xcd5b9000 + .word 0x3d3b552f, 0xb28badaa + .word 0x3fd22941, 0xfbcf7000 + .word 0x3d42cb44, 0x850a7b4f + .word 0x3fd2354c, 0xe5bc8000 + .word 0x3d414389, 0x7cfeacce + .word 0x3fd24155, 0x8bfd1000 + .word 0x3d300fff, 0x3228fcad + .word 0x3fd24d5b, 0xef6ae000 + .word 0x3d4ff114, 0x3f81b02a + .word 0x3fd25960, 0x10df7000 + .word 0x3d38e7bc, 0x224ea3e3 + .word 0x3fd26561, 0xf1338000 + .word 0x3d38b488, 0x66faa45f + .word 0x3fd27161, 0x913f8000 + .word 0x3d34f4f1, 0xf61564b4 + .word 0x3fd27d5e, 0xf1db5000 + .word 0x3d4e6dc8, 0xb8735361 + .word 0x3fd2895a, 0x13de8000 + .word 0x3d3a8d7a, 0xd24c13f0 + .word 0x3fd29552, 0xf81ff000 + .word 0x3d348d30, 0x1771c408 + .word 0x3fd2a149, 0x9f762000 + .word 0x3d479220, 0x57062a92 + .word 0x3fd2ad3e, 0x0ab73000 + .word 0x3d2b972e, 0x488c359f + .word 0x3fd2b930, 0x3ab89000 + .word 0x3d4a493b, 0x4a5013d7 + .word 0x3fd2c520, 0x304f8000 + .word 0x3d230852, 0x8c342f39 + .word 0x3fd2d10d, 0xec508000 + .word 0x3d360c61, 0xf7088353 + .word 0x3fd2dcf9, 0x6f8fd000 + .word 0x3d20b4a2, 0x8e33c9ce + .word 0x3fd2e8e2, 0xbae11000 + .word 0x3d4a6138, 0x5992350a + .word 0x3fd2f4c9, 0xcf17a000 + .word 0x3d371f04, 0x9374b87b + .word 0x3fd300ae, 0xad063000 + .word 0x3d342f56, 0x8b75fcac + .word 0x3fd30c91, 0x557f1000 + .word 0x3d4d7ad4, 0xebd75d15 + .word 0x3fd31871, 0xc9544000 + .word 0x3d184fab, 0x94cecfd9 + .word 0x3fd32450, 0x09570000 + .word 0x3d3d271b, 0x9bdae59d + .word 0x3fd3302c, 0x16586000 + .word 0x3d36217d, 0xc2a3e08b + .word 0x3fd33c05, 0xf128d000 + .word 0x3d4b51be, 0x71fc7961 + .word 0x3fd347dd, 0x9a987000 + .word 0x3d4aa9ac, 0x8ace9fdc + .word 0x3fd353b3, 0x1376d000 + .word 0x3d4d99ca, 0x0327b24d + .word 0x3fd35f86, 0x5c932000 + .word 0x3d427c10, 0xd8af2d5b + .word 0x3fd36b57, 0x76bc1000 + .word 0x3d116978, 0x5a9c223f + .word 0x3fd37726, 0x62bfd000 + .word 0x3d40b5e4, 0xa9d627ef + .word 0x3fd382f3, 0x216c4000 + .word 0x3d4df3c5, 0xbc5cb012 + .word 0x3fd38ebd, 0xb38ed000 + .word 0x3d290582, 0xe67d4ca0 + .word 0x3fd39a86, 0x19f45000 + .word 0x3d18ee51, 0x937354f5 + .word 0x3fd3a64c, 0x55694000 + .word 0x3d37a71c, 0xbcd735d0 + .word 0x3fd3b210, 0x66b9b000 + .word 0x3d461f09, 0x33f754f9 + .word 0x3fd3bdd2, 0x4eb14000 + .word 0x3d46d425, 0xb478c893 + .word 0x3fd3c992, 0x0e1b2000 + .word 0x3d141c28, 0xaa680b76 + .word 0x3fd3d54f, 0xa5c1f000 + .word 0x3d3c3e1c, 0xd9a395e3 + .word 0x3fd3e10b, 0x16701000 + .word 0x3d3f3bcf, 0x145429c7 + .word 0x3fd3ecc4, 0x60ef5000 + .word 0x3d4e9fd7, 0x9d83ecff + .word 0x3fd3f87b, 0x86093000 + .word 0x3d451014, 0x55d3b3bc + .word 0x3fd40430, 0x8686a000 + .word 0x3d3f8ef4, 0x3049f7d3 + .word 0x3fd40fe3, 0x63303000 + .word 0x3d3e5c5f, 0xe79f05c6 + .word 0x3fd41b94, 0x1cce0000 + .word 0x3d47dcb7, 0xf60de01c + .word 0x3fd42742, 0xb427d000 + .word 0x3d433c6c, 0x7ea3ecc5 + .word 0x3fd432ef, 0x2a04e000 + .word 0x3d40276b, 0x3674752a + .word 0x3fd43e99, 0x7f2c1000 + .word 0x3d1c3f72, 0x40c41a04 + .word 0x3fd44a41, 0xb463c000 + .word 0x3d31ee28, 0xf37cf612 + .word 0x3fd455e7, 0xca720000 + .word 0x3d1ad8c6, 0x36629aed + .word 0x3fd4618b, 0xc21c5000 + .word 0x3d4d84fa, 0x16f66f66 + .word 0x3fd46d2d, 0x9c280000 + .word 0x3d359b27, 0x5f67f75a + .word 0x3fd478cd, 0x5959b000 + .word 0x3d2ec89b, 0xf0c8d098 + .word 0x3fd4846a, 0xfa75b000 + .word 0x3d4a7057, 0x47219c8d + .word 0x3fd49006, 0x80400000 + .word 0x3d43a198, 0x00f2f83a + .word 0x3fd49b9f, 0xeb7c1000 + .word 0x3d3dac1c, 0x58ab60d7 + .word 0x3fd4a737, 0x3cecf000 + .word 0x3d432ee5, 0x8a0655db + .word 0x3fd4b2cc, 0x75555000 + .word 0x3d43f81a, 0x1c3a02db + .word 0x3fd4be5f, 0x95777000 + .word 0x3d4141b6, 0x993293ee + .word 0x3fd4c9f0, 0x9e152000 + .word 0x3d487888, 0x63c7f488 + .word 0x3fd4d57f, 0x8fefe000 + .word 0x3d23f926, 0x7fd06868 + .word 0x3fd4e10c, 0x6bc8a000 + .word 0x3cf8283f, 0x1636f061 + .word 0x3fd4ec97, 0x32600000 + .word 0x3d234d7a, 0xaf04d104 + .word 0x3fd4f81f, 0xe4763000 + .word 0x3d4a00c2, 0x6f2c03dd + .word 0x3fd503a6, 0x82cb1000 + .word 0x3d4965cd, 0xc3a41929 + .word 0x3fd50f2b, 0x0e1e0000 + .word 0x3d3a0940, 0x8c47b8d8 + .word 0x3fd51aad, 0x872df000 + .word 0x3d405a13, 0x927ac19f + .word 0x3fd5262d, 0xeeb98000 + .word 0x3d40f230, 0x47bb5b00 + .word 0x3fd531ac, 0x457ee000 + .word 0x3d3df83b, 0x7d931501 + .word 0x3fd53d28, 0x8c3bd000 + .word 0x3d4ddd8d, 0x029240a7 + .word 0x3fd548a2, 0xc3add000 + .word 0x3d23167e, 0x63081cf7 + .word 0x3fd5541a, 0xec91b000 + .word 0x3d4f3f4a, 0xa91c688a + .word 0x3fd55f91, 0x07a43000 + .word 0x3d4dc337, 0x10e416b4 + .word 0x3fd56b05, 0x15a18000 + .word 0x3d29247b, 0xbc4a23fc + .word 0x3fd57677, 0x17455000 + .word 0x3d44d8a9, 0x356d941b + .word 0x3fd581e7, 0x0d4b2000 + .word 0x3d4c19c3, 0xc9da4e1c + .word 0x3fd58d54, 0xf86e0000 + .word 0x3d2791f3, 0x0a795215 + .word 0x3fd598c0, 0xd9687000 + .word 0x3d43d05b, 0x4793492e + .word 0x3fd5a42a, 0xb0f4c000 + .word 0x3d4fc338, 0xa1a4108b + .word 0x3fd5af92, 0x7fccd000 + .word 0x3d4c7f9a, 0x01400711 + .word 0x3fd5baf8, 0x46aa1000 + .word 0x3d46328b, 0x83c602e0 + .word 0x3fd5c65c, 0x06459000 + .word 0x3d4300fc, 0xff3f88cd + .word 0x3fd5d1bd, 0xbf580000 + .word 0x3d4394a1, 0x1b1c1ee4 + .word 0x3fd5dd1d, 0x7299b000 + .word 0x3d43a84f, 0x3bf518f5 + .word 0x3fd5e87b, 0x20c29000 + .word 0x3d3527d1, 0x8f7738fa + .word 0x3fd5f3d6, 0xca8a2000 + .word 0x3d37af84, 0x8e19cc75 + .word 0x3fd5ff30, 0x70a79000 + .word 0x3d2e9e43, 0x9f105039 + .word 0x3fd60a88, 0x13d1a000 + .word 0x3d36e9b9, 0xc879af55 + .word 0x3fd615dd, 0xb4bec000 + .word 0x3d13c7ca, 0x90bc04b2 + .word 0x3fd62131, 0x5424e000 + .word 0x3d463e81, 0xdaacbccc + .word 0x3fd62c82, 0xf2b9c000 + .word 0x3d3e54bd, 0xbd7c8a98 + .word 0x3fd637d2, 0x91329000 + .word 0x3d450450, 0x865165ea + .word 0x3fd64320, 0x30444000 + .word 0x3d3efe02, 0x7a01d7df + .word 0x3fd64e6b, 0xd0a35000 + .word 0x3d2afe80, 0x69d61295 + .word 0x3fd659b5, 0x7303e000 + .word 0x3d1f281d, 0xb0af8efc + .word 0x3fd664fd, 0x1819b000 + .word 0x3d418e55, 0xe463b5fe + .word 0x3fd67042, 0xc0983000 + .word 0x3d4c6148, 0xdbdcf10d + .word 0x3fd67b86, 0x6d327000 + .word 0x3d438fd6, 0x3ea11c64 + .word 0x3fd686c8, 0x1e9b1000 + .word 0x3d32bb11, 0x0af84054 + .word 0x3fd69207, 0xd5845000 + .word 0x3d43a44f, 0x4861e4ab + .word 0x3fd69d45, 0x92a03000 + .word 0x3d38b1bd, 0xbf97ffa6 + .word 0x3fd6a881, 0x56a03000 + .word 0x3d420e9b, 0xd9d37351 + .word 0x3fd6b3bb, 0x22359000 + .word 0x3d30f625, 0x7a933268 + .word 0x3fd6bef2, 0xf6111000 + .word 0x3d48f8fc, 0x947d5965 + .word 0x3fd6ca28, 0xd2e34000 + .word 0x3d430ad0, 0xb8c49166 + .word 0x3fd6d55c, 0xb95c3000 + .word 0x3d39b9c8, 0xae9a6ee2 + .word 0x3fd6e08e, 0xaa2ba000 + .word 0x3d1e38c1, 0x39318d71 + .word 0x3fd6ebbe, 0xa600e000 + .word 0x3d4cce14, 0xc7dd17dd + .word 0x3fd6f6ec, 0xad8b2000 + .word 0x3d249058, 0xfdf08376 + .word 0x3fd70218, 0xc178e000 + .word 0x3d42a947, 0x0e225428 + .word 0x3fd70d42, 0xe2789000 + .word 0x3d21aead, 0x337ee287 + .word 0x3fd7186b, 0x11381000 + .word 0x3d1934e2, 0x677d272b + .word 0x3fd72391, 0x4e650000 + .word 0x3d0c1d52, 0xbdc87d8a + .word 0x3fd72eb5, 0x9aac9000 + .word 0x3d4dd010, 0xd08a7a15 +!! TBL - end + +! constants: + .align 64 +CONSTANTS: + .word 0x40000000,0x00000000 + .word 0x3fe55555,0x555571da + .word 0x3fd99999,0x8702be3a + .word 0x3fd24af7,0x3f4569b1 + .word 0x3ea62e42,0xfee00000 ! scaled by 2**-20 + .word 0x3caa39ef,0x35793c76 ! scaled by 2**-20 + .word 0xfffffc00,0x00000000 ! ELEVENBIT + .word 0x43200000 + .word 0xfff00000 + .word 0xc0190200 ! ELEVENBIT + .word 0x0200 ! ELEVENBIT + +#define two 0x00 +#define A1 0x08 +#define A2 0x10 +#define A3 0x18 +#define ln2hi 0x20 +#define ln2lo 0x28 +#define mask 0x30 +#define ox43200000 0x38 +#define oxfff00000 0x3c +#define oxc0194000 0x40 +#define ox4000 0x44 + + +! local storage indices + +#define jnk STACK_BIAS-0x8 +#define tmp2 STACK_BIAS-0x10 +#define tmp1 STACK_BIAS-0x18 +#define tmp0 STACK_BIAS-0x20 +#define tmp3 STACK_BIAS-0x28 +#define tmp4 STACK_BIAS-0x30 +#define tmp5 STACK_BIAS-0x38 +#define tmp6 STACK_BIAS-0x40 +! sizeof temp storage - must be a multiple of 16 for V9 +#define tmps 0x40 + +! register use + +! i0 n +! i1 x +! i2 stridex +! i3 y +! i4 stridey +! i5 + +! g1 TBL + +! l0 j0 +! l1 j1 +! l2 j2 +! l3 +! l4 0x94000 +! l5 CONSTANTS +! l6 0x000fffff +! l7 0x7ff00000 + +! o0 py0 +! o1 py1 +! o2 py2 +! o3 used in primary range bounds check +! o4 used in primary range bounds check +! o5 used in .rangeI check section as temporary +! o7 NOT USED + +! f0 u0,q0 +! f2 v0,(two-v0)-u0,z0 +! f4 n0,f0,q0 +! f6 s0 +! f8 q +! f10 u1,q1 +! f12 v1,(two-v1)-u1,z1 +! f14 n1,f1,q1 +! f16 s1 +! f18 t ! now tmp0 storage +! f20 u2,q2 +! f22 v2,(two-v2)-u2,q2 +! f24 n2,f2,q2 +! f26 s2 +! f28 0xfff00000 +! f29 0x43200000 +! f30 0x4000 +! f31 0xc0194000 +! f32 t0 +! f34 h0,f0-(c0-h0) +! f36 c0 +! f38 A1 +! f40 two +! f42 t1 +! f44 h1,f1-(c1-h1) +! f46 c1 +! f48 A2 +! f50 0xffff8000... or 0xfffffc00 for 6 or 11 bit tbl resp +! f52 t2 +! f54 h2,f2-(c2-h2) +! f56 c2 +! f58 A3 now tmp1 storage +! f60 ln2hi +! f62 ln2lo +!-------------------------------------------------------------------- +!-------------------------------------------------------------------- +! PREFETCH info +#define PREFETCH_MULT_READS 0 +!-------------------------------------------------------------------- +!-------------------------------------------------------------------- +! define pipes for easier reading + +#define ICNT %i0 + +#define XPTR %i1 +#define XSTR %i2 +#define YPTR %i3 +#define YSTR %i4 + +#define RANGE_LO %l6 +#define RANGE_HI %l7 + +#define P0_X1 %f0 +#define P0_f1 %f1 +#define P0_f2 %f2 +#define P0_f3 %f3 +#define P0_f4 %f4 +#define P0_f5 %f5 +#define P0_f6 %f6 +#define P0_f7 %f7 +!#define P0_f8 %f8 +#define T0_f8 %f8 +#define P0_f9 %f9 + +#define P1_X2 %f10 +#define P1_f11 %f11 +#define P1_f12 %f12 +#define P1_f13 %f13 +#define P1_f14 %f14 +#define P1_f15 %f15 +#define P1_f16 %f16 +#define P1_f17 %f17 + +!#define P1_f18 %f18 +#define T1_f18 %f18 + +#define P1_f19 %f19 + +#define P2_X3 %f20 +#define P2_f21 %f21 +#define P2_f22 %f22 +#define P2_f23 %f23 +#define P2_f24 %f24 +#define P2_f25 %f25 +#define P2_f26 %f26 +#define P2_f27 %f27 +#define INF_f28 %f28 +#define CONSTE432_f29 %f29 + +#define CONST_f30 %f30 + +#define TTOPMSK %f31 + +#define P0_f32 %f32 +#define P0_f34 %f34 +#define P0_f36 %f36 + +#define P1_f42 %f42 +#define P1_f44 %f44 +#define P1_f46 %f46 + +#define P2_f52 %f52 +#define P2_f54 %f54 +#define P2_f56 %f56 + +#define G1_TBL %g1 +#define L5_CONSTANTS %l5 +#define FP40_TWO %f40 +#define FP38_A1 %f38 +#define FP48_A2 %f48 +#define FP50_MASK %f50 +!!!#define FP58_A3 %f58 +#define T2_f58 %f58 +#define FP60_LN2HI %f60 +#define FP62_LN2LO %f62 + + +!-------------------------------------------------------------------- + + ENTRY(__vlog_ultra3) + save %sp,-SA(MINFRAME)-tmps,%sp + PIC_SETUP(l7) + PIC_SET(l7,CONSTANTS,l5) + PIC_SET(l7,TBL,o0) + mov %o0,%g1 + wr %g0,0x82,%asi ! set %asi for non-faulting loads + + ld [XPTR],%l0 ! quickly !X1 + + sethi %hi(0x90200),%l4 ! ELEVENBIT + or %l4,%lo(0x90200),%l4 ! ELEVENBIT + ldd [XPTR],P0_X1 ! u.l[0] = *x !X1 + sethi %hi(0x000fffff),RANGE_LO + or RANGE_LO,%lo(0x000fffff),RANGE_LO + sethi %hi(0x7ff00000),RANGE_HI + ldd [L5_CONSTANTS+two],FP40_TWO + fzero P1_X2 + fzero P2_X3 + ldd [L5_CONSTANTS+A1],FP38_A1 + ldd [L5_CONSTANTS+A2],FP48_A2 + ldd [L5_CONSTANTS+ln2hi],FP60_LN2HI + ldd [L5_CONSTANTS+ln2lo],FP62_LN2LO + ldd [L5_CONSTANTS+mask],FP50_MASK + ld [L5_CONSTANTS+ox43200000],CONSTE432_f29 + ld [L5_CONSTANTS+oxfff00000],INF_f28 + ld [L5_CONSTANTS+oxc0194000],TTOPMSK + fpadd32s P0_X1,TTOPMSK,P0_f2 ! X+TTOP !X1 START + ld [L5_CONSTANTS+ox4000],CONST_f30 + sll XSTR,3,XSTR ! scale strides + sll YSTR,3,YSTR + add %fp,jnk,%o0 ! precondition loop + fands P0_f2,INF_f28,P0_f2 ! (X+TTOP)&INF->n X1 +! st P0_X1,[%fp+tmp0] !BYPASS in + fzero P0_f4 + fzero P0_f6 +! ld [%fp+tmp0],%l0 !BYPASS out ix X1 + add %fp,jnk,%o1 + add %fp,jnk,%o2 + fzero P0_f32 + fzero P0_f34 + fzero P0_f36 + fzero P1_f12 + sub %l0,RANGE_HI,%o3 ! bounds for X1 + sub RANGE_LO,%l0,%o4 ! bounds for X1 + fzero P1_f14 + fzero P1_f16 + sub YPTR,YSTR,YPTR + fzero P1_f42 + mov %g0,%l1 ! zero out for first pass + mov %g0,%l2 ! zero out for first pass + fzero P1_f44 + fzero P1_f46 + fzero T0_f8 + fzero T1_f18 + fzero T2_f58 + fzero P2_f24 + fzero P2_f26 + fzero P2_f52 + fzero P2_f54 + fzero P2_f56 + ba .loop0 + std P2_f26,[%fp+tmp2] + + .align 16 +! -- 16 byte aligned +.loop0: +!############################# AREA 1 (0-19) ###################################! +!>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 1.1 <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 0 + + fmuld P1_f44,FP48_A2,P1_f46 ! s^2,A2 ! X2-2 + andcc %o3,%o4,%o4 ! X1 + bge,pn %icc,.range0 ! ix<=0x000fffff or >=0x7ff00000 ! X1 +! delay slot + nop + ! x , n , reduction + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 1 + fpsub32s P0_X1,P0_f2,P0_X1 ! X - n -> x ! X1 + add XPTR,XSTR,XPTR ! x += stridex + add YPTR,YSTR,YPTR ! y += stridey ! + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 2 +.cont0: + ! n*l2lo , lylo + faddd P0_f4,P0_f34,P0_f34 !n*l2lo,lylo ! X1-2 + ! TBL calc + add %l0,%l4,%l0 ! j = ix + 0x94000 X1 +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 3 + fsubd FP40_TWO,P2_f24,P2_f24 ! two - xT ! X3-2 + + +!>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 1.2 <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 4 + ! round up redunction + fpadd32s P0_X1,CONST_f30,P0_f4 ! x round up X1 +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 5 + ! s ( poly + ( 2-xT-x)), n*l2lo+lylo + faddd P0_f36,P0_f34,P0_f36 ! + n*l2lo+lylo X1-2 + ! n*l2hi + fmuld T0_f8,FP60_LN2HI,T0_f8 ! n*l2hi ! X1-2 + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 6 + fmuld T1_f18,FP62_LN2LO,P1_f12 ! n*l2lo ! X2 + faddd P1_f46,FP38_A1,P1_f46 ! (s^2*A2), A1 X2-2 + ! TBL calc + srl %l0,10,%l0 ! j=(j>>11)&0x1f0 !ELEVENBIT ! X1 + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 7 + fsubd P2_f24,P2_X3,P2_f24 ! (two - xT) - x ! !X3-2 + +!>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 1.3 <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 8 + ldda [XPTR]%asi,P1_X2 ! X2-nextX START + ! x-roundedup & 0xffff8000 -> xT i.e 11bit value of x + fand P0_f4,FP50_MASK,P0_f4 ! xT ! X1 + + + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 9 + faddd P0_f36,P0_f32,P0_f36 ! + (x-xT) X1-2 + and %l0,0x3ff,%l0 ! ELEVENBIT ! X1 + st P1_X2,[%fp+tmp0] !BYPASS in ! X2 + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 10 + fmuld P1_f46,P1_f44,P1_f46 ! s^2*A2+A1 , s^2 X2-2 + ldd [G1_TBL+%l1],P1_f44 !lylo ! X2-2 + sub %l1,8,%l1 ! get back ptr to lyhi X2-2 + faddd P1_f12,P1_f44,P1_f44 !n*l2lo,lylo ! X2-2 + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 11 + +!>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 1.4 <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 12 + faddd P0_f36,P0_f6,P0_f36 ! + lyhi X1-2 + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 13 + ! x+xT + faddd P0_X1,P0_f4,P0_f6 ! x + xT ! X1 + ! TBL calc + sll %l0,4,%l0 ! ELEVENBIT ! X1 + + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 14 + faddd P1_f46,P1_f14,P1_f46 ! (s^2*A2+A1)s^2 + (2-xT-x) X2-2 + + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 15 + fpadd32s P1_X2,TTOPMSK,P1_f12 ! X + TTOP ! X2 + ld [%fp+tmp0],%l3 !BYPASS out ! X2 + +!>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 1.5 <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 16 + ! x-xT + fsubd P0_X1,P0_f4,P0_f32 ! x-xT ! X1 + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 17 + faddd P0_f36,T0_f8,P0_f36 ! + n*l2hi X1-2 + ! TBL+1 + add %l0,8,%l0 ! X1 + + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 18 + fmuld P1_f16,P1_f46,P1_f46 ! s*(POLY) ! X2-2 + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 19 + fands P1_f12,INF_f28,P1_f12 ! X2 + fmuld P2_f26,P2_f26,P2_f54 ! z = s * s ! !X3-2 + +!############################# AREA 2 (20#39) ###################################! +!>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 2.1 <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 20 + ! (x-xT) / (x+xT) => s + fdivd P0_f32,P0_f6,P0_f6 ! -> s ! X1 + faddd P1_f46,P1_f44,P1_f46 ! + n*l2lo+lylo X2-2 + ldd [G1_TBL+%l1],P1_f44 ! ld lyhi ! X2-2 + mov %l3,%l1 ! BYPASS temp ! X2 + ! wrap !!! done for X0 + std P0_f36,[%o0] ! X1-2 FINI + mov YPTR,%o0 ! X1-2 INC + + addcc ICNT,-1,ICNT ! + ble,pn %icc,.endloop0 ! +! delay slot + nop + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 21 +! -- 16 byte aligned +.loop1: + sub %l1,RANGE_HI,%o3 ! bounds for X2 + sub RANGE_LO,%l1,%o4 ! bounds for X2 + andcc %o3,%o4,%o4 ! X2 + bge,pn %icc,.range1 ! ix<=0x000fffff or >=0x7ff00000 ! X2 +! delay slot + nop + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 22 + fpsub32s P1_X2,P1_f12,P1_X2 ! X - n -> x ! X2 + add XPTR,XSTR,XPTR ! x += stridex + add YPTR,YSTR,YPTR ! y += stridey ! +.cont1: + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 23 + fmuld P2_f54,FP48_A2,P2_f56 ! s^2,A2! X3-2 + +!>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 2.2 <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 24 + ! n to double + fitod P0_f2,T0_f8 ! (double) n ! X1 + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 25 + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 26 + faddd P1_f46,P1_f42,P1_f46 ! + (x-xT) X2-2 + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 27 + fpadd32s P1_X2,CONST_f30,P1_f14 ! x round up X2 + faddd P2_f56,FP38_A1,P2_f56 ! (s^2*A2), A1 X3-2 + +!>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 2.3 <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 28 + ! 2 , xT + fsubd FP40_TWO,P0_f4,P0_f4 ! two - xT ! X1 + fmuld T1_f18,FP60_LN2HI,T1_f18 ! n*l2hi ! X2-2 + ldda [XPTR]%asi,P2_X3 ! X3-nextX START + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 29 + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 30 + faddd P1_f46,P1_f44,P1_f46 ! + lyhi X2-2 + st P2_X3,[%fp+tmp0] !BYPASS in ! X3 + + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 31 + fand P1_f14,FP50_MASK,P1_f14 ! xT ! X2 + fmuld P2_f56,P2_f54,P2_f56 ! s^2*A2+A1 , s^2 X3-2 + ldd [G1_TBL+%l2],P2_f54 !lylo ! X3 + sub %l2,8,%l2 ! back to TBL hi ! X3 + add %l1,%l4,%l1 ! j = ix + 0x94000 X2 + +!>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 2.4 <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 32 + ! 2-xT , x + fsubd P0_f4,P0_X1,P0_f4 ! (two - xT) - x ! !X1 + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 33 + fpadd32s P2_X3,TTOPMSK,P2_f22 ! X + TTOP ! X3 + + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 34 + faddd P1_f46,T1_f18,P1_f46 ! + n*l2hi X2-2 + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 35 + faddd P1_X2,P1_f14,P1_f16 ! x + xT ! X2 + srl %l1,10,%l1 ! j=(j>>11)&0x1f0 !ELEVENBIT ! X2 + faddd P2_f56,P2_f24,P2_f56 ! + 2-xT-x X3-2 + + +!>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 2.5 <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 36 + fitod P1_f12,T1_f18 ! (double) n ! X2 + fmuld T2_f58,FP62_LN2LO,P2_f24 ! n*l2lo ! X3-2 + + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 37 + fands P2_f22,INF_f28,P2_f22 ! X3 + ld [%fp+tmp0],%l3 !BYPASS out ! X3 + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 38 + std P1_f46,[%o1] ! X2-2 FINI + mov YPTR,%o1 ! X2-2 INC + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 39 + fsubd P1_X2,P1_f14,P1_f42 ! x-xT ! X2 + fmuld P2_f26,P2_f56,P2_f56 ! s*(POLY) ! X3-2 + ldd [G1_TBL+%l2],P2_f26 ! ld lyhi ! X3 + mov %l3,%l2 ! BYPASS for X3 ! X3 + and %l1,0x3ff,%l1 ! ELEVENBIT ! X2 + +!############################# AREA 3 (40#59) ###################################! +!>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 3.1 <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 40 + faddd P2_f24,P2_f54,P2_f54 !n*l2lo,lylo ! X3-2 + ! s , s + fmuld P0_f6,P0_f6,P0_f34 ! z = s * s ! !X1 + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 41 + fdivd P1_f42,P1_f16,P1_f16 ! -> s ! X2 +! -- 16 byte aligned + addcc ICNT,-1,ICNT ! + ble,pn %icc,.endloop1 ! + nop +.loop2: + + sub %l2,RANGE_HI,%o3 ! bounds for X3 + sub RANGE_LO,%l2,%o4 ! bounds for X3 + andcc %o3,%o4,%o4 ! X3 + bge,pn %icc,.range2 ! ix<=0x000fffff or >=0x7ff00000 ! X3 +! delay slot + nop +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 42 + fpsub32s P2_X3,P2_f22,P2_X3 ! X - n -> x ! X3 + add XPTR,XSTR,XPTR ! x += stridex + add YPTR,YSTR,YPTR ! y += stridey ! +.cont2: + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 43 + sll %l1,4,%l1 ! ELEVENBIT ! X2 + fmuld T2_f58,FP60_LN2HI,T2_f58 ! n*l2hi ! X3-2 + faddd P2_f56,P2_f54,P2_f56 ! + n*l2lo+lylo X3-2 + + +!>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 3.2 <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 44 + ! s^ , A2 + fmuld P0_f34,FP48_A2,P0_f36 ! s^2,A2 ! X1 + fsubd FP40_TWO,P1_f14,P1_f14 ! two - xT ! X2 + add %l2,%l4,%l2 ! j = ix + 0x94000 X3 + srl %l2,10,%l2 ! j=(j>>11)&0x1f0 !ELEVENBIT ! X3 + ldda [XPTR]%asi,P0_X1 ! X1-nextX START + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 45 + st P0_X1,[%fp+tmp0] !BYPASS in ! X1-nextX + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 46 + fpadd32s P2_X3,CONST_f30,P2_f24 ! x round up X3 + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 47 + add %l1,8,%l1 ! X2 + faddd P2_f56,P2_f52,P2_f56 ! + (x-xT) X3-2 + +!>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 3.3 <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 48 + ! s^2*A2 , A1 + faddd P0_f36,FP38_A1,P0_f36 ! (s^2*A2), A1 X1 + + and %l2,0x3ff,%l2 ! ELEVENBIT ! X3 + + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 49 + fsubd P1_f14,P1_X2,P1_f14 ! (two - xT) - x ! !X2 + + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 50 + fand P2_f24,FP50_MASK,P2_f24 ! xT ! X3 + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 51 + faddd P2_f56,P2_f26,P2_f56 ! + lyhi X3-2 + +!>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 3.4 <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 52 + ! s^2*A2+A1 , s^2 + fmuld P0_f36,P0_f34,P0_f36 ! s^2*A2+A1 , s^2 X1 + fpadd32s P0_X1,TTOPMSK,P0_f2 ! X + TTOP ! X1-nextX + sll %l2,4,%l2 ! ELEVENBIT ! X3 + + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 53 + ! lylo + ldd [G1_TBL+%l0],P0_f34 !lylo ! X1 + add %l0,-8,%l0 !lyhi pointer ! X1 + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 54 + faddd P2_X3,P2_f24,P2_f26 ! x + xT ! X3 + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 55 + faddd P2_f56,T2_f58,P2_f56 ! + n*l2hi X3-2 + +!>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 3.5 <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 56 + ! s^2(s^2*A1+A1) + (2-xT-x) + faddd P0_f36,P0_f4,P0_f36 ! X1 + add %l2,8,%l2 ! TBL+8 is TBL lo ! X3 + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 57 + ! X+TTOP & INF -> n + fands P0_f2,INF_f28,P0_f2 ! X1-nextX + ! n * l2lo + fmuld T0_f8,FP62_LN2LO,P0_f4 ! n*l2lo ! X1 + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 58 + fsubd P2_X3,P2_f24,P2_f52 ! x-xT ! X3 +!BEST ld [%fp+tmp0],%l3 !BYPASS out ! X1-nextX + ld [%fp+tmp0],%l3 !BYPASS out ! X1-nextX + + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 59 + fitod P2_f22,T2_f58 ! (double) n ! X3 + std P2_f56,[%o2] ! X3 FINI + mov YPTR,%o2 ! X3 INC + +!############################# AREA 4 (OVERFLOW) ###################################! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 60 + ! s * (s^2(s^2*A1+A1) + (2-xT-x)) + fmuld P0_f6,P0_f36,P0_f36 ! s*(POLY) ! X1 + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 61 + fmuld P1_f16,P1_f16,P1_f44 ! z = s * s ! !X2 + ! lyhi + ldd [G1_TBL+%l0],P0_f6 ! ld lyhi ! X1 + mov %l3,%l0 ! BYPASS tmp for X1 ! X1 + sub %l0,RANGE_HI,%o3 ! bounds for X1 + sub RANGE_LO,%l0,%o4 ! bounds for X1 + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 62 + addcc ICNT,-1,ICNT ! +! FALL THROUGH if running out of X array here + bg,pt %icc,.loop0 !62 +! delay slot + fdivd P2_f52,P2_f26,P2_f26 ! -> s ! X3 + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 63 +!LOSTC + + + + + + ! Once we get to the last element, we loop three more times to finish + ! the computations in progress. This means we will load past the end + ! of the argument vector, but since we use non-faulting loads and never + ! use the data, the only potential problem is cache miss. (Strictly + ! speaking, since we pad the argument vector with twos, we incorrectly + ! raise inexact if the actual argument vector is all ones.) + .endloop2: + sethi %hi(0x40000000),%l0 ! "next argument" = two + sub %l0,RANGE_HI,%o3 ! bnds chk x1 !54 + sub RANGE_LO,%l0,%o4 ! bounds chk x1 !54 + fmovd FP40_TWO,P0_X1 + cmp ICNT,-3 + bg,a,pt %icc,.loop0 + ! delay slot + fpadd32s P0_X1,TTOPMSK,P0_f2 ! n=(ix+0xc0194000)&0xfff00000 + ret + restore + + .align 16 + .endloop0: + sethi %hi(0x40000000),%l1 ! "next argument" = two + fmovd FP40_TWO,P1_X2 + cmp ICNT,-3 + bg,a,pt %icc,.loop1 + ! delay slot + fpadd32s P1_X2,TTOPMSK,P1_f12 ! n=(ix+0xc0194000)&0xfff00000 + ret + restore + + .align 16 + .endloop1: + sethi %hi(0x40000000),%l2 ! "next argument" = two + fmovd FP40_TWO,P2_X3 + cmp ICNT,-3 + bg,a,pt %icc,.loop2 + ! delay slot + fpadd32s P2_X3,TTOPMSK,P2_f22 ! n=(ix+0xc0194000)&0xfff00000 + ret + restore + + + .align 16 + .range0: + cmp %l0,RANGE_HI + bgeu,pn %icc,2f ! if (unsigned) ix >= 0x7ff00000 + ! delay slot + ld [XPTR+4],%o5 + !THERE + fxtod P0_X1,P0_X1 ! scale by 2**1074 w/o trapping + st P0_X1,[%fp+tmp0] !BYPASS in + add XPTR,XSTR,XPTR ! x += stridex + orcc %l0,%o5,%g0 + be,pn %icc,1f ! if x == 0 + ! delay slot + add YPTR,YSTR,YPTR ! y += stridey + ! HERE + fpadd32s P0_X1,TTOPMSK,P0_f2 ! n = (ix + 0xc0194000) & 0xfff00000 + fands P0_f2,INF_f28,P0_f2 + fpsub32s P0_X1,P0_f2,P0_X1 ! u.l[0] -= n + ld [%fp+tmp0],%l0 !BYPASS out + ba,pt %icc,.cont0 + ! delay slot + fpsub32s P0_f2,CONSTE432_f29,P0_f2 ! n -= 0x43200000 + 1: + fdivs CONSTE432_f29,P0_f1,P0_f2 ! raise div-by-zero + ba,pt %icc,3f + ! delay slot + st INF_f28,[YPTR] ! store -inf + 2: + sll %l0,1,%l0 ! lop off sign bit + add XPTR,XSTR,XPTR ! x += stridex + orcc %l0,%o5,%g0 + be,pn %icc,1b ! if x == -0 + ! delay slot + add YPTR,YSTR,YPTR ! y += stridey + fzero P0_f2 ! *y = (x < 0.0? 0.0 : x) * inf + fcmpd %fcc0,P0_X1,P0_f2 + fmovdl %fcc0,P0_f2,P0_X1 + fand INF_f28,FP50_MASK,P0_f2 + fnegd P0_f2,P0_f2 + fmuld P0_X1,P0_f2,P0_X1 + st P0_X1,[YPTR] + 3: + addcc ICNT,-1,ICNT + ble,pn %icc,.endloop2 + ! delay slot + st P0_f1,[YPTR+4] + ld [XPTR],%l0 ! get next argument + sub %l0,RANGE_HI,%o3 ! bnds chk x1 !54 + sub RANGE_LO,%l0,%o4 ! bounds chk x1 !54 + ldd [XPTR],P0_X1 + fpadd32s P0_X1,TTOPMSK,P0_f2 ! n=(ix+0xc0194000)&0xfff00000 + ba,pt %icc,.loop0 + ! delay slot + fands P0_f2,INF_f28,P0_f2 !58 + + + .align 16 + .range1: + cmp %l1,RANGE_HI + bgeu,pn %icc,2f ! if (unsigned) ix >= 0x7ff00000 + ! delay slot + ld [XPTR+4],%o5 + fxtod P1_X2,P1_X2 ! scale by 2**1074 w/o trapping + st P1_X2,[%fp+tmp1] + add XPTR,XSTR,XPTR ! x += stridex + orcc %l1,%o5,%g0 + be,pn %icc,1f ! if x == 0 + ! delay slot + add YPTR,YSTR,YPTR ! y += stridey + fpadd32s P1_X2,TTOPMSK,P1_f12 ! n = (ix + 0xc0194000) & 0xfff00000 + fands P1_f12,INF_f28,P1_f12 + fpsub32s P1_X2,P1_f12,P1_X2 ! u.l[0] -= n + ld [%fp+tmp1],%l1 + ba,pt %icc,.cont1 + ! delay slot + fpsub32s P1_f12,CONSTE432_f29,P1_f12 ! n -= 0x43200000 + 1: + fdivs CONSTE432_f29,P1_f11,P1_f12 ! raise div-by-zero + ba,pt %icc,3f + ! delay slot + st INF_f28,[YPTR] ! store -inf + 2: + sll %l1,1,%l1 ! lop off sign bit + add XPTR,XSTR,XPTR ! x += stridex + orcc %l1,%o5,%g0 + be,pn %icc,1b ! if x == -0 + ! delay slot + add YPTR,YSTR,YPTR ! y += stridey + fzero P1_f12 ! *y = (x < 0.0? 0.0 : x) * inf + fcmpd %fcc0,P1_X2,P1_f12 + fmovdl %fcc0,P1_f12,P1_X2 + fand INF_f28,FP50_MASK,P1_f12 + fnegd P1_f12,P1_f12 + fmuld P1_X2,P1_f12,P1_X2 + st P1_X2,[YPTR] + 3: + addcc ICNT,-1,ICNT + ble,pn %icc,.endloop0 + ! delay slot + st P1_f11,[YPTR+4] + ld [XPTR],%l1 ! get next argument + ldd [XPTR],P1_X2 + fpadd32s P1_X2,TTOPMSK,P1_f12 ! X + TTOP + ba,pt %icc,.loop1 + ! delay slot + fands P1_f12,INF_f28,P1_f12 ! & INF + + + .align 16 +.range2: + cmp %l2,RANGE_HI + bgeu,pn %icc,2f ! if (unsigned) ix >= 0x7ff00000 +! delay slot + ld [XPTR+4],%o5 + fxtod P2_X3,P2_X3 ! scale by 2**1074 w/o trapping + st P2_X3,[%fp+tmp2] + add XPTR,XSTR,XPTR ! x += stridex + orcc %l2,%o5,%g0 + be,pn %icc,1f ! if x == 0 +! delay slot + add YPTR,YSTR,YPTR ! y += stridey + fpadd32s P2_X3,TTOPMSK,P2_f22 ! n = (ix + 0xc0194000) & 0xfff00000 + fands P2_f22,INF_f28,P2_f22 + fpsub32s P2_X3,P2_f22,P2_X3 ! u.l[0] -= n + ld [%fp+tmp2],%l2 + ba,pt %icc,.cont2 +! delay slot + fpsub32s P2_f22,CONSTE432_f29,P2_f22 ! n -= 0x43200000 +1: + fdivs CONSTE432_f29,P2_f21,P2_f22 ! raise div-by-zero + ba,pt %icc,3f +! delay slot + st INF_f28,[YPTR] ! store -inf +2: + sll %l2,1,%l2 ! lop off sign bit + add XPTR,XSTR,XPTR ! x += stridex + orcc %l2,%o5,%g0 + be,pn %icc,1b ! if x == -0 +! delay slot + add YPTR,YSTR,YPTR ! y += stridey + fzero P2_f22 ! *y = (x < 0.0? 0.0 : x) * inf + fcmpd %fcc0,P2_X3,P2_f22 + fmovdl %fcc0,P2_f22,P2_X3 + fand INF_f28,FP50_MASK,P2_f22 + fnegd P2_f22,P2_f22 + fmuld P2_X3,P2_f22,P2_X3 + st P2_X3,[YPTR] +3: + addcc ICNT,-1,ICNT + ble,pn %icc,.endloop1 +! delay slot + st P2_f21,[YPTR+4] + ld [XPTR],%l2 ! get next argument + ldd [XPTR],P2_X3 + fpadd32s P2_X3,TTOPMSK,P2_f22 ! X + TTOP + ba,pt %icc,.loop2 +! delay slot + fands P2_f22,INF_f28,P2_f22 ! X3 + nop !ld [XPTR+4],P2_f21 + + SET_SIZE(__vlog_ultra3) + diff --git a/usr/src/lib/libmvec/common/vis/__vlogf.S b/usr/src/lib/libmvec/common/vis/__vlogf.S new file mode 100644 index 0000000000..a2c66f9697 --- /dev/null +++ b/usr/src/lib/libmvec/common/vis/__vlogf.S @@ -0,0 +1,1277 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "__vlogf.S" + +#include "libm.h" + + RO_DATA + .align 64 +!! CONST_TBL[2*i] = 127*log(2) - log(1+i/32), i = [0, 32] +!! CONST_TBL[2*i+1] = 2**(-23)/(1+i/32), i = [0, 32] + +.CONST_TBL: + .word 0x405601e6, 0x78fc457b, 0x3e800000, 0x00000000, + .word 0x4055ffee, 0x4f4b5df8, 0x3e7f07c1, 0xf07c1f08, + .word 0x4055fe05, 0x32e4434f, 0x3e7e1e1e, 0x1e1e1e1e, + .word 0x4055fc2a, 0x44598c21, 0x3e7d41d4, 0x1d41d41d, + .word 0x4055fa5c, 0xb720babf, 0x3e7c71c7, 0x1c71c71c, + .word 0x4055f89b, 0xcf803581, 0x3e7bacf9, 0x14c1bad0, + .word 0x4055f6e6, 0xe0c3f1b1, 0x3e7af286, 0xbca1af28, + .word 0x4055f53d, 0x4badcb50, 0x3e7a41a4, 0x1a41a41a, + .word 0x4055f39e, 0x7d18782e, 0x3e799999, 0x9999999a, + .word 0x4055f209, 0xecc5965c, 0x3e78f9c1, 0x8f9c18fa, + .word 0x4055f07f, 0x1c5099d5, 0x3e786186, 0x18618618, + .word 0x4055eefd, 0x9641645e, 0x3e77d05f, 0x417d05f4, + .word 0x4055ed84, 0xed3a291d, 0x3e7745d1, 0x745d1746, + .word 0x4055ec14, 0xbb3ced72, 0x3e76c16c, 0x16c16c17, + .word 0x4055eaac, 0xa10589ab, 0x3e7642c8, 0x590b2164, + .word 0x4055e94c, 0x45758439, 0x3e75c988, 0x2b931057, + .word 0x4055e7f3, 0x550f85e3, 0x3e755555, 0x55555555, + .word 0x4055e6a1, 0x818078ec, 0x3e74e5e0, 0xa72f0539, + .word 0x4055e556, 0x8134aae1, 0x3e747ae1, 0x47ae147b, + .word 0x4055e412, 0x0ef783b7, 0x3e741414, 0x14141414, + .word 0x4055e2d3, 0xe99c9674, 0x3e73b13b, 0x13b13b14, + .word 0x4055e19b, 0xd3b0f9d9, 0x3e73521c, 0xfb2b78c1, + .word 0x4055e069, 0x9333fb26, 0x3e72f684, 0xbda12f68, + .word 0x4055df3c, 0xf1565bd0, 0x3e729e41, 0x29e4129e, + .word 0x4055de15, 0xba3f64fa, 0x3e724924, 0x92492492, + .word 0x4055dcf3, 0xbcd73219, 0x3e71f704, 0x7dc11f70, + .word 0x4055dbd6, 0xca95a75a, 0x3e71a7b9, 0x611a7b96, + .word 0x4055dabe, 0xb7559927, 0x3e715b1e, 0x5f75270d, + .word 0x4055d9ab, 0x592bb896, 0x3e711111, 0x11111111, + .word 0x4055d89c, 0x8840e4fe, 0x3e70c971, 0x4fbcda3b, + .word 0x4055d792, 0x1eaf8df0, 0x3e708421, 0x08421084, + .word 0x4055d68b, 0xf863da3d, 0x3e704104, 0x10410410, + .word 0x4055d589, 0xf2fe5107, 0x3e700000, 0x00000000, + .word 0xbfcffb16, 0xbfa3db6e, ! K3 = -2.49850123953105416108e-01 + .word 0x3fd5561b, 0xa4b3110b, ! K2 = 3.33380614127478394992e-01 + .word 0xbfe00000, 0x0b666d0b, ! K1 = -5.00000021234343492201e-01 + .word 0x3fefffff, 0xff3fd118, ! K0 = 9.99999998601683029714e-01 + .word 0x3fe62e42, 0xfefa39ef, ! LN2 = 6.931471805599452862e-01 + .word 0xbf800000, 0x7f800000, ! MONE = -1.0f ; INF + +! local storage indices +#define tmp0 STACK_BIAS-0x8 +#define tmp1 STACK_BIAS-0x10 +#define tmp2 STACK_BIAS-0x18 +#define tmp3 STACK_BIAS-0x20 +#define tmp4 STACK_BIAS-0x28 +#define tmp5 STACK_BIAS-0x30 +! sizeof temp storage - must be a multiple of 16 for V9 +#define tmps 0x30 + +#define ZERO %f28 +#define K3 %f30 +#define K2 %f32 +#define K1 %f34 +#define K0 %f36 +#define LN2 %f38 + +#define stridex %o0 +#define stridex2 %o1 +#define stridey %o2 +#define x0 %o3 +#define x1 %o4 +#define y %o5 + +#define ind0 %i0 +#define ind1 %i1 +#define ind2 %i2 +#define ind3 %i3 +#define MASK_0x007fffff %i4 +#define MASK_0xfffc0000 %i5 +#define CONST_0x20000 %o7 +#define MASK_0x7f800000 %l3 + +#define ival0 %l0 +#define iy0 %l1 +#define ival1 %l2 +#define iy1 %l1 +#define ival2 %l4 +#define iy2 %l5 +#define ival3 %l6 +#define iy3 %l2 +#define counter %l7 + +#define LOGFTBL %g5 +#define LOGFTBL_P8 %g1 + +! register use + +! i0 ind0 +! i1 ind1 +! i2 ind2 +! i3 ind3 +! i4 0x007fffff +! i5 0xfffc0000 + +! l0 ival0 +! l1 iy0, iy1 +! l2 ival1, iy3 +! l3 0x7f800000 +! l4 ival2 +! l5 iy2 +! l6 ival3 +! l7 cycle counter + +! o0 stridex +! o1 stridex * 2 +! o2 stridey +! o3 x +! o4 x +! o5 y +! o7 0x20000 + +! g1 CONST_TBL +! g5 CONST_TBL + 8 + +! f2 +! f4 +! f6 +! f8 +! f9 +! f10 +! f12 +! f14 +! f16 +! f18 +! f19 +! f20 +! f22 +! f24 +! f26 +! f28 ZERO = 0 +! f30 K3 = -2.49850123953105416108e-01 +! f32 K2 = 3.33380614127478394992e-01 +! f34 K1 = -5.00000021234343492201e-01 +! f36 K0 = 9.99999998601683029714e-01 +! f38 LN2 = 6.931471805599452862e-01 +! f40 +! f42 +! f44 +! f46 +! f48 +! f50 +! f52 +! f54 +! f56 +! f58 +! f60 +! f62 + + +! !!!!! Algorithm !!!!! +! +! double exp, ty, yy, ldtmp0, ldtmp1; +! double dtmp0, dtmp1, dtmp2, dtmp3, dtmp4, dtmp5; +! float value; +! int ival, iy, i, ind, iexp; +! double K3 = -2.49850123953105416108e-01; +! double K2 = 3.33380614127478394992e-01; +! double K1 = -5.00000021234343492201e-01; +! double K0 = 9.99999998601683029714e-01; +! double LN2 = 6.931471805599452862e-01; +! double ZERO = 0; +! float INF; +! +! ival = *(int*)(x); +! if (ival >= 0x7f800000) goto spec; +! if (ival <= 0x7fffff) goto spec; +! *(float*)&*(float*)&exp = *(float*)(x); +! exp = vis_fpack32(ZERO, exp); +! iy = ival & 0x007fffff; +! ival = iy + 0x20000; +! ival = ival & 0xfffc0000; +! i = ival >> 14; +! ind = i & (-8); +! iy = iy - ival; +! ty = LN2 * (double)(*(int*)&exp); +! ldtmp0 = *(double*)((char*)CONST_TBL+ind); +! ldtmp1 = *(double*)((char*)CONST_TBL+ind+8); +! ty = ty - ldtmp0; +! yy = (double) iy; +! yy = yy * ldtmp1; +! dtmp0 = K3 * yy; +! dtmp1 = dtmp0 + K2; +! dtmp2 = dtmp1 * yy; +! dtmp3 = dtmp2 + K1; +! dtmp4 = dtmp3 * yy; +! dtmp5 = dtmp4 + K0; +! yy = dtmp5 * yy; +! yy = yy + ty; +! y[0] = (float)(yy); +! return; +! +!spec: +! if ((ival & 0x7fffffff) >= 0x7f800000) { /* X = NaN or Inf */ +! value = *(float*) &ival; +! y[0] = (value < 0.0f? 0.0f : value) * value; +! return; +! } else if (ival <= 0) { +! y[0] = ((ival & 0x7fffffff) == 0) ? +! -1.0f / 0f. : 0f. /0f.; /* X = +-0 : X < 0 */ +! return; +! } else { /* Denom. number */ +! value = (float) ival; +! ival = *(int*) &value; +! iexp = (ival >> 23) - 149; +! iy = ival & 0x007fffff; +! ival = iy + 0x20000; +! ival = ival & 0xfffc0000; +! i = ival >> 14; +! ind = i & (-8); +! iy = iy - ival; +! ty = LN2 * (double)iexp; +! ldtmp0 = *(double*)((char*)CONST_TBL+ind); +! ldtmp1 = *(double*)((char*)CONST_TBL+ind+8); +! ty = ty - ldtmp0; +! yy = (double) iy; +! yy = yy * ldtmp1; +! dtmp0 = K3 * yy; +! dtmp1 = dtmp0 + K2; +! dtmp2 = dtmp1 * yy; +! dtmp3 = dtmp2 + K1; +! dtmp4 = dtmp3 * yy; +! dtmp5 = dtmp4 + K0; +! yy = dtmp5 * yy; +! yy = yy + ty; +! y[0] = (float)(yy); +! return; +! } +!-------------------------------------------------------------------- + + ENTRY(__vlogf) + save %sp,-SA(MINFRAME)-tmps,%sp + PIC_SETUP(l7) + PIC_SET(l7,.CONST_TBL,g5) + wr %g0,0,%gsr + + st %i0,[%fp+tmp0] + stx %i1,[%fp+tmp5] + + sra %i2,0,%l4 + ldd [LOGFTBL+528],K3 + add %i3,0,y + sllx %l4,2,stridex + sllx %l4,3,stridex2 + ldd [LOGFTBL+536],K2 + sra %i4,0,%l3 + ldd [LOGFTBL+544],K1 + sllx %l3,2,stridey + sethi %hi(0x7ffc00),MASK_0x007fffff + add MASK_0x007fffff,1023,MASK_0x007fffff + ldd [LOGFTBL+552],K0 + sethi %hi(0xfffc0000),MASK_0xfffc0000 + ldd [LOGFTBL+560],LN2 + sethi %hi(0x20000),CONST_0x20000 + fzero ZERO + sethi %hi(0x7f800000),MASK_0x7f800000 + sub y,stridey,y + +.begin: + ld [%fp+tmp0],counter + ldx [%fp+tmp5],x0 + st %g0,[%fp+tmp0] +.begin1: + add x0,stridex2,x1! x += 2*stridex + subcc counter,1,counter + bneg,pn %icc,.end + lda [x0]0x82,ival0 ! (Y0_0) ival = *(int*)(x) + + add LOGFTBL,8,LOGFTBL_P8 + lda [stridex+x0]0x82,ival1 ! (Y1_0) ival = *(int*)(x) + + cmp ival0,MASK_0x7f800000 ! (Y0_0) if (ival >= 0x7f800000) + lda [x1]0x82,ival2 ! (Y2_0) ival = *(int*)(x); + + bge,pn %icc,.spec ! (Y0_0) if (ival >= 0x7f800000) + nop + + cmp ival0,MASK_0x007fffff ! (Y0_0) if (ival <= 0x7fffff) + ble,pn %icc,.spec ! (Y0_0) if (ival <= 0x7fffff) + nop + + cmp ival1,MASK_0x7f800000 ! (Y1_0) if (ival >= 0x7f800000) + and ival0,MASK_0x007fffff,iy0 ! (Y0_0) iy = ival & 0x007fffff + + + add iy0,CONST_0x20000,ival0 ! (Y0_0) ival = iy + 0x20000 + + and ival0,MASK_0xfffc0000,ival0 ! (Y0_0) ival = ival & 0xfffc0000 + bge,pn %icc,.update2 ! (Y1_0) if (ival >= 0x7f800000) + nop +.cont2: + sub iy0,ival0,iy0 ! (Y0_0) iy = iy - ival + cmp ival1,MASK_0x007fffff ! (Y1_0) if (ival <= 0x7fffff) + lda [stridex+x1]0x82,ival3 ! (Y3_0) ival = *(int*)(x) + + st iy0,[%fp+tmp1] ! (Y0_0) (double) iy + ble,pn %icc,.update3 ! (Y1_0) if (ival <= 0x7fffff) + nop +.cont3: + cmp ival2,MASK_0x7f800000 ! (Y2_0) if (ival >= 0x7f800000) + and ival1,MASK_0x007fffff,iy1 ! (Y1_0) iy = ival & 0x007fffff + bge,pn %icc,.update4 ! (Y2_0) if (ival >= 0x7f800000) + nop +.cont4: + cmp ival2,MASK_0x007fffff ! (Y2_0) if (ival <= 0x7fffff) + ble,pn %icc,.update5 ! (Y2_0) if (ival <= 0x7fffff) + nop +.cont5: + add iy1,CONST_0x20000,ival1 ! (Y1_0) ival = iy + 0x20000 + and ival2,MASK_0x007fffff,iy2 ! (Y2_0) iy = ival & 0x007fffff + + and ival1,MASK_0xfffc0000,ival1 ! (Y1_0) ival = ival & 0xfffc0000 + add iy2,CONST_0x20000,ival2 ! (Y2_0) ival = iy + 0x20000 + + sub iy1,ival1,iy1 ! (Y1_0) iy = iy - ival + and ival2,MASK_0xfffc0000,ival2 ! (Y2_0) ival = ival & 0xfffc0000 + + cmp ival3,MASK_0x7f800000 ! (Y3_0) (ival >= 0x7f800000) + sub iy2,ival2,iy2 ! (Y2_0) iy = iy - ival + st iy1,[%fp+tmp3] ! (Y1_0) (double) iy + + st iy2,[%fp+tmp2] ! (Y2_0) (double) iy + bge,pn %icc,.update6 ! (Y3_0) (ival >= 0x7f800000) + nop +.cont6: + cmp ival3,MASK_0x007fffff ! (Y3_0) if (ival <= 0x7fffff) + ld [%fp+tmp1],%f2 ! (Y0_0) (double) iy + ble,pn %icc,.update7 ! (Y3_0) if (ival <= 0x7fffff) + sra ival0,14,ival0 ! (Y0_0) i = ival >> 14; +.cont7: + sra ival1,14,ind1 ! (Y1_0) i = ival >> 14; + ld [%fp+tmp3],%f4 ! (Y1_0) (double) iy + + sra ival2,14,ival2 ! (Y2_0) i = ival >> 14; + and ival0,-8,ind0 ! (Y0_0) ind = i & (-8) + lda [x0]0x82,%f6 ! (Y0_0) *(float*)&exp = *(float*)(x) + + and ind1,-8,ind1 ! (Y1_0) ind = i & (-8) + ldd [LOGFTBL_P8+ind0],%f14 ! (Y0_0) ldtmp1 = *(double*)((char*)CONST_TBL+ind+8) + fitod %f2,%f48 ! (Y0_0) yy = (double) iy + + and ival3,MASK_0x007fffff,iy3 ! (Y3_0) iy = ival & 0x007fffff + lda [stridex+x0]0x82,%f8 ! (Y1_0) *(float*)&exp = *(float*)(x) + + add iy3,CONST_0x20000,ival3 ! (Y3_0) iy + 0x20000 + ldd [LOGFTBL_P8+ind1],%f16 ! (Y1_0) ldtmp1 = *(double*)((char*)CONST_TBL+ind+8) + fitod %f4,%f26 ! (Y1_0) yy = (double) iy + + sub y,stridey,y ! y += stridey + and ival3,MASK_0xfffc0000,ival3 ! (Y3_0) ival = ival & 0xfffc0000 + lda [x1]0x82,%f10 ! (Y2_0) *(float*)&exp = *(float*)(x) + + add x1,stridex2,x0 ! x += 2*stridex + sub iy3,ival3,iy3 ! (Y3_0) iy = iy - ival + ld [%fp+tmp2],%f2 ! (Y2_0) (double) iy + fmuld %f48,%f14,%f46 ! (Y0_0) yy = yy * ldtmp1 + + lda [stridex+x1]0x82,%f12 ! (Y3_0) *(float*)&exp = *(float*)(x) + fmuld %f26,%f16,%f62 ! (Y1_0) yy = yy * ldtmp1 + + sra ival3,14,ival3 ! (Y3_0) i = ival >> 14; + lda [x0]0x82,ival0 ! (Y0_1) ival = *(int*)(x) + + add x0,stridex2,x1 ! x += 2*stridex + st iy3,[%fp+tmp3] ! (Y3_0) (double) iy + fmuld K3,%f46,%f22 ! (Y0_0) dtmp0 = K3 * yy + + and ival2,-8,ind2 ! (Y2_0) ind = i & (-8) + lda [stridex+x0]0x82,ival1 ! (Y1_1) ival = *(int*)(x) + + cmp ival0,MASK_0x7f800000 ! (Y0_1) if (ival >= 0x7f800000) + lda [x1]0x82,ival2 ! (Y2_1) ival = *(int*)(x); + fmuld K3,%f62,%f50 ! (Y1_0) dtmp0 = K3 * yy + + bge,pn %icc,.update8 ! (Y0_1) if (ival >= 0x7f800000) + nop +.cont8: + cmp ival0,MASK_0x007fffff ! (Y0_1) if (ival <= 0x7fffff) + ble,pn %icc,.update9 ! (Y0_1) if (ival <= 0x7fffff) + faddd %f22,K2,%f48 ! (Y0_0) dtmp1 = dtmp0 + K2 + +.cont9: + cmp ival1,MASK_0x7f800000 ! (Y1_1) if (ival >= 0x7f800000) + and ival0,MASK_0x007fffff,iy0 ! (Y0_1) iy = ival & 0x007fffff + + add iy0,CONST_0x20000,ival0 ! (Y0_1) ival = iy + 0x20000 + ldd [LOGFTBL_P8+ind2],%f14 ! (Y2_0) ldtmp1 = *(double*)((char*)CONST_TBL+ind+8); + fpack32 ZERO,%f6,%f6 ! (Y0_0) exp = vis_fpack32(ZERO, exp) + + and ival0,MASK_0xfffc0000,ival0 ! (Y0_1) ival = ival & 0xfffc0000 + faddd %f50,K2,%f26 ! (Y1_0) dtmp1 = dtmp0 + K2 + bge,pn %icc,.update10 ! (Y1_1) if (ival >= 0x7f800000) + nop +.cont10: + sub iy0,ival0,iy0 ! (Y0_1) iy = iy - ival + and ival3,-8,ind3 ! (Y3_0) ind = i & (-8) + ld [%fp+tmp3],%f4 ! (Y3_0) (double) iy + + cmp ival1,MASK_0x007fffff ! (Y1_1) if (ival <= 0x7fffff) + lda [stridex+x1]0x82,ival3 ! (Y3_1) ival = *(int*)(x) + fmuld %f48,%f46,%f50 ! (Y0_0) dtmp2 = dtmp1 * yy + fitod %f2,%f48 ! (Y2_0) yy = (double) iy + + st iy0,[%fp+tmp1] ! (Y0_1) (double) iy + ble,pn %icc,.update11 ! (Y1_1) if (ival <= 0x7fffff) + nop +.cont11: + cmp ival2,MASK_0x7f800000 ! (Y2_1) if (ival >= 0x7f800000) + and ival1,MASK_0x007fffff,iy1 ! (Y1_1) iy = ival & 0x007fffff + bge,pn %icc,.update12 ! (Y2_1) if (ival >= 0x7f800000) + fmuld %f26,%f62,%f42 ! (Y1_0) dtmp2 = dtmp1 * yy +.cont12: + cmp ival2,MASK_0x007fffff ! (Y2_1) if (ival <= 0x7fffff) + ldd [LOGFTBL_P8+ind3],%f16 ! (Y3_0) ldtmp1 = *(double*)((char*)CONST_TBL+ind+8) + ble,pn %icc,.update13 ! (Y2_1) if (ival <= 0x7fffff) + fitod %f4,%f26 ! (Y3_0) yy = (double) iy +.cont13: + add iy1,CONST_0x20000,ival1 ! (Y1_1) ival = iy + 0x20000 + and ival2,MASK_0x007fffff,iy2 ! (Y2_1) iy = ival & 0x007fffff + + and ival1,MASK_0xfffc0000,ival1 ! (Y1_1) ival = ival & 0xfffc0000 + add iy2,CONST_0x20000,ival2 ! (Y2_1) ival = iy + 0x20000 + fmuld %f48,%f14,%f44 ! (Y2_0) yy = yy * ldtmp1 + faddd %f50,K1,%f50 ! (Y0_0) dtmp3 = dtmp2 + K1 + + cmp ival3,MASK_0x7f800000 ! (Y3_1) if (ival >= 0x7f800000) + sub iy1,ival1,iy1 ! (Y1_1) iy = iy - ival + and ival2,MASK_0xfffc0000,ival2 ! (Y2_1) ival = ival & 0xfffc0000 + fpack32 ZERO,%f8,%f8 ! (Y1_0) exp = vis_fpack32(ZERO, exp) + + sub iy2,ival2,iy2 ! (Y2_1) iy = iy - ival + st iy1,[%fp+tmp3] ! (Y1_1) (double) iy + fmuld %f26,%f16,%f60 ! (Y3_0) yy = yy * ldtmp1 + faddd %f42,K1,%f54 ! (Y1_0) dtmp3 = dtmp2 + K1 + + st iy2,[%fp+tmp2] ! (Y2_1) (double) iy + fmuld K3,%f44,%f22 ! (Y2_0) dtmp0 = K3 * yy + bge,pn %icc,.update14 ! (Y3_1) if (ival >= 0x7f800000) + fitod %f6,%f40 ! (Y0_0) (double)(*(int*)&exp) +.cont14: + cmp ival3,MASK_0x007fffff ! (Y3_1) if (ival <= 0x7fffff) + ldd [LOGFTBL+ind1],%f58 ! (Y1_0) ldtmp0 = *(double*)((char*)CONST_TBL+ind) + fmuld %f50,%f46,%f52 ! (Y0_0) dtmp4 = dtmp3 * yy + fitod %f8,%f56 ! (Y1_0) (double)(*(int*)&exp) + + ld [%fp+tmp1],%f2 ! (Y0_1) (double) iy + fmuld K3,%f60,%f50 ! (Y3_0) dtmp0 = K3 * yy + ble,pn %icc,.update15 ! (Y3_1) if (ival <= 0x7fffff) + nop +.cont15: + subcc counter,7,counter + fmuld %f54,%f62,%f54 ! (Y1_0) dtmp4 = dtmp3 * yy + + sra ival0,14,ival0 ! (Y0_1) i = ival >> 14; + bneg,pn %icc,.tail + faddd %f22,K2,%f48 ! (Y2_0) dtmp1 = dtmp0 + K2 + + ba .main_loop + nop + + .align 16 +.main_loop: + sra ival2,14,ival2 ! (Y2_1) i = ival >> 14; + ldd [LOGFTBL+ind0],%f42 ! (Y0_0) ldtmp0 = *(double*)((char*)CONST_TBL+ind) + fmuld LN2,%f40,%f40 ! (Y0_0) ty = LN2 * (double)(*(int*)&exp) + faddd %f52,K0,%f22 ! (Y0_0) dtmp5 = dtmp4 + K0 + + sra ival1,14,ind1 ! (Y1_1) i = ival >> 14; + ld [%fp+tmp3],%f4 ! (Y1_1) (double) iy + fpack32 ZERO,%f10,%f18 ! (Y2_0) exp = vis_fpack32(ZERO, exp) + faddd %f50,K2,%f26 ! (Y3_0) dtmp1 = dtmp0 + K2 + + and ival0,-8,ind0 ! (Y0_1) ind = i & (-8) + lda [x0]0x82,%f6 ! (Y0_1) *(float*)&exp = *(float*)(x) + fmuld LN2,%f56,%f56 ! (Y1_0) LN2 * (double)(*(int*)&exp) + faddd %f54,K0,%f24 ! (Y1_0) dtmp5 = dtmp4 + K0 + + and ind1,-8,ind1 ! (Y1_1) ind = i & (-8) + ldd [LOGFTBL_P8+ind0],%f14 ! (Y0_1) ldtmp1 = *(double*)((char*)CONST_TBL+ind+8) + fmuld %f48,%f44,%f50 ! (Y2_0) dtmp2 = dtmp1 * yy + fitod %f2,%f48 ! (Y0_1) yy = (double) iy + + and ival3,MASK_0x007fffff,iy3 ! (Y3_1) iy = ival & 0x007fffff + lda [stridex+x0]0x82,%f8 ! (Y1_1) *(float*)&exp = *(float*)(x) + fmuld %f22,%f46,%f22 ! (Y0_0) yy = dtmp5 * yy + fsubd %f40,%f42,%f40 ! (Y0_0) ty = ty - ldtmp0 + + add iy3,CONST_0x20000,ival3 ! (Y3_1) iy + 0x20000 + ldd [LOGFTBL_P8+ind1],%f16 ! (Y1_1) ldtmp1 = *(double*)((char*)CONST_TBL+ind+8) + fmuld %f26,%f60,%f42 ! (Y3_0) dtmp2 = dtmp1 * yy + fitod %f4,%f26 ! (Y1_1) yy = (double) iy + + and ival3,MASK_0xfffc0000,ival3 ! (Y3_1) ival = ival & 0xfffc0000 + lda [x1]0x82,%f10 ! (Y2_1) *(float*)&exp = *(float*)(x) + fmuld %f24,%f62,%f24 ! (Y1_0) yy = dtmp5 * yy + fsubd %f56,%f58,%f58 ! (Y1_0) ty = ty - ldtmp0 + + sub iy3,ival3,iy3 ! (Y3_1) iy = iy - ival + ld [%fp+tmp2],%f2 ! (Y2_1) (double) iy + fmuld %f48,%f14,%f46 ! (Y0_1) yy = yy * ldtmp1 + faddd %f50,K1,%f50 ! (Y2_0) dtmp3 = dtmp2 + K1 + + add x1,stridex2,x0 ! x += 2*stridex + st iy3,[%fp+tmp3] ! (Y3_1) (double) iy + fpack32 ZERO,%f12,%f20 ! (Y3_0) exp = vis_fpack32(ZERO, exp) + faddd %f22,%f40,%f48 ! (Y0_0) yy = yy + ty + + add y,stridey,y ! y += stridey + lda [stridex+x1]0x82,%f12 ! (Y3_1) *(float*)&exp = *(float*)(x) + fmuld %f26,%f16,%f62 ! (Y1_1) yy = yy * ldtmp1 + faddd %f42,K1,%f54 ! (Y3_0) dtmp3 = dtmp2 + K1 + + sra ival3,14,ival3 ! (Y3_1) i = ival >> 14; + add y,stridey,y ! y += stridey + lda [x0]0x82,ival0 ! (Y0_2) ival = *(int*)(x) + faddd %f24,%f58,%f24 ! (Y1_0) yy = yy + ty + + add x0,stridex2,x1 ! x += 2*stridex + ldd [LOGFTBL+ind2],%f42 ! (Y2_0) ldtmp0 = *(double*)((char*)CONST_TBL+ind) + fmuld K3,%f46,%f22 ! (Y0_1) dtmp0 = K3 * yy + fitod %f18,%f40 ! (Y2_0) (double)(*(int*)&exp) + + and ival2,-8,ind2 ! (Y2_1) ind = i & (-8) + lda [stridex+x0]0x82,ival1 ! (Y1_2) ival = *(int*)(x) + fmuld %f50,%f44,%f52 ! (Y2_0) dtmp4 = dtmp3 * yy + fitod %f20,%f56 ! (Y3_0) (double)(*(int*)&exp) + + cmp ival0,MASK_0x7f800000 ! (Y0_2) if (ival >= 0x7f800000) + lda [x1]0x82,ival2 ! (Y2_2) ival = *(int*)(x); + fmuld K3,%f62,%f50 ! (Y1_1) dtmp0 = K3 * yy + fdtos %f48,%f4 ! (Y0_0) (float)(yy) + + st %f4,[y] ! (Y0_0) write into memory + fmuld %f54,%f60,%f54 ! (Y3_0) dtmp4 = dtmp3 * yy + bge,pn %icc,.update16 ! (Y0_2) if (ival >= 0x7f800000) + fdtos %f24,%f4 ! (Y1_0) (float)(yy) +.cont16: + cmp ival0,MASK_0x007fffff ! (Y0_2) if (ival <= 0x7fffff + ldd [LOGFTBL+ind3],%f58 ! (Y3_0) ldtmp0 = *(double*)((char*)CONST_TBL+ind) + ble,pn %icc,.update17 ! (Y0_2) if (ival <= 0x7fffff + faddd %f22,K2,%f48 ! (Y0_1) dtmp1 = dtmp0 + K2 +.cont17: + cmp ival1,MASK_0x7f800000 ! (Y1_2) if (ival >= 0x7f800000) + and ival0,MASK_0x007fffff,iy0 ! (Y0_2) iy = ival & 0x007fffff + st %f4,[stridey+y] ! (Y1_0) write into memory + fmuld LN2,%f40,%f40 ! (Y2_0) ty = LN2 * (double)(*(int*)&exp) + + add iy0,CONST_0x20000,ival0 ! (Y0_2) ival = iy + 0x20000 + ldd [LOGFTBL_P8+ind2],%f14 ! (Y2_1) ldtmp1 = *(double*)((char*)CONST_TBL+ind+8); + faddd %f52,K0,%f22 ! (Y2_0) dtmp5 = dtmp4 + K0 + fpack32 ZERO,%f6,%f6 ! (Y0_1) exp = vis_fpack32(ZERO, exp) + + and ival0,MASK_0xfffc0000,ival0 ! (Y0_2) ival = ival & 0xfffc0000 + faddd %f50,K2,%f26 ! (Y1_1) dtmp1 = dtmp0 + K2 + bge,pn %icc,.update18 ! (Y1_2) if (ival >= 0x7f800000) + fmuld LN2,%f56,%f56 ! (Y3_0) ty = LN2 * (double)(*(int*)&exp) +.cont18: + sub iy0,ival0,iy0 ! (Y0_2) iy = iy - ival + and ival3,-8,ind3 ! (Y3_1) ind = i & (-8) + ld [%fp+tmp3],%f4 ! (Y3_1) (double) iy + faddd %f54,K0,%f24 ! (Y3_0) dtmp5 = dtmp4 + K0 + + cmp ival1,MASK_0x007fffff ! (Y1_2) if (ival <= 0x7fffff) + lda [stridex+x1]0x82,ival3 ! (Y3_2) ival = *(int*)(x) + fmuld %f48,%f46,%f50 ! (Y0_1) dtmp2 = dtmp1 * yy + fitod %f2,%f48 ! (Y2_1) yy = (double) iy + + st iy0,[%fp+tmp1] ! (Y0_2) (double) iy + fmuld %f22,%f44,%f22 ! (Y2_0) yy = dtmp5 * yy + ble,pn %icc,.update19 ! (Y1_2) if (ival <= 0x7fffff) + fsubd %f40,%f42,%f40 ! (Y2_0) ty = ty - ldtmp0 +.cont19: + cmp ival2,MASK_0x7f800000 ! (Y2_2) if (ival >= 0x7f800000) + and ival1,MASK_0x007fffff,iy1 ! (Y1_2) iy = ival & 0x007fffff + bge,pn %icc,.update20 ! (Y2_2) if (ival >= 0x7f800000) + fmuld %f26,%f62,%f42 ! (Y1_1) dtmp2 = dtmp1 * yy +.cont20: + cmp ival2,MASK_0x007fffff ! (Y2_2) if (ival <= 0x7fffff) + ldd [LOGFTBL_P8+ind3],%f16 ! (Y3_1) ldtmp1 = *(double*)((char*)CONST_TBL+ind+8) + ble,pn %icc,.update21 ! (Y2_2) if (ival <= 0x7fffff) + fitod %f4,%f26 ! (Y3_1) yy = (double) iy +.cont21: + add iy1,CONST_0x20000,ival1 ! (Y1_2) ival = iy + 0x20000 + and ival2,MASK_0x007fffff,iy2 ! (Y2_2) iy = ival & 0x007fffff + fmuld %f24,%f60,%f24 ! (Y3_0) yy = dtmp5 * yy + fsubd %f56,%f58,%f58 ! (Y3_0) ty = ty - ldtmp0 + + and ival1,MASK_0xfffc0000,ival1 ! (Y1_2) ival = ival & 0xfffc0000 + add iy2,CONST_0x20000,ival2 ! (Y2_2) ival = iy + 0x20000 + fmuld %f48,%f14,%f44 ! (Y2_1) yy = yy * ldtmp1 + faddd %f50,K1,%f50 ! (Y0_1) dtmp3 = dtmp2 + K1 + + sub iy1,ival1,iy1 ! (Y1_2) iy = iy - ival + and ival2,MASK_0xfffc0000,ival2 ! (Y2_2) ival = ival & 0xfffc0000 + fpack32 ZERO,%f8,%f8 ! (Y1_1) exp = vis_fpack32(ZERO, exp) + faddd %f22,%f40,%f48 ! (Y2_0) yy = yy + ty + + sub iy2,ival2,iy2 ! (Y2_2) iy = iy - ival + st iy1,[%fp+tmp3] ! (Y1_2) (double) iy + fmuld %f26,%f16,%f60 ! (Y3_1) yy = yy * ldtmp1 + faddd %f42,K1,%f54 ! (Y1_1) dtmp3 = dtmp2 + K1 + + cmp ival3,MASK_0x7f800000 ! (Y3_2) if (ival >= 0x7f800000) + add y,stridey,y ! y += stridey + st iy2,[%fp+tmp2] ! (Y2_2) (double) iy + faddd %f24,%f58,%f24 ! (Y3_0) yy = yy + ty + + add y,stridey,y ! y += stridey + fmuld K3,%f44,%f22 ! (Y2_1) dtmp0 = K3 * yy + bge,pn %icc,.update22 ! (Y3_2) if (ival >= 0x7f800000) + fitod %f6,%f40 ! (Y0_1)(double)(*(int*)&exp) +.cont22: + cmp ival3,MASK_0x007fffff ! (Y3_2) if (ival <= 0x7fffff) + ldd [LOGFTBL+ind1],%f58 ! (Y1_1) ldtmp0 = *(double*)((char*)CONST_TBL+ind) + fmuld %f50,%f46,%f52 ! (Y0_1) dtmp4 = dtmp3 * yy + fitod %f8,%f56 ! (Y1_1) (double)(*(int*)&exp) + + ld [%fp+tmp1],%f2 ! (Y0_2) (double) iy + fmuld K3,%f60,%f50 ! (Y3_1) dtmp0 = K3 * yy + ble,pn %icc,.update23 ! (Y3_2) if (ival <= 0x7fffff) + fdtos %f48,%f4 ! (Y2_0) (float)(yy) +.cont23: + subcc counter,4,counter ! update cycle counter + st %f4,[y] ! (Y2_0) write into memory + fmuld %f54,%f62,%f54 ! (Y1_1) dtmp4 = dtmp3 * yy + fdtos %f24,%f4 ! (Y3_0)(float)(yy) + + sra ival0,14,ival0 ! (Y0_2) i = ival >> 14; + st %f4,[stridey+y] ! (Y3_0) write into memory + bpos,pt %icc,.main_loop + faddd %f22,K2,%f48 ! (Y2_1) dtmp1 = dtmp0 + K2 + +.tail: + addcc counter,7,counter + add y,stridey,y ! y += stridey + bneg,pn %icc,.end_loop + + sra ival2,14,ival2 ! (Y2_1) i = ival >> 14; + ldd [LOGFTBL+ind0],%f42 ! (Y0_0) ldtmp0 = *(double*)((char*)CONST_TBL+ind) + fmuld LN2,%f40,%f40 ! (Y0_0) ty = LN2 * (double)(*(int*)&exp) + faddd %f52,K0,%f22 ! (Y0_0) dtmp5 = dtmp4 + K0 + + sra ival1,14,ind1 ! (Y1_1) i = ival >> 14; + ld [%fp+tmp3],%f4 ! (Y1_1) (double) iy + fpack32 ZERO,%f10,%f18 ! (Y2_0) exp = vis_fpack32(ZERO, exp) + faddd %f50,K2,%f26 ! (Y3_0) dtmp1 = dtmp0 + K2 + + and ival0,-8,ind0 ! (Y0_1) ind = i & (-8) + lda [x0]0x82,%f6 ! (Y0_1) *(float*)&exp = *(float*)(x) + fmuld LN2,%f56,%f56 ! (Y1_0) LN2 * (double)(*(int*)&exp) + faddd %f54,K0,%f24 ! (Y1_0) dtmp5 = dtmp4 + K0 + + and ind1,-8,ind1 ! (Y1_1) ind = i & (-8) + ldd [LOGFTBL_P8+ind0],%f14 ! (Y0_1) ldtmp1 = *(double*)((char*)CONST_TBL+ind+8) + fmuld %f48,%f44,%f50 ! (Y2_0) dtmp2 = dtmp1 * yy + fitod %f2,%f48 ! (Y0_1) yy = (double) iy + + and ival3,MASK_0x007fffff,ival1 ! (Y3_1) iy = ival & 0x007fffff + lda [stridex+x0]0x82,%f8 ! (Y1_1) *(float*)&exp = *(float*)(x) + fmuld %f22,%f46,%f22 ! (Y0_0) yy = dtmp5 * yy + fsubd %f40,%f42,%f40 ! (Y0_0) ty = ty - ldtmp0 + + add iy3,CONST_0x20000,ival3 ! (Y3_1) iy + 0x20000 + ldd [LOGFTBL_P8+ind1],%f16 ! (Y1_1) ldtmp1 = *(double*)((char*)CONST_TBL+ind+8) + fmuld %f26,%f60,%f42 ! (Y3_0) dtmp2 = dtmp1 * yy + fitod %f4,%f26 ! (Y1_1) yy = (double) iy + + and ival3,MASK_0xfffc0000,ival3 ! (Y3_1) ival = ival & 0xfffc0000 + lda [x1]0x82,%f10 ! (Y2_1) *(float*)&exp = *(float*)(x) + fmuld %f24,%f62,%f24 ! (Y1_0) yy = dtmp5 * yy + fsubd %f56,%f58,%f58 ! (Y1_0) ty = ty - ldtmp0 + + sub iy3,ival3,iy3 ! (Y3_1) iy = iy - ival + ld [%fp+tmp2],%f2 ! (Y2_1) (double) iy + fmuld %f48,%f14,%f46 ! (Y0_1) yy = yy * ldtmp1 + faddd %f50,K1,%f50 ! (Y2_0) dtmp3 = dtmp2 + K1 + + add x1,stridex2,x0 ! x += 2*stridex + st iy3,[%fp+tmp3] ! (Y3_1) (double) iy + fpack32 ZERO,%f12,%f20 ! (Y3_0) exp = vis_fpack32(ZERO, exp) + faddd %f22,%f40,%f48 ! (Y0_0) yy = yy + ty + + lda [stridex+x1]0x82,%f12 ! (Y3_1) *(float*)&exp = *(float*)(x) + fmuld %f26,%f16,%f62 ! (Y1_1) yy = yy * ldtmp1 + faddd %f42,K1,%f54 ! (Y3_0) dtmp3 = dtmp2 + K1 + + sra ival3,14,ival3 ! (Y3_1) i = ival >> 14; + add y,stridey,y ! y += stridey + faddd %f24,%f58,%f24 ! (Y1_0) yy = yy + ty + + subcc counter,1,counter + ldd [LOGFTBL+ind2],%f42 ! (Y2_0) ldtmp0 = *(double*)((char*)CONST_TBL+ind) + fmuld K3,%f46,%f22 ! (Y0_1) dtmp0 = K3 * yy + fitod %f18,%f40 ! (Y2_0) (double)(*(int*)&exp) + + and ival2,-8,ind2 ! (Y2_1) ind = i & (-8) + fmuld %f50,%f44,%f52 ! (Y2_0) dtmp4 = dtmp3 * yy + fitod %f20,%f56 ! (Y3_0) (double)(*(int*)&exp) + + fmuld K3,%f62,%f50 ! (Y1_1) dtmp0 = K3 * yy + fdtos %f48,%f4 ! (Y0_0) (float)(yy) + + st %f4,[y] ! (Y0_0) write into memory + fmuld %f54,%f60,%f54 ! (Y3_0) dtmp4 = dtmp3 * yy + bneg,pn %icc,.end_loop + fdtos %f24,%f4 ! (Y1_0) (float)(yy) + + add y,stridey,y ! y += stridey + subcc counter,1,counter + ldd [LOGFTBL+ind3],%f58 ! (Y3_0) ldtmp0 = *(double*)((char*)CONST_TBL+ind) + faddd %f22,K2,%f48 ! (Y0_1) dtmp1 = dtmp0 + K2 + + st %f4,[y] ! (Y1_0) write into memory + bneg,pn %icc,.end_loop + fmuld LN2,%f40,%f40 ! (Y2_0) ty = LN2 * (double)(*(int*)&exp) + + ldd [LOGFTBL_P8+ind2],%f14 ! (Y2_1) ldtmp1 = *(double*)((char*)CONST_TBL+ind+8); + faddd %f52,K0,%f22 ! (Y2_0) dtmp5 = dtmp4 + K0 + fpack32 ZERO,%f6,%f6 ! (Y0_1) exp = vis_fpack32(ZERO, exp) + + faddd %f50,K2,%f26 ! (Y1_1) dtmp1 = dtmp0 + K2 + fmuld LN2,%f56,%f56 ! (Y3_0) ty = LN2 * (double)(*(int*)&exp) + + and ival3,-8,ind3 ! (Y3_1) ind = i & (-8) + ld [%fp+tmp3],%f4 ! (Y3_1) (double) iy + faddd %f54,K0,%f24 ! (Y3_0) dtmp5 = dtmp4 + K0 + + fmuld %f48,%f46,%f50 ! (Y0_1) dtmp2 = dtmp1 * yy + fitod %f2,%f48 ! (Y2_1) yy = (double) iy + + fmuld %f22,%f44,%f22 ! (Y2_0) yy = dtmp5 * yy + fsubd %f40,%f42,%f40 ! (Y2_0) ty = ty - ldtmp0 + + fmuld %f26,%f62,%f42 ! (Y1_1) dtmp2 = dtmp1 * yy + + ldd [LOGFTBL_P8+ind3],%f16 ! (Y3_1) ldtmp1 = *(double*)((char*)CONST_TBL+ind+8) + fitod %f4,%f26 ! (Y3_1) yy = (double) iy + + fmuld %f24,%f60,%f24 ! (Y3_0) yy = dtmp5 * yy + fsubd %f56,%f58,%f58 ! (Y3_0) ty = ty - ldtmp0 + + fmuld %f48,%f14,%f44 ! (Y2_1) yy = yy * ldtmp1 + faddd %f50,K1,%f50 ! (Y0_1) dtmp3 = dtmp2 + K1 + + fpack32 ZERO,%f8,%f8 ! (Y1_1) exp = vis_fpack32(ZERO, exp) + faddd %f22,%f40,%f48 ! (Y2_0) yy = yy + ty + + fmuld %f26,%f16,%f60 ! (Y3_1) yy = yy * ldtmp1 + faddd %f42,K1,%f54 ! (Y1_1) dtmp3 = dtmp2 + K1 + + add y,stridey,y ! y += stridey + faddd %f24,%f58,%f24 ! (Y3_0) yy = yy + ty + + subcc counter,1,counter + fmuld K3,%f44,%f22 ! (Y2_1) dtmp0 = K3 * yy + fitod %f6,%f40 ! (Y0_1)(double)(*(int*)&exp) + + ldd [LOGFTBL+ind1],%f58 ! (Y1_1) ldtmp0 = *(double*)((char*)CONST_TBL+ind) + fmuld %f50,%f46,%f52 ! (Y0_1) dtmp4 = dtmp3 * yy + fitod %f8,%f56 ! (Y1_1) (double)(*(int*)&exp) + + fmuld K3,%f60,%f50 ! (Y3_1) dtmp0 = K3 * yy + fdtos %f48,%f4 ! (Y2_0) (float)(yy) + + st %f4,[y] ! (Y2_0) write into memory + fmuld %f54,%f62,%f54 ! (Y1_1) dtmp4 = dtmp3 * yy + bneg,pn %icc,.end_loop + fdtos %f24,%f4 ! (Y3_0)(float)(yy) + + subcc counter,1,counter ! update cycle counter + add y,stridey,y + + st %f4,[y] ! (Y3_0) write into memory + bneg,pn %icc,.end_loop + faddd %f22,K2,%f48 ! (Y2_1) dtmp1 = dtmp0 + K2 + + ldd [LOGFTBL+ind0],%f42 ! (Y0_0) ldtmp0 = *(double*)((char*)CONST_TBL+ind) + fmuld LN2,%f40,%f40 ! (Y0_0) ty = LN2 * (double)(*(int*)&exp) + faddd %f52,K0,%f22 ! (Y0_0) dtmp5 = dtmp4 + K0 + + fpack32 ZERO,%f10,%f18 ! (Y2_0) exp = vis_fpack32(ZERO, exp) + + fmuld LN2,%f56,%f56 ! (Y1_0) LN2 * (double)(*(int*)&exp) + faddd %f54,K0,%f24 ! (Y1_0) dtmp5 = dtmp4 + K0 + + fmuld %f48,%f44,%f50 ! (Y2_0) dtmp2 = dtmp1 * yy + + fmuld %f22,%f46,%f22 ! (Y0_0) yy = dtmp5 * yy + fsubd %f40,%f42,%f40 ! (Y0_0) ty = ty - ldtmp0 + + fmuld %f24,%f62,%f24 ! (Y1_0) yy = dtmp5 * yy + fsubd %f56,%f58,%f58 ! (Y1_0) ty = ty - ldtmp0 + + subcc counter,1,counter + faddd %f50,K1,%f50 ! (Y2_0) dtmp3 = dtmp2 + K1 + + faddd %f22,%f40,%f48 ! (Y0_0) yy = yy + ty + + add y,stridey,y ! y += stridey + faddd %f24,%f58,%f24 ! (Y1_0) yy = yy + ty + + ldd [LOGFTBL+ind2],%f42 ! (Y2_0) ldtmp0 = *(double*)((char*)CONST_TBL+ind) + fitod %f18,%f40 ! (Y2_0) (double)(*(int*)&exp) + + fmuld %f50,%f44,%f52 ! (Y2_0) dtmp4 = dtmp3 * yy + + fdtos %f48,%f4 ! (Y0_0) (float)(yy) + + st %f4,[y] ! (Y0_0) write into memory + bneg,pn %icc,.end_loop + fdtos %f24,%f4 ! (Y1_0) (float)(yy) + + add y,stridey,y ! y += stridey + subcc counter,1,counter + st %f4,[y] ! (Y1_0) write into memory + bneg,pn %icc,.end_loop + fmuld LN2,%f40,%f40 ! (Y2_0) ty = LN2 * (double)(*(int*)&exp) + + faddd %f52,K0,%f22 ! (Y2_0) dtmp5 = dtmp4 + K0 + + fmuld %f22,%f44,%f22 ! (Y2_0) yy = dtmp5 * yy + fsubd %f40,%f42,%f40 ! (Y2_0) ty = ty - ldtmp0 + + add y,stridey,y ! y += stridey + faddd %f22,%f40,%f48 ! (Y2_0) yy = yy + ty + + fdtos %f48,%f4 ! (Y2_0) (float)(yy) + + st %f4,[y] ! (Y2_0) write into memory +.end_loop: + ba .begin + nop + +.end: + ret + restore %g0,0,%o0 + + .align 16 +.update2: + cmp counter,0 + ble .cont2 + nop + + add x0,stridex,x0 + stx x0,[%fp+tmp5] + sub x0,stridex,x0 + st counter,[%fp+tmp0] + or %g0,0,counter + ba .cont2 + nop + + .align 16 +.update3: + cmp counter,0 + ble .cont3 + nop + + add x0,stridex,x0 + stx x0,[%fp+tmp5] + sub x0,stridex,x0 + st counter,[%fp+tmp0] + or %g0,0,counter + ba .cont3 + nop + + .align 16 +.update4: + cmp counter,1 + ble .cont4 + nop + + stx x1,[%fp+tmp5] + sub counter,1,counter + st counter,[%fp+tmp0] + or %g0,1,counter + ba .cont4 + nop + + .align 16 +.update5: + cmp counter,1 + ble .cont5 + nop + + stx x1,[%fp+tmp5] + sub counter,1,counter + st counter,[%fp+tmp0] + or %g0,1,counter + ba .cont5 + nop + + .align 16 +.update6: + cmp counter,2 + ble .cont6 + nop + + add x1,stridex,x1 + stx x1,[%fp+tmp5] + sub x1,stridex,x1 + sub counter,2,counter + st counter,[%fp+tmp0] + or %g0,2,counter + ba .cont6 + nop + + .align 16 +.update7: + cmp counter,2 + ble .cont7 + nop + + add x1,stridex,x1 + stx x1,[%fp+tmp5] + sub x1,stridex,x1 + sub counter,2,counter + st counter,[%fp+tmp0] + or %g0,2,counter + ba .cont7 + nop + + .align 16 +.update8: + cmp counter,3 + ble .cont8 + nop + + stx x0,[%fp+tmp5] + sub counter,3,counter + st counter,[%fp+tmp0] + or %g0,3,counter + ba .cont8 + nop + + .align 16 +.update9: + cmp counter,3 + ble .cont9 + nop + + stx x0,[%fp+tmp5] + sub counter,3,counter + st counter,[%fp+tmp0] + or %g0,3,counter + ba .cont9 + nop + + .align 16 +.update10: + cmp counter,4 + ble .cont10 + nop + + add x0,stridex,x0 + stx x0,[%fp+tmp5] + sub x0, stridex, x0 + sub counter,4,counter + st counter,[%fp+tmp0] + or %g0,4,counter + ba .cont10 + nop + + .align 16 +.update11: + cmp counter,4 + ble .cont11 + nop + + add x0,stridex,x0 + stx x0,[%fp+tmp5] + sub x0,stridex,x0 + sub counter,4,counter + st counter,[%fp+tmp0] + or %g0,4,counter + ba .cont11 + nop + + .align 16 +.update12: + cmp counter,5 + ble .cont12 + nop + + stx x1,[%fp+tmp5] + sub counter,5,counter + st counter,[%fp+tmp0] + or %g0,5,counter + ba .cont12 + nop + + .align 16 +.update13: + cmp counter,5 + ble .cont13 + nop + + stx x1,[%fp+tmp5] + sub counter,5,counter + st counter,[%fp+tmp0] + or %g0,5,counter + ba .cont13 + nop + + .align 16 +.update14: + cmp counter,6 + ble .cont14 + nop + + add x1,stridex,x1 + stx x1,[%fp+tmp5] + sub x1, stridex, x1 + sub counter,6,counter + st counter,[%fp+tmp0] + or %g0,6,counter + ba .cont14 + nop + + .align 16 +.update15: + cmp counter,6 + ble .cont15 + nop + + add x1,stridex,x1 + stx x1,[%fp+tmp5] + sub x1, stridex, x1 + sub counter,6,counter + st counter,[%fp+tmp0] + or %g0,6,counter + ba .cont15 + nop + + .align 16 +.update16: + cmp counter,0 + ble,pt %icc, .cont16 + nop + + stx x0,[%fp+tmp5] + st counter,[%fp+tmp0] + or %g0,0,counter + ba .cont16 + nop + + .align 16 +.update17: + cmp counter,0 + ble,pt %icc, .cont17 + nop + + stx x0,[%fp+tmp5] + st counter,[%fp+tmp0] + or %g0,0,counter + ba .cont17 + nop + + .align 16 +.update18: + cmp counter,1 + ble,pt %icc, .cont18 + nop + + add x0,stridex,x0 + stx x0,[%fp+tmp5] + sub x0,stridex,x0 + sub counter,1,counter + st counter,[%fp+tmp0] + or %g0,1,counter + ba .cont18 + nop + + .align 16 +.update19: + cmp counter,1 + ble,pt %icc, .cont19 + nop + + add x0,stridex,x0 + sub counter,1,counter + stx x0,[%fp+tmp5] + sub x0, stridex, x0 + st counter,[%fp+tmp0] + or %g0,1,counter + ba .cont19 + nop + + .align 16 +.update20: + cmp counter,2 + ble,pt %icc, .cont20 + nop + + stx x1,[%fp+tmp5] + sub counter,2,counter + st counter,[%fp+tmp0] + or %g0,2,counter + ba .cont20 + nop + + .align 16 +.update21: + cmp counter,2 + ble,pt %icc, .cont21 + nop + + stx x1,[%fp+tmp5] + sub counter, 2, counter + st counter,[%fp+tmp0] + or %g0,2,counter + ba .cont21 + nop + + .align 16 +.update22: + cmp counter,3 + ble,pt %icc, .cont22 + nop + + add x1,stridex,x1 + stx x1,[%fp+tmp5] + sub x1,stridex,x1 + sub counter,3,counter + st counter,[%fp+tmp0] + or %g0,3,counter + ba .cont22 + nop + + .align 16 +.update23: + cmp counter,3 + ble,pt %icc, .cont23 + nop + + add x1,stridex,x1 + stx x1,[%fp+tmp5] + sub x1,stridex,x1 + sub counter,3,counter + st counter,[%fp+tmp0] + or %g0,3,counter + ba .cont23 + nop + + .align 16 +.spec: + or %g0,1,ind3 ! ind3 = 1 + sll ind3,31,ind3 ! ind3 = 0x8000000 + add x0,stridex,x0 ! x += stridex + sub ind3,1,ind3 ! ind3 = 0x7ffffff + add y,stridey,y ! y += stridey + and ival0,ind3,iy0 ! ival & 0x7fffffff + cmp iy0,MASK_0x7f800000 ! if ((ival & 0x7fffffff) >= 0x7f800000) + bge,pn %icc, .spec0 ! if ((ival & 0x7fffffff) >= 0x7f800000) + st ival0,[%fp+tmp1] + cmp ival0,0 ! if (ival <= 0) + ble,pn %icc,.spec1 ! if (ival <= 0) + nop + + ld [%fp+tmp1],%f12 + fitos %f12,%f14 ! value = (float) ival + st %f14,[%fp+tmp2] ! ival = *(int*) &value + ld [%fp+tmp2],ival0 ! ival = *(int*) &value + + and ival0,MASK_0x007fffff,iy0 ! iy = ival & 0x007fffff + sra ival0,23,ival2 ! iexp = ival >> 23 + + add iy0,CONST_0x20000,ival0 ! ival = iy + 0x20000 + sub ival2,149,ival2 ! iexp = iexp - 149 + + and ival0,MASK_0xfffc0000,ival0 ! ival = ival & 0xfffc0000 + st ival2,[%fp+tmp2] ! (double) iexp + + sub iy0,ival0,iy0 ! iy = iy - ival + + sra ival0,14,ival0 ! i = ival >> 14; + st iy0,[%fp+tmp1] ! (double) iy + + and ival0,-8,ind0 ! ind = i & (-8) + ld [%fp+tmp1],%f2 ! (double) iy + + ldd [LOGFTBL_P8+ind0],%f14 ! ldtmp1 = *(double*)((char*)CONST_TBL+ind+8) + fitod %f2,%f48 ! yy = (double) iy + + fmuld %f48,%f14,%f46 ! yy = yy * ldtmp1 + + ld [%fp+tmp2],%f6 ! (double) iexp + fmuld K3,%f46,%f22 ! dtmp0 = K3 * yy + + ldd [LOGFTBL+ind0],%f42 ! ldtmp0 = *(double*)((char*)CONST_TBL+ind) + faddd %f22,K2,%f48 ! dtmp1 = dtmp0 + K2 + + fmuld %f48,%f46,%f50 ! dtmp2 = dtmp1 * yy + + faddd %f50,K1,%f50 ! dtmp3 = dtmp2 + K1 + + fitod %f6,%f40 ! (double) iexp + fmuld %f50,%f46,%f52 ! dtmp4 = dtmp3 * yy + + fmuld LN2,%f40,%f40 ! ty = LN2 * (double) iexp + faddd %f52,K0,%f22 ! dtmp5 = dtmp4 + K0 + + fmuld %f22,%f46,%f22 ! yy = dtmp5 * yy + fsubd %f40,%f42,%f40 ! ty = ty - ldtmp0 + + faddd %f22,%f40,%f48 ! yy = yy + ty + + fdtos %f48,%f4 ! (float)(yy) + + ba .begin1 + st %f4,[y] ! write into memory + + .align 16 +.spec0: + ld [%fp+tmp1],%f12 ! value = *(float*) &ival + fzeros %f2 ! y[0] = (value < 0.0f? + fcmps %fcc0,%f12,%f2 ! 0.0f : value) * value + fmovsug %fcc0,%f12,%f2 + fmuls %f12,%f2,%f2 + ba .begin1 + st %f2,[y] ! write into memory + + .align 16 +.spec1: + cmp iy0,0 ! if ((ival & 0x7fffffff) == 0) + bne,pn %icc,.spec2 ! if ((ival & 0x7fffffff) == 0) + nop + ld [LOGFTBL+568],%f4 + fdivs %f4,ZERO,%f6 ! y[0] = -1.0f / 0f + ba .begin1 + st %f6,[y] ! write into memory + + .align 16 +.spec2: + fdivs ZERO,ZERO,%f6 ! y[0] = 0f / 0f + ba .begin1 + st %f6,[y] ! write into memory + + SET_SIZE(__vlogf) + diff --git a/usr/src/lib/libmvec/common/vis/__vpow.S b/usr/src/lib/libmvec/common/vis/__vpow.S new file mode 100644 index 0000000000..5ae56b3e9f --- /dev/null +++ b/usr/src/lib/libmvec/common/vis/__vpow.S @@ -0,0 +1,4353 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "__vpow.S" + +#include "libm.h" + + RO_DATA + .align 64 + +.CONST_TBL: + +! __mt_constlog2[2*i] = high order rounded 32 bits log2(1+i/256)*256, i = [0, 255] +! __mt_constlog2[2*i+1] = low order least bits log2(1+i/256)*256, i = [0, 255] + + .word 0x00000000, 0x00000000, 0x00000000, 0x00000000, + .word 0x3ff709c4, 0x00000000, 0x3e9b5eab, 0x1dd2b66f, + .word 0x4006fe51, 0x00000000, 0xbea2443d, 0xeba01c72, + .word 0x40113631, 0x00000000, 0x3e97a97b, 0x0c4bb41a, + .word 0x4016e797, 0x00000000, 0xbebe8f4b, 0x759d6476, + .word 0x401c9364, 0x00000000, 0xbeb15ebc, 0x1e666460, + .word 0x40211cd2, 0x00000000, 0xbeb57665, 0xf6893f5d, + .word 0x4023ed31, 0x00000000, 0xbecae5e9, 0x7677f62d, + .word 0x4026bad3, 0x00000000, 0x3ecd63bf, 0x61cc4d82, + .word 0x402985c0, 0x00000000, 0xbebe5b57, 0x35cfaf8e, + .word 0x402c4dfb, 0x00000000, 0xbec1bd55, 0x2842c1c2, + .word 0x402f138a, 0x00000000, 0xbecf336b, 0x18178cbe, + .word 0x4030eb39, 0x00000000, 0xbed81758, 0x19530c23, + .word 0x40324b5b, 0x00000000, 0x3edf84d6, 0x8f2268b4, + .word 0x4033aa30, 0x00000000, 0xbec16c07, 0x1e93fd97, + .word 0x403507b8, 0x00000000, 0x3ecb019d, 0xdb6a796a, + .word 0x403663f7, 0x00000000, 0xbe94dbb3, 0xa60cceb2, + .word 0x4037beef, 0x00000000, 0xbeda51d7, 0x5fb0ef94, + .word 0x403918a1, 0x00000000, 0x3edb918c, 0xd6ab9c8d, + .word 0x403a7112, 0x00000000, 0xbec065bd, 0xb60a5dd4, + .word 0x403bc842, 0x00000000, 0x3ed02b6a, 0xee98ecb1, + .word 0x403d1e35, 0x00000000, 0xbebca47d, 0x25b2f4c7, + .word 0x403e72ec, 0x00000000, 0x3eb17fa5, 0xb21cbdb6, + .word 0x403fc66a, 0x00000000, 0x3eae1601, 0x49209a69, + .word 0x40408c59, 0x00000000, 0xbeecc961, 0x871a7611, + .word 0x404134e2, 0x00000000, 0xbee2ddbe, 0x74803297, + .word 0x4041dcd2, 0x00000000, 0xbeea2ab5, 0x212856eb, + .word 0x40428429, 0x00000000, 0x3ee2c1e9, 0x8fe35da3, + .word 0x40432aea, 0x00000000, 0xbecd8751, 0xe5e0ae0d, + .word 0x4043d114, 0x00000000, 0x3eeb66a2, 0x98fc02ce, + .word 0x404476aa, 0x00000000, 0xbea9f022, 0xcb3b1c5b, + .word 0x40451bac, 0x00000000, 0xbeebe168, 0xdd6dd3fe, + .word 0x4045c01a, 0x00000000, 0x3edcfdeb, 0x43cfd006, + .word 0x404663f7, 0x00000000, 0xbea4dbb3, 0xa60cceb2, + .word 0x40470743, 0x00000000, 0xbed5887e, 0xc06b1ff2, + .word 0x4047a9ff, 0x00000000, 0xbedc17d1, 0x108740d9, + .word 0x40484c2c, 0x00000000, 0xbed7e87e, 0x268116ee, + .word 0x4048edcb, 0x00000000, 0xbec7cad4, 0x944a32be, + .word 0x40498edd, 0x00000000, 0x3eadf9c3, 0x7c0beb3a, + .word 0x404a2f63, 0x00000000, 0x3ed1905c, 0x35651c43, + .word 0x404acf5e, 0x00000000, 0x3ed6da76, 0x49f7f08f, + .word 0x404b6ecf, 0x00000000, 0x3ec75f95, 0xe96bed8d, + .word 0x404c0db7, 0x00000000, 0xbed91359, 0x08df8ec9, + .word 0x404cac16, 0x00000000, 0x3ede3b86, 0xe44b6265, + .word 0x404d49ee, 0x00000000, 0x3ee30c96, 0x5bf23d2d, + .word 0x404de740, 0x00000000, 0xbecc4eb7, 0xf11e41be, + .word 0x404e840c, 0x00000000, 0xbec8b195, 0xb338360c, + .word 0x404f2053, 0x00000000, 0x3edc9047, 0x93a3ba95, + .word 0x404fbc17, 0x00000000, 0xbee1bf65, 0xfd7715ca, + .word 0x40502bac, 0x00000000, 0xbef76cbe, 0x67113a18, + .word 0x4050790b, 0x00000000, 0xbee227e7, 0xfb487e73, + .word 0x4050c629, 0x00000000, 0x3efd550a, 0xa3a93ec8, + .word 0x40511308, 0x00000000, 0xbee2967a, 0x451a7b48, + .word 0x40515fa6, 0x00000000, 0x3efdaec2, 0x3fd65f8e, + .word 0x4051ac06, 0x00000000, 0xbef35b83, 0xe3eb5ce3, + .word 0x4051f826, 0x00000000, 0xbec24ee3, 0xd9a82f2e, + .word 0x40524408, 0x00000000, 0xbef53c7e, 0x319f6e92, + .word 0x40528fab, 0x00000000, 0x3eead993, 0x41b181d1, + .word 0x4052db11, 0x00000000, 0xbead932a, 0x8487642e, + .word 0x40532639, 0x00000000, 0x3ef8daca, 0x0d66b8f9, + .word 0x40537125, 0x00000000, 0xbee8ad99, 0x09933766, + .word 0x4053bbd4, 0x00000000, 0xbef7d788, 0xc15a9f3d, + .word 0x40540646, 0x00000000, 0x3eed8d82, 0x24bad97a, + .word 0x4054507d, 0x00000000, 0xbe922b03, 0xc6b2a5f6, + .word 0x40549a78, 0x00000000, 0x3ef2f346, 0xe2bf924b, + .word 0x4054e439, 0x00000000, 0xbeffc5c1, 0x258110a4, + .word 0x40552dbe, 0x00000000, 0xbead9b4a, 0x641184f9, + .word 0x40557709, 0x00000000, 0x3edb3378, 0xcab10782, + .word 0x4055c01a, 0x00000000, 0x3eecfdeb, 0x43cfd006, + .word 0x405608f2, 0x00000000, 0xbef2f5ad, 0xd49a43fc, + .word 0x40565190, 0x00000000, 0xbedb9884, 0x591add87, + .word 0x405699f5, 0x00000000, 0x3ee2466a, 0x5c3462a4, + .word 0x4056e222, 0x00000000, 0xbee93179, 0x90d43957, + .word 0x40572a16, 0x00000000, 0x3eebe5e0, 0xc14a1a6d, + .word 0x405771d3, 0x00000000, 0xbef16041, 0x3106e405, + .word 0x4057b958, 0x00000000, 0xbef4eb95, 0x4eea2724, + .word 0x405800a5, 0x00000000, 0x3ef8c587, 0x150cabae, + .word 0x405847bc, 0x00000000, 0x3ee9ec30, 0xc6e3e04a, + .word 0x40588e9c, 0x00000000, 0x3efcb82c, 0x89692d99, + .word 0x4058d546, 0x00000000, 0x3efced70, 0xdc6acf42, + .word 0x40591bbb, 0x00000000, 0xbefdb83a, 0x3dd2d353, + .word 0x405961f9, 0x00000000, 0x3eb49d02, 0x6e33d676, + .word 0x4059a802, 0x00000000, 0x3eec8f11, 0x979a5db7, + .word 0x4059edd6, 0x00000000, 0x3efd66c9, 0x77e236c7, + .word 0x405a3376, 0x00000000, 0x3ec4fec0, 0xa13af882, + .word 0x405a78e1, 0x00000000, 0x3ef1bdef, 0xbd14a081, + .word 0x405abe18, 0x00000000, 0x3efe5fc7, 0xd238691d, + .word 0x405b031c, 0x00000000, 0xbed01f9b, 0xcb999fe9, + .word 0x405b47ec, 0x00000000, 0xbec18efa, 0xbeb7d722, + .word 0x405b8c89, 0x00000000, 0xbee203bc, 0xc3346511, + .word 0x405bd0f3, 0x00000000, 0xbed6186f, 0xcf54bbd3, + .word 0x405c152a, 0x00000000, 0x3efb0932, 0xb9700973, + .word 0x405c5930, 0x00000000, 0xbef4b5a9, 0x2a606047, + .word 0x405c9d03, 0x00000000, 0xbec26b70, 0x98590071, + .word 0x405ce0a5, 0x00000000, 0xbefb7169, 0xe0cda8bd, + .word 0x405d2415, 0x00000000, 0xbeebfa06, 0xc156f521, + .word 0x405d6754, 0x00000000, 0xbedfcd15, 0xf101c142, + .word 0x405daa62, 0x00000000, 0x3ee10327, 0xdc8093a5, + .word 0x405ded40, 0x00000000, 0xbee5dee4, 0xd9d8a273, + .word 0x405e2fed, 0x00000000, 0x3eee84b9, 0x4c06f913, + .word 0x405e726b, 0x00000000, 0xbef7862a, 0xcb7ceb98, + .word 0x405eb4b8, 0x00000000, 0x3ef1f456, 0xf394f972, + .word 0x405ef6d6, 0x00000000, 0x3efcca38, 0x881f4780, + .word 0x405f38c5, 0x00000000, 0x3ef9ef31, 0x50343f8e, + .word 0x405f7a85, 0x00000000, 0x3efa32c1, 0xb3b3864c, + .word 0x405fbc17, 0x00000000, 0xbef1bf65, 0xfd7715ca, + .word 0x405ffd7a, 0x00000000, 0xbef95f00, 0x19518ce0, + .word 0x40601f57, 0x00000000, 0x3ef3b932, 0x6ff91960, + .word 0x40603fdb, 0x00000000, 0xbf0d1a19, 0xa0331af3, + .word 0x40606047, 0x00000000, 0x3ee9f24e, 0xb23e991f, + .word 0x4060809d, 0x00000000, 0xbedb011f, 0x855b4988, + .word 0x4060a0dc, 0x00000000, 0x3efa7c70, 0xfde006c7, + .word 0x4060c105, 0x00000000, 0x3e9ac754, 0xcb104aea, + .word 0x4060e117, 0x00000000, 0x3f0d535f, 0x0444ebab, + .word 0x40610114, 0x00000000, 0xbf03ab0d, 0xc56138c9, + .word 0x406120fa, 0x00000000, 0xbef630f3, 0xfc695a97, + .word 0x406140ca, 0x00000000, 0xbec5786a, 0xf187a96b, + .word 0x40616084, 0x00000000, 0x3f012578, 0x0181e2b3, + .word 0x40618029, 0x00000000, 0xbef846b4, 0x4ad8a38b, + .word 0x40619fb8, 0x00000000, 0xbf01c336, 0xf7a3a78f, + .word 0x4061bf31, 0x00000000, 0x3eee95d0, 0x0de3b514, + .word 0x4061de95, 0x00000000, 0x3eed9cbb, 0xa6187a4d, + .word 0x4061fde4, 0x00000000, 0xbef678bf, 0x6cdedf51, + .word 0x40621d1d, 0x00000000, 0x3f06edb5, 0x668c543d, + .word 0x40623c42, 0x00000000, 0xbef5ec6c, 0x1bfbf89a, + .word 0x40625b51, 0x00000000, 0x3f062dcf, 0x4115a1a3, + .word 0x40627a4c, 0x00000000, 0x3ec6172f, 0xe015e13c, + .word 0x40629932, 0x00000000, 0xbed30dd5, 0x3f5c184c, + .word 0x4062b803, 0x00000000, 0x3f01cfde, 0xb43cfd00, + .word 0x4062d6c0, 0x00000000, 0x3ee35013, 0x8064a94e, + .word 0x4062f568, 0x00000000, 0x3f0d7acf, 0xc98509e3, + .word 0x406313fd, 0x00000000, 0xbf0d7932, 0x43718371, + .word 0x4063327c, 0x00000000, 0x3f0aad27, 0x29b21ae5, + .word 0x406350e8, 0x00000000, 0x3ef92b83, 0xec743665, + .word 0x40636f40, 0x00000000, 0xbec249ba, 0x76fee235, + .word 0x40638d84, 0x00000000, 0xbeefd0a2, 0xf6d7e41e, + .word 0x4063abb4, 0x00000000, 0xbec57f7a, 0x64ccd537, + .word 0x4063c9d0, 0x00000000, 0x3f09242b, 0x8488b305, + .word 0x4063e7d9, 0x00000000, 0x3efbcfb8, 0x0b357154, + .word 0x406405cf, 0x00000000, 0xbf0cb1c2, 0xd10504b4, + .word 0x406423b0, 0x00000000, 0x3f0fa61a, 0xaa59c1d8, + .word 0x4064417f, 0x00000000, 0x3ef26410, 0xb256d8d7, + .word 0x40645f3b, 0x00000000, 0xbf09d77e, 0x31d6ca00, + .word 0x40647ce3, 0x00000000, 0xbeda5fb4, 0xf23978de, + .word 0x40649a78, 0x00000000, 0x3f02f346, 0xe2bf924b, + .word 0x4064b7fb, 0x00000000, 0xbf0106da, 0x1aa0e9e7, + .word 0x4064d56a, 0x00000000, 0x3f06ccf3, 0xb1129b7c, + .word 0x4064f2c7, 0x00000000, 0x3f006a7c, 0xcf9dd420, + .word 0x40651012, 0x00000000, 0xbf0e3dd5, 0xc1c885ae, + .word 0x40652d49, 0x00000000, 0x3f00b91e, 0x4253bd27, + .word 0x40654a6f, 0x00000000, 0xbf0cd6af, 0x1c9393cd, + .word 0x40656781, 0x00000000, 0x3f0ee1ac, 0x0b1ec5ea, + .word 0x40658482, 0x00000000, 0x3ef34c4e, 0x99e1c6c6, + .word 0x4065a171, 0x00000000, 0xbf06d01c, 0xa8f50e5f, + .word 0x4065be4d, 0x00000000, 0x3ed96a28, 0x6955d67e, + .word 0x4065db17, 0x00000000, 0x3f0d4210, 0x4f127092, + .word 0x4065f7d0, 0x00000000, 0xbed7c3ec, 0xa28e69ca, + .word 0x40661477, 0x00000000, 0xbf07f393, 0xbdd98c47, + .word 0x4066310c, 0x00000000, 0xbf0c2ab3, 0xedefe569, + .word 0x40664d8f, 0x00000000, 0xbef44732, 0x0833c207, + .word 0x40666a01, 0x00000000, 0xbf0c6e1d, 0xcd0cb449, + .word 0x40668661, 0x00000000, 0xbefb4848, 0x3c643a24, + .word 0x4066a2b0, 0x00000000, 0xbf08697c, 0x3d7dfd9b, + .word 0x4066beed, 0x00000000, 0x3ef12866, 0xd705c554, + .word 0x4066db19, 0x00000000, 0x3f0a9d86, 0x52765f7c, + .word 0x4066f735, 0x00000000, 0xbf0d0e8e, 0x7a165e04, + .word 0x4067133f, 0x00000000, 0xbf093aa4, 0xe106ba60, + .word 0x40672f38, 0x00000000, 0xbf04bace, 0x940d18ba, + .word 0x40674b20, 0x00000000, 0xbef4d8fc, 0x561c8d44, + .word 0x406766f7, 0x00000000, 0x3ef5931e, 0xf6e6f15b, + .word 0x406782be, 0x00000000, 0xbf000896, 0x6a210de0, + .word 0x40679e74, 0x00000000, 0xbf05dbfe, 0x780eccdb, + .word 0x4067ba19, 0x00000000, 0xbecb2bf4, 0x6fd85522, + .word 0x4067d5ae, 0x00000000, 0xbefd2fc3, 0xaddfdee2, + .word 0x4067f132, 0x00000000, 0x3ef0c167, 0x8ae89767, + .word 0x40680ca6, 0x00000000, 0x3ef034a6, 0xfc6488d1, + .word 0x4068280a, 0x00000000, 0xbef520c7, 0xc69211fe, + .word 0x4068435d, 0x00000000, 0x3f05328d, 0xdcedf39e, + .word 0x40685ea1, 0x00000000, 0xbf03d361, 0x367bde41, + .word 0x406879d4, 0x00000000, 0xbebc2624, 0x7a0cdfbb, + .word 0x406894f7, 0x00000000, 0x3f02c1bb, 0xe2d01ba9, + .word 0x4068b00b, 0x00000000, 0xbf043a4a, 0xd5c7a4dd, + .word 0x4068cb0e, 0x00000000, 0x3efda59d, 0xded9b445, + .word 0x4068e602, 0x00000000, 0x3eb11eb3, 0x043f5602, + .word 0x406900e6, 0x00000000, 0x3ee60002, 0xccfe43f5, + .word 0x40691bbb, 0x00000000, 0xbf0db83a, 0x3dd2d353, + .word 0x4069367f, 0x00000000, 0x3f0b682a, 0xcba73219, + .word 0x40695135, 0x00000000, 0xbef53d8e, 0x8e4c59c3, + .word 0x40696bdb, 0x00000000, 0xbef6a9a5, 0x050809db, + .word 0x40698671, 0x00000000, 0x3f0db68e, 0x0ba15359, + .word 0x4069a0f9, 0x00000000, 0xbef6278f, 0xd810b546, + .word 0x4069bb71, 0x00000000, 0xbec528c6, 0xcdef4d8d, + .word 0x4069d5da, 0x00000000, 0xbeb57f7a, 0x64ccd537, + .word 0x4069f034, 0x00000000, 0xbee33716, 0xa9ae332f, + .word 0x406a0a7f, 0x00000000, 0xbef2d9f7, 0x698ce769, + .word 0x406a24bb, 0x00000000, 0xbef48c02, 0x44aa8cfc, + .word 0x406a3ee8, 0x00000000, 0xbed8e3cf, 0xc25f0ce6, + .word 0x406a5906, 0x00000000, 0x3f0044c5, 0x590979a0, + .word 0x406a7316, 0x00000000, 0xbef7e86f, 0x9c2154fb, + .word 0x406a8d17, 0x00000000, 0xbf03a076, 0x2ed351cd, + .word 0x406aa709, 0x00000000, 0xbed4ffd6, 0x59064390, + .word 0x406ac0ed, 0x00000000, 0xbf04d9bb, 0x3135f0b1, + .word 0x406adac2, 0x00000000, 0xbee8ee37, 0xcd2ea9d3, + .word 0x406af489, 0x00000000, 0xbf02ba1b, 0x4a95229c, + .word 0x406b0e41, 0x00000000, 0x3ef35e64, 0x35ebd377, + .word 0x406b27eb, 0x00000000, 0x3f02fe3c, 0x2291b5ad, + .word 0x406b4187, 0x00000000, 0x3efa5480, 0x45ecbc5d, + .word 0x406b5b15, 0x00000000, 0xbedee0d3, 0x3432f2c3, + .word 0x406b7495, 0x00000000, 0xbf0c2ab3, 0x496d2d24, + .word 0x406b8e06, 0x00000000, 0x3ef04439, 0x848e9d1e, + .word 0x406ba76a, 0x00000000, 0xbf03186d, 0xa6fc41e0, + .word 0x406bc0bf, 0x00000000, 0x3f05fc8d, 0x8164754e, + .word 0x406bda07, 0x00000000, 0x3eecc67e, 0x6db516de, + .word 0x406bf341, 0x00000000, 0x3ee14464, 0xa6bcdf48, + .word 0x406c0c6d, 0x00000000, 0x3f011f17, 0x74d8b66a, + .word 0x406c258c, 0x00000000, 0xbefd4cdb, 0xebaa4121, + .word 0x406c3e9d, 0x00000000, 0xbf074797, 0xeab3259d, + .word 0x406c57a0, 0x00000000, 0xbee44a49, 0xa82ed669, + .word 0x406c7096, 0x00000000, 0xbf045b87, 0x8e27d0d9, + .word 0x406c897e, 0x00000000, 0xbec7c929, 0xc9e33277, + .word 0x406ca259, 0x00000000, 0xbef1ab66, 0x74e5008e, + .word 0x406cbb26, 0x00000000, 0x3f09333f, 0x3d6bb35f, + .word 0x406cd3e7, 0x00000000, 0xbf07cd5d, 0xbe4f6f23, + .word 0x406cec9a, 0x00000000, 0xbf0848eb, 0x7f40a752, + .word 0x406d053f, 0x00000000, 0x3f0b4982, 0x259cc626, + .word 0x406d1dd8, 0x00000000, 0x3ee9b4c3, 0xf0c92723, + .word 0x406d3664, 0x00000000, 0xbf036033, 0x8ab5a1f2, + .word 0x406d4ee2, 0x00000000, 0x3f015971, 0x8aacb6ec, + .word 0x406d6754, 0x00000000, 0xbeefcd15, 0xf101c142, + .word 0x406d7fb9, 0x00000000, 0xbf0bd935, 0x64ee1bf6, + .word 0x406d9810, 0x00000000, 0x3f090f59, 0x8530f102, + .word 0x406db05b, 0x00000000, 0x3f0a28be, 0xd929effb, + .word 0x406dc89a, 0x00000000, 0xbf053002, 0xa4e86631, + .word 0x406de0cb, 0x00000000, 0x3efcb99c, 0x5233429f, + .word 0x406df8f0, 0x00000000, 0x3ef04357, 0x9625f7a4, + .word 0x406e1108, 0x00000000, 0x3f0b6bdd, 0x258a7b23, + .word 0x406e2914, 0x00000000, 0x3ef70700, 0xa00fdd55, + .word 0x406e4113, 0x00000000, 0x3f0bab95, 0x4f46b93f, + .word 0x406e5906, 0x00000000, 0x3efe4411, 0x672b0c89, + .word 0x406e70ed, 0x00000000, 0xbf06e041, 0xe4467502, + .word 0x406e88c7, 0x00000000, 0xbf032765, 0x63557797, + .word 0x406ea094, 0x00000000, 0x3f0d7b8f, 0x0e7b8e75, + .word 0x406eb856, 0x00000000, 0xbeccd5dc, 0x13cad28e, + .word 0x406ed00b, 0x00000000, 0x3f0222fb, 0x08d5c3f2, + .word 0x406ee7b4, 0x00000000, 0x3f0c6cea, 0x541f5b70, + .word 0x406eff52, 0x00000000, 0xbf0fd40b, 0x070e6c33, + .word 0x406f16e3, 0x00000000, 0xbf0f8922, 0x73f1379b, + .word 0x406f2e68, 0x00000000, 0xbf0fa051, 0xeebd4f74, + .word 0x406f45e1, 0x00000000, 0xbf0d0c3e, 0x6aac6ca9, + .word 0x406f5d4e, 0x00000000, 0xbf04c432, 0x5068bc88, + .word 0x406f74af, 0x00000000, 0xbede20a0, 0xa450bc93, + .word 0x406f8c04, 0x00000000, 0x3f08f3a3, 0x1a23946e, + .word 0x406fa34e, 0x00000000, 0x3ee177c2, 0x3362928c, + .word 0x406fba8c, 0x00000000, 0x3ec71513, 0x7cfebaa0, + .word 0x406fd1be, 0x00000000, 0x3f031fca, 0xbe50ac88, + .word 0x406fe8e5, 0x00000000, 0xbedd485c, 0xbfb44c3b, +! + .word 0x01a56e1f, 0xc2f8f359, ! _TINY = 1.0e-300 + .word 0x7e37e43c, 0x8800759c, ! _HUGE = 1.0e+300 + .word 0x3f6d94ae, 0x0bf85de6, ! KA1_LO = (1.41052154268147309568e-05*256) + .word 0x40871540, 0x00000000, ! KA1_HI = (2.8853759765625e+00*256) + .word 0x3cd5d528, 0x93bc7fec, ! KB5 = 1.21195555854068860923e-15 + .word 0x3e2c6b08, 0xd71f5d1e, ! KB3 = 3.30830268126604677436e-09 + .word 0x3ecebfbd, 0xff82c4ed, ! KB2 = 3.66556559691003767877e-06 + .word 0x3f662e42, 0xfefa39ef, ! KB1 = 2.70760617406228636578e-03 +! +! __mt_constexp2[2*i] = high order bits 2^(i/256), i = [0, 255] +! __mt_constexp2[2*i+1] = least bits 2^(i/256), i = [0, 255] + + .word 0x3ff00000, 0x00000000, 0x00000000, 0x00000000, + .word 0x3ff00b1a, 0xfa5abcbf, 0xbc84f6b2, 0xa7609f71, + .word 0x3ff0163d, 0xa9fb3335, 0x3c9b6129, 0x9ab8cdb7, + .word 0x3ff02168, 0x143b0281, 0xbc82bf31, 0x0fc54eb6, + .word 0x3ff02c9a, 0x3e778061, 0xbc719083, 0x535b085d, + .word 0x3ff037d4, 0x2e11bbcc, 0x3c656811, 0xeeade11a, + .word 0x3ff04315, 0xe86e7f85, 0xbc90a31c, 0x1977c96e, + .word 0x3ff04e5f, 0x72f654b1, 0x3c84c379, 0x3aa0d08c, + .word 0x3ff059b0, 0xd3158574, 0x3c8d73e2, 0xa475b465, + .word 0x3ff0650a, 0x0e3c1f89, 0xbc95cb7b, 0x5799c397, + .word 0x3ff0706b, 0x29ddf6de, 0xbc8c91df, 0xe2b13c27, + .word 0x3ff07bd4, 0x2b72a836, 0x3c832334, 0x54458700, + .word 0x3ff08745, 0x18759bc8, 0x3c6186be, 0x4bb284ff, + .word 0x3ff092bd, 0xf66607e0, 0xbc968063, 0x800a3fd1, + .word 0x3ff09e3e, 0xcac6f383, 0x3c914878, 0x18316136, + .word 0x3ff0a9c7, 0x9b1f3919, 0x3c85d16c, 0x873d1d38, + .word 0x3ff0b558, 0x6cf9890f, 0x3c98a62e, 0x4adc610b, + .word 0x3ff0c0f1, 0x45e46c85, 0x3c94f989, 0x06d21cef, + .word 0x3ff0cc92, 0x2b7247f7, 0x3c901edc, 0x16e24f71, + .word 0x3ff0d83b, 0x23395dec, 0xbc9bc14d, 0xe43f316a, + .word 0x3ff0e3ec, 0x32d3d1a2, 0x3c403a17, 0x27c57b52, + .word 0x3ff0efa5, 0x5fdfa9c5, 0xbc949db9, 0xbc54021b, + .word 0x3ff0fb66, 0xaffed31b, 0xbc6b9bed, 0xc44ebd7b, + .word 0x3ff10730, 0x28d7233e, 0x3c8d46eb, 0x1692fdd5, + .word 0x3ff11301, 0xd0125b51, 0xbc96c510, 0x39449b3a, + .word 0x3ff11edb, 0xab5e2ab6, 0xbc9ca454, 0xf703fb72, + .word 0x3ff12abd, 0xc06c31cc, 0xbc51b514, 0xb36ca5c7, + .word 0x3ff136a8, 0x14f204ab, 0xbc67108f, 0xba48dcf0, + .word 0x3ff1429a, 0xaea92de0, 0xbc932fbf, 0x9af1369e, + .word 0x3ff14e95, 0x934f312e, 0xbc8b91e8, 0x39bf44ab, + .word 0x3ff15a98, 0xc8a58e51, 0x3c82406a, 0xb9eeab0a, + .word 0x3ff166a4, 0x5471c3c2, 0x3c58f23b, 0x82ea1a32, + .word 0x3ff172b8, 0x3c7d517b, 0xbc819041, 0xb9d78a76, + .word 0x3ff17ed4, 0x8695bbc0, 0x3c709e3f, 0xe2ac5a64, + .word 0x3ff18af9, 0x388c8dea, 0xbc911023, 0xd1970f6c, + .word 0x3ff19726, 0x58375d2f, 0x3c94aadd, 0x85f17e08, + .word 0x3ff1a35b, 0xeb6fcb75, 0x3c8e5b4c, 0x7b4968e4, + .word 0x3ff1af99, 0xf8138a1c, 0x3c97bf85, 0xa4b69280, + .word 0x3ff1bbe0, 0x84045cd4, 0xbc995386, 0x352ef607, + .word 0x3ff1c82f, 0x95281c6b, 0x3c900977, 0x8010f8c9, + .word 0x3ff1d487, 0x3168b9aa, 0x3c9e016e, 0x00a2643c, + .word 0x3ff1e0e7, 0x5eb44027, 0xbc96fdd8, 0x088cb6de, + .word 0x3ff1ed50, 0x22fcd91d, 0xbc91df98, 0x027bb78c, + .word 0x3ff1f9c1, 0x8438ce4d, 0xbc9bf524, 0xa097af5c, + .word 0x3ff2063b, 0x88628cd6, 0x3c8dc775, 0x814a8495, + .word 0x3ff212be, 0x3578a819, 0x3c93592d, 0x2cfcaac9, + .word 0x3ff21f49, 0x917ddc96, 0x3c82a97e, 0x9494a5ee, + .word 0x3ff22bdd, 0xa27912d1, 0x3c8d34fb, 0x5577d69f, + .word 0x3ff2387a, 0x6e756238, 0x3c99b07e, 0xb6c70573, + .word 0x3ff2451f, 0xfb82140a, 0x3c8acfcc, 0x911ca996, + .word 0x3ff251ce, 0x4fb2a63f, 0x3c8ac155, 0xbef4f4a4, + .word 0x3ff25e85, 0x711ece75, 0x3c93e1a2, 0x4ac31b2c, + .word 0x3ff26b45, 0x65e27cdd, 0x3c82bd33, 0x9940e9d9, + .word 0x3ff2780e, 0x341ddf29, 0x3c9e067c, 0x05f9e76c, + .word 0x3ff284df, 0xe1f56381, 0xbc9a4c3a, 0x8c3f0d7e, + .word 0x3ff291ba, 0x7591bb70, 0xbc82cc72, 0x28401cbd, + .word 0x3ff29e9d, 0xf51fdee1, 0x3c8612e8, 0xafad1255, + .word 0x3ff2ab8a, 0x66d10f13, 0xbc995743, 0x191690a7, + .word 0x3ff2b87f, 0xd0dad990, 0xbc410adc, 0xd6381aa4, + .word 0x3ff2c57e, 0x39771b2f, 0xbc950145, 0xa6eb5124, + .word 0x3ff2d285, 0xa6e4030b, 0x3c900247, 0x54db41d5, + .word 0x3ff2df96, 0x1f641589, 0x3c9d16cf, 0xfbbce198, + .word 0x3ff2ecaf, 0xa93e2f56, 0x3c71ca0f, 0x45d52383, + .word 0x3ff2f9d2, 0x4abd886b, 0xbc653c55, 0x532bda93, + .word 0x3ff306fe, 0x0a31b715, 0x3c86f46a, 0xd23182e4, + .word 0x3ff31432, 0xedeeb2fd, 0x3c8959a3, 0xf3f3fcd1, + .word 0x3ff32170, 0xfc4cd831, 0x3c8a9ce7, 0x8e18047c, + .word 0x3ff32eb8, 0x3ba8ea32, 0xbc9c45e8, 0x3cb4f318, + .word 0x3ff33c08, 0xb26416ff, 0x3c932721, 0x843659a6, + .word 0x3ff34962, 0x66e3fa2d, 0xbc835a75, 0x930881a4, + .word 0x3ff356c5, 0x5f929ff1, 0xbc8b5cee, 0x5c4e4628, + .word 0x3ff36431, 0xa2de883b, 0xbc8c3144, 0xa06cb85e, + .word 0x3ff371a7, 0x373aa9cb, 0xbc963aea, 0xbf42eae2, + .word 0x3ff37f26, 0x231e754a, 0xbc99f5ca, 0x9eceb23c, + .word 0x3ff38cae, 0x6d05d866, 0xbc9e958d, 0x3c9904bd, + .word 0x3ff39a40, 0x1b7140ef, 0xbc99a9a5, 0xfc8e2934, + .word 0x3ff3a7db, 0x34e59ff7, 0xbc75e436, 0xd661f5e3, + .word 0x3ff3b57f, 0xbfec6cf4, 0x3c954c66, 0xe26fff18, + .word 0x3ff3c32d, 0xc313a8e5, 0xbc9efff8, 0x375d29c3, + .word 0x3ff3d0e5, 0x44ede173, 0x3c7fe8d0, 0x8c284c71, + .word 0x3ff3dea6, 0x4c123422, 0x3c8ada09, 0x11f09ebc, + .word 0x3ff3ec70, 0xdf1c5175, 0xbc8af663, 0x7b8c9bca, + .word 0x3ff3fa45, 0x04ac801c, 0xbc97d023, 0xf956f9f3, + .word 0x3ff40822, 0xc367a024, 0x3c8bddf8, 0xb6f4d048, + .word 0x3ff4160a, 0x21f72e2a, 0xbc5ef369, 0x1c309278, + .word 0x3ff423fb, 0x2709468a, 0xbc98462d, 0xc0b314dd, + .word 0x3ff431f5, 0xd950a897, 0xbc81c7dd, 0xe35f7999, + .word 0x3ff43ffa, 0x3f84b9d4, 0x3c8880be, 0x9704c003, + .word 0x3ff44e08, 0x6061892d, 0x3c489b7a, 0x04ef80d0, + .word 0x3ff45c20, 0x42a7d232, 0xbc686419, 0x82fb1f8e, + .word 0x3ff46a41, 0xed1d0057, 0x3c9c944b, 0xd1648a76, + .word 0x3ff4786d, 0x668b3237, 0xbc9c20f0, 0xed445733, + .word 0x3ff486a2, 0xb5c13cd0, 0x3c73c1a3, 0xb69062f0, + .word 0x3ff494e1, 0xe192aed2, 0xbc83b289, 0x5e499ea0, + .word 0x3ff4a32a, 0xf0d7d3de, 0x3c99cb62, 0xf3d1be56, + .word 0x3ff4b17d, 0xea6db7d7, 0xbc8125b8, 0x7f2897f0, + .word 0x3ff4bfda, 0xd5362a27, 0x3c7d4397, 0xafec42e2, + .word 0x3ff4ce41, 0xb817c114, 0x3c905e29, 0x690abd5d, + .word 0x3ff4dcb2, 0x99fddd0d, 0x3c98ecdb, 0xbc6a7833, + .word 0x3ff4eb2d, 0x81d8abff, 0xbc95257d, 0x2e5d7a52, + .word 0x3ff4f9b2, 0x769d2ca7, 0xbc94b309, 0xd25957e3, + .word 0x3ff50841, 0x7f4531ee, 0x3c7a249b, 0x49b7465f, + .word 0x3ff516da, 0xa2cf6642, 0xbc8f7685, 0x69bd93ef, + .word 0x3ff5257d, 0xe83f4eef, 0xbc7c998d, 0x43efef71, + .word 0x3ff5342b, 0x569d4f82, 0xbc807abe, 0x1db13cad, + .word 0x3ff542e2, 0xf4f6ad27, 0x3c87926d, 0x192d5f7e, + .word 0x3ff551a4, 0xca5d920f, 0xbc8d689c, 0xefede59b, + .word 0x3ff56070, 0xdde910d2, 0xbc90fb6e, 0x168eebf0, + .word 0x3ff56f47, 0x36b527da, 0x3c99bb2c, 0x011d93ad, + .word 0x3ff57e27, 0xdbe2c4cf, 0xbc90b98c, 0x8a57b9c4, + .word 0x3ff58d12, 0xd497c7fd, 0x3c8295e1, 0x5b9a1de8, + .word 0x3ff59c08, 0x27ff07cc, 0xbc97e2ce, 0xe467e60f, + .word 0x3ff5ab07, 0xdd485429, 0x3c96324c, 0x054647ad, + .word 0x3ff5ba11, 0xfba87a03, 0xbc9b77a1, 0x4c233e1a, + .word 0x3ff5c926, 0x8a5946b7, 0x3c3c4b1b, 0x816986a2, + .word 0x3ff5d845, 0x90998b93, 0xbc9cd6a7, 0xa8b45643, + .word 0x3ff5e76f, 0x15ad2148, 0x3c9ba6f9, 0x3080e65e, + .word 0x3ff5f6a3, 0x20dceb71, 0xbc89eadd, 0xe3cdcf92, + .word 0x3ff605e1, 0xb976dc09, 0xbc93e242, 0x9b56de47, + .word 0x3ff6152a, 0xe6cdf6f4, 0x3c9e4b3e, 0x4ab84c27, + .word 0x3ff6247e, 0xb03a5585, 0xbc9383c1, 0x7e40b497, + .word 0x3ff633dd, 0x1d1929fd, 0x3c984710, 0xbeb964e5, + .word 0x3ff64346, 0x34ccc320, 0xbc8c483c, 0x759d8933, + .word 0x3ff652b9, 0xfebc8fb7, 0xbc9ae3d5, 0xc9a73e09, + .word 0x3ff66238, 0x82552225, 0xbc9bb609, 0x87591c34, + .word 0x3ff671c1, 0xc70833f6, 0xbc8e8732, 0x586c6134, + .word 0x3ff68155, 0xd44ca973, 0x3c6038ae, 0x44f73e65, + .word 0x3ff690f4, 0xb19e9538, 0x3c8804bd, 0x9aeb445d, + .word 0x3ff6a09e, 0x667f3bcd, 0xbc9bdd34, 0x13b26456, + .word 0x3ff6b052, 0xfa75173e, 0x3c7a38f5, 0x2c9a9d0e, + .word 0x3ff6c012, 0x750bdabf, 0xbc728956, 0x67ff0b0d, + .word 0x3ff6cfdc, 0xddd47645, 0x3c9c7aa9, 0xb6f17309, + .word 0x3ff6dfb2, 0x3c651a2f, 0xbc6bbe3a, 0x683c88ab, + .word 0x3ff6ef92, 0x98593ae5, 0xbc90b974, 0x9e1ac8b2, + .word 0x3ff6ff7d, 0xf9519484, 0xbc883c0f, 0x25860ef6, + .word 0x3ff70f74, 0x66f42e87, 0x3c59d644, 0xd45aa65f, + .word 0x3ff71f75, 0xe8ec5f74, 0xbc816e47, 0x86887a99, + .word 0x3ff72f82, 0x86ead08a, 0xbc920aa0, 0x2cd62c72, + .word 0x3ff73f9a, 0x48a58174, 0xbc90a8d9, 0x6c65d53c, + .word 0x3ff74fbd, 0x35d7cbfd, 0x3c9047fd, 0x618a6e1c, + .word 0x3ff75feb, 0x564267c9, 0xbc902459, 0x57316dd3, + .word 0x3ff77024, 0xb1ab6e09, 0x3c9b7877, 0x169147f8, + .word 0x3ff78069, 0x4fde5d3f, 0x3c9866b8, 0x0a02162d, + .word 0x3ff790b9, 0x38ac1cf6, 0x3c9349a8, 0x62aadd3e, + .word 0x3ff7a114, 0x73eb0187, 0xbc841577, 0xee04992f, + .word 0x3ff7b17b, 0x0976cfdb, 0xbc9bebb5, 0x8468dc88, + .word 0x3ff7c1ed, 0x0130c132, 0x3c9f124c, 0xd1164dd6, + .word 0x3ff7d26a, 0x62ff86f0, 0x3c91bddb, 0xfb72b8b4, + .word 0x3ff7e2f3, 0x36cf4e62, 0x3c705d02, 0xba15797e, + .word 0x3ff7f387, 0x8491c491, 0xbc807f11, 0xcf9311ae, + .word 0x3ff80427, 0x543e1a12, 0xbc927c86, 0x626d972b, + .word 0x3ff814d2, 0xadd106d9, 0x3c946437, 0x0d151d4d, + .word 0x3ff82589, 0x994cce13, 0xbc9d4c1d, 0xd41532d8, + .word 0x3ff8364c, 0x1eb941f7, 0x3c999b9a, 0x31df2bd5, + .word 0x3ff8471a, 0x4623c7ad, 0xbc88d684, 0xa341cdfb, + .word 0x3ff857f4, 0x179f5b21, 0xbc5ba748, 0xf8b216d0, + .word 0x3ff868d9, 0x9b4492ed, 0xbc9fc6f8, 0x9bd4f6ba, + .word 0x3ff879ca, 0xd931a436, 0x3c85d2d7, 0xd2db47bd, + .word 0x3ff88ac7, 0xd98a6699, 0x3c9994c2, 0xf37cb53a, + .word 0x3ff89bd0, 0xa478580f, 0x3c9d5395, 0x4475202a, + .word 0x3ff8ace5, 0x422aa0db, 0x3c96e9f1, 0x56864b27, + .word 0x3ff8be05, 0xbad61778, 0x3c9ecb5e, 0xfc43446e, + .word 0x3ff8cf32, 0x16b5448c, 0xbc70d55e, 0x32e9e3aa, + .word 0x3ff8e06a, 0x5e0866d9, 0xbc97114a, 0x6fc9b2e6, + .word 0x3ff8f1ae, 0x99157736, 0x3c85cc13, 0xa2e3976c, + .word 0x3ff902fe, 0xd0282c8a, 0x3c9592ca, 0x85fe3fd2, + .word 0x3ff9145b, 0x0b91ffc6, 0xbc9dd679, 0x2e582524, + .word 0x3ff925c3, 0x53aa2fe2, 0xbc83455f, 0xa639db7f, + .word 0x3ff93737, 0xb0cdc5e5, 0xbc675fc7, 0x81b57ebc, + .word 0x3ff948b8, 0x2b5f98e5, 0xbc8dc3d6, 0x797d2d99, + .word 0x3ff95a44, 0xcbc8520f, 0xbc764b7c, 0x96a5f039, + .word 0x3ff96bdd, 0x9a7670b3, 0xbc5ba596, 0x7f19c896, + .word 0x3ff97d82, 0x9fde4e50, 0xbc9d185b, 0x7c1b85d1, + .word 0x3ff98f33, 0xe47a22a2, 0x3c7cabda, 0xa24c78ec, + .word 0x3ff9a0f1, 0x70ca07ba, 0xbc9173bd, 0x91cee632, + .word 0x3ff9b2bb, 0x4d53fe0d, 0xbc9dd84e, 0x4df6d518, + .word 0x3ff9c491, 0x82a3f090, 0x3c7c7c46, 0xb071f2be, + .word 0x3ff9d674, 0x194bb8d5, 0xbc9516be, 0xa3dd8233, + .word 0x3ff9e863, 0x19e32323, 0x3c7824ca, 0x78e64c6e, + .word 0x3ff9fa5e, 0x8d07f29e, 0xbc84a9ce, 0xaaf1face, + .word 0x3ffa0c66, 0x7b5de565, 0xbc935949, 0x5d1cd533, + .word 0x3ffa1e7a, 0xed8eb8bb, 0x3c9c6618, 0xee8be70e, + .word 0x3ffa309b, 0xec4a2d33, 0x3c96305c, 0x7ddc36ab, + .word 0x3ffa42c9, 0x80460ad8, 0xbc9aa780, 0x589fb120, + .word 0x3ffa5503, 0xb23e255d, 0xbc9d2f6e, 0xdb8d41e1, + .word 0x3ffa674a, 0x8af46052, 0x3c650f56, 0x30670366, + .word 0x3ffa799e, 0x1330b358, 0x3c9bcb7e, 0xcac563c7, + .word 0x3ffa8bfe, 0x53c12e59, 0xbc94f867, 0xb2ba15a9, + .word 0x3ffa9e6b, 0x5579fdbf, 0x3c90fac9, 0x0ef7fd31, + .word 0x3ffab0e5, 0x21356eba, 0x3c889c31, 0xdae94545, + .word 0x3ffac36b, 0xbfd3f37a, 0xbc8f9234, 0xcae76cd0, + .word 0x3ffad5ff, 0x3a3c2774, 0x3c97ef3b, 0xb6b1b8e5, + .word 0x3ffae89f, 0x995ad3ad, 0x3c97a1cd, 0x345dcc81, + .word 0x3ffafb4c, 0xe622f2ff, 0xbc94b2fc, 0x0f315ecd, + .word 0x3ffb0e07, 0x298db666, 0xbc9bdef5, 0x4c80e425, + .word 0x3ffb20ce, 0x6c9a8952, 0x3c94dd02, 0x4a0756cc, + .word 0x3ffb33a2, 0xb84f15fb, 0xbc62805e, 0x3084d708, + .word 0x3ffb4684, 0x15b749b1, 0xbc7f763d, 0xe9df7c90, + .word 0x3ffb5972, 0x8de5593a, 0xbc9c71df, 0xbbba6de3, + .word 0x3ffb6c6e, 0x29f1c52a, 0x3c92a8f3, 0x52883f6e, + .word 0x3ffb7f76, 0xf2fb5e47, 0xbc75584f, 0x7e54ac3b, + .word 0x3ffb928c, 0xf22749e4, 0xbc9b7216, 0x54cb65c6, + .word 0x3ffba5b0, 0x30a1064a, 0xbc9efcd3, 0x0e54292e, + .word 0x3ffbb8e0, 0xb79a6f1f, 0xbc3f52d1, 0xc9696205, + .word 0x3ffbcc1e, 0x904bc1d2, 0x3c823dd0, 0x7a2d9e84, + .word 0x3ffbdf69, 0xc3f3a207, 0xbc3c2623, 0x60ea5b52, + .word 0x3ffbf2c2, 0x5bd71e09, 0xbc9efdca, 0x3f6b9c73, + .word 0x3ffc0628, 0x6141b33d, 0xbc8d8a5a, 0xa1fbca34, + .word 0x3ffc199b, 0xdd85529c, 0x3c811065, 0x895048dd, + .word 0x3ffc2d1c, 0xd9fa652c, 0xbc96e516, 0x17c8a5d7, + .word 0x3ffc40ab, 0x5fffd07a, 0x3c9b4537, 0xe083c60a, + .word 0x3ffc5447, 0x78fafb22, 0x3c912f07, 0x2493b5af, + .word 0x3ffc67f1, 0x2e57d14b, 0x3c92884d, 0xff483cad, + .word 0x3ffc7ba8, 0x8988c933, 0xbc8e76bb, 0xbe255559, + .word 0x3ffc8f6d, 0x9406e7b5, 0x3c71acbc, 0x48805c44, + .word 0x3ffca340, 0x5751c4db, 0xbc87f2be, 0xd10d08f5, + .word 0x3ffcb720, 0xdcef9069, 0x3c7503cb, 0xd1e949db, + .word 0x3ffccb0f, 0x2e6d1675, 0xbc7d220f, 0x86009092, + .word 0x3ffcdf0b, 0x555dc3fa, 0xbc8dd83b, 0x53829d72, + .word 0x3ffcf315, 0x5b5bab74, 0xbc9a08e9, 0xb86dff57, + .word 0x3ffd072d, 0x4a07897c, 0xbc9cbc37, 0x43797a9c, + .word 0x3ffd1b53, 0x2b08c968, 0x3c955636, 0x219a36ee, + .word 0x3ffd2f87, 0x080d89f2, 0xbc9d487b, 0x719d8578, + .word 0x3ffd43c8, 0xeacaa1d6, 0x3c93db53, 0xbf5a1614, + .word 0x3ffd5818, 0xdcfba487, 0x3c82ed02, 0xd75b3707, + .word 0x3ffd6c76, 0xe862e6d3, 0x3c5fe87a, 0x4a8165a0, + .word 0x3ffd80e3, 0x16c98398, 0xbc911ec1, 0x8beddfe8, + .word 0x3ffd955d, 0x71ff6075, 0x3c9a052d, 0xbb9af6be, + .word 0x3ffda9e6, 0x03db3285, 0x3c9c2300, 0x696db532, + .word 0x3ffdbe7c, 0xd63a8315, 0xbc9b76f1, 0x926b8be4, + .word 0x3ffdd321, 0xf301b460, 0x3c92da57, 0x78f018c3, + .word 0x3ffde7d5, 0x641c0658, 0xbc9ca552, 0x8e79ba8f, + .word 0x3ffdfc97, 0x337b9b5f, 0xbc91a5cd, 0x4f184b5c, + .word 0x3ffe1167, 0x6b197d17, 0xbc72b529, 0xbd5c7f44, + .word 0x3ffe2646, 0x14f5a129, 0xbc97b627, 0x817a1496, + .word 0x3ffe3b33, 0x3b16ee12, 0xbc99f4a4, 0x31fdc68b, + .word 0x3ffe502e, 0xe78b3ff6, 0x3c839e89, 0x80a9cc8f, + .word 0x3ffe6539, 0x24676d76, 0xbc863ff8, 0x7522b735, + .word 0x3ffe7a51, 0xfbc74c83, 0x3c92d522, 0xca0c8de2, + .word 0x3ffe8f79, 0x77cdb740, 0xbc910894, 0x80b054b1, + .word 0x3ffea4af, 0xa2a490da, 0xbc9e9c23, 0x179c2893, + .word 0x3ffeb9f4, 0x867cca6e, 0x3c94832f, 0x2293e4f2, + .word 0x3ffecf48, 0x2d8e67f1, 0xbc9c93f3, 0xb411ad8c, + .word 0x3ffee4aa, 0xa2188510, 0x3c91c68d, 0xa487568d, + .word 0x3ffefa1b, 0xee615a27, 0x3c9dc7f4, 0x86a4b6b0, + .word 0x3fff0f9c, 0x1cb6412a, 0xbc932200, 0x65181d45, + .word 0x3fff252b, 0x376bba97, 0x3c93a1a5, 0xbf0d8e43, + .word 0x3fff3ac9, 0x48dd7274, 0xbc795a5a, 0x3ed837de, + .word 0x3fff5076, 0x5b6e4540, 0x3c99d3e1, 0x2dd8a18b, + .word 0x3fff6632, 0x798844f8, 0x3c9fa37b, 0x3539343e, + .word 0x3fff7bfd, 0xad9cbe14, 0xbc9dbb12, 0xd006350a, + .word 0x3fff91d8, 0x02243c89, 0xbc612ea8, 0xa779f689, + .word 0x3fffa7c1, 0x819e90d8, 0x3c874853, 0xf3a5931e, + .word 0x3fffbdba, 0x3692d514, 0xbc796773, 0x15098eb6, + .word 0x3fffd3c2, 0x2b8f71f1, 0x3c62eb74, 0x966579e7, + .word 0x3fffe9d9, 0x6b2a23d9, 0x3c74a603, 0x7442fde3, +! + .word 0x3c900000, 0x00000000, ! 2**(-54) = 5.551115123125782702e-17 + .word 0x3ff00000, 0x00000000, ! DONE = 1.0 + .word 0x43300000, 0x00000000, ! DVAIN52 = 2**52 = 4.503599627370496e15 + .word 0xffffffff, 0x00000000, ! MHI32 = 0xffffffff00000000 + .word 0x4062776d, 0x8ce329bd, ! KA5 = (5.77078604860893737986e-01*256) + .word 0x406ec709, 0xdc39fc99, ! KA3 = (9.61796693925765549423e-01*256) + .word 0x40871547, 0x652b82fe, ! KA1 = (2.885390081777926774e+00*256) + .word 0x41100000, 0x00000000, ! HTHRESH = 262144.0 + .word 0xc110cc00, 0x00000000, ! LTHRESH = -275200.0 + .word 0x3d83b2ab, 0xc07c93d0, ! KB4 = 2.23939573811855104311e-12 + .word 0x000fffff, 0xffffffff, ! MMANT + .word 0x00000800, 0x00000000, ! MROUND + .word 0xfffff000, 0x00000000, ! MHI20 + +! local storage indices +#define tmp0_lo STACK_BIAS-4 +#define tmp0_hi STACK_BIAS-8 +#define tmp1_lo STACK_BIAS-12 +#define tmp1_hi STACK_BIAS-16 +#define tmp2_lo STACK_BIAS-20 +#define tmp2_hi STACK_BIAS-24 +#define tmp3 STACK_BIAS-28 +#define tmp4 STACK_BIAS-32 +#define ind_buf STACK_BIAS-48 +#define tmp_counter STACK_BIAS-56 +#define tmp_px STACK_BIAS-64 +#define tmp_py STACK_BIAS-72 +#define tmp_mant STACK_BIAS-80 +#define tmp5 STACK_BIAS-88 +#define tmp6 STACK_BIAS-96 + +! sizeof temp storage - must be a multiple of 16 for V9 +#define tmps 96 + +#define LOGTBL %g5 +#define EXPTBL %g1 +#define EXPTBL_P8 %l4 + +#define MASK_0x7fffffff %o4 +#define MASK_0x000fffff %o3 +#define MASK_0x3ff00000 %o1 + +#define counter %i0 +#define px %i1 +#define stridex %l5 +#define py %i3 +#define stridey %l6 +#define pz %i5 +#define stridez %l7 + +#define HTHRESH %f0 +#define LTHRESH %f2 + +#define MHI32 %f38 +#define KA1_LO %f40 +#define KA1_HI %f40 + +#define KB1 %f42 +#define KB2 %f42 +#define KB3 %f42 +#define KB4 %f44 +#define KB5 %f42 + +#define KA1 %f46 +#define KA3 %f28 +#define KA5 %f50 + +#define DZERO %f24 +#define DZERO_HI %f24 +#define DZERO_LO %f25 +#define DONE %f18 +#define DONE_HI %f18 +#define DONE_LO %f19 + +#define XKB1 %f42 +#define XKB2 %f40 +#define XKB3 %f32 +#define XKB4 %f36 +#define XKB5 %f34 + +#define s_h %f46 +#define yr %f30 + +#define ind_TINY 64 +#define ind_HUGE 56 +#define ind_LO 48 +#define ind_HI 40 +#define ind_KB5 32 +#define ind_KB3 24 +#define ind_KB2 16 +#define ind_KB1 8 + +!-------------------------------------------------------------------- +! !!!!! vpow algorithm !!!!! +! +! hx = ((unsigned*)px)[0]; +! lx = ((unsigned*)px)[1]; +! hy = ((unsigned*)py)[0]; +! ly = ((unsigned*)py)[1]; +! sx = hx >> 31; +! sy = hy >> 31; +! hx &= 0x7fffffff; +! hy &= 0x7fffffff; +! y0 = *px; +! +! if (hy < 0x3bf00000) { /* |Y| < 2^(-64) */ +! if ((hy | ly) == 0) { /* pow(X,0) */ +! *pz = DONE; +! goto next; +! } +! if (hx > 0x7ff00000 || (hx == 0x7ff00000 && lx != 0)) { /* |X| = Nan */ +! *pz = y0 * y0; +! goto next; +! } +! else if ((hx | lx) == 0 || (hx == 0x7ff00000 && lx == 0)) { /* X = 0 or Inf */ +! ((int*)pz)[0] = hx; +! ((int*)pz)[1] = lx; +! if (sy) *pz = DONE / *pz; +! goto next; +! } +! else *pz = (sx) ? DZERO / DZERO : DONE; +! goto next; +! } +! yisint = 0; /* Y - non-integer */ +! expy = hy >> 20; /* Y exponent */ +! +! if (hx >= 0x7ff00000 || expy >= 0x43e) { /* X=Inf,Nan or |Y|>2^63,Inf,Nan */ +! if (hx > 0x7ff00000 || (hx == 0x7ff00000 && lx != 0) || +! hy > 0x7ff00000 || (hy == 0x7ff00000 && ly != 0)) +! *pz = y0 * *py; /* |X| or |Y| = Nan */ +! goto next; +! if (hy == 0x7ff00000 && (ly == 0)) { /* |Y| = Inf */ +! if (hx == 0x3ff00000 && (lx == 0)) +! *pz = *py - *py; /* +-1 ** +-Inf */ +! else if ((hx < 0x3ff00000) != sy) +! *pz = DZERO; +! else { +! ((int*)pz)[0] = hy; +! ((int*)pz)[1] = ly; +! } +! goto next; +! } +! if (expy < 0x43e) { /* |Y| < 2^63 */ +! if (sx) { /* X = -Inf */ +! if (expy >= 0x434) /* |Y| >= 2^53 */ +! yisint##I = 2; /* Y - even */ +! else { +! if (expy >= 0x3ff) { /* |Y| >= 1 */ +! if (expy > (20 + 0x3ff)) { +! i0 = ly >> (52 - (expy - 0x3ff)); +! if ((i0 << (52 - (expy - 0x3ff))) == ly) yisint = 2 - (i0 & 1); +! } +! else if (ly == 0) { +! i0 = hy >> (20 - (expy - 0x3ff)); +! if ((i0 << (20 - (expy - 0x3ff))) == hy) yisint = 2 - (i0 & 1); +! } +! } +! } +! } +! if (sy) hx = lx = 0; +! hx += yisint << 31; +! ((int*)pz)[0] = hx; +! ((int*)pz)[1] = lx; +! goto next; +! } +! else { /* |Y| >= 2^63 */ +! if (lx == 0 && /* |X| = 0, 1, Inf */ +! (hx == 0 || hx == 0x3ff00000 || hx == 0x7ff00000)) { +! ((int*)pz)[0] = hx; +! ((int*)pz)[1] = lx; +! if (sy) *pz = DONE / *pz; +! } +! else { +! y0 = ((hx < 0x3ff00000) != sy) ? _TINY : _HUGE; +! *pz = y0 * y0; +! } +! goto next; +! } +! } +! if (sx || (hx | lx) == 0) { /* X <= 0 */ +! if (expy >= 0x434) /* |Y| >= 2^53 */ +! yisint = 2; /* Y - even */ +! else { +! if (expy >= 0x3ff) { /* |Y| >= 1 */ +! if (expy > (20 + 0x3ff)) { +! i0 = ly >> (52 - (expy - 0x3ff)); +! if ((i0 << (52 - (expy - 0x3ff))) == ly) yisint = 2 - (i0 & 1); +! } +! else if (ly == 0) { +! i0 = hy >> (20 - (expy - 0x3ff)); +! if ((i0 << (20 - (expy - 0x3ff))) == hy) yisint = 2 - (i0 & 1); +! } +! } +! } +! if ((hx | lx) == 0) { /* X == 0 */ +! y0 = DZERO; +! if (sy) y0 = DONE / y0; +! if (sx & yisint) y0 = -y0; +! *pz = y0; +! goto next; +! } +! if (yisint == 0) { /* pow(neg,non-integer) */ +! *pz = DZERO / DZERO; /* NaN */ +! goto next; +! } +! } +! +! *((int*)&x + 1) = ((unsigned*)px)[1]; +! *((int*)&ax + 1) = 0; +! exp = hx; +! hx &= 0xfffff; +! hx |= 0x3ff00000; +! *(int*)&x = hx; +! hx += 0x800; +! hx &= 0xfffff000; +! *(int*)&ax = hx; +! if (exp <= 0xfffff) { +! y0 = vis_fand(x, MMANT); +! ax = (double) ((long long *) & y0)[0]; +! x = vis_fand(ax, MMANT); +! x = vis_for(x, DONE); +! exp = ((unsigned int*) & ax)[0]; +! exp -= (1023 + 51) << 20; +! hx = exp & 0xfffff; +! hx |= 0x3ff00000; +! hx += 0x800; +! *(int*)&ax = hx; +! } +! exp = (exp >> 20); +! exp = exp - 2046; +! ux = x + ax; +! yd = DONE / ux; +! u = x - ax; +! s = u * yd; +! ux = vis_fand(ux, MHI32); +! y = s * s; +! s_h = vis_fand(s, MHI32); +! dtmp8 = KA5 * y; +! dtmp8 = dtmp8 + KA3; +! dtmp8 = dtmp8 * y; +! s = dtmp8 * s; +! dtmp0 = (ux - ax); +! s_l = (x - dtmp0); +! dtmp0 = s_h * ux; +! dtmp1 = s_h * s_l; +! s_l = u - dtmp0; +! s_l -= dtmp1; +! dtmp0 = KA1 * yd; +! s_l = dtmp0 * s_l; +! i = (hx >> 8); +! i = i & 0xff0; +! itmp0 = (hx >> 20); +! exp += itmp0; +! yd = KA1_HI * s_h; +! y = *(double *)((char*)__mt_constlog2 + i); +! itmp0 = exp << 8; +! y += (double)itmp0; +! m_h = y + yd; +! dtmp2 = m_h - y; +! dtmp2 -= yd; +! dtmp2 -= s_l; +! y = s - dtmp2; +! dtmp0 = *(double *)((char*)__mt_constlog2 + i + 8); +! dtmp1 = KA1_LO * s_h; +! dtmp0 += dtmp1; +! y += dtmp0; +! dtmp0 = y + m_h; +! s_h = vis_fand(dtmp0, MHI32); +! dtmp0 = (s_h - m_h); +! y = y - dtmp0; +! yd = *py; +! s = vis_fand(yd, MHI32); +! dtmp0 = (yd - s); +! dtmp1 = yd * y; +! dtmp0 *= s_h; +! yd = dtmp0 + dtmp1; +! s = s_h * s; +! if (s > HTHRESH) {s = HTHRESH; yd = DZERO;} +! if (s < LTHRESH) {s = LTHRESH; yd = DZERO;} +! dtmp0 = (s + yd); +! ind = (int)dtmp0; +! i = ind & 0xff; +! i = i << 4; +! u = (double)(int)dtmp0; +! ind >>= 8; +! y = s - u; +! y = y + yd; +! u = *(double*)((char*)__mt_constexp2 + i); +! dtmp0 = KB5 * y; +! dtmp1 = dtmp0 + KB4; +! dtmp2 = dtmp1 * y; +! dtmp3 = dtmp2 + KB3; +! dtmp4 = dtmp3 * y; +! dtmp5 = dtmp4 + KB2; +! dtmp6 = dtmp5 * y; +! dtmp7 = dtmp6 + KB1; +! y = dtmp7 * y; +! eflag = (ind + 1021); +! eflag = eflag >> 31; +! gflag = (1022 - ind); +! gflag = gflag >> 31; +! dtmp0 = *(double*)((char*)__mt_constexp2 + i + 8); +! dtmp1 = u * y; +! dtmp2 = dtmp0 + dtmp1; +! u = dtmp2 + u; +! ind = yisint + ind; +! itmp0 = 54 & eflag; +! itmp1 = 52 & gflag; +! ind = ind + itmp0; +! ind = ind - itmp1; +! ind <<= 20; +! *(int*)&dtmp0 = ind; +! *((int*)&dtmp0 + 1) = 0; +! u = vis_fpadd32(u, dtmp0); +! ind = eflag - gflag; +! ind += 1; +! ind *= 8; +! dtmp1 = (*(double*)((char*)lconst + ind); +! dtmp1 = u * dtmp1; +! *pz = dtmp1; +!-------------------------------------------------------------------- +! !!!!! vpowx algorithm !!!!! (x > 0 and x != Inf, NaN) +! +! /* perform s_h + yr = 256*log2(x) */ +! +! exp = ((unsigned*)px)[0]; +! y0 = px[0]; +! if (exp <= 0xfffff) { +! y0 = (double) ((long long *) & y0)[0]; +! exp = ((unsigned int*) & y0)[0]; +! exp -= (1023 + 51) << 20; +! } +! x = vis_fand(y0, MMANT); +! x = vis_for(x, DONE); +! ax = vis_fpadd32(x, MROUND); +! ax = vis_fand(ax, MHI20); +! hx = *(int*)&ax; +! exp = (exp >> 20); +! exp = exp - 2046; +! ux = x + ax; +! yd = DONE / ux; +! u = x - ax; +! s = u * yd; +! ux = vis_fand(ux, MHI32); +! y = s * s; +! s_h = vis_fand(s, MHI32); +! dtmp8 = KA5 * y; +! dtmp8 = dtmp8 + KA3; +! dtmp8 = dtmp8 * y; +! s = dtmp8 * s; +! dtmp0 = (ux - ax); +! s_l = (x - dtmp0); +! dtmp0 = s_h * ux; +! dtmp1 = s_h * s_l; +! s_l = u - dtmp0; +! s_l -= dtmp1; +! dtmp0 = KA1 * yd; +! s_l = dtmp0 * s_l; +! i = (hx >> 8); +! i = i & 0xff0; +! itmp0 = (hx >> 20); +! exp += itmp0; +! yd = KA1_HI * s_h; +! y = *(double *)((char*)__mt_constlog2 + i); +! itmp0 = exp << 8; +! y += (double)itmp0; +! m_h = y + yd; +! dtmp2 = m_h - y; +! dtmp2 -= yd; +! dtmp2 -= s_l; +! y = s - dtmp2; +! dtmp0 = *(double *)((char*)__mt_constlog2 + i + 8); +! dtmp1 = KA1_LO * s_h; +! dtmp0 += dtmp1; +! y += dtmp0; +! dtmp0 = y + m_h; +! s_h = vis_fand(dtmp0, MHI32); +! dtmp0 = (s_h - m_h); +! yr = y - dtmp0; +! +! hy = ((unsigned*)py)[0]; +! ly = ((unsigned*)py)[1]; +! hx = ((unsigned*)px)[0]; +! lx = ((unsigned*)px)[1]; +! sy = hy >> 31; +! hy &= 0x7fffffff; +! +! if (hy < 0x3bf00000) {/* |Y| < 2^(-64) */ +! *pz = DONE; +! goto next; +! } +! +! if (hy >= 0x43e00000) { /* |Y|>2^63,Inf,Nan */ +! if (hy == 0x7ff00000 && (ly == 0)) { /* |Y| = Inf */ +! if (hx == 0x3ff00000 && (lx == 0)) +! *pz = *py - *py; /* 1 ** +-Inf */ +! else if ((hx < 0x3ff00000) != sy) +! *pz = DZERO; +! else { +! ((int*)pz)[0] = hy; +! ((int*)pz)[1] = ly; +! } +! goto next; +! } +! if (hy >= 0x7ff00000) { +! *pz = *px + *py; /* |Y| = Nan */ +! goto next; +! } +! /* |Y| >= 2^63 */ +! if (lx == 0 && (hx == 0x3ff00000)) { /* X = 1 */ +! *pz = DONE; +! } +! else { +! y0 = ((hx < 0x3ff00000) != sy) ? _TINY : _HUGE; +! *pz = y0 * y0; +! } +! goto next; +! } +! +! yd = *py; +! s = vis_fand(yd, MHI32); +! dtmp0 = (yd - s); +! dtmp1 = yd * yr; +! dtmp0 *= s_h; +! yd = dtmp0 + dtmp1; +! s = s_h * s; +! if (s > HTHRESH) {s = HTHRESH; yd = DZERO;} +! if (s < LTHRESH) {s = LTHRESH; yd = DZERO;} +! dtmp0 = (s + yd); +! ind = (int)dtmp0; +! i = ind & 0xff; +! i = i << 4; +! u = (double)(int)dtmp0; +! ind >>= 8; +! y = s - u; +! y = y + yd; +! u = *(double*)((char*)__mt_constexp2 + i); +! dtmp0 = XKB5 * y; +! dtmp1 = dtmp0 + XKB4; +! dtmp2 = dtmp1 * y; +! dtmp3 = dtmp2 + XKB3; +! dtmp4 = dtmp3 * y; +! dtmp5 = dtmp4 + XKB2; +! dtmp6 = dtmp5 * y; +! dtmp7 = dtmp6 + XKB1; +! y = dtmp7 * y; +! eflag = (ind + 1021); +! eflag = eflag >> 31; +! gflag = (1022 - ind); +! gflag = gflag >> 31; +! dtmp0 = *(double*)((char*)__mt_constexp2 + i + 8); +! dtmp1 = u * y; +! dtmp2 = dtmp0 + dtmp1; +! u = dtmp2 + u; +! itmp0 = 54 & eflag; +! itmp1 = 52 & gflag; +! ind = ind + itmp0; +! ind = ind - itmp1; +! ind <<= 20; +! *(int*)&dtmp0 = ind; +! *((int*)&dtmp0 + 1) = 0; +! u = vis_fpadd32(u, dtmp0); +! ind = eflag - gflag; +! ind += 1; +! ind *= 8; +! dtmp1 = (*(double*)((char*)__mt_constexp2 + ind); +! dtmp1 = u * dtmp1; +! *pz = dtmp1; +!-------------------------------------------------------------------- + + ENTRY(__vpow) + save %sp,-SA(MINFRAME)-tmps,%sp + PIC_SETUP(l7) + PIC_SET(l7,.CONST_TBL,g5) + wr %g0,0x82,%asi ! set %asi for non-faulting loads + + cmp counter,0 + ble,pn %icc,.end + +#ifdef __sparcv9 + ldx [%fp+STACK_BIAS+176],stridez +#else + ld [%fp+STACK_BIAS+92],stridez +#endif + + ld [px],%o0 + add LOGTBL,4095,EXPTBL + st counter,[%fp+tmp_counter] + add EXPTBL,65,EXPTBL + sra %i2,0,stridex + stx px,[%fp+tmp_px] + add EXPTBL,4095,%l0 + fzero DZERO + stx py,[%fp+tmp_py] + + cmp stridex,0 + bne,pt %icc,.common_case + add %l0,1,%l0 + + cmp %o0,0 + ble,pt %icc,.common_case + sethi %hi(0x7f800000),%o1 + + cmp %o0,%o1 + bl,pn %icc,.stridex_zero + nop + +.common_case: + sra stridez,0,stridez + ldd [%l0+8],DONE + ldd [%l0+24],MHI32 + sra %i4,0,stridey + ldd [%l0+32],KA5 + sethi %hi(0x7ffffc00),MASK_0x7fffffff + ldd [%l0+40],KA3 + sethi %hi(0xffc00),MASK_0x000fffff + ldd [%l0+48],KA1 + sethi %hi(0x3ff00000),MASK_0x3ff00000 + ldd [%l0+56],HTHRESH + sllx stridex,3,stridex + add MASK_0x7fffffff,0x3ff,MASK_0x7fffffff + ldd [%l0+64],LTHRESH + sllx stridey,3,stridey + add MASK_0x000fffff,0x3ff,MASK_0x000fffff + ldd [%l0+72],KB4 + sllx stridez,3,stridez + st %g0,[%fp+tmp1_lo] ! *((int*)&ax + 1) = 0; + sub %g0,1,%o2 + st %g0,[%fp+tmp2_lo] ! (Y0_0) *((int*)&dtmp0 + 1) = 0; + st MASK_0x000fffff,[%fp+tmp_mant] + sub pz,stridez,pz + st %o2,[%fp+tmp_mant+4] + +.begin: + ld [%fp+tmp_counter],counter + ldx [%fp+tmp_px],px + ldx [%fp+tmp_py],py + st %g0,[%fp+tmp_counter] +.begin1: + subcc counter,1,counter + bneg,pn %icc,.end + or %g0,ind_buf,%o7 + + lda [py]%asi,%o2 ! (Y0_1) hy = *py; + + and %o2,MASK_0x7fffffff,%l1 ! (Y0_3) hy &= 0x7fffffff; + lda [px]%asi,%l0 ! (Y0_3) hx = ((unsigned*)px)[0]; + + sra %l1,20,%o0 ! (Y0_3) expy = hy >> 20; + lda [px+4]%asi,%i2 ! (Y0_3) *((int*)&x + 1) = ((unsigned*)px)[1]; + + and MASK_0x000fffff,%l0,%o5 ! (Y0_3) hx &= 0xfffff; + + or MASK_0x3ff00000,%o5,%o5 ! (Y0_3) hx |= 0x3ff00000; + + st %o5,[%fp+tmp0_hi] ! (Y0_3) *(int*)&x = hx; + + add %o5,2048,%o5 ! (Y0_3) hx += 0x800; + + st %i2,[%fp+tmp0_lo] ! (Y0_3) *((int*)&x + 1) = ((unsigned*)px)[1]; + and %o5,-4096,%l4 ! (Y0_3) hx &= 0xfffff000; + + add pz,stridez,pz + st %l4,[%fp+tmp1_hi] ! (Y0_3) *(int*)&ax = hx; + + and %l0,MASK_0x7fffffff,%l3 ! (Y0_3) hx &= 0x7fffffff; + + sra %l3,20,%l2 ! (Y0_3) exp = (exp >> 20); + + cmp %o0,959 ! (Y0_3) if (expy < 0x3fb); + bl,pn %icc,.spec0 ! (Y0_3) if (expy < 0x3fb); + st %g0,[%fp+%o7] ! (Y0_3) yisint = 0; + + cmp %o0,1086 ! (Y0_3) if (expy >= 0x43e); + bge,pn %icc,.spec1 ! (Y0_3) if (expy >= 0x43e); + nop + + cmp %l2,2047 ! (Y0_2) if (exp >= 0x7ff) + bge,pn %icc,.spec1 ! (Y0_2) if (exp >= 0x7ff) + nop + + cmp %l0,MASK_0x000fffff ! (Y0_2) if (hx <= 0xfffff) + + ldd [%fp+tmp0_hi],%f32 ! (Y0_2) *(int*)&x = hx; + ble,pn %icc,.update0 ! (Y0_2) if (hx <= 0xfffff) + nop +.cont0: + sub %o7,ind_buf,%o7 ! stack buffer pointer update + sub pz,stridez,pz + ldd [%fp+tmp1_hi],%f54 ! (Y0_2) *(int*)&ax = hx; + + add %o7,4,%o7 ! stack buffer pointer update + faddd %f32,%f54,%f12 ! (Y0_2) ux = x + ax; + + and %o7,15,%o7 ! stack buffer pointer update + + add %o7,ind_buf,%o7 ! stack buffer pointer update + add px,stridex,px ! px += stridex; + + lda [px]%asi,%l0 ! (Y1_2) hx = ((unsigned*)px)[0]; + + lda [px+4]%asi,%i2 ! (Y1_2) *((int*)&x + 1) = ((unsigned*)px)[1]; + and MASK_0x000fffff,%l0,%i4 ! (Y1_2) hx &= 0xfffff; + + st %g0,[%fp+%o7] ! (Y1_2) yisint = 0; + or MASK_0x3ff00000,%i4,%i4 ! (Y1_2) hx |= 0x3ff00000; + + st %i4,[%fp+tmp0_hi] ! (Y1_2) *(int*)&x = hx; + add %i4,2048,%i4 ! (Y1_2) hx += 0x800; + + st %i2,[%fp+tmp0_lo] ! (Y1_2) *((int*)&x + 1) = ((unsigned*)px)[1]; + and %i4,-4096,%i4 ! (Y1_2) hx &= 0xfffff000; + + st %i4,[%fp+tmp1_hi] ! (Y1_2) *(int*)&ax = hx; + and %l0,MASK_0x7fffffff,%l2 ! (Y1_2) hx &= 0x7fffffff; + cmp %l0,MASK_0x000fffff ! (Y1_2) if (hx <= 0xfffff) + + ble,pn %icc,.update1 ! (Y1_2) if (hx <= 0xfffff) + nop +.cont1: + sub %o7,ind_buf,%o7 ! stack buffer pointer update + + add %o7,4,%o7 ! stack buffer pointer update + fdivd DONE,%f12,%f20 ! (Y0_2) yd = DONE / ux; + + and %o7,15,%o7 ! stack buffer pointer update + + sra %l3,20,%l3 ! (Y0_2) exp = (exp >> 20); + add %o7,ind_buf,%o7 ! stack buffer pointer update + ldd [%fp+tmp0_hi],%f8 ! (Y1_2) *(int*)&x = hx; + + ldd [%fp+tmp1_hi],%f14 ! (Y1_2) *(int*)&ax = hx; + sra %l4,20,%l0 ! (Y0_2) itmp0 = (hx >> 20); + sub %l3,2046,%o5 ! (Y0_2) exp = exp - 2046; + + add %o5,%l0,%o5 ! (Y0_2) exp += itmp0; + + sll %o5,8,%l0 ! (Y0_2) itmp0 = exp << 8; + st %l0,[%fp+tmp3] ! (Y0_2) (double)itmp0; + faddd %f8,%f14,%f26 ! (Y1_2) ux = x + ax; + + fand %f12,MHI32,%f12 ! (Y0_2) ux = vis_fand(ux, MHI32); + add px,stridex,px ! px += stridex; + + ldd [EXPTBL-ind_HI],KA1_HI ! (Y0_2) load KA1_HI; + fsubd %f12,%f54,%f10 ! (Y0_2) dtmp0 = (ux - ax); + + ld [%fp+tmp3],%f16 ! (Y0_2) (double)itmp0; + fsubd %f32,%f54,%f58 ! (Y0_2) u = x - ax; + + sra %l4,8,%l4 ! (Y0_2) i = (hx >> 8); + + and %l4,4080,%l4 ! (Y0_2) i = i & 0xff0; + + ldd [LOGTBL+%l4],%f62 ! (Y0_2) y = *(double *)((char*)__mt_constlog2 + i); + fmuld %f58,%f20,%f52 ! (Y0_2) s = u * yd; + fsubd %f32,%f10,%f10 ! (Y0_2) s_l = (x - dtmp0); + + fitod %f16,%f54 ! (Y0_2) (double)itmp0; + add %l4,8,%o0 ! (Y0_2) i += 8; + + lda [px]%asi,%l0 ! (Y0_3) hx = ((unsigned*)px)[0]; + fand %f52,MHI32,%f4 ! (Y0_2) s_h = vis_fand(s, MHI32); + + faddd %f62,%f54,%f54 ! (Y0_2) y += (double)itmp0; + lda [px+4]%asi,%i2 ! (Y0_3) *((int*)&x + 1) = ((unsigned*)px)[1]; + fmuld %f4,%f12,%f32 ! (Y0_2) dtmp0 = s_h * ux; + + and MASK_0x000fffff,%l0,%o5 ! (Y0_3) hx &= 0xfffff; + fmuld %f52,%f52,%f12 ! (Y0_2) y = s * s; + + or MASK_0x3ff00000,%o5,%o5 ! (Y0_3) hx |= 0x3ff00000; + + st %o5,[%fp+tmp0_hi] ! (Y0_3) *(int*)&x = hx; + fsubd %f58,%f32,%f32 ! (Y0_2) s_l = u - dtmp0; + + add %o5,2048,%o5 ! (Y0_3) hx += 0x800; + + st %i2,[%fp+tmp0_lo] ! (Y0_3) *((int*)&x + 1) = ((unsigned*)px)[1]; + and %o5,-4096,%l4 ! (Y0_3) hx &= 0xfffff000; + fmuld KA5,%f12,%f36 ! (Y0_2) dtmp8 = KA5 * y; + + st %l4,[%fp+tmp1_hi] ! (Y0_3) *(int*)&ax = hx; + fmuld KA1_HI,%f4,%f48 ! (Y0_2) yd = KA1_HI * s_h; + + fmuld %f4,%f10,%f10 ! (Y0_2) dtmp1 = s_h * s_l; + ldd [EXPTBL-ind_LO],KA1_LO ! (y0_2) load KA1_LO; + and %l0,MASK_0x7fffffff,%l3 ! (Y0_3) hx &= 0x7fffffff; + faddd %f36,KA3,%f62 ! (Y0_2) dtmp8 = dtmp8 + KA3; + + st %g0,[%fp+%o7] ! (Y0_3) yisint = 0; + faddd %f54,%f48,%f36 ! (Y0_2) m_h = y + yd; + + fdivd DONE,%f26,%f22 ! (Y1_2) yd = DONE / ux; + fsubd %f32,%f10,%f10 ! (Y0_2) s_l -= dtmp1; + + cmp %l0,MASK_0x000fffff ! (Y0_2) if (hx <= 0xfffff) + + sra %l2,20,%l2 ! (Y1_1) exp = (exp >> 20); + ldd [%fp+tmp0_hi],%f32 ! (Y0_2) *(int*)&x = hx; + ble,pn %icc,.update2 ! (Y0_2) if (hx <= 0xfffff) + fsubd %f36,%f54,%f30 ! (Y0_1) dtmp2 = m_h - y; +.cont2: + cmp %l2,2047 ! (Y1_1) if (exp >= 0x7ff) + sub %o7,ind_buf,%o7 ! stack buffer pointer update + ldd [%fp+tmp1_hi],%f54 ! (Y0_2) *(int*)&ax = hx; + + sra %i4,20,%l0 ! (Y1_1) itmp0 = (hx >> 20); + sub %l2,2046,%o5 ! (Y1_1) exp = exp - 2046; + fmuld KA1,%f20,%f20 ! (Y0_1) dtmp0 = KA1 * yd; + + add %o5,%l0,%o5 ! (Y1_1) exp += itmp0; + fmuld %f62,%f12,%f62 ! (Y0_1) dtmp8 = dtmp8 * y; + + sll %o5,8,%l0 ! (Y1_1) itmp0 = exp << 8; + add %o7,4,%o7 ! stack buffer pointer update + st %l0,[%fp+tmp3] ! (Y1_1) (double)itmp0; + faddd %f32,%f54,%f12 ! (Y0_2) ux = x + ax; + + bge,pn %icc,.update3 ! (Y1_1) if (exp >= 0x7ff) + fsubd %f30,%f48,%f48 ! (Y0_1) dtmp2 -= yd; +.cont3: + and %o7,15,%o7 ! stack buffer pointer update + fmuld %f20,%f10,%f10 ! (Y0_1) s_l = dtmp0 * s_l; + + add %o7,ind_buf,%o7 ! stack buffer pointer update + fmuld KA1_LO,%f4,%f4 ! (Y0_1) dtmp1 = KA1_LO * s_h; + fand %f26,MHI32,%f26 ! (Y1_1) ux = vis_fand(ux, MHI32); + + fmuld %f62,%f52,%f62 ! (Y0_1) s = dtmp8 * s; + ldd [LOGTBL+%o0],%f52 ! (Y0_1) dtmp0 = *(double *)((char*)__mt_constlog2 + i + 8); + fsubd %f48,%f10,%f20 ! (Y0_1) dtmp2 -= s_l; + + add px,stridex,px ! px += stridex; + fsubd %f26,%f14,%f10 ! (Y1_1) dtmp0 = (ux - ax); + + faddd %f52,%f4,%f52 ! (Y0_1) dtmp0 += dtmp1; + + ldd [EXPTBL-ind_HI],KA1_HI ! (Y1_1) load KA1_HI; + fsubd %f62,%f20,%f4 ! (Y0_1) y = s - dtmp2; + + ld [%fp+tmp3],%f16 ! (Y1_1) (double)itmp0; + fsubd %f8,%f14,%f58 ! (Y1_1) u = x - ax; + + sra %i4,8,%o0 ! (Y1_1) i = (hx >> 8); + + faddd %f4,%f52,%f48 ! (Y0_1) y += dtmp0; + and %o0,4080,%o0 ! (Y1_1) i = i & 0xff0; + + ldd [LOGTBL+%o0],%f62 ! (Y1_1) y = *(double *)((char*)__mt_constlog2 + i); + fmuld %f58,%f22,%f52 ! (Y1_1) s = u * yd; + fsubd %f8,%f10,%f10 ! (Y1_1) s_l = (x - dtmp0); + + lda [py]%asi,%f30 ! (Y0_1) yd = *py; + fitod %f16,%f14 ! (Y1_1) (double)itmp0; + + lda [py+4]%asi,%f31 ! (Y0_1) yd = *py; + faddd %f48,%f36,%f8 ! (Y0_1) dtmp0 = y + m_h; + + add %o0,8,%o0 ! (Y1_1) i += 8; + lda [px]%asi,%l0 ! (Y1_2) hx = ((unsigned*)px)[0]; + fand %f52,MHI32,%f4 ! (Y1_1) s_h = vis_fand(s, MHI32); + + faddd %f62,%f14,%f14 ! (Y1_1) y += (double)itmp0; + + lda [px+4]%asi,%i2 ! (Y1_2) *((int*)&x + 1) = ((unsigned*)px)[1]; + fand %f8,MHI32,%f20 ! (Y0_1) s_h = vis_fand(dtmp0, MHI32); + fmuld %f4,%f26,%f8 ! (Y1_1) dtmp0 = s_h * ux; + + fand %f30,MHI32,%f6 ! (Y0_1) s = vis_fand(yd, MHI32); + and MASK_0x000fffff,%l0,%i4 ! (Y1_2) hx &= 0xfffff; + fmuld %f52,%f52,%f26 ! (Y1_1) y = s * s; + + st %g0,[%fp+%o7] ! (Y1_2) yisint = 0; + or MASK_0x3ff00000,%i4,%i4 ! (Y1_2) hx |= 0x3ff00000; + fsubd %f20,%f36,%f62 ! (Y0_1) dtmp0 = (s_h - m_h); + + st %i4,[%fp+tmp0_hi] ! (Y1_2) *(int*)&x = hx; + fsubd %f58,%f8,%f8 ! (Y1_1) s_l = u - dtmp0; + + add %i4,2048,%i4 ! (Y1_2) hx += 0x800; + fmuld %f20,%f6,%f34 ! (Y0_1) s = s_h * s; + fsubd %f30,%f6,%f6 ! (Y0_1) dtmp0 = (yd - s); + + st %i2,[%fp+tmp0_lo] ! (Y1_2) *((int*)&x + 1) = ((unsigned*)px)[1]; + and %i4,-4096,%i4 ! (Y1_2) hx &= 0xfffff000; + fmuld KA5,%f26,%f36 ! (Y1_1) dtmp8 = KA5 * y; + + st %i4,[%fp+tmp1_hi] ! (Y1_2) *(int*)&ax = hx; + fsubd %f48,%f62,%f62 ! (Y0_1) y = y - dtmp0; + fmuld KA1_HI,%f4,%f48 ! (Y1_1) yd = KA1_HI * s_h; + + fmuld %f4,%f10,%f10 ! (Y1_1) dtmp1 = s_h * s_l; + + ldd [EXPTBL-ind_LO],KA1_LO ! (Y1_1) load KA1_LO; + and %l0,MASK_0x7fffffff,%l2 ! (Y1_2) hx &= 0x7fffffff; + fmuld %f6,%f20,%f6 ! (Y0_1) dtmp0 *= s_h; + fcmped %fcc0,%f34,HTHRESH ! (Y0_1) s > HTHRESH + + cmp %l0,MASK_0x000fffff ! (Y1_2) if (hx <= 0xfffff) + fmuld %f30,%f62,%f30 ! (Y0_1) dtmp1 = yd * y; + faddd %f36,KA3,%f62 ! (Y1_1) dtmp8 = dtmp8 + KA3; + + ble,pn %icc,.update4 ! (Y1_2) if (hx <= 0xfffff) + faddd %f14,%f48,%f36 ! (Y1_1) m_h = y + yd; +.cont4: + sub %o7,ind_buf,%o7 ! stack buffer pointer update + fmovdg %fcc0,HTHRESH,%f34 ! (Y0_1) s = HTHRESH + + add %o7,4,%o7 ! stack buffer pointer update + fdivd DONE,%f12,%f20 ! (Y0_2) yd = DONE / ux; + fsubd %f8,%f10,%f10 ! (Y1_1) s_l -= dtmp1; + + and %o7,15,%o7 ! stack buffer pointer update + faddd %f6,%f30,%f6 ! (Y0_1) yd = dtmp0 + dtmp1; + + sra %l3,20,%l3 ! (Y0_2) exp = (exp >> 20); + add %o7,ind_buf,%o7 ! stack buffer pointer update + ldd [%fp+tmp0_hi],%f8 ! (Y1_2) *(int*)&x = hx; + fsubd %f36,%f14,%f30 ! (Y1_1) dtmp2 = m_h - y; + + cmp %l3,2047 ! (Y0_2) if (exp >= 0x7ff) + ldd [%fp+tmp1_hi],%f14 ! (Y1_2) *(int*)&ax = hx; + fmuld KA1,%f22,%f22 ! (Y1_1) dtmp0 = KA1 * yd; + + sra %l4,20,%l0 ! (Y0_2) itmp0 = (hx >> 20); + sub %l3,2046,%o5 ! (Y0_2) exp = exp - 2046; + fcmped %fcc1,%f34,LTHRESH ! (Y0_1) s < LTHRESH + + add %o5,%l0,%o5 ! (Y0_2) exp += itmp0; + add py,stridey,py ! py += stridey; + fmuld %f62,%f26,%f62 ! (Y1_1) dtmp8 = dtmp8 * y; + fmovdg %fcc0,DZERO,%f6 ! (Y0_1) yd = DZERO + + sll %o5,8,%l0 ! (Y0_2) itmp0 = exp << 8; + st %l0,[%fp+tmp3] ! (Y0_2) (double)itmp0; + faddd %f8,%f14,%f26 ! (Y1_2) ux = x + ax; + + bge,pn %icc,.update5 ! (Y0_2) if (exp >= 0x7ff) + fsubd %f30,%f48,%f48 ! (Y1_1) dtmp2 -= yd; +.cont5: + lda [py]%asi,%l1 ! (Y1_1) hy = *py; + fmuld %f22,%f10,%f10 ! (Y1_1) s_l = dtmp0 * s_l; + fmovdl %fcc1,LTHRESH,%f34 ! (Y0_1) s = LTHRESH + + fmovdl %fcc1,DZERO,%f6 ! (Y0_1) yd = DZERO + + fand %f12,MHI32,%f12 ! (Y0_2) ux = vis_fand(ux, MHI32); + fmuld KA1_LO,%f4,%f4 ! (Y1_1) dtmp1 = KA1_LO * s_h; + + fmuld %f62,%f52,%f62 ! (Y1_1) s = dtmp8 * s; + ldd [LOGTBL+%o0],%f52 ! (Y1_1) dtmp0 = *(double *)((char*)__mt_constlog2 + i + 8); + fsubd %f48,%f10,%f22 ! (Y1_1) dtmp2 -= s_l; + + add px,stridex,px ! px += stridex; + faddd %f34,%f6,%f58 ! (Y0_1) dtmp0 = (s + yd); + + and %l1,MASK_0x7fffffff,%l1 ! (Y1_1) hy &= 0x7fffffff; + ldd [EXPTBL-ind_HI],KA1_HI ! (Y0_2) load KA1_HI; + fsubd %f12,%f54,%f10 ! (Y0_2) dtmp0 = (ux - ax); + + faddd %f52,%f4,%f52 ! (Y1_1) dtmp0 += dtmp1; + + fsubd %f62,%f22,%f4 ! (Y1_1) y = s - dtmp2; + + fdtoi %f58,%f17 ! (Y0_1) (int)dtmp0; + + ld [%fp+tmp3],%f16 ! (Y0_2) (double)itmp0; + fsubd %f32,%f54,%f58 ! (Y0_2) u = x - ax; + sra %l4,8,%l4 ! (Y0_2) i = (hx >> 8); + + sra %l1,20,%l1 ! (Y1_1) expy = hy >> 20; + ldd [EXPTBL-ind_KB5],KB5 ! (Y0_1) load KB5; + faddd %f4,%f52,%f48 ! (Y1_1) y += dtmp0; + + and %l4,4080,%l4 ! (Y0_2) i = i & 0xff0; + st %f17,[%fp+tmp4] ! (Y0_1) ind = (int)dtmp0; + fitod %f17,%f4 ! (Y0_1) u = (double)(int)dtmp0; + + ldd [LOGTBL+%l4],%f62 ! (Y0_2) y = *(double *)((char*)__mt_constlog2 + i); + fmuld %f58,%f20,%f52 ! (Y0_2) s = u * yd; + fsubd %f32,%f10,%f10 ! (Y0_2) s_l = (x - dtmp0); + + lda [py]%asi,%f30 ! (Y1_1) yd = *py; + fitod %f16,%f54 ! (Y0_2) (double)itmp0; + + lda [py+4]%asi,%f31 ! (Y1_1) yd = *py; + faddd %f48,%f36,%f32 ! (Y1_1) dtmp0 = y + m_h; + + add %l4,8,%o0 ! (Y0_2) i += 8; + fsubd %f34,%f4,%f60 ! (Y0_1) y = s - u; + + cmp %l1,959 ! (Y1_1) if (expy < 0x3fb); + lda [px]%asi,%l0 ! (Y0_3) hx = ((unsigned*)px)[0]; + fand %f52,MHI32,%f4 ! (Y0_2) s_h = vis_fand(s, MHI32); + + bl,pn %icc,.update6 ! (Y1_1) if (expy < 0x3fb); + faddd %f62,%f54,%f54 ! (Y0_2) y += (double)itmp0; +.cont6: + cmp %l1,1086 ! (Y1_1) if (expy >= 0x43e); + lda [px+4]%asi,%i2 ! (Y0_3) *((int*)&x + 1) = ((unsigned*)px)[1]; + fand %f32,MHI32,%f22 ! (Y1_1) s_h = vis_fand(dtmp0, MHI32); + + fmuld %f4,%f12,%f32 ! (Y0_2) dtmp0 = s_h * ux; + bge,pn %icc,.update7 ! (Y1_1) if (expy >= 0x43e); + faddd %f60,%f6,%f60 ! (Y0_1) y = y + yd; +.cont7: + ld [%fp+%o7],%o2 ! (Y0_1) load yisint + fand %f30,MHI32,%f6 ! (Y1_1) s = vis_fand(yd, MHI32); + + and MASK_0x000fffff,%l0,%o5 ! (Y0_3) hx &= 0xfffff; + fmuld %f52,%f52,%f12 ! (Y0_2) y = s * s; + + or MASK_0x3ff00000,%o5,%o5 ! (Y0_3) hx |= 0x3ff00000; + fsubd %f22,%f36,%f62 ! (Y1_1) dtmp0 = (s_h - m_h); + + st %o5,[%fp+tmp0_hi] ! (Y0_3) *(int*)&x = hx; + fsubd %f58,%f32,%f32 ! (Y0_2) s_l = u - dtmp0; + fmuld KB5,%f60,%f58 ! (Y0_1) dtmp0 = KB5 * y; + + ldd [EXPTBL-ind_KB3],KB3 ! (Y0_1) load KB3; + add %o5,2048,%o5 ! (Y0_3) hx += 0x800; + fmuld %f22,%f6,%f34 ! (Y1_1) s = s_h * s; + fsubd %f30,%f6,%f6 ! (Y1_1) dtmp0 = (yd - s); + + st %i2,[%fp+tmp0_lo] ! (Y0_3) *((int*)&x + 1) = ((unsigned*)px)[1]; + and %o5,-4096,%l4 ! (Y0_3) hx &= 0xfffff000; + fmuld KA5,%f12,%f36 ! (Y0_2) dtmp8 = KA5 * y; + + st %l4,[%fp+tmp1_hi] ! (Y0_3) *(int*)&ax = hx; + fsubd %f48,%f62,%f62 ! (Y1_1) y = y - dtmp0; + fmuld KA1_HI,%f4,%f48 ! (Y0_2) yd = KA1_HI * s_h; + + subcc counter,1,counter + fmuld %f4,%f10,%f10 ! (Y0_2) dtmp1 = s_h * s_l; + faddd %f58,KB4,%f58 ! (Y0_1) dtmp1 = dtmp0 + KB4; + + ldd [EXPTBL-ind_LO],KA1_LO ! (y0_2) load KA1_LO; + and %l0,MASK_0x7fffffff,%l3 ! (Y0_3) hx &= 0x7fffffff; + fmuld %f6,%f22,%f6 ! (Y1_1) dtmp0 *= s_h; + fcmped %fcc0,%f34,HTHRESH ! (Y1_1) s > HTHRESH; + + fmuld %f30,%f62,%f30 ! (Y1_1) dtmp1 = yd * y; + ba 1f + faddd %f36,KA3,%f62 ! (Y0_2) dtmp8 = dtmp8 + KA3; + + .align 16 +1: + st %g0,[%fp+%o7] ! (Y0_3) yisint = 0; + fmuld %f58,%f60,%f58 ! (Y0_1) dtmp2 = dtmp1 * y; + bneg,pn %icc,.tail + faddd %f54,%f48,%f36 ! (Y0_2) m_h = y + yd; + + nop + fmovdg %fcc0,HTHRESH,%f34 ! (Y1_1) s = HTHRESH; + + fdivd DONE,%f26,%f22 ! (Y1_2) yd = DONE / ux; + fsubd %f32,%f10,%f10 ! (Y0_2) s_l -= dtmp1; + +.main_loop: + cmp %l0,MASK_0x000fffff ! (Y0_2) if (hx <= 0xfffff) + add py,stridey,py ! py += stridey; + faddd %f6,%f30,%f6 ! (Y1_0) yd = dtmp0 + dtmp1; + + sra %l2,20,%l2 ! (Y1_1) exp = (exp >> 20); + ldd [%fp+tmp0_hi],%f32 ! (Y0_2) *(int*)&x = hx; + ble,pn %icc,.update8 ! (Y0_2) if (hx <= 0xfffff) + fsubd %f36,%f54,%f30 ! (Y0_1) dtmp2 = m_h - y; +.cont8: + cmp %l2,2047 ! (Y1_1) if (exp >= 0x7ff) + sub %o7,ind_buf,%o7 ! stack buffer pointer update + ldd [%fp+tmp1_hi],%f54 ! (Y0_2) *(int*)&ax = hx; + faddd %f58,KB3,%f58 ! (Y0_0) dtmp3 = dtmp2 + KB3; + + sra %i4,20,%l0 ! (Y1_1) itmp0 = (hx >> 20); + sub %l2,2046,%o5 ! (Y1_1) exp = exp - 2046; + fmuld KA1,%f20,%f20 ! (Y0_1) dtmp0 = KA1 * yd; + fcmped %fcc1,%f34,LTHRESH ! (Y1_0) s < LTHRESH; + + ldd [EXPTBL-ind_KB2],KB2 ! (Y0_0) load KB2; + add %o5,%l0,%o5 ! (Y1_1) exp += itmp0; + fmuld %f62,%f12,%f62 ! (Y0_1) dtmp8 = dtmp8 * y; + fmovdg %fcc0,DZERO,%f6 ! (Y1_0) yd = DZERO + + sll %o5,8,%l0 ! (Y1_1) itmp0 = exp << 8; + add %o7,4,%o7 ! stack buffer pointer update + st %l0,[%fp+tmp3] ! (Y1_1) (double)itmp0; + faddd %f32,%f54,%f12 ! (Y0_2) ux = x + ax; + + ld [%fp+tmp4],%i2 ! (Y0_0) ind = (int)dtmp0; + fsubd %f30,%f48,%f48 ! (Y0_1) dtmp2 -= yd; + bge,pn %icc,.update9 ! (Y1_1) if (exp >= 0x7ff) + fmuld %f58,%f60,%f58 ! (Y0_0) dtmp4 = dtmp3 * y; +.cont9: + lda [py]%asi,%l1 ! (Y0_1) hy = *py; + and %o7,15,%o7 ! stack buffer pointer update + fmuld %f20,%f10,%f10 ! (Y0_1) s_l = dtmp0 * s_l; + fmovdl %fcc1,LTHRESH,%f34 ! (Y1_0) s = LTHRESH; + + add %o7,ind_buf,%o7 ! stack buffer pointer update + fmovdl %fcc1,DZERO,%f6 ! (Y1_0) yd = DZERO + + fmuld KA1_LO,%f4,%f4 ! (Y0_1) dtmp1 = KA1_LO * s_h; + fand %f26,MHI32,%f26 ! (Y1_1) ux = vis_fand(ux, MHI32); + + fmuld %f62,%f52,%f62 ! (Y0_1) s = dtmp8 * s; + nop + faddd %f58,KB2,%f30 ! (Y0_0) dtmp5 = dtmp4 + KB2; + + nop + add pz,stridez,pz ! pz += stridez; + ldd [LOGTBL+%o0],%f52 ! (Y0_1) dtmp0 = *(double *)((char*)__mt_constlog2 + i + 8); + fsubd %f48,%f10,%f20 ! (Y0_1) dtmp2 -= s_l; + + sra %i2,8,%l0 ! (Y0_0) ind >>= 8; + ldd [EXPTBL-ind_KB1],KB1 ! (Y0_0) load KB1; + add px,stridex,px ! px += stridex; + faddd %f34,%f6,%f58 ! (Y1_0) dtmp0 = (s + yd); + + add %l0,1021,%l2 ! (Y0_0) eflag = (ind + 1021); + sub %g0,%l0,%o5 ! (Y0_0) gflag = (1022 - ind); + fsubd %f26,%f14,%f10 ! (Y1_1) dtmp0 = (ux - ax); + + sra %l2,31,%l2 ! (Y0_0) eflag = eflag >> 31; + add %o5,1022,%o5 ! (Y0_0) gflag = (1022 - ind); + fmuld %f30,%f60,%f48 ! (Y0_0) dtmp6 = dtmp5 * y; + faddd %f52,%f4,%f52 ! (Y0_1) dtmp0 += dtmp1; + + sra %o5,31,%o5 ! (Y0_0) gflag = gflag >> 31; + and %l2,54,%o0 ! (Y0_0) itmp0 = 54 & eflag; + ldd [EXPTBL-ind_HI],KA1_HI ! (Y1_1) load KA1_HI; + fsubd %f62,%f20,%f4 ! (Y0_1) y = s - dtmp2; + + lda [py]%asi,%f30 ! (Y0_1) yd = *py; + sub %l2,%o5,%l2 ! (Y0_0) ind = eflag - gflag; + add %l0,%o0,%l0 ! (Y0_0) ind = ind + itmp0; + fdtoi %f58,%f20 ! (Y1_0) u = (double)(int)dtmp0; + + sra %i4,8,%o0 ! (Y1_1) i = (hx >> 8); + and %o5,52,%o5 ! (Y0_0) itmp1 = 52 & gflag; + ld [%fp+tmp3],%f16 ! (Y1_1) (double)itmp0; + fsubd %f8,%f14,%f58 ! (Y1_1) u = x - ax; + + and %o0,4080,%o0 ! (Y1_1) i = i & 0xff0; + sub %l0,%o5,%i4 ! (Y0_0) ind = ind - itmp1; + st %f20,[%fp+tmp4] ! (Y1_0) ind = (int)dtmp0; + faddd %f48,KB1,%f14 ! (Y0_0) dtmp7 = dtmp6 + KB1; + + add %o2,%i4,%i4 ! (Y0_0) ind = yisint + ind; + and %i2,255,%o5 ! (Y0_0) i = ind & 0xff; + lda [px]%asi,%l0 ! (Y1_2) hx = ((unsigned*)px)[0]; + faddd %f4,%f52,%f48 ! (Y0_1) y += dtmp0; + + sll %i4,20,%i4 ! (Y0_0) ind <<= 20; + ldd [LOGTBL+%o0],%f62 ! (Y1_1) y = *(double *)((char*)__mt_constlog2 + i); + and %l1,MASK_0x7fffffff,%l1 ! (Y0_1) hy &= 0x7fffffff; + fitod %f20,%f4 ! (Y1_0) u = (double)(int)dtmp0; + + lda [px+4]%asi,%i2 ! (Y1_2) *((int*)&x + 1) = ((unsigned*)px)[1]; + nop + fmuld %f58,%f22,%f52 ! (Y1_1) s = u * yd; + fsubd %f8,%f10,%f10 ! (Y1_1) s_l = (x - dtmp0); + + sll %o5,4,%o5 ! (Y0_0) i = i << 4; + st %i4,[%fp+tmp2_hi] ! (Y0_0) *(int*)&dtmp0 = ind; + fmuld %f14,%f60,%f20 ! (Y0_0) y = dtmp7 * y; + fitod %f16,%f14 ! (Y1_1) (double)itmp0; + + sra %l1,20,%l1 ! (Y0_1) expy = hy >> 20; + nop + ldd [EXPTBL+%o5],%f56 ! (Y0_0) u = *(double*)((char*)__mt_constexp2 + i); + faddd %f48,%f36,%f8 ! (Y0_1) dtmp0 = y + m_h; + + add %o5,8,%o5 ! (Y0_0) i += 8; + add %o0,8,%o0 ! (Y1_1) i += 8; + lda [py+4]%asi,%f31 ! (Y0_1) yd = *py; + fsubd %f34,%f4,%f60 ! (Y1_0) y = s - u; + + cmp %l1,959 ! (Y0_1) if (expy < 0x3fb); + and MASK_0x000fffff,%l0,%i4 ! (Y1_2) hx &= 0xfffff; + ldd [EXPTBL-ind_KB5],KB5 ! (Y1_0) load KB5; + fand %f52,MHI32,%f4 ! (Y1_1) s_h = vis_fand(s, MHI32); + + ldd [EXPTBL+%o5],%f16 ! (Y0_0) dtmp0 = *(double*)((char*)__mt_constexp2 + i + 8); + fmuld %f56,%f20,%f34 ! (Y0_0) dtmp1 = u * y; + bl,pn %icc,.update10 ! (Y0_1) if (expy < 0x3fb); + faddd %f62,%f14,%f14 ! (Y1_1) y += (double)itmp0; +.cont10: + or MASK_0x3ff00000,%i4,%i4 ! (Y1_2) hx |= 0x3ff00000; + cmp %l1,1086 ! (Y0_1) if (expy >= 0x43e); + fand %f8,MHI32,%f20 ! (Y0_1) s_h = vis_fand(dtmp0, MHI32); + + fmuld %f4,%f26,%f8 ! (Y1_1) dtmp0 = s_h * ux; + st %i4,[%fp+tmp0_hi] ! (Y1_2) *(int*)&x = hx; + bge,pn %icc,.update11 ! (Y0_1) if (expy >= 0x43e); + faddd %f60,%f6,%f60 ! (Y1_0) y = y + yd; +.cont11: + add %i4,2048,%i4 ! (Y1_2) hx += 0x800; + ld [%fp+%o7],%o2 ! (Y1_0) load yisint + fand %f30,MHI32,%f6 ! (Y0_1) s = vis_fand(yd, MHI32); + + st %i2,[%fp+tmp0_lo] ! (Y1_2) *((int*)&x + 1) = ((unsigned*)px)[1]; + and %i4,-4096,%i4 ! (Y1_2) hx &= 0xfffff000; + fmuld %f52,%f52,%f26 ! (Y1_1) y = s * s; + faddd %f16,%f34,%f16 ! (Y0_0) dtmp2 = dtmp0 + dtmp1; + + st %i4,[%fp+tmp1_hi] ! (Y1_2) *(int*)&ax = hx; + fsubd %f20,%f36,%f62 ! (Y0_1) dtmp0 = (s_h - m_h); + + fsubd %f58,%f8,%f8 ! (Y1_1) s_l = u - dtmp0; + fmuld KB5,%f60,%f58 ! (Y1_0) dtmp0 = KB5 * y; + + ldd [EXPTBL-ind_KB3],KB3 ! (Y1_0) load KB3; + fmuld %f20,%f6,%f34 ! (Y0_1) s = s_h * s; + fsubd %f30,%f6,%f6 ! (Y0_1) dtmp0 = (yd - s); + + faddd %f16,%f56,%f56 ! (Y0_0) u = dtmp2 + u; + nop + fmuld KA5,%f26,%f36 ! (Y1_1) dtmp8 = KA5 * y; + + nop + add %l2,513,%l2 ! (Y0_0) ind += 513; + fsubd %f48,%f62,%f62 ! (Y0_1) y = y - dtmp0; + fmuld KA1_HI,%f4,%f48 ! (Y1_1) yd = KA1_HI * s_h; + + sll %l2,3,%o5 ! (Y0_0) ind *= 8; + ldd [%fp+tmp2_hi],%f16 ! (Y0_0) ld dtmp0; + fmuld %f4,%f10,%f10 ! (Y1_1) dtmp1 = s_h * s_l; + faddd %f58,KB4,%f58 ! (Y1_0) dtmp1 = dtmp0 + KB4; + + ldd [EXPTBL-ind_LO],KA1_LO ! (Y1_1) load KA1_LO; + and %l0,MASK_0x7fffffff,%l2 ! (Y1_2) hx &= 0x7fffffff; + fmuld %f6,%f20,%f6 ! (Y0_1) dtmp0 *= s_h; + fcmped %fcc0,%f34,HTHRESH ! (Y0_1) s > HTHRESH + + ldd [EXPTBL+%o5],%f20 ! (Y0_0) dtmp1 = (*(double*)((char*)__mt_constexp2 + ind); + nop + nop + fpadd32 %f56,%f16,%f56 ! (Y0_0) u = vis_fpadd32(u, dtmp0); + + nop + cmp %l0,MASK_0x000fffff ! (Y1_2) if (hx <= 0xfffff) + fmuld %f30,%f62,%f30 ! (Y0_1) dtmp1 = yd * y; + faddd %f36,KA3,%f62 ! (Y1_1) dtmp8 = dtmp8 + KA3; + + fmuld %f58,%f60,%f58 ! (Y1_0) dtmp2 = dtmp1 * y; + st %g0,[%fp+%o7] ! (Y1_2) yisint = 0; + ble,pn %icc,.update12 ! (Y1_2) if (hx <= 0xfffff) + faddd %f14,%f48,%f36 ! (Y1_1) m_h = y + yd; +.cont12: + sra %l3,20,%l3 ! (Y0_2) exp = (exp >> 20); + sub %o7,ind_buf,%o7 ! stack buffer pointer update + fmuld %f56,%f20,%f16 ! (Y0_0) dtmp1 = u * dtmp1; + fmovdg %fcc0,HTHRESH,%f34 ! (Y0_1) s = HTHRESH + + cmp %l3,2047 ! (Y0_2) if (exp >= 0x7ff) + st %f16,[pz] ! (Y0_0) write into memory + fdivd DONE,%f12,%f20 ! (Y0_2) yd = DONE / ux; + fsubd %f8,%f10,%f10 ! (Y1_1) s_l -= dtmp1; + + sra %l4,20,%l0 ! (Y0_2) itmp0 = (hx >> 20); + sub %l3,2046,%o5 ! (Y0_2) exp = exp - 2046; + st %f17,[pz+4] ! (Y0_0) write into memory + faddd %f6,%f30,%f6 ! (Y0_1) yd = dtmp0 + dtmp1; + + add %o5,%l0,%o5 ! (Y0_2) exp += itmp0; + add py,stridey,py ! py += stridey; + ldd [%fp+tmp0_hi],%f8 ! (Y1_2) *(int*)&x = hx; + fsubd %f36,%f14,%f30 ! (Y1_1) dtmp2 = m_h - y; + + sll %o5,8,%l0 ! (Y0_2) itmp0 = exp << 8; + ldd [%fp+tmp1_hi],%f14 ! (Y1_2) *(int*)&ax = hx; + fmuld KA1,%f22,%f22 ! (Y1_1) dtmp0 = KA1 * yd; + faddd %f58,KB3,%f58 ! (Y1_0) dtmp3 = dtmp2 + KB3; + + add %o7,4,%o7 ! stack buffer pointer update + st %l0,[%fp+tmp3] ! (Y0_2) (double)itmp0; + fcmped %fcc1,%f34,LTHRESH ! (Y0_1) s < LTHRESH + + and %o7,15,%o7 ! stack buffer pointer update + ld [%fp+tmp4],%l0 ! (Y1_0) ind = (int)dtmp0; + fmuld %f62,%f26,%f62 ! (Y1_1) dtmp8 = dtmp8 * y; + fmovdg %fcc0,DZERO,%f6 ! (Y0_1) yd = DZERO + + nop + add %o7,ind_buf,%o7 ! stack buffer pointer update + ldd [EXPTBL-ind_KB2],KB2 ! (Y1_0) load KB2; + faddd %f8,%f14,%f26 ! (Y1_2) ux = x + ax; + + fmuld %f58,%f60,%f58 ! (Y1_0) dtmp4 = dtmp3 * y; + nop + bge,pn %icc,.update13 ! (Y0_2) if (exp >= 0x7ff) + fsubd %f30,%f48,%f48 ! (Y1_1) dtmp2 -= yd; +.cont13: + lda [py]%asi,%l1 ! (Y1_1) hy = *py; + nop + fmuld %f22,%f10,%f10 ! (Y1_1) s_l = dtmp0 * s_l; + fmovdl %fcc1,LTHRESH,%f34 ! (Y0_1) s = LTHRESH + + nop + nop + fmovdl %fcc1,DZERO,%f6 ! (Y0_1) yd = DZERO + + fand %f12,MHI32,%f12 ! (Y0_2) ux = vis_fand(ux, MHI32); + nop + nop + fmuld KA1_LO,%f4,%f4 ! (Y1_1) dtmp1 = KA1_LO * s_h; + + nop + add px,stridex,px ! px += stridex; + faddd %f58,KB2,%f30 ! (Y1_0) dtmp5 = dtmp4 + KB2; + fmuld %f62,%f52,%f62 ! (Y1_1) s = dtmp8 * s; + + sra %l0,8,%i2 ! (Y1_0) ind >>= 8; + add pz,stridez,pz ! pz += stridez; + ldd [LOGTBL+%o0],%f52 ! (Y1_1) dtmp0 = *(double *)((char*)__mt_constlog2 + i + 8); + fsubd %f48,%f10,%f22 ! (Y1_1) dtmp2 -= s_l; + + add %i2,1021,%l3 ! (Y1_0) eflag = (ind + 1021); + sub %g0,%i2,%o5 ! (Y1_0) gflag = (1022 - ind); + ldd [EXPTBL-ind_KB1],KB1 ! (Y1_0) load KB1; + faddd %f34,%f6,%f58 ! (Y0_1) dtmp0 = (s + yd); + + sra %l3,31,%l3 ! (Y1_0) eflag = eflag >> 31; + add %o5,1022,%o5 ! (Y1_0) gflag = (1022 - ind); + ldd [EXPTBL-ind_HI],KA1_HI ! (Y0_2) load KA1_HI; + fsubd %f12,%f54,%f10 ! (Y0_2) dtmp0 = (ux - ax); + + sra %o5,31,%o5 ! (Y1_0) gflag = gflag >> 31; + and %l3,54,%o0 ! (Y1_0) itmp0 = 54 & eflag; + fmuld %f30,%f60,%f48 ! (Y1_0) dtmp6 = dtmp5 * y; + faddd %f52,%f4,%f52 ! (Y1_1) dtmp0 += dtmp1; + + sra %l4,8,%l4 ! (Y0_2) i = (hx >> 8); + add %i2,%o0,%i2 ! (Y1_0) ind = ind + itmp0; + fsubd %f62,%f22,%f4 ! (Y1_1) y = s - dtmp2; + + lda [py]%asi,%f30 ! (Y1_1) yd = *py; + and %l4,4080,%l4 ! (Y0_2) i = i & 0xff0; + and %o5,52,%o0 ! (Y1_0) itmp1 = 52 & gflag; + fdtoi %f58,%f22 ! (Y0_1) (int)dtmp0; + + sub %l3,%o5,%l3 ! (Y1_0) ind = eflag - gflag; + sub %i2,%o0,%i2 ! (Y1_0) ind = ind - itmp1; + ld [%fp+tmp3],%f16 ! (Y0_2) (double)itmp0; + fsubd %f32,%f54,%f58 ! (Y0_2) u = x - ax; + + add %o2,%i2,%i2 ! (Y1_0) ind = yisint + ind; + and %l0,255,%o5 ! (Y1_0) i = ind & 0xff; + st %f22,[%fp+tmp4] ! (Y0_1) ind = (int)dtmp0; + faddd %f48,KB1,%f54 ! (Y1_0) dtmp7 = dtmp6 + KB1; + + sll %i2,20,%o0 ! (Y1_0) ind <<= 20; + nop + lda [px]%asi,%l0 ! (Y0_3) hx = ((unsigned*)px)[0]; + faddd %f4,%f52,%f48 ! (Y1_1) y += dtmp0; + + and %l1,MASK_0x7fffffff,%l1 ! (Y1_1) hy &= 0x7fffffff; + nop + st %o0,[%fp+tmp2_hi] ! (Y1_0) *(int*)&dtmp0 = ind; + fitod %f22,%f4 ! (Y0_1) u = (double)(int)dtmp0; + + lda [px+4]%asi,%i2 ! (Y0_3) *((int*)&x + 1) = ((unsigned*)px)[1]; + nop + fmuld %f58,%f20,%f52 ! (Y0_2) s = u * yd; + fsubd %f32,%f10,%f10 ! (Y0_2) s_l = (x - dtmp0); + + sll %o5,4,%o5 ! (Y1_0) i = i << 4; + ldd [LOGTBL+%l4],%f62 ! (Y0_2) y = *(double *)((char*)__mt_constlog2 + i); + fmuld %f54,%f60,%f22 ! (Y1_0) y = dtmp7 * y; + fitod %f16,%f54 ! (Y0_2) (double)itmp0; + + sra %l1,20,%l1 ! (Y1_1) expy = hy >> 20; + nop + ldd [EXPTBL+%o5],%f56 ! (Y1_0) u = *(double*)((char*)__mt_constexp2 + i); + faddd %f48,%f36,%f32 ! (Y1_1) dtmp0 = y + m_h; + + add %o5,8,%o5 ! (Y1_0) i += 8; + add %l4,8,%o0 ! (Y0_2) i += 8; + lda [py+4]%asi,%f31 ! (Y1_1) yd = *py; + fsubd %f34,%f4,%f60 ! (Y0_1) y = s - u; + + cmp %l1,959 ! (Y1_1) if (expy < 0x3fb); + and MASK_0x000fffff,%l0,%l4 ! (Y0_3) hx &= 0xfffff; + fand %f52,MHI32,%f4 ! (Y0_2) s_h = vis_fand(s, MHI32); + + ldd [EXPTBL+%o5],%f16 ! (Y1_0) dtmp0 = *(double*)((char*)__mt_constexp2 + i + 8); + fmuld %f56,%f22,%f34 ! (Y1_0) dtmp1 = u * y; + bl,pn %icc,.update14 ! (Y1_1) if (expy < 0x3fb); + faddd %f62,%f54,%f54 ! (Y0_2) y += (double)itmp0; +.cont14: + ldd [EXPTBL-ind_KB5],KB5 ! (Y0_1) load KB5; + or MASK_0x3ff00000,%l4,%o5 ! (Y0_3) hx |= 0x3ff00000; + cmp %l1,1086 ! (Y1_1) if (expy >= 0x43e); + fand %f32,MHI32,%f22 ! (Y1_1) s_h = vis_fand(dtmp0, MHI32); + + fmuld %f4,%f12,%f32 ! (Y0_2) dtmp0 = s_h * ux; + st %o5,[%fp+tmp0_hi] ! (Y0_3) *(int*)&x = hx; + bge,pn %icc,.update15 ! (Y1_1) if (expy >= 0x43e); + faddd %f60,%f6,%f60 ! (Y0_1) y = y + yd; +.cont15: + add %o5,2048,%o5 ! (Y0_3) hx += 0x800; + nop + ld [%fp+%o7],%o2 ! (Y0_1) load yisint + fand %f30,MHI32,%f6 ! (Y1_1) s = vis_fand(yd, MHI32); + + and %o5,-4096,%l4 ! (Y0_3) hx &= 0xfffff000; + st %i2,[%fp+tmp0_lo] ! (Y0_3) *((int*)&x + 1) = ((unsigned*)px)[1]; + fmuld %f52,%f52,%f12 ! (Y0_2) y = s * s; + faddd %f16,%f34,%f16 ! (Y1_0) dtmp2 = dtmp0 + dtmp1; + + nop + nop + st %l4,[%fp+tmp1_hi] ! (Y0_3) *(int*)&ax = hx; + fsubd %f22,%f36,%f62 ! (Y1_1) dtmp0 = (s_h - m_h); + + fsubd %f58,%f32,%f32 ! (Y0_2) s_l = u - dtmp0; + nop + nop + fmuld KB5,%f60,%f58 ! (Y0_1) dtmp0 = KB5 * y; + + ldd [EXPTBL-ind_KB3],KB3 ! (Y0_1) load KB3; + nop + fmuld %f22,%f6,%f34 ! (Y1_1) s = s_h * s; + fsubd %f30,%f6,%f6 ! (Y1_1) dtmp0 = (yd - s); + + fmuld KA5,%f12,%f36 ! (Y0_2) dtmp8 = KA5 * y; + nop + faddd %f16,%f56,%f56 ! (Y1_0) u = dtmp2 + u; + + add %l3,513,%l3 ! (Y1_0) ind += 1; + fsubd %f48,%f62,%f62 ! (Y1_1) y = y - dtmp0; + fmuld KA1_HI,%f4,%f48 ! (Y0_2) yd = KA1_HI * s_h; + + sll %l3,3,%o5 ! (Y1_0) ind *= 8; + ldd [%fp+tmp2_hi],%f16 ! (Y1_0) *(int*)&dtmp0 = ind; + fmuld %f4,%f10,%f10 ! (Y0_2) dtmp1 = s_h * s_l; + faddd %f58,KB4,%f58 ! (Y0_1) dtmp1 = dtmp0 + KB4; + + ldd [EXPTBL-ind_LO],KA1_LO ! (y0_2) load KA1_LO; + and %l0,MASK_0x7fffffff,%l3 ! (Y0_3) hx &= 0x7fffffff; + fmuld %f6,%f22,%f6 ! (Y1_1) dtmp0 *= s_h; + fcmped %fcc0,%f34,HTHRESH ! (Y1_1) s > HTHRESH; + + nop + subcc counter,2,counter ! update cycle counter + ldd [EXPTBL+%o5],%f22 ! (Y1_0) dtmp1 = (*(double*)((char*)__mt_constexp2 + ind); + fpadd32 %f56,%f16,%f56 ! (Y1_0) u = vis_fpadd32(u, dtmp0); + + fmuld %f30,%f62,%f30 ! (Y1_1) dtmp1 = yd * y; + nop + nop + faddd %f36,KA3,%f62 ! (Y0_2) dtmp8 = dtmp8 + KA3; + + nop + st %g0,[%fp+%o7] ! (Y0_3) yisint = 0; + fmuld %f58,%f60,%f58 ! (Y0_1) dtmp2 = dtmp1 * y; + faddd %f54,%f48,%f36 ! (Y0_2) m_h = y + yd; + + fmuld %f56,%f22,%f16 ! (Y1_0) dtmp1 = u * dtmp1; + nop + st %f16,[pz] ! (Y1_0) write into memory + fmovdg %fcc0,HTHRESH,%f34 ! (Y1_1) s = HTHRESH; + + fdivd DONE,%f26,%f22 ! (Y1_2) yd = DONE / ux; + st %f17,[pz+4] ! (Y1_0) write into memory + bpos,pt %icc,.main_loop + fsubd %f32,%f10,%f10 ! (Y0_2) s_l -= dtmp1; + +.tail: + addcc counter,1,counter + bneg,pn %icc,.end_loop + + faddd %f58,KB3,%f58 ! (Y0_0) dtmp3 = dtmp2 + KB3; + ldd [EXPTBL-ind_KB2],KB2 ! (Y0_0) load KB2; + + ld [%fp+tmp4],%i2 ! (Y0_0) ind = (int)dtmp0; + fmuld %f58,%f60,%f58 ! (Y0_0) dtmp4 = dtmp3 * y; + faddd %f58,KB2,%f30 ! (Y0_0) dtmp5 = dtmp4 + KB2; + + add pz,stridez,pz ! pz += stridez; + ldd [EXPTBL-ind_KB1],KB1 ! (Y0_0) load KB1; + sra %i2,8,%l0 ! (Y0_0) ind >>= 8; + + add %l0,1021,%l2 ! (Y0_0) eflag = (ind + 1021); + sub %g0,%l0,%o5 ! (Y0_0) gflag = (1022 - ind); + fmuld %f30,%f60,%f48 ! (Y0_0) dtmp6 = dtmp5 * y; + + sra %l2,31,%l2 ! (Y0_0) eflag = eflag >> 31; + add %o5,1022,%o5 ! (Y0_0) gflag = (1022 - ind); + + sra %o5,31,%o5 ! (Y0_0) gflag = gflag >> 31; + and %l2,54,%o0 ! (Y0_0) itmp0 = 54 & eflag; + + sub %l2,%o5,%l2 ! (Y0_0) ind = eflag - gflag; + add %l0,%o0,%l0 ! (Y0_0) ind = ind + itmp0; + + and %o5,52,%o5 ! (Y0_0) itmp1 = 52 & gflag; + faddd %f48,KB1,%f14 ! (Y0_0) dtmp7 = dtmp6 + KB1; + + sub %l0,%o5,%l0 ! (Y0_0) ind = ind - itmp1; + and %i2,255,%i4 ! (Y0_0) i = ind & 0xff; + + sll %i4,4,%o5 ! (Y0_0) i = i << 4; + + ldd [EXPTBL+%o5],%f56 ! (Y0_0) u = *(double*)((char*)__mt_constexp2 + i); + add %o2,%l0,%l0 ! (Y0_0) ind = yisint + ind; + fmuld %f14,%f60,%f20 ! (Y0_0) y = dtmp7 * y; + + sll %l0,20,%i2 ! (Y0_0) ind <<= 20; + + add %o5,8,%o5 ! (Y0_0) i += 8; + st %i2,[%fp+tmp2_hi] ! (Y0_0) *(int*)&dtmp0 = ind; + + ldd [EXPTBL+%o5],%f16 ! (Y0_0) dtmp0 = *(double*)((char*)__mt_constexp2 + i + 8); + fmuld %f56,%f20,%f34 ! (Y0_0) dtmp1 = u * y; + + faddd %f16,%f34,%f16 ! (Y0_0) dtmp2 = dtmp0 + dtmp1; + + faddd %f16,%f56,%f56 ! (Y0_0) u = dtmp2 + u; + add %l2,513,%l2 ! (Y0_0) ind += 513; + + sll %l2,3,%o5 ! (Y0_0) ind *= 8; + ldd [%fp+tmp2_hi],%f16 ! (Y0_0) ld dtmp0; + + ldd [EXPTBL+%o5],%f20 ! (Y0_0) dtmp1 = (*(double*)((char*)__mt_constexp2 + ind); + fpadd32 %f56,%f16,%f56 ! (Y0_0) u = vis_fpadd32(u, dtmp0); + + fmuld %f56,%f20,%f16 ! (Y0_0) dtmp1 = u * dtmp1; + st %f16,[pz] ! (Y0_0) write into memory + st %f17,[pz+4] ! (Y0_0) write into memory + +.end_loop: + ba .begin + nop +.end: + ret + restore %g0,0,%o0 + + .align 16 +.update0: + cmp %l0,%g0 ! if (x >= 0); + fzero %f30 + + lda [py+4]%asi,%l0 ! ld ly + bge,pt %icc,.pos0 ! if (x >= 0); + or %g0,%g0,%o5 ! yisint = 0; + + cmp %o0,1076 ! if (expy >= 0x434); + bge .neg0 ! if (expy >= 0x434); + or %g0,2,%o5 ! yisint = 2; + + cmp %o0,1023 ! if (expy < 0x3ff); + bl .neg0 ! if (expy < 0x3ff); + or %g0,0,%o5 ! yisint = 0; + + cmp %o0,1043 ! if (expy <= (20 + 0x3ff)); + ble .small0 ! if (expy <= (20 + 0x3ff)); + sub %o0,1023,%o0 ! expy - 0x3ff; + + sub %g0,%o0,%o0 + add %o0,52,%o0 ! sh = (52 - (expy - 0x3ff); + srl %l0,%o0,%i4 ! i0 = (ly >> sh); + + sll %i4,%o0,%i4 ! (i0 << sh); + + srl %l0,%o0,%o0 ! i0 = (ly >> sh); + cmp %i4,%l0 ! if ((i0 << sh) == ly); + + and %o0,1,%o0 ! i0 &= 1; + + sub %g0,%o0,%o0 + add %o0,2,%o0 ! i0 = 2 - i0; + + move %icc,%o0,%o5 ! yisint = i0; + + ba .neg0 + nop +.small0: + sub %g0,%o0,%o0 + cmp %l0,%g0 ! if (ly != 0); + + add %o0,20,%o0 ! sh = (20 - (expy - 0x3ff); + bne .neg0 ! if (ly != 0); + or %g0,0,%o5 ! yisint = 0; + + srl %l1,%o0,%i4 ! i0 = (hy >> sh); + + sll %i4,%o0,%i4 ! (i0 << sh); + + srl %l1,%o0,%o0 ! i0 = (hy >> sh); + cmp %i4,%l1 ! if ((i0 << sh) == hy); + + and %o0,1,%o0 ! i0 &= 1; + + sub %g0,%o0,%o0 + add %o0,2,%o0 ! i0 = 2 - i0; + + move %icc,%o0,%o5 ! yisint = i0; +.neg0: + orcc %l3,%i2,%g0 ! if (x != 0); + + sra %o2,31,%i4 ! sy = (*((unsigned*)py)[0]) >> 31; + bne,pt %icc,3f ! if (x != 0); + nop + + cmp %i4,%g0 ! if (sy == 0); + be 1f ! if (sy == 0); + and %o5,1,%i4 ! yisint &= 1; + + fdivd DONE,%f30,%f30 ! y0 = DONE / y0; +1: + cmp %i4,%g0 ! if ((yisint & 1) == 0); + be 2f ! if ((yisint & 1) == 0); + nop + + fnegd %f30,%f30 ! y0 = -y0; +2: + st %f30,[pz] + ba .update_point + st %f31,[pz+4] +3: + cmp %o5,%g0 ! if (yisint != 0); + bne .pos0 ! if (yisint != 0); + nop + + fdivd DZERO,DZERO,%f30 ! y0 = DZERO / DZERO; + st %f30,[pz] + ba .update_point + st %f31,[pz+4] +.pos0: + orcc %l3,%i2,%g0 ! if (x != 0); + + sra %o2,31,%i4 ! sy = (*((unsigned*)py)[0]) >> 31; + bne,pt %icc,.nzero0 ! if (x != 0); + nop + + cmp %i4,%g0 ! if (sy == 0); + be 1f ! if (sy == 0); + nop + + fdivd DONE,%f30,%f30 ! y0 = DONE / y0; +1: + st %f30,[pz] + ba .update_point + st %f31,[pz+4] +.nzero0: + sll %o5,11,%o5 + cmp %l3,MASK_0x000fffff ! if (exp > 0xfffff); + + bg,pt %icc,.cont0 ! if (exp > 0xfffff); + st %o5,[%fp+%o7] + + ldd [%fp+tmp_mant],%f54 + + or %g0,1074,%o5 + fand %f32,%f54,%f32 ! y0 = vis_fand(x, MMANT); + + sll %o5,20,%o5 + fxtod %f32,%f32 ! ax = (double) ((long long *) & y0)[0]; + + std %f32,[%fp+tmp0_hi] ! exp = ((unsigned int*) & ax)[0]; + fand %f32,%f54,%f32 ! x = vis_fand(ax, MMANT); + + ld [%fp+tmp0_hi],%i2 ! exp = ((unsigned int*) & ax)[0]; + for %f32,DONE,%f32 ! x = vis_for(x, DONE); + + sub %i2,%o5,%l3 ! exp -= (1023 + 51) << 20; + and MASK_0x000fffff,%i2,%l4 ! hx = exp & 0xfffff; + or MASK_0x3ff00000,%l4,%l4 ! hx |= 0x3ff00000; + add %l4,2048,%l4 ! hx += 0x800; + and %l4,-4096,%l4 ! hx &= 0xfffff000; + + ba .cont0 + st %l4,[%fp+tmp1_hi] ! *(int*)&ax = hx; + + .align 16 +.update1: + cmp counter,0 + ble,pt %icc,.cont1 + add py,stridey,%o5 + + stx px,[%fp+tmp_px] + + orcc %l2,%i2,%g0 ! if (x == 0); + bne,pt %icc,.nzero1 ! if (x == 0); + stx %o5,[%fp+tmp_py] +.u1: + st counter,[%fp+tmp_counter] + ba .cont1 + or %g0,0,counter +.nzero1: + lda [%o5]%asi,%l1 ! ld hy; + cmp %l0,%g0 ! if (x >= 0); + + lda [%o5+4]%asi,%l0 ! ld ly + bge,pt %icc,.pos1 ! if (x >= 0); + or %g0,%g0,%o5 ! yisint = 0; + + and %l1,MASK_0x7fffffff,%i2 ! hy &= 0x7fffffff; + + sra %i2,20,%i2 ! expy = hy >> 20; + + cmp %i2,1076 ! if (expy >= 0x434); + bge .neg1 ! if (expy >= 0x434); + or %g0,2,%o5 ! yisint = 2; + + cmp %i2,1023 ! if (expy < 0x3ff); + bl .neg1 ! if (expy < 0x3ff); + or %g0,0,%o5 ! yisint = 0; + + cmp %i2,1043 ! if (expy <= (20 + 0x3ff)); + ble .small1 ! if (expy <= (20 + 0x3ff)); + sub %i2,1023,%i2 ! expy - 0x3ff; + + sub %g0,%i2,%i2 + add %i2,52,%i2 ! sh = (52 - (expy - 0x3ff); + srl %l0,%i2,%l1 ! i0 = (ly >> sh); + + sll %l1,%i2,%l1 ! (i0 << sh); + + srl %l0,%i2,%i2 ! i0 = (ly >> sh); + cmp %l1,%l0 ! if ((i0 << sh) == ly); + + and %i2,1,%i2 ! i0 &= 1; + + sub %g0,%i2,%i2 + add %i2,2,%i2 ! i0 = 2 - i0; + + move %icc,%i2,%o5 ! yisint = i0; + + ba .neg1 + nop +.small1: + sub %g0,%i2,%i2 + cmp %l0,%g0 ! if (ly != 0); + + add %i2,20,%i2 ! sh = (20 - (expy - 0x3ff); + bne .neg1 ! if (ly != 0); + or %g0,0,%o5 ! yisint = 0; + + srl %l1,%i2,%l0 ! i0 = (hy >> sh); + + sll %l0,%i2,%l0 ! (i0 << sh); + + srl %l1,%i2,%i2 ! i0 = (hy >> sh); + cmp %l0,%l1 ! if ((i0 << sh) == hy); + + and %i2,1,%i2 ! i0 &= 1; + + sub %g0,%i2,%i2 + add %i2,2,%i2 ! i0 = 2 - i0; + + move %icc,%i2,%o5 ! yisint = i0; +.neg1: + cmp %o5,%g0 + be .u1 + nop +.pos1: + sll %o5,11,%o5 + cmp %l2,MASK_0x000fffff ! if (exp > 0xfffff); + + bg,pt %icc,.cont1 ! if (exp > 0xfffff); + st %o5,[%fp+%o7] + + std %f32,[%fp+tmp5]; + std %f54,[%fp+tmp6]; + ldd [%fp+tmp0_hi],%f32 + ldd [%fp+tmp_mant],%f54 + + or %g0,1074,%o5 + fand %f32,%f54,%f32 ! y0 = vis_fand(x, MMANT); + + sll %o5,20,%o5 + fxtod %f32,%f32 ! ax = (double) ((long long *) & y0)[0]; + + std %f32,[%fp+tmp0_hi] ! exp = ((unsigned int*) & ax)[0]; + fand %f32,%f54,%f32 ! x = vis_fand(ax, MMANT); + + ld [%fp+tmp0_hi],%i2 ! exp = ((unsigned int*) & ax)[0]; + for %f32,DONE,%f32 ! x = vis_for(x, DONE); + + std %f32,[%fp+tmp0_hi]; + sub %i2,%o5,%l2 ! exp -= (1023 + 51) << 20; + and MASK_0x000fffff,%i2,%i4 ! hx = exp & 0xfffff; + ldd [%fp+tmp5],%f32 + or MASK_0x3ff00000,%i4,%i4 ! hx |= 0x3ff00000; + add %i4,2048,%i4 ! hx += 0x800; + ldd [%fp+tmp6],%f54 + and %i4,-4096,%i4 ! hx &= 0xfffff000; + + ba .cont1 + st %i4,[%fp+tmp1_hi] ! *(int*)&ax = hx; + + .align 16 +.update2: + cmp counter,1 + ble,pt %icc,.cont2 + add py,stridey,%o5 + + add %o5,stridey,%o5 + stx px,[%fp+tmp_px] + + orcc %l3,%i2,%g0 ! if (x == 0); + bne,pt %icc,.nzero2 ! if (x == 0); + stx %o5,[%fp+tmp_py] +.u2: + sub counter,1,counter + st counter,[%fp+tmp_counter] + ba .cont2 + or %g0,1,counter +.nzero2: + lda [%o5]%asi,%l1 ! ld hy; + cmp %l0,%g0 ! if (x >= 0); + + lda [%o5+4]%asi,%l0 ! ld ly + bge,pt %icc,.pos2 ! if (x >= 0); + or %g0,%g0,%o5 ! yisint = 0; + + and %l1,MASK_0x7fffffff,%i2 ! hy &= 0x7fffffff; + + sra %i2,20,%i2 ! expy = hy >> 20; + + cmp %i2,1076 ! if (expy >= 0x434); + bge .neg2 ! if (expy >= 0x434); + or %g0,2,%o5 ! yisint = 2; + + cmp %i2,1023 ! if (expy < 0x3ff); + bl .neg2 ! if (expy < 0x3ff); + or %g0,0,%o5 ! yisint = 0; + + cmp %i2,1043 ! if (expy <= (20 + 0x3ff)); + ble .small2 ! if (expy <= (20 + 0x3ff)); + sub %i2,1023,%i2 ! expy - 0x3ff; + + sub %g0,%i2,%i2 + add %i2,52,%i2 ! sh = (52 - (expy - 0x3ff); + srl %l0,%i2,%l1 ! i0 = (ly >> sh); + + sll %l1,%i2,%l1 ! (i0 << sh); + + srl %l0,%i2,%i2 ! i0 = (ly >> sh); + cmp %l1,%l0 ! if ((i0 << sh) == ly); + + and %i2,1,%i2 ! i0 &= 1; + + sub %g0,%i2,%i2 + add %i2,2,%i2 ! i0 = 2 - i0; + + move %icc,%i2,%o5 ! yisint = i0; + + ba .neg2 + nop +.small2: + sub %g0,%i2,%i2 + cmp %l0,%g0 ! if (ly != 0); + + add %i2,20,%i2 ! sh = (20 - (expy - 0x3ff); + bne .neg2 ! if (ly != 0); + or %g0,0,%o5 ! yisint = 0; + + srl %l1,%i2,%l0 ! i0 = (hy >> sh); + + sll %l0,%i2,%l0 ! (i0 << sh); + + srl %l1,%i2,%i2 ! i0 = (hy >> sh); + cmp %l0,%l1 ! if ((i0 << sh) == hy); + + and %i2,1,%i2 ! i0 &= 1; + + sub %g0,%i2,%i2 + add %i2,2,%i2 ! i0 = 2 - i0; + + move %icc,%i2,%o5 ! yisint = i0; +.neg2: + cmp %o5,%g0 + be .u2 + nop +.pos2: + sll %o5,11,%o5 + cmp %l3,MASK_0x000fffff ! if (exp > 0xfffff); + + bg,pt %icc,.cont2 ! if (exp > 0xfffff); + st %o5,[%fp+%o7] + + ldd [%fp+tmp_mant],%f54 + + or %g0,1074,%o5 + fand %f32,%f54,%f32 ! y0 = vis_fand(x, MMANT); + + sll %o5,20,%o5 + fxtod %f32,%f32 ! ax = (double) ((long long *) & y0)[0] + + std %f32,[%fp+tmp0_hi] ! exp = ((unsigned int*) & ax)[0]; + fand %f32,%f54,%f32 ! x = vis_fand(ax, MMANT); + + ld [%fp+tmp0_hi],%i2 ! exp = ((unsigned int*) & ax)[0]; + for %f32,DONE,%f32 ! x = vis_for(x, DONE); + + sub %i2,%o5,%l3 ! exp -= (1023 + 51) << 20; + and MASK_0x000fffff,%i2,%l4 ! hx = exp & 0xfffff; + or MASK_0x3ff00000,%l4,%l4 ! hx |= 0x3ff00000; + add %l4,2048,%l4 ! hx += 0x800; + and %l4,-4096,%l4 ! hx &= 0xfffff000; + + ba .cont2 + st %l4,[%fp+tmp1_hi] ! *(int*)&ax = hx; + + .align 16 +.update3: + cmp counter,0 + ble,pt %icc,.cont3 + sub px,stridex,%o5 + + ld [%fp+tmp_counter],%l1 + + stx %o5,[%fp+tmp_px] + add py,stridey,%o5 + + add %l1,counter,counter + stx %o5,[%fp+tmp_py] + + st counter,[%fp+tmp_counter] + ba .cont3 + or %g0,0,counter + + .align 16 +.update4: + cmp counter,2 + ble,pt %icc,.cont4 + add py,stridey,%o5 + + add %o5,stridey,%o5 + add %o5,stridey,%o5 + stx px,[%fp+tmp_px] + + orcc %l2,%i2,%g0 ! if (x == 0); + bne,pt %icc,.nzero4 ! if (x == 0); + stx %o5,[%fp+tmp_py] +.u4: + sub counter,2,counter + st counter,[%fp+tmp_counter] + ba .cont4 + or %g0,2,counter +.nzero4: + lda [%o5]%asi,%l1 ! ld hy; + cmp %l0,%g0 ! if (x >= 0); + + lda [%o5+4]%asi,%l0 ! ld ly + bge,pt %icc,.pos4 ! if (x >= 0); + or %g0,%g0,%o5 ! yisint = 0; + + and %l1,MASK_0x7fffffff,%i2 ! hy &= 0x7fffffff; + + sra %i2,20,%i2 ! expy = hy >> 20; + + cmp %i2,1076 ! if (expy >= 0x434); + bge .neg4 ! if (expy >= 0x434); + or %g0,2,%o5 ! yisint = 2; + + cmp %i2,1023 ! if (expy < 0x3ff); + bl .neg4 ! if (expy < 0x3ff); + or %g0,0,%o5 ! yisint = 2; + + cmp %i2,1043 ! if (expy <= (20 + 0x3ff)); + ble .small4 ! if (expy <= (20 + 0x3ff)); + sub %i2,1023,%i2 ! expy - 0x3ff; + + sub %g0,%i2,%i2 + add %i2,52,%i2 ! sh = (52 - (expy - 0x3ff); + srl %l0,%i2,%l1 ! i0 = (ly >> sh); + + sll %l1,%i2,%l1 ! (i0 << sh); + + srl %l0,%i2,%i2 ! i0 = (ly >> sh); + cmp %l1,%l0 ! if ((i0 << sh) == ly); + + and %i2,1,%i2 ! i0 &= 1; + + sub %g0,%i2,%i2 + add %i2,2,%i2 ! i0 = 2 - i0; + + move %icc,%i2,%o5 ! yisint = i0; + + ba .neg4 + nop +.small4: + sub %g0,%i2,%i2 + cmp %l0,%g0 ! if (ly != 0); + + add %i2,20,%i2 ! sh = (20 - (expy - 0x3ff); + bne .neg4 ! if (ly != 0); + or %g0,0,%o5 ! yisint = 0; + + srl %l1,%i2,%l0 ! i0 = (hy >> sh); + + sll %l0,%i2,%l0 ! (i0 << sh); + + srl %l1,%i2,%i2 ! i0 = (hy >> sh); + cmp %l0,%l1 ! if ((i0 << sh) == hy); + + and %i2,1,%i2 ! i0 &= 1; + + sub %g0,%i2,%i2 + add %i2,2,%i2 ! i0 = 2 - i0; + + move %icc,%i2,%o5 ! yisint = i0; +.neg4: + cmp %o5,%g0 + be .u4 + nop +.pos4: + sll %o5,11,%o5 + cmp %l2,MASK_0x000fffff ! if (exp > 0xfffff); + + bg,pt %icc,.cont4 ! if (exp > 0xfffff); + st %o5,[%fp+%o7] + + std %f32,[%fp+tmp5]; + std %f54,[%fp+tmp6]; + ldd [%fp+tmp0_hi],%f32 + ldd [%fp+tmp_mant],%f54 + + or %g0,1074,%o5 + fand %f32,%f54,%f32 ! y0 = vis_fand(x, MMANT); + + sll %o5,20,%o5 + fxtod %f32,%f32 ! ax = (double) ((long long *) & y0)[0] + + std %f32,[%fp+tmp0_hi] ! exp = ((unsigned int*) & ax)[0]; + fand %f32,%f54,%f32 ! x = vis_fand(ax, MMANT); + + ld [%fp+tmp0_hi],%i2 ! exp = ((unsigned int*) & ax)[0]; + for %f32,DONE,%f32 ! x = vis_for(x, DONE); + + std %f32,[%fp+tmp0_hi]; + sub %i2,%o5,%l2 ! exp -= (1023 + 51) << 20; + and MASK_0x000fffff,%i2,%i4 ! hx = exp & 0xfffff; + ldd [%fp+tmp5],%f32 + or MASK_0x3ff00000,%i4,%i4 ! hx |= 0x3ff00000; + add %i4,2048,%i4 ! hx += 0x800; + ldd [%fp+tmp6],%f54 + and %i4,-4096,%i4 ! hx &= 0xfffff000; + + ba .cont4 + st %i4,[%fp+tmp1_hi] ! *(int*)&ax = hx; + + .align 16 +.update5: + cmp counter,1 + ble,pt %icc,.cont5 + sub px,stridex,%o5 + + ld [%fp+tmp_counter],%l1 + + stx %o5,[%fp+tmp_px] + add py,stridey,%o5 + + add %l1,counter,counter + stx %o5,[%fp+tmp_py] + + sub counter,1,counter + st counter,[%fp+tmp_counter] + ba .cont5 + or %g0,1,counter + + .align 16 +.update6: + cmp counter,0 + ble,pt %icc,.cont6 + fmovd DONE,%f30 + + ld [%fp+tmp_counter],%o2 + sub px,stridex,%o5 + + sub %o5,stridex,%o5 + stx py,[%fp+tmp_py] + + add %o2,counter,counter + sub %o5,stridex,%o5 + stx %o5,[%fp+tmp_px] + + st counter,[%fp+tmp_counter] + ba .cont6 + or %g0,0,counter + + .align 16 +.update7: + cmp counter,0 + ble,pt %icc,.cont7 + fmovd DONE,%f30 + sub px,stridex,%o5 + + ld [%fp+tmp_counter],%o2 + + sub %o5,stridex,%o5 + stx py,[%fp+tmp_py] + + add %o2,counter,counter + sub %o5,stridex,%o5 + stx %o5,[%fp+tmp_px] + + st counter,[%fp+tmp_counter] + ba .cont7 + or %g0,0,counter + + .align 16 +.update8: + cmp counter,2 + ble,pt %icc,.cont8 + add py,stridey,%o5 + + add %o5,stridey,%o5 + stx px,[%fp+tmp_px] + + orcc %l3,%i2,%g0 ! if (x == 0); + bne,pt %icc,.nzero8 ! if (x == 0); + stx %o5,[%fp+tmp_py] +.u8: + sub counter,2,counter + st counter,[%fp+tmp_counter] + ba .cont8 + or %g0,2,counter +.nzero8: + lda [%o5]%asi,%l1 ! ld hy; + cmp %l0,%g0 ! if (x >= 0); + + lda [%o5+4]%asi,%l0 ! ld ly + bge,pt %icc,.pos8 ! if (x >= 0); + or %g0,%g0,%o5 ! yisint = 0; + + and %l1,MASK_0x7fffffff,%i2 ! hy &= 0x7fffffff; + + sra %i2,20,%i2 ! expy = hy >> 20; + + cmp %i2,1076 ! if (expy >= 0x434); + bge .pos8 ! if (expy >= 0x434); + or %g0,2,%o5 ! yisint = 2; + + cmp %i2,1023 ! if (expy < 0x3ff); + bl .neg8 ! if (expy < 0x3ff); + or %g0,0,%o5 ! yisint = 0; + + cmp %i2,1043 ! if (expy <= (20 + 0x3ff)); + ble .small8 ! if (expy <= (20 + 0x3ff)); + sub %i2,1023,%i2 ! expy - 0x3ff; + + sub %g0,%i2,%i2 + add %i2,52,%i2 ! sh = (52 - (expy - 0x3ff); + srl %l0,%i2,%l1 ! i0 = (ly >> sh); + + sll %l1,%i2,%l1 ! (i0 << sh); + + srl %l0,%i2,%i2 ! i0 = (ly >> sh); + cmp %l1,%l0 ! if ((i0 << sh) == ly); + + and %i2,1,%i2 ! i0 &= 1; + + sub %g0,%i2,%i2 + add %i2,2,%i2 ! i0 = 2 - i0; + + move %icc,%i2,%o5 ! yisint = i0; + + ba .neg8 + nop +.small8: + sub %g0,%i2,%i2 + cmp %l0,%g0 ! if (ly != 0); + + add %i2,20,%i2 ! sh = (20 - (expy - 0x3ff); + bne .neg8 ! if (ly != 0); + or %g0,0,%o5 ! yisint = 0; + + srl %l1,%i2,%l0 ! i0 = (hy >> sh); + + sll %l0,%i2,%l0 ! (i0 << sh); + + srl %l1,%i2,%i2 ! i0 = (hy >> sh); + cmp %l0,%l1 ! if ((i0 << sh) == hy); + + and %i2,1,%i2 ! i0 &= 1; + + sub %g0,%i2,%i2 + add %i2,2,%i2 ! i0 = 2 - i0; + + move %icc,%i2,%o5 ! yisint = i0; +.neg8: + cmp %o5,%g0 + be .u8 + nop +.pos8: + sll %o5,11,%o5 + cmp %l3,MASK_0x000fffff ! if (exp > 0xfffff); + + bg,pt %icc,.cont8 ! if (exp > 0xfffff); + st %o5,[%fp+%o7] + + ldd [%fp+tmp_mant],%f54 + + or %g0,1074,%o5 + fand %f32,%f54,%f32 ! y0 = vis_fand(x, MMANT); + + sll %o5,20,%o5 + fxtod %f32,%f32 ! ax = (double) ((long long *) & y0)[0] + + std %f32,[%fp+tmp0_hi] ! exp = ((unsigned int*) & ax)[0]; + fand %f32,%f54,%f32 ! x = vis_fand(ax, MMANT); + + ld [%fp+tmp0_hi],%i2 ! exp = ((unsigned int*) & ax)[0]; + for %f32,DONE,%f32 ! x = vis_for(x, DONE); + + sub %i2,%o5,%l3 ! exp -= (1023 + 51) << 20; + and MASK_0x000fffff,%i2,%l4 ! hx &= 0xfffff; + or MASK_0x3ff00000,%l4,%l4 ! hx |= 0x3ff00000; + add %l4,2048,%l4 ! hx += 0x800; + and %l4,-4096,%l4 ! hx &= 0xfffff000; + + ba .cont8 + st %l4,[%fp+tmp1_hi] ! *(int*)&ax = hx; + + .align 16 +.update9: + cmp counter,1 + ble,pt %icc,.cont9 + sub px,stridex,%o5 + + ld [%fp+tmp_counter],%l1 + + stx %o5,[%fp+tmp_px] + add py,stridey,%o5 + + add %l1,counter,counter + stx %o5,[%fp+tmp_py] + + sub counter,1,counter + st counter,[%fp+tmp_counter] + ba .cont9 + or %g0,1,counter + + .align 16 +.update10: + cmp counter,0 + ble,pt %icc,.cont10 + fmovd DONE,%f30 + + ld [%fp+tmp_counter],%o2 + sub px,stridex,%o5 + + sub %o5,stridex,%o5 + stx py,[%fp+tmp_py] + + add %o2,counter,counter + sub %o5,stridex,%o5 + stx %o5,[%fp+tmp_px] + + st counter,[%fp+tmp_counter] + ba .cont10 + or %g0,0,counter + + .align 16 +.update11: + cmp counter,0 + ble,pt %icc,.cont11 + fmovd DONE,%f30 + + ld [%fp+tmp_counter],%o2 + sub px,stridex,%o5 + + sub %o5,stridex,%o5 + stx py,[%fp+tmp_py] + + add %o2,counter,counter + sub %o5,stridex,%o5 + stx %o5,[%fp+tmp_px] + + st counter,[%fp+tmp_counter] + ba .cont11 + or %g0,0,counter + + .align 16 +.update12: + cmp counter,3 + ble,pt %icc,.cont12 + add py,stridey,%o5 + + add %o5,stridey,%o5 + stx px,[%fp+tmp_px] + + add %o5,stridey,%o5 + orcc %l2,%i2,%g0 ! if (x == 0); + + bne,pt %icc,.nzero12 ! if (x == 0); + stx %o5,[%fp+tmp_py] +.u12: + sub counter,3,counter + st counter,[%fp+tmp_counter] + ba .cont12 + or %g0,3,counter +.nzero12: + lda [%o5]%asi,%l1 ! ld hy; + cmp %l0,%g0 ! if (x >= 0); + + lda [%o5+4]%asi,%l0 ! ld ly + bge,pt %icc,.pos12 ! if (x >= 0); + or %g0,%g0,%o5 ! yisint = 0; + + and %l1,MASK_0x7fffffff,%i2 ! hy &= 0x7fffffff; + + sra %i2,20,%i2 ! expy = hy >> 20; + + cmp %i2,1076 ! if (expy >= 0x434); + bge .neg12 ! if (expy >= 0x434); + or %g0,2,%o5 ! yisint = 2; + + cmp %i2,1023 ! if (expy < 0x3ff); + bl .neg12 ! if (expy < 0x3ff); + or %g0,0,%o5 ! yisint = 0; + + cmp %i2,1043 ! if (expy <= (20 + 0x3ff)); + ble .small12 ! if (expy <= (20 + 0x3ff)); + sub %i2,1023,%i2 ! expy - 0x3ff; + + sub %g0,%i2,%i2 + add %i2,52,%i2 ! sh = (52 - (expy - 0x3ff); + srl %l0,%i2,%l1 ! i0 = (ly >> sh); + + sll %l1,%i2,%l1 ! (i0 << sh); + + srl %l0,%i2,%i2 ! i0 = (ly >> sh); + cmp %l1,%l0 ! if ((i0 << sh) == ly); + + and %i2,1,%i2 ! i0 &= 1; + + sub %g0,%i2,%i2 + add %i2,2,%i2 ! i0 = 2 - i0; + + move %icc,%i2,%o5 ! yisint = i0; + + ba .neg12 + nop +.small12: + sub %g0,%i2,%i2 + cmp %l0,%g0 ! if (ly != 0); + + add %i2,20,%i2 ! sh = (20 - (expy - 0x3ff); + bne .neg12 ! if (ly != 0); + or %g0,0,%o5 ! yisint = 0; + + srl %l1,%i2,%l0 ! i0 = (hy >> sh); + + sll %l0,%i2,%l0 ! (i0 << sh); + + srl %l1,%i2,%i2 ! i0 = (hy >> sh); + cmp %l0,%l1 ! if ((i0 << sh) == hy); + + and %i2,1,%i2 ! i0 &= 1; + + sub %g0,%i2,%i2 + add %i2,2,%i2 ! i0 = 2 - i0; + + move %icc,%i2,%o5 ! yisint = i0; +.neg12: + cmp %o5,%g0 + be .u12 + nop +.pos12: + sll %o5,11,%o5 + cmp %l2,MASK_0x000fffff ! y0 = vis_fand(x, MMANT); + + bg,pt %icc,.cont12 ! y0 = vis_fand(x, MMANT); + st %o5,[%fp+%o7] + + std %f32,[%fp+tmp5]; + std %f54,[%fp+tmp6]; + ldd [%fp+tmp0_hi],%f32 + ldd [%fp+tmp_mant],%f54 + + or %g0,1074,%o5 + fand %f32,%f54,%f32 ! y0 = vis_fand(x, MMANT); + + sll %o5,20,%o5 + fxtod %f32,%f32 ! ax = (double) ((long long *) & y0)[0] + + std %f32,[%fp+tmp0_hi] ! exp = ((unsigned int*) & ax)[0]; + fand %f32,%f54,%f32 ! x = vis_fand(ax, MMANT); + + ld [%fp+tmp0_hi],%i2 ! exp = ((unsigned int*) & ax)[0]; + for %f32,DONE,%f32 ! x = vis_for(x, DONE); + + std %f32,[%fp+tmp0_hi]; + sub %i2,%o5,%l2 ! exp -= (1023 + 51) << 20; + and MASK_0x000fffff,%i2,%i4 ! hx &= 0xfffff; + ldd [%fp+tmp5],%f32 + or MASK_0x3ff00000,%i4,%i4 ! hx |= 0x3ff00000; + add %i4,2048,%i4 ! hx += 0x800; + ldd [%fp+tmp6],%f54 + and %i4,-4096,%i4 ! hx &= 0xfffff000; + + ba .cont12 + st %i4,[%fp+tmp1_hi] ! *(int*)&ax = hx; + + .align 16 +.update13: + cmp counter,2 + ble,pt %icc,.cont13 + sub px,stridex,%o5 + + ld [%fp+tmp_counter],%l1 + + stx %o5,[%fp+tmp_px] + add py,stridey,%o5 + + add %l1,counter,counter + stx %o5,[%fp+tmp_py] + + sub counter,2,counter + st counter,[%fp+tmp_counter] + ba .cont13 + or %g0,2,counter + + .align 16 +.update14: + cmp counter,1 + ble,pt %icc,.cont14 + fmovd DONE,%f30 + + ld [%fp+tmp_counter],%o2 + sub px,stridex,%o5 + + sub %o5,stridex,%o5 + stx py,[%fp+tmp_py] + + add %o2,counter,counter + sub %o5,stridex,%o5 + stx %o5,[%fp+tmp_px] + + sub counter,1,counter + st counter,[%fp+tmp_counter] + ba .cont14 + or %g0,1,counter + + .align 16 +.update15: + cmp counter,1 + ble,pt %icc,.cont15 + fmovd DONE,%f30 + + sub px,stridex,%o5 + + ld [%fp+tmp_counter],%o2 + sub %o5,stridex,%o5 + stx py,[%fp+tmp_py] + + add %o2,counter,counter + sub %o5,stridex,%o5 + stx %o5,[%fp+tmp_px] + + sub counter,1,counter + st counter,[%fp+tmp_counter] + ba .cont15 + or %g0,1,counter + + .align 16 +.spec0: + lda [py+4]%asi,%o5 ! ld ly; + lda [px]%asi,%f16 ! y0 = *px; + lda [px+4]%asi,%f17 ! y0 = *px; + orcc %l1,%o5,%g0 ! if (hy | ly) != 0; + + bne,pn %icc,1f + sethi %hi(0x7ff00000),%o5 + + st DONE_HI,[pz] + ba .update_point + st DONE_LO,[pz+4] +1: + cmp %l3,%o5 ! if (hx > 0x7ff00000); + bgu,a,pn %icc,6f ! if (hx > 0x7ff00000); + fmuld %f16,%f16,%f16 ! *pz = y0 * y0; + + bne,pt %icc,2f ! if (hx != 0x7ff00000); + orcc %l3,%i2,%g0 ! if (hx | lx) != 0; + + cmp %i2,0 ! if (lx) != 0; + bne,pn %icc,5f ! if (lx) != 0; + srl %o2,31,%o5 ! sy; + + st %l3,[pz] ! ((int*)pz)[0] = hx; + ba 3f + cmp %o5,0 ! if (sy == 0); +2: + bne,pt %icc,4f ! if (hx | lx) != 0; + srl %l0,31,%o5 ! sx; + + st %l3,[pz] ! ((int*)pz)[0] = hx; + srl %o2,31,%o5 ! sy; + cmp %o5,0 ! if (sy == 0); +3: + be,pt %icc,.update_point ! if (sy == 0); + st %i2,[pz+4] ! ((int*)pz)[1] = lx; + + ld [pz],%f16 ! *pz; + ld [pz+4],%f17 ! *pz; + fdivd DONE,%f16,%f16 ! *pz = DONE / *pz; + + st %f16,[pz] + ba .update_point + st %f17,[pz+4] +4: + cmp %o5,0 ! if (sx == 0); + bne,a,pt %icc,1f + nop + + st DONE_HI,[pz] ! *pz = DONE; + ba .update_point + st DONE_LO,[pz+4] ! *pz = DONE; +1: + fdivd DZERO,DZERO,%f16 ! *pz = DZERO / DZERO; + st %f16,[pz] + ba .update_point + st %f17,[pz+4] +5: + fmuld %f16,%f16,%f16 ! *pz = y0 * y0; +6: + st %f16,[pz] + ba .update_point + st %f17,[pz+4] + + .align 16 +.spec1: + lda [px]%asi,%f14 ! y0 = *px; + lda [px+4]%asi,%f15 ! y0 = *px; + sethi %hi(0x7ff00000),%o5 + lda [py+4]%asi,%i4 ! ld ly; + srl %o2,31,%o2 ! sy + cmp %l3,%o5 ! if (hx >= 0x7ff00000); + bcc,pn %icc,3f + nop + + cmp %l1,%o5 ! if (hy > 0x7ff00000); + bgu,a,pt %icc,.spec1_nan_inf ! if (hy > 0x7ff00000); + lda [py]%asi,%f16 ! ld y + + bne,a,pt %icc,1f ! if (hy != 0x7ff00000); + cmp %i2,0 ! if (lx != 0); + + ba 2f ! if (hy == 0x7ff00000); + cmp %i4,0 ! if (ly != 0); +1: + bne,pt %icc,7f ! if (lx != 0); + nop + + cmp %l3,0 ! if (hx == 0); + be,a,pt %icc,6f ! if (hx == 0); + st %l3,[pz] ! ((int*)pz)[0] = hx; + + cmp %l3,MASK_0x3ff00000 ! if (hx == 0x3ff00000); + be,a,pn %icc,6f ! if (hx == 0x3ff00000); + st %l3,[pz] ! ((int*)pz)[0] = hx; + + ba 5f + cmp %l3,%o5 ! if (hx != 0x7ff00000); +3: + bgu,a,pt %icc,.spec1_nan_inf ! if (hx > 0x7ff00000); + lda [py]%asi,%f16 ! ld y + + bne,a,pn %icc,1f ! if (hx != 0x7ff00000); + cmp %l1,%o5 ! if (hy > 0x7ff00000); + + cmp %i2,0 ! if (lx != 0); + bne,a,pt %icc,.spec1_nan_inf ! if (lx != 0); + lda [py]%asi,%f16 ! ld y + + cmp %l1,%o5 ! if (hy > 0x7ff00000); +1: + bgu,a,pt %icc,.spec1_nan_inf ! if (hy > 0x7ff00000); + lda [py]%asi,%f16 ! ld y + + bne,pn %icc,3f ! if (hy != 0x7ff00000); + nop + + cmp %i4,0 ! if (ly != 0); +2: + bne,a,pn %icc,.spec1_nan_inf ! if (ly != 0); + lda [py]%asi,%f16 ! ld y + + cmp %l3,MASK_0x3ff00000 ! if (hx != 0x3ff00000); + bne,pn %icc,1f ! if (hx != 0x3ff00000); + cmp %i2,0 ! if (lx != 0); + + bne,pn %icc,1f ! if (lx != 0); + nop + + ld [py],%f16 ! ld y + ld [py+4],%f17 ! ld y + fzero %f14 + fmuld %f16,%f14,%f14 ! *pz = *py * 0.0; + st %f14,[pz] + ba .update_point + st %f15,[pz+4] +1: + sub %l3,MASK_0x3ff00000,%o7 ! (hx - 0x3ff00000); + srlx %o7,63,%l2 ! (hx - 0x3ff00000) >> 63; + + cmp %l2,%o2 ! if ((hx < 0x3ff00000) == sy) + be,a,pn %icc,1f ! if ((hx < 0x3ff00000) == sy) + st %l1,[pz] ! ((int*)pz)[0] = hy; + + st DZERO_HI,[pz] ! *pz = DZERO; + ba .update_point + st DZERO_LO,[pz+4] ! *pz = DZERO; +1: + ba .update_point + st %i4,[pz+4] ! ((int*)pz)[0] = ly; +3: + cmp %o0,1086 ! if (expy >= 0x43e); + bge,pn %icc,4f ! if (expy >= 0x43e) + nop + + srl %l0,31,%l0 ! sx; + cmp %l0,0 ! if (sx == 0); + be,pn %icc,2f + or %g0,0,%l4 + + cmp %o0,1076 ! if (expy >= 0x434); + + bge,pn %icc,2f ! if (expy >= 0x434); + or %g0,2,%l4 ! yisint = 2; + + cmp %o0,1023 ! if (expy < 0x3ff); + bl,a,pn %icc,2f ! if (expy < 0x3ff); + or %g0,0,%l4 ! yisint = 0; + + cmp %o0,1043 ! if (expy <= (20 + 0x3ff)); + ble,pn %icc,1f + sub %o0,1023,%l2 ! (expy - 0x3ff); + + sub %g0,%l2,%l2 ! 0 - (expy - 0x3ff); + add %l2,52,%l2 ! sh = 52 - (expy - 0x3ff); + srl %i4,%l2,%o0 ! i0 = ly >> sh; + sll %o0,%l2,%l2 ! i0 << sh; + cmp %l2,%i4 ! if ((i0 << sh) != ly); + bne,a,pn %icc,2f ! if ((i0 << sh) != ly); + or %g0,0,%l4 ! yisint = 0; + + and %o0,1,%o0 ! i0 &= 1; + sub %g0,%o0,%o0 + + ba 2f + add %o0,2,%l4 ! yisint = 2 - (i0 & 1); +1: + cmp %i4,0 ! if (ly != 0) + bne,a,pn %icc,2f ! if (ly != 0) + or %g0,0,%l4 ! yisint = 0; + + sub %o0,1023,%l2 ! (expy - 0x3ff); + sub %g0,%l2,%l2 ! 0 - (expy - 0x3ff); + add %l2,20,%l2 ! sh = 20 - (expy - 0x3ff); + srl %l1,%l2,%o0 ! i0 = hy >> sh; + sll %o0,%l2,%l2 ! i0 << sh; + cmp %l2,%l1 ! if ((i0 << sh) != hy); + bne,a,pn %icc,2f ! if ((i0 << sh) != hy); + or %g0,0,%l4 ! yisint = 0; + + and %o0,1,%o0 ! i0 &= 1; + sub %g0,%o0,%o0 + add %o0,2,%l4 ! yisint = 2 - (i0 & 1); +2: + cmp %o2,0 ! if (sy == 0); + sll %l4,31,%l4 ! yisint << 31; + be,pt %icc,1f ! if (sy == 0); + add %l3,%l4,%l3 ! hx += yisint << 31; + + or %g0,%l4,%l3 ! hx = yisint << 31; + or %g0,0,%i2 ! lx = 0; +1: + st %l3,[pz] ! ((int*)pz)[0] = hx; + ba .update_point + st %i2,[pz+4] ! ((int*)pz)[1] = lx; +4: + cmp %i2,0 ! if (lx != 0); + bne,pn %icc,7f ! if (lx != 0); + nop + + cmp %l3,%o5 ! if (hx != 0x7ff00000); +5: + bne,pn %icc,7f ! if (hx != 0x7ff00000); + nop + + st %l3,[pz] ! ((int*)pz)[0] = hx; +6: + cmp %o2,0 ! if (sy == 0); + be,pt %icc,.update_point + st %i2,[pz+4] ! ((int*)pz)[1] = lx; + + ld [pz],%f14 ! ld *pz; + ld [pz+4],%f15 ! ld *pz; + fdivd DONE,%f14,%f14 ! *pz = DONE / *pz; + st %f14,[pz] + ba .update_point + st %f15,[pz+4] +7: + sub %l3,MASK_0x3ff00000,%o7 ! hx - 0x3ff00000; + srlx %o7,63,%l2 ! (hx - 0x3ff00000) >> 63; + cmp %l2,%o2 ! if (hx < 0x3ff00000) == sy); + be,a,pn %icc,1f ! if (hx < 0x3ff00000) == sy); + ldd [EXPTBL-ind_HUGE],%f14 ! y0 = _HUGE; + + ldd [EXPTBL-ind_TINY],%f14 ! y0 = _TINY; +1: + fmuld %f14,%f14,%f14 ! *pz = y0 * y0 + + st %f14,[pz] + ba .update_point + st %f15,[pz+4] + + .align 16 +.spec1_nan_inf: + lda [py+4]%asi,%f17 ! ld y + fmuld %f14,%f16,%f16 ! *pz = *px * *py + st %f16,[pz] + ba .update_point + st %f17,[pz+4] + + + .align 16 +.update_point: + add px,stridex,px + ba .begin1 + add py,stridey,py + + .align 64 +.stridex_zero: + + sra stridez,0,stridez + ld [%i1],%f18 ! y0 = px[0]; + ld [%i1+4],%f19 ! y0 = px[0]; + + sra %i4,0,stridey + sethi %hi(0xffc00),MASK_0x000fffff + ldd [%l0+80],%f12 ! ld MMANT + + sllx stridez,3,stridez + add MASK_0x000fffff,0x3ff,MASK_0x000fffff + ldd [%l0+8],%f56 ! ld DONE + + sllx stridey,3,stridey + ldd [%l0+88],%f14 ! ld MROUND + + ldd [%l0+96],%f16 ! ld MHI20 + cmp %o0,MASK_0x000fffff ! if (exp <= 0xfffff) + + bg,pt %icc,1f + srl %o0,20,%o0 ! exp = (exp >> 20); + + fxtod %f18,%f18 ! y0 = (double) ((long long *) & y0)[0]; + std %f18,[%fp+tmp0_hi] ! exp = ((unsigned int*) & y0)[0]; + or %g0,1074,%i2 + ld [%fp+tmp0_hi],%o0 ! exp = ((unsigned int*) & y0)[0]; + srl %o0,20,%o0 ! exp = (exp >> 20); + sub %o0,%i2,%o0 ! exp -= (1023 + 51) << 20; +1: + ldd [%l0+24],MHI32 + sub %o0,2046,%l5 ! exp = exp - 2046; + fand %f18,%f12,%f18 ! x = vis_fand(y0, MMANT); + + ldd [%l0+48],%f10 ! ld KA1 + for %f18,%f56,%f18 ! x = vis_for(x, DONE); + + ldd [EXPTBL-ind_HI],%f28 ! ld KA1_HI + fpadd32 %f18,%f14,%f44 ! ax = vis_fpadd32(x, MROUND); + + ldd [%l0+32],%f46 ! ld KA5 + fand %f44,%f16,%f60 ! ax = vis_fand(ax, MHI20); + + std %f60,[%fp+tmp0_hi] ! itmp0 = (hx >> 20); + faddd %f18,%f60,%f50 ! ux = x + ax; + + ldd [EXPTBL-ind_LO],%f52 ! ld KA1_LO + fsubd %f18,%f60,%f30 ! u = x - ax; + + ld [%fp+tmp0_hi],%i2 ! itmp0 = (hx >> 20); + fdivd %f56,%f50,%f56 ! yd = DONE / ux; + fand %f50,MHI32,%f50 ! ux = vis_fand(ux, MHI32); + + srl %i2,20,%l3 ! itmp0 = (hx >> 20); + ldd [%l0+40],%f26 ! ld KA3 + + srl %i2,8,%i2 ! i = (hx >> 8); + add %l5,%l3,%l5 ! exp += itmp0; + + and %i2,4080,%o3 ! i = i & 0xff0; + sll %l5,8,%l3 ! itmp0 = exp << 8; + st %l3,[%fp+tmp1_hi] ! (double)itmp0; + fsubd %f50,%f60,%f60 ! dtmp0 = (ux - ax); + + add %o3,8,%i2 + ldd [%o3+LOGTBL],%f58 ! y = *(double *)((char*)__mt_constlog2 + i); + + ldd [%i2+LOGTBL],%f20 ! dtmp0 = *(double *)((char*)__mt_constlog2 + i + 8); + + ld [%fp+tmp1_hi],%f8 ! (double)itmp0; + + fitod %f8,%f62 ! (double)itmp0; + + faddd %f58,%f62,%f22 ! y += (double)itmp0; + + fsubd %f18,%f60,%f62 ! s_l = (x - dtmp0); + fmuld %f30,%f56,%f16 ! s = u * yd; + + fmuld %f10,%f56,%f8 ! dtmp0 = KA1 * yd; + fand %f16,MHI32,%f58 ! s_h = vis_fand(s, MHI32); + + ldd [%l0+56],HTHRESH + fmuld %f16,%f16,%f18 ! y = s * s; + + ldd [%l0+64],LTHRESH + fmuld %f58,%f50,%f60 ! dtmp0 = s_h * ux; + + ldd [%l0+72],XKB4 + fmuld %f28,%f58,%f50 ! yd = KA1_HI * s_h; + + ldd [EXPTBL-ind_KB1],XKB1 + fmuld %f46,%f18,%f56 ! dtmp8 = KA5 * y; + + ldd [EXPTBL-ind_KB2],XKB2 + fmuld %f58,%f62,%f46 ! dtmp1 = s_h * s_l; + fsubd %f30,%f60,%f62 ! s_l = u - dtmp0; + + ldd [EXPTBL-ind_KB3],XKB3 + fmuld %f52,%f58,%f10 ! dtmp1 = KA1_LO * s_h; + faddd %f22,%f50,%f28 ! m_h = y + yd; + + ldd [EXPTBL-ind_KB5],XKB5 + faddd %f56,%f26,%f58 ! dtmp8 = dtmp8 + KA3; + + add EXPTBL,8,EXPTBL_P8 + fsubd %f62,%f46,%f46 ! s_l -= dtmp1; + + fsubd %f28,%f22,%f60 ! dtmp2 = m_h - y; + + st %g0,[%fp+tmp0_lo] ! *((int*)&dtmp0 + 1) = 0; + faddd %f20,%f10,%f56 ! dtmp0 += dtmp1; + + st %g0,[%fp+tmp1_lo] ! *((int*)&dtmp0 + 1) = 0; + fmuld %f58,%f18,%f18 ! dtmp8 = dtmp8 * y; + + st %g0,[%fp+tmp2_lo] ! *((int*)&dtmp0 + 1) = 0; + fmuld %f8,%f46,%f62 ! s_l = dtmp0 * s_l; + + fsubd %f60,%f50,%f10 ! dtmp2 -= yd; + + fmuld %f18,%f16,%f58 ! s = dtmp8 * s; + + fsubd %f10,%f62,%f46 ! dtmp2 -= s_l; + + fsubd %f58,%f46,%f50 ! y = s - dtmp2; + + faddd %f50,%f56,%f60 ! y += dtmp0; + + faddd %f60,%f28,%f18 ! dtmp0 = y + m_h; + + fand %f18,MHI32,s_h ! s_h = vis_fand(dtmp0, MHI32); + + fsubd s_h,%f28,%f62 ! dtmp0 = (s_h - m_h); + + fsubd %f60,%f62,yr ! yr = y - dtmp0; + +.xbegin: + ld [%fp+tmp_counter],counter + ldx [%fp+tmp_py],py + st %g0,[%fp+tmp_counter] +.xbegin1: + subcc counter,1,counter + bneg,pn %icc,.end + nop + + lda [py]0x82,%l2 ! (Y0_3) hy = *py; + + lda [py]0x82,%f18 ! (Y0_3) yd = *py; + lda [py+4]%asi,%f19 ! (Y0_3) yd = *py; + + sra %l2,20,%l5 ! (Y0_3) expy = hy >> 20; + + and %l5,0x7ff,%l5 ! (Y0_3) expy &= 0x7ff; + + cmp %l5,959 ! (Y0_3) if (expy < 0x3fb); + + bl,pn %icc,.xspec0 ! (Y0_3) if (expy < 0x3fb); + nop + + cmp %l5,1086 ! (Y0_2) if (expy >= 0x43e); + + bge,pn %icc,.xspec1 ! (Y0_2) if (expy >= 0x43e); + nop + + add py,stridey,py ! y += stridey; + fand %f18,MHI32,%f12 ! (Y0_2) s = vis_fand(yd, MHI32); + + lda [py]0x82,%l5 ! (Y1_2) hy = *py; + + lda [py]0x82,%f10 ! (Y1_2) yd = *py; + lda [py+4]%asi,%f11 ! (Y1_2) yd = *py; + + sra %l5,20,%l5 ! (Y1_2) expy = hy >> 20; + + and %l5,0x7ff,%l5 ! (Y1_2) expy &= 0x7ff; + + cmp %l5,959 ! (Y1_2) if (expy < 0x3fb); + add py,stridey,py ! y += stridey; + fmuld s_h,%f12,%f50 ! (Y0_2) s = s_h * s; + fsubd %f18,%f12,%f56 ! (Y0_2) dtmp0 = (yd - s); + + fmuld %f18,yr,%f26 ! (Y0_2) dtmp1 = yd * yr; + bl,pn %icc,.xupdate0 ! (Y1_2) if (expy < 0x3fb); + nop +.xcont0: + cmp %l5,1086 ! (Y1_2) if (expy >= 0x43e); + bge,pn %icc,.xupdate1 ! (Y0_2) if (expy >= 0x43e); + nop +.xcont1: + fmuld %f56,s_h,%f58 ! (Y0_2) dtmp0 *= s_h; + fand %f10,MHI32,%f12 ! (Y1_2) s = vis_fand(yd, MHI32); + + fcmped %fcc0,%f50,HTHRESH ! (Y0_2) if (s > HTHRESH); + + faddd %f58,%f26,%f48 ! (Y0_2) yd = dtmp0 + dtmp1; + + lda [py]0x82,%l5 ! (Y2_2) hy = *py; + fmovdg %fcc0,HTHRESH,%f50 ! (Y0_2) s = HTHRESH; + + fmovdg %fcc0,DZERO,%f48 ! (Y0_2) yd = DZERO; + + fcmped %fcc1,%f50,LTHRESH ! (Y0_2) if (s < LTHRESH); + + lda [py]0x82,%f14 ! (Y2_2) yd = *py; + lda [py+4]%asi,%f15 ! (Y2_2) yd = *py; + + sra %l5,20,%l5 ! (Y2_2) expy = hy >> 20; + + fmovdl %fcc1,DZERO,%f48 ! (Y0_2) yd = DZERO; + + add py,stridey,py ! y += stridey; + and %l5,0x7ff,%l5 ! (Y2_2) expy &= 0x7ff; + fmovdl %fcc1,LTHRESH,%f50 ! (Y0_2) s = LTHRESH; + + cmp %l5,959 ! (Y2_2) if (expy < 0x3fb); + + fmuld s_h,%f12,%f16 ! (Y1_2) s = s_h * s; + bl,pn %icc,.xupdate2 ! (Y2_2) if (expy < 0x3fb); + fsubd %f10,%f12,%f56 ! (Y1_2) dtmp0 = (yd - s); +.xcont2: + cmp %l5,1086 ! (Y2_2) if (expy >= 0x43e); + fmuld %f10,yr,%f8 ! (Y1_2) dtmp1 = yd * yr; + faddd %f50,%f48,%f28 ! (Y0_2) dtmp0 = (s + yd); + + lda [py]0x82,%l5 ! (Y0_3) hy = *py; + bge,pn %icc,.xupdate3 ! (Y2_2) if (expy >= 0x43e); + nop +.xcont3: + fmuld %f56,s_h,%f58 ! (Y1_2) dtmp0 *= s_h; + fand %f14,MHI32,%f44 ! (Y2_2) s = vis_fand(yd, MHI32); + + fcmped %fcc0,%f16,HTHRESH ! (Y1_2) if (s > HTHRESH); + + fdtoi %f28,%f3 ! (Y0_2) u = (double)(int)dtmp0; + + st %f3,[%fp+tmp3] ! (Y0_2) ind = (int)dtmp0; + + faddd %f58,%f8,%f10 ! (Y1_2) yd = dtmp0 + dtmp1; + + lda [py]0x82,%f18 ! (Y0_3) yd = *py; + lda [py+4]%asi,%f19 ! (Y0_3) yd = *py; + fmovdg %fcc0,HTHRESH,%f16 ! (Y1_2) s = HTHRESH; + + fitod %f3,%f58 ! (Y0_2) u = (double)(int)dtmp0; + + fmovdg %fcc0,DZERO,%f10 ! (Y1_2) yd = DZERO; + + sra %l5,20,%l5 ! (Y0_3) expy = hy >> 20; + fcmped %fcc1,%f16,LTHRESH ! (Y1_2) if (s < LTHRESH); + + and %l5,0x7ff,%l5 ! (Y0_3) expy &= 0x7ff; + fsubd %f50,%f58,%f54 ! (Y0_2) y = s - u; + + cmp %l5,959 ! (Y0_3) if (expy < 0x3fb); + + bl,pn %icc,.xupdate4 ! (Y0_3) if (expy < 0x3fb); + nop +.xcont4: + fmovdl %fcc1,DZERO,%f10 ! (Y1_2) yd = DZERO; + + fmovdl %fcc1,LTHRESH,%f16 ! (Y1_2) s = LTHRESH; + + faddd %f54,%f48,%f54 ! (Y0_2) y = y + yd; + + ld [%fp+tmp3],%o2 ! (Y0_2) ind = (int)dtmp0; + + + fsubd %f14,%f44,%f50 ! (Y2_1) dtmp0 = (yd - s); + + cmp %l5,1086 ! (Y0_2) if (expy >= 0x43e); + + fmuld s_h,%f44,%f44 ! (Y2_1) s = s_h * s; + bge,pn %icc,.xupdate5 ! (Y0_2) if (expy >= 0x43e); + faddd %f16,%f10,%f22 ! (Y1_1) dtmp0 = (s + yd); +.xcont5: + sra %o2,8,%o0 ! (Y0_1) ind >>= 8; + add py,stridey,py ! y += stridey; + fmuld %f14,yr,%f20 ! (Y2_1) dtmp1 = yd * yr; + + add %o0,1021,%i1 ! (Y0_1) eflag = (ind + 1021); + fmuld XKB5,%f54,%f48 ! (Y0_1) dtmp0 = XKB5 * y; + + sub %g0,%o0,%o3 ! (Y0_1) gflag = (1022 - ind); + fmuld %f50,s_h,%f52 ! (Y2_1) dtmp0 *= s_h; + fand %f18,MHI32,%f12 ! (Y0_2) s = vis_fand(yd, MHI32); + + sra %i1,31,%o1 ! (Y0_1) eflag = eflag >> 31; + add %o3,1022,%l0 ! (Y0_1) gflag = (1022 - ind); + fcmped %fcc0,%f44,HTHRESH ! (Y2_1) if (s > HTHRESH); + + sra %l0,31,%o4 ! (Y0_1) gflag = gflag >> 31; + and %o1,54,%i4 ! (Y0_1) itmp0 = 54 & eflag; + fdtoi %f22,%f4 ! (Y1_1) u = (double)(int)dtmp0; + + add %o0,%i4,%i2 ! (Y0_1) ind = ind + itmp0; + and %o4,52,%l3 ! (Y0_1) itmp1 = 52 & gflag; + st %f4,[%fp+tmp4] ! (Y1_1) ind = (int)dtmp0; + faddd %f48,XKB4,%f60 ! (Y0_1) dtmp1 = dtmp0 + XKB4; + + sub %i2,%l3,%l2 ! (Y0_1) ind = ind - itmp1; + sub %o1,%o4,%o4 ! (Y0_1) ind = eflag - gflag; + faddd %f52,%f20,%f62 ! (Y2_1) yd = dtmp0 + dtmp1; + + sll %l2,20,%o3 ! (Y0_1) ind <<= 20; + lda [py]0x82,%l5 ! (Y1_2) hy = *py; + fmovdg %fcc0,HTHRESH,%f44 ! (Y2_1) s = HTHRESH; + + st %o3,[%fp+tmp0_hi] ! (Y0_1) *(int*)&dtmp0 = ind; + fitod %f4,%f48 ! (Y1_1) u = (double)(int)dtmp0; + + fmuld %f60,%f54,%f60 ! (Y0_1) dtmp2 = dtmp1 * y; + + lda [py]0x82,%f20 ! (Y1_2) yd = *py; + lda [py+4]%asi,%f21 ! (Y1_2) yd = *py; + fmovdg %fcc0,DZERO,%f62 ! (Y2_1) yd = DZERO; + + fcmped %fcc1,%f44,LTHRESH ! (Y2_1) if (s < LTHRESH); + + fsubd %f16,%f48,%f50 ! (Y1_1) y = s - u; + + faddd %f60,XKB3,%f60 ! (Y0_1) dtmp3 = dtmp2 + XKB3; + + sra %l5,20,%l5 ! (Y1_2) expy = hy >> 20; + + fmovdl %fcc1,DZERO,%f62 ! (Y2_1) yd = DZERO; + + and %l5,0x7ff,%l5 ! (Y1_2) expy &= 0x7ff; + fmovdl %fcc1,LTHRESH,%f44 ! (Y2_1) s = LTHRESH; + + cmp %l5,959 ! (Y1_2) if (expy < 0x3fb); + fmuld %f60,%f54,%f48 ! (Y0_1) dtmp4 = dtmp3 * y; + faddd %f50,%f10,%f52 ! (Y1_1) y = y + yd; + + ld [%fp+tmp4],%o1 ! (Y1_1) ind = (int)dtmp0; + + add py,stridey,py ! y += stridey; + fmuld s_h,%f12,%f50 ! (Y0_2) s = s_h * s; + fsubd %f18,%f12,%f56 ! (Y0_2) dtmp0 = (yd - s); + + fmuld %f18,yr,%f26 ! (Y0_2) dtmp1 = yd * yr; + bl,pn %icc,.xupdate6 ! (Y1_2) if (expy < 0x3fb); + faddd %f44,%f62,%f28 ! (Y2_1) dtmp0 = (s + yd); +.xcont6: + sra %o1,8,%o3 ! (Y1_1) ind >>= 8; + cmp %l5,1086 ! (Y1_2) if (expy >= 0x43e); + fmuld XKB5,%f52,%f22 ! (Y1_1) dtmp0 = XKB5 * y; + faddd %f48,XKB2,%f14 ! (Y0_1) dtmp5 = dtmp4 + XKB2; + + add %o3,1021,%o0 ! (Y1_1) eflag = (ind + 1021); + bge,pn %icc,.xupdate7 ! (Y0_2) if (expy >= 0x43e); + nop +.xcont7: + sub %g0,%o3,%i2 ! (Y1_1) gflag = (1022 - ind); + fmuld %f56,s_h,%f58 ! (Y0_2) dtmp0 *= s_h; + fand %f20,MHI32,%f12 ! (Y1_2) s = vis_fand(yd, MHI32); + + sra %o0,31,%l3 ! (Y1_1) eflag = eflag >> 31; + add %i2,1022,%l2 ! (Y1_1) gflag = (1022 - ind); + fcmped %fcc0,%f50,HTHRESH ! (Y0_2) if (s > HTHRESH); + + sra %l2,31,%o7 ! (Y1_1) gflag = gflag >> 31; + and %l3,54,%i1 ! (Y1_1) itmp0 = 54 & eflag; + fdtoi %f28,%f3 ! (Y2_1) u = (double)(int)dtmp0; + + add %o3,%i1,%l0 ! (Y1_1) ind = ind + itmp0; + and %o7,52,%l1 ! (Y1_1) itmp1 = 52 & gflag; + st %f3,[%fp+ind_buf] ! (Y2_1) ind = (int)dtmp0; + faddd %f22,XKB4,%f60 ! (Y1_1) dtmp1 = dtmp0 + XKB4; + + sub %l0,%l1,%i4 ! (Y1_1) ind = ind - itmp1; + sub %l3,%o7,%o7 ! (Y1_1) ind = eflag - gflag; + faddd %f58,%f26,%f48 ! (Y0_2) yd = dtmp0 + dtmp1; + + sll %i4,20,%i2 ! (Y1_1) ind <<= 20; + lda [py]0x82,%l5 ! (Y2_2) hy = *py; + fmovdg %fcc0,HTHRESH,%f50 ! (Y0_2) s = HTHRESH; + + st %i2,[%fp+tmp1_hi] ! (Y1_1) *(int*)&dtmp0 = ind; + fitod %f3,%f18 ! (Y2_1) u = (double)(int)dtmp0; + + fmuld %f60,%f52,%f60 ! (Y1_1) dtmp2 = dtmp1 * y; + + fmuld %f14,%f54,%f56 ! (Y0_1) dtmp6 = dtmp5 * y; + fmovdg %fcc0,DZERO,%f48 ! (Y0_2) yd = DZERO; + + fcmped %fcc1,%f50,LTHRESH ! (Y0_2) if (s < LTHRESH); + + lda [py]0x82,%f26 ! (Y2_2) yd = *py; + lda [py+4]%asi,%f27 ! (Y2_2) yd = *py; + fsubd %f44,%f18,%f18 ! (Y2_1) y = s - u; + + faddd %f60,XKB3,%f44 ! (Y1_1) dtmp3 = dtmp2 + XKB3; + + sra %l5,20,%l5 ! (Y2_2) expy = hy >> 20; + and %o2,255,%o2 ! (Y0_1) i = ind & 0xff; + faddd %f56,XKB1,%f58 ! (Y0_1) dtmp7 = dtmp6 + XKB1; + + sll %o2,4,%l2 ! (Y0_1) i = i << 4; + fmovdl %fcc1,DZERO,%f48 ! (Y0_2) yd = DZERO; + + add py,stridey,py ! y += stridey; + and %l5,0x7ff,%l5 ! (Y2_2) expy &= 0x7ff; + fmovdl %fcc1,LTHRESH,%f50 ! (Y0_2) s = LTHRESH; + + cmp %l5,959 ! (Y2_2) if (expy < 0x3fb); + ldd [EXPTBL+%l2],%f22 ! (Y0_1) u = *(double*)((char*)__mt_constexp2 + i); + faddd %f18,%f62,%f18 ! (Y2_1) y = y + yd; + fmuld %f44,%f52,%f62 ! (Y1_1) dtmp4 = dtmp3 * y; + + ld [%fp+ind_buf],%l1 ! (Y2_1) ind = (int)dtmp0; + fmuld %f58,%f54,%f54 ! (Y0_1) y = dtmp7 * y; + + fmuld s_h,%f12,%f16 ! (Y1_2) s = s_h * s; + bl,pn %icc,.xupdate8 ! (Y2_2) if (expy < 0x3fb); + fsubd %f20,%f12,%f56 ! (Y1_2) dtmp0 = (yd - s); +.xcont8: + cmp %l5,1086 ! (Y2_2) if (expy >= 0x43e); + fmuld %f20,yr,%f8 ! (Y1_2) dtmp1 = yd * yr; + faddd %f50,%f48,%f28 ! (Y0_2) dtmp0 = (s + yd); + + sra %l1,8,%o2 ! (Y2_1) ind >>= 8; + lda [py]0x82,%l5 ! (Y0_3) hy = *py; + fmuld XKB5,%f18,%f20 ! (Y2_1) dtmp0 = XKB5 * y; + faddd %f62,XKB2,%f12 ! (Y1_1) dtmp5 = dtmp4 + XKB2; + + add %o2,1021,%l0 ! (Y2_1) eflag = (ind + 1021); + bge,pn %icc,.xupdate9 ! (Y2_2) if (expy >= 0x43e); + nop +.xcont9: + sub %g0,%o2,%l3 ! (Y2_1) gflag = (1022 - ind); + ldd [EXPTBL_P8+%l2],%f14 ! (Y0_1) dtmp0 = *(double*)((char*)__mt_constexp2 + i + 8); + fmuld %f56,s_h,%f58 ! (Y1_2) dtmp0 *= s_h; + fand %f26,MHI32,%f44 ! (Y2_2) s = vis_fand(yd, MHI32); + + sra %l0,31,%o0 ! (Y2_1) eflag = eflag >> 31; + add %l3,1022,%i4 ! (Y2_1) gflag = (1022 - ind); + fmuld %f22,%f54,%f56 ! (Y0_1) dtmp1 = u * y; + fcmped %fcc0,%f16,HTHRESH ! (Y1_2) if (s > HTHRESH); + + sra %i4,31,%o5 ! (Y2_1) gflag = gflag >> 31; + and %o0,54,%i2 ! (Y2_1) itmp0 = 54 & eflag; + fdtoi %f28,%f3 ! (Y0_2) u = (double)(int)dtmp0; + + add %o2,%i2,%i1 ! (Y2_1) ind = ind + itmp0; + and %o5,52,%l2 ! (Y2_1) itmp1 = 52 & gflag; + st %f3,[%fp+tmp3] ! (Y0_2) ind = (int)dtmp0; + faddd %f20,XKB4,%f60 ! (Y2_1) dtmp1 = dtmp0 + XKB4; + + sub %i1,%l2,%o3 ! (Y2_1) ind = ind - itmp1; + sub %o0,%o5,%o5 ! (Y2_1) ind = eflag - gflag; + faddd %f58,%f8,%f10 ! (Y1_2) yd = dtmp0 + dtmp1; + + sll %o3,20,%l3 ! (Y2_1) ind <<= 20; + lda [py]0x82,%f28 ! (Y0_3) yd = *py; + lda [py+4]%asi,%f29 ! (Y0_3) yd = *py; + fmovdg %fcc0,HTHRESH,%f16 ! (Y1_2) s = HTHRESH; + + st %l3,[%fp+tmp2_hi] ! (Y2_1) *(int*)&dtmp0 = ind; + fitod %f3,%f58 ! (Y0_2) u = (double)(int)dtmp0; + + fmuld %f60,%f18,%f60 ! (Y2_1) dtmp2 = dtmp1 * y; + faddd %f14,%f56,%f20 ! (Y0_1) dtmp2 = dtmp0 + dtmp1; + + fmuld %f12,%f52,%f56 ! (Y1_1) dtmp6 = dtmp5 * y; + fmovdg %fcc0,DZERO,%f10 ! (Y1_2) yd = DZERO; + + sra %l5,20,%l5 ! (Y0_3) expy = hy >> 20; + fcmped %fcc1,%f16,LTHRESH ! (Y1_2) if (s < LTHRESH); + + and %l5,0x7ff,%l5 ! (Y0_3) expy &= 0x7ff; + fsubd %f50,%f58,%f54 ! (Y0_2) y = s - u; + + cmp %l5,959 ! (Y0_3) if (expy < 0x3fb); + faddd %f60,XKB3,%f60 ! (Y2_1) dtmp3 = dtmp2 + XKB3; + + and %o1,255,%o1 ! (Y1_1) i = ind & 0xff; + bl,pn %icc,.xupdate10 ! (Y0_3) if (expy < 0x3fb); + faddd %f56,XKB1,%f8 ! (Y1_1) dtmp7 = dtmp6 + XKB1; +.xcont10: + sll %o1,4,%l0 ! (Y1_1) i = i << 4; + fmovdl %fcc1,DZERO,%f10 ! (Y1_2) yd = DZERO; + + nop + ba 1f + fmovdl %fcc1,LTHRESH,%f16 ! (Y1_2) s = LTHRESH; + + .align 16 +1: + subcc counter,2,counter + ldd [EXPTBL+%l0],%f56 ! (Y1_1) u = *(double*)((char*)__mt_constexp2 + i); + fmuld %f60,%f18,%f58 ! (Y2_1) dtmp4 = dtmp3 * y; + faddd %f54,%f48,%f54 ! (Y0_2) y = y + yd; + + fmuld %f8,%f52,%f60 ! (Y1_1) y = dtmp7 * y; + ld [%fp+tmp3],%o2 ! (Y0_2) ind = (int)dtmp0; + bneg,pn %icc,.xtail + faddd %f20,%f22,%f12 ! (Y0_1) u = dtmp2 + u; + +.xmain_loop: + cmp %l5,1086 ! (Y0_2) if (expy >= 0x43e); + add %o4,513,%o4 ! (Y0_0) ind += 513; + ldd [%fp+tmp0_hi],%f52 ! (Y0_0) *(int*)&dtmp0 = ind; + fsubd %f26,%f44,%f50 ! (Y2_1) dtmp0 = (yd - s); + + fmuld s_h,%f44,%f44 ! (Y2_1) s = s_h * s; + sra %o2,8,%o0 ! (Y0_1) ind >>= 8; + bge,pn %icc,.xupdate11 ! (Y0_2) if (expy >= 0x43e); + faddd %f16,%f10,%f22 ! (Y1_1) dtmp0 = (s + yd); +.xcont11: + sll %o4,3,%l2 ! (Y0_0) ind *= 8; + add py,stridey,py ! y += stridey; + fmuld %f26,yr,%f20 ! (Y2_1) dtmp1 = yd * yr; + faddd %f58,XKB2,%f14 ! (Y2_0) dtmp5 = dtmp4 + XKB2; + + add %o0,1021,%i1 ! (Y0_1) eflag = (ind + 1021); + ldd [%l2+EXPTBL],%f62 ! (Y0_0) dtmp1 = (*(double*)((char*)__mt_constexp2 + ind); + fmuld XKB5,%f54,%f48 ! (Y0_1) dtmp0 = XKB5 * y; + fpadd32 %f12,%f52,%f58 ! (Y0_0) u = vis_fpadd32(u, dtmp0); + + sub %g0,%o0,%o3 ! (Y0_1) gflag = (1022 - ind); + ldd [EXPTBL_P8+%l0],%f8 ! (Y1_0) dtmp0 = *(double*)((char*)__mt_constexp2 + i + 8); + fand %f28,MHI32,%f12 ! (Y0_2) s = vis_fand(yd, MHI32); + fmuld %f50,s_h,%f52 ! (Y2_1) dtmp0 *= s_h; + + sra %i1,31,%o1 ! (Y0_1) eflag = eflag >> 31; + add %o3,1022,%l0 ! (Y0_1) gflag = (1022 - ind); + fmuld %f56,%f60,%f26 ! (Y1_0) dtmp1 = u * y; + fcmped %fcc0,%f44,HTHRESH ! (Y2_1) if (s > HTHRESH); + + sra %l0,31,%o4 ! (Y0_1) gflag = gflag >> 31; + and %o1,54,%i4 ! (Y0_1) itmp0 = 54 & eflag; + fmuld %f58,%f62,%f6 ! (Y0_0) dtmp1 = u * dtmp1; + fdtoi %f22,%f4 ! (Y1_1) u = (double)(int)dtmp0; + + add %o0,%i4,%i2 ! (Y0_1) ind = ind + itmp0; + and %o4,52,%l3 ! (Y0_1) itmp1 = 52 & gflag; + st %f4,[%fp+tmp4] ! (Y1_1) ind = (int)dtmp0; + faddd %f48,XKB4,%f60 ! (Y0_1) dtmp1 = dtmp0 + XKB4; + + sub %i2,%l3,%l2 ! (Y0_1) ind = ind - itmp1; + sub %o1,%o4,%o4 ! (Y0_1) ind = eflag - gflag; + st %f6,[pz] ! (Y0_0) write into memory + faddd %f52,%f20,%f62 ! (Y2_1) yd = dtmp0 + dtmp1; + + sll %l2,20,%o3 ! (Y0_1) ind <<= 20; + nop + st %o3,[%fp+tmp0_hi] ! (Y0_1) *(int*)&dtmp0 = ind; + fmovdg %fcc0,HTHRESH,%f44 ! (Y2_1) s = HTHRESH; + + lda [py]0x82,%l5 ! (Y1_2) hy = *py; + nop + fitod %f4,%f48 ! (Y1_1) u = (double)(int)dtmp0; + + fmuld %f60,%f54,%f60 ! (Y0_1) dtmp2 = dtmp1 * y; + nop + st %f7,[pz+4] ! (Y0_0) write into memory + faddd %f8,%f26,%f26 ! (Y1_0) dtmp2 = dtmp0 + dtmp1; + + lda [py]0x82,%f8 ! (Y1_2) yd = *py; + nop + fmuld %f14,%f18,%f52 ! (Y2_0) dtmp6 = dtmp5 * y; + fmovdg %fcc0,DZERO,%f62 ! (Y2_1) yd = DZERO; + + lda [py+4]%asi,%f9 ! (Y1_2) yd = *py; + add pz,stridez,pz ! z += stridez; + fcmped %fcc1,%f44,LTHRESH ! (Y2_1) if (s < LTHRESH); + + fsubd %f16,%f48,%f50 ! (Y1_1) y = s - u; + + faddd %f60,XKB3,%f60 ! (Y0_1) dtmp3 = dtmp2 + XKB3; + + sra %l5,20,%l5 ! (Y1_2) expy = hy >> 20; + and %l1,255,%l1 ! (Y2_0) i = ind & 0xff; + faddd %f52,XKB1,%f58 ! (Y2_0) dtmp7 = dtmp6 + XKB1; + + sll %l1,4,%l0 ! (Y2_0) i = i << 4; + fmovdl %fcc1,DZERO,%f62 ! (Y2_1) yd = DZERO; + + and %l5,0x7ff,%l5 ! (Y1_2) expy &= 0x7ff; + nop + fmovdl %fcc1,LTHRESH,%f44 ! (Y2_1) s = LTHRESH; + + cmp %l5,959 ! (Y1_2) if (expy < 0x3fb); + ldd [EXPTBL+%l0],%f20 ! (Y2_0) u = *(double*)((char*)__mt_constexp2 + i); + fmuld %f60,%f54,%f48 ! (Y0_1) dtmp4 = dtmp3 * y; + faddd %f50,%f10,%f52 ! (Y1_1) y = y + yd; + + add %o7,513,%o7 ! (Y1_0) ind += 513; + ld [%fp+tmp4],%o1 ! (Y1_1) ind = (int)dtmp0; + fmuld %f58,%f18,%f18 ! (Y2_0) y = dtmp7 * y; + faddd %f26,%f56,%f58 ! (Y1_0) u = dtmp2 + u; + + add py,stridey,py ! y += stridey; + ldd [%fp+tmp1_hi],%f60 ! (Y1_0) *(int*)&dtmp0 = ind; + fmuld s_h,%f12,%f50 ! (Y0_2) s = s_h * s; + fsubd %f28,%f12,%f56 ! (Y0_2) dtmp0 = (yd - s); + + sll %o7,3,%l3 ! (Y1_0) ind *= 8; + fmuld %f28,yr,%f26 ! (Y0_2) dtmp1 = yd * yr; + bl,pn %icc,.xupdate12 ! (Y1_2) if (expy < 0x3fb); + faddd %f44,%f62,%f28 ! (Y2_1) dtmp0 = (s + yd); +.xcont12: + sra %o1,8,%o3 ! (Y1_1) ind >>= 8; + cmp %l5,1086 ! (Y1_2) if (expy >= 0x43e); + fmuld XKB5,%f52,%f22 ! (Y1_1) dtmp0 = XKB5 * y; + faddd %f48,XKB2,%f14 ! (Y0_1) dtmp5 = dtmp4 + XKB2; + + add %o3,1021,%o0 ! (Y1_1) eflag = (ind + 1021); + ldd [%l3+EXPTBL],%f48 ! (Y1_0) dtmp1 = (*(double*)((char*)__mt_constexp2 + ind); + bge,pn %icc,.xupdate13 ! (Y1_2) if (expy >= 0x43e); + fpadd32 %f58,%f60,%f60 ! (Y1_0) u = vis_fpadd32(u, dtmp0); +.xcont13: + sub %g0,%o3,%i2 ! (Y1_1) gflag = (1022 - ind); + ldd [EXPTBL_P8+%l0],%f16 ! (Y2_0) dtmp0 = *(double*)((char*)__mt_constexp2 + i + 8); + fmuld %f56,s_h,%f58 ! (Y0_2) dtmp0 *= s_h; + fand %f8,MHI32,%f12 ! (Y1_2) s = vis_fand(yd, MHI32); + + sra %o0,31,%l3 ! (Y1_1) eflag = eflag >> 31; + add %i2,1022,%l2 ! (Y1_1) gflag = (1022 - ind); + fmuld %f20,%f18,%f56 ! (Y2_0) dtmp1 = u * y; + fcmped %fcc0,%f50,HTHRESH ! (Y0_2) if (s > HTHRESH); + + sra %l2,31,%o7 ! (Y1_1) gflag = gflag >> 31; + and %l3,54,%i1 ! (Y1_1) itmp0 = 54 & eflag; + fmuld %f60,%f48,%f18 ! (Y1_0) dtmp1 = u * dtmp1; + fdtoi %f28,%f3 ! (Y2_1) u = (double)(int)dtmp0; + + add %o3,%i1,%l0 ! (Y1_1) ind = ind + itmp0; + and %o7,52,%l1 ! (Y1_1) itmp1 = 52 & gflag; + st %f3,[%fp+ind_buf] ! (Y2_1) ind = (int)dtmp0; + faddd %f22,XKB4,%f60 ! (Y1_1) dtmp1 = dtmp0 + XKB4; + + sub %l0,%l1,%i4 ! (Y1_1) ind = ind - itmp1; + sub %l3,%o7,%o7 ! (Y1_1) ind = eflag - gflag; + st %f18,[pz] ! (Y1_0) write into memory + faddd %f58,%f26,%f48 ! (Y0_2) yd = dtmp0 + dtmp1; + + sll %i4,20,%i2 ! (Y1_1) ind <<= 20; + lda [py]0x82,%l5 ! (Y2_2) hy = *py; + fmovdg %fcc0,HTHRESH,%f50 ! (Y0_2) s = HTHRESH; + + st %i2,[%fp+tmp1_hi] ! (Y1_1) *(int*)&dtmp0 = ind; + fitod %f3,%f10 ! (Y2_1) u = (double)(int)dtmp0; + + fmuld %f60,%f52,%f60 ! (Y1_1) dtmp2 = dtmp1 * y; + st %f19,[pz+4] ! (Y1_0) write into memory + faddd %f16,%f56,%f28 ! (Y2_0) dtmp2 = dtmp0 + dtmp1; + + fmuld %f14,%f54,%f56 ! (Y0_1) dtmp6 = dtmp5 * y; + fmovdg %fcc0,DZERO,%f48 ! (Y0_2) yd = DZERO; + + add pz,stridez,pz ! z += stridez; + fcmped %fcc1,%f50,LTHRESH ! (Y0_2) if (s < LTHRESH); + + lda [py]0x82,%f26 ! (Y2_2) yd = *py; + fsubd %f44,%f10,%f18 ! (Y2_1) y = s - u; + + lda [py+4]%asi,%f27 ! (Y2_2) yd = *py; + faddd %f60,XKB3,%f44 ! (Y1_1) dtmp3 = dtmp2 + XKB3; + + sra %l5,20,%l5 ! (Y2_2) expy = hy >> 20; + and %o2,255,%o2 ! (Y0_1) i = ind & 0xff; + faddd %f56,XKB1,%f58 ! (Y0_1) dtmp7 = dtmp6 + XKB1; + + sll %o2,4,%l2 ! (Y0_1) i = i << 4; + fmovdl %fcc1,DZERO,%f48 ! (Y0_2) yd = DZERO; + + add py,stridey,py ! y += stridey; + and %l5,0x7ff,%l5 ! (Y2_2) expy &= 0x7ff; + fmovdl %fcc1,LTHRESH,%f50 ! (Y0_2) s = LTHRESH; + + cmp %l5,959 ! (Y2_2) if (expy < 0x3fb); + ldd [EXPTBL+%l2],%f22 ! (Y0_1) u = *(double*)((char*)__mt_constexp2 + i); + faddd %f18,%f62,%f18 ! (Y2_1) y = y + yd; + fmuld %f44,%f52,%f62 ! (Y1_1) dtmp4 = dtmp3 * y; + + add %o5,513,%o5 ! (Y2_0) ind += 513; + ld [%fp+ind_buf],%l1 ! (Y2_1) ind = (int)dtmp0; + fmuld %f58,%f54,%f54 ! (Y0_1) y = dtmp7 * y; + faddd %f28,%f20,%f58 ! (Y2_0) u = dtmp2 + u; + + ldd [%fp+tmp2_hi],%f60 ! (Y2_0) *(int*)&dtmp0 = ind; + fmuld s_h,%f12,%f16 ! (Y1_2) s = s_h * s; + bl,pn %icc,.xupdate14 ! (Y2_2) if (expy < 0x3fb); + fsubd %f8,%f12,%f56 ! (Y1_2) dtmp0 = (yd - s); +.xcont14: + sll %o5,3,%i1 ! (Y2_0) ind *= 8; + cmp %l5,1086 ! (Y2_2) if (expy >= 0x43e); + fmuld %f8,yr,%f8 ! (Y1_2) dtmp1 = yd * yr; + faddd %f50,%f48,%f28 ! (Y0_2) dtmp0 = (s + yd); + + sra %l1,8,%o2 ! (Y2_1) ind >>= 8; + lda [py]0x82,%l5 ! (Y0_3) hy = *py; + fmuld XKB5,%f18,%f20 ! (Y2_1) dtmp0 = XKB5 * y; + faddd %f62,XKB2,%f12 ! (Y1_1) dtmp5 = dtmp4 + XKB2; + + add %o2,1021,%l0 ! (Y2_1) eflag = (ind + 1021); + ldd [%i1+EXPTBL],%f62 ! (Y2_0) dtmp1 = (*(double*)((char*)__mt_constexp2 + ind); + bge,pn %icc,.xupdate15 ! (Y2_2) if (expy >= 0x43e); + fpadd32 %f58,%f60,%f60 ! (Y2_0) u = vis_fpadd32(u, dtmp0); +.xcont15: + sub %g0,%o2,%l3 ! (Y2_1) gflag = (1022 - ind); + ldd [EXPTBL_P8+%l2],%f14 ! (Y0_1) dtmp0 = *(double*)((char*)__mt_constexp2 + i + 8); + fmuld %f56,s_h,%f58 ! (Y1_2) dtmp0 *= s_h; + fand %f26,MHI32,%f44 ! (Y2_2) s = vis_fand(yd, MHI32); + + sra %l0,31,%o0 ! (Y2_1) eflag = eflag >> 31; + add %l3,1022,%i4 ! (Y2_1) gflag = (1022 - ind); + fmuld %f22,%f54,%f56 ! (Y0_1) dtmp1 = u * y; + fcmped %fcc0,%f16,HTHRESH ! (Y1_2) if (s > HTHRESH); + + sra %i4,31,%o5 ! (Y2_1) gflag = gflag >> 31; + and %o0,54,%i2 ! (Y2_1) itmp0 = 54 & eflag; + fmuld %f60,%f62,%f6 ! (Y2_0) dtmp1 = u * dtmp1; + fdtoi %f28,%f3 ! (Y0_2) u = (double)(int)dtmp0; + + add %o2,%i2,%i1 ! (Y2_1) ind = ind + itmp0; + and %o5,52,%l2 ! (Y2_1) itmp1 = 52 & gflag; + st %f3,[%fp+tmp3] ! (Y0_2) ind = (int)dtmp0; + faddd %f20,XKB4,%f60 ! (Y2_1) dtmp1 = dtmp0 + XKB4; + + sub %i1,%l2,%o3 ! (Y2_1) ind = ind - itmp1; + sub %o0,%o5,%o5 ! (Y2_1) ind = eflag - gflag; + st %f6,[pz] ! (Y2_0) write into memory + faddd %f58,%f8,%f10 ! (Y1_2) yd = dtmp0 + dtmp1; + + sll %o3,20,%l3 ! (Y2_1) ind <<= 20; + lda [py]0x82,%f28 ! (Y0_3) yd = *py; + fmovdg %fcc0,HTHRESH,%f16 ! (Y1_2) s = HTHRESH; + + lda [py+4]%asi,%f29 ! (Y0_3) yd = *py; + fitod %f3,%f58 ! (Y0_2) u = (double)(int)dtmp0; + + fmuld %f60,%f18,%f60 ! (Y2_1) dtmp2 = dtmp1 * y; + st %l3,[%fp+tmp2_hi] ! (Y2_1) *(int*)&dtmp0 = ind; + faddd %f14,%f56,%f20 ! (Y0_1) dtmp2 = dtmp0 + dtmp1; + + fmuld %f12,%f52,%f56 ! (Y1_1) dtmp6 = dtmp5 * y; + st %f7,[pz+4] ! (Y2_0) write into memory + fmovdg %fcc0,DZERO,%f10 ! (Y1_2) yd = DZERO; + + sra %l5,20,%l5 ! (Y0_3) expy = hy >> 20; + add pz,stridez,pz ! z += stridez; + fcmped %fcc1,%f16,LTHRESH ! (Y1_2) if (s < LTHRESH); + + and %l5,0x7ff,%l5 ! (Y0_3) expy &= 0x7ff; + fsubd %f50,%f58,%f54 ! (Y0_2) y = s - u; + + cmp %l5,959 ! (Y0_3) if (expy < 0x3fb); + faddd %f60,XKB3,%f60 ! (Y2_1) dtmp3 = dtmp2 + XKB3; + + and %o1,255,%o1 ! (Y1_1) i = ind & 0xff; + bl,pn %icc,.xupdate16 ! (Y0_3) if (expy < 0x3fb); + faddd %f56,XKB1,%f8 ! (Y1_1) dtmp7 = dtmp6 + XKB1; +.xcont16: + sll %o1,4,%l0 ! (Y1_1) i = i << 4; + fmovdl %fcc1,DZERO,%f10 ! (Y1_2) yd = DZERO; + + subcc counter,3,counter ! update cycle counter + fmovdl %fcc1,LTHRESH,%f16 ! (Y1_2) s = LTHRESH; + + ldd [EXPTBL+%l0],%f56 ! (Y1_1) u = *(double*)((char*)__mt_constexp2 + i); + fmuld %f60,%f18,%f58 ! (Y2_1) dtmp4 = dtmp3 * y; + faddd %f54,%f48,%f54 ! (Y0_2) y = y + yd; + + fmuld %f8,%f52,%f60 ! (Y1_1) y = dtmp7 * y; + ld [%fp+tmp3],%o2 ! (Y0_2) ind = (int)dtmp0; + bpos,pt %icc,.xmain_loop + faddd %f20,%f22,%f12 ! (Y0_1) u = dtmp2 + u; + +.xtail: + addcc counter,2,counter + ldd [%fp+tmp0_hi],%f52 ! (Y0_0) *(int*)&dtmp0 = ind; + + add %o4,513,%o4 ! (Y0_0) ind += 513; + bneg,pn %icc,.xend_loop + nop + + sll %o4,3,%l2 ! (Y0_0) ind *= 8; + + subcc counter,1,counter + ldd [%l2+EXPTBL],%f62 ! (Y0_0) dtmp1 = (*(double*)((char*)__mt_constexp2 + ind); + fpadd32 %f12,%f52,%f58 ! (Y0_0) u = vis_fpadd32(u, dtmp0); + + ldd [EXPTBL_P8+%l0],%f8 ! (Y1_0) dtmp0 = *(double*)((char*)__mt_constexp2 + i + 8); + + fmuld %f56,%f60,%f26 ! (Y1_0) dtmp1 = u * y; + + fmuld %f58,%f62,%f6 ! (Y0_0) dtmp1 = u * dtmp1; + + st %f6,[pz] ! (Y0_0) write into memory + st %f7,[pz+4] ! (Y0_0) write into memory + bneg,pn %icc,.xend_loop + add pz,stridez,pz ! z += stridez; + + faddd %f8,%f26,%f26 ! (Y1_0) dtmp2 = dtmp0 + dtmp1; + + add %o7,513,%o7 ! (Y1_0) ind += 513; + faddd %f26,%f56,%f58 ! (Y1_0) u = dtmp2 + u; + + ldd [%fp+tmp1_hi],%f60 ! (Y1_0) *(int*)&dtmp0 = ind; + + sll %o7,3,%l3 ! (Y1_0) ind *= 8; + + ldd [%l3+EXPTBL],%f48 ! (Y1_0) dtmp1 = (*(double*)((char*)__mt_constexp2 + ind); + fpadd32 %f58,%f60,%f60 ! (Y1_0) u = vis_fpadd32(u, dtmp0); + + fmuld %f60,%f48,%f18 ! (Y1_0) dtmp1 = u * dtmp1; + + st %f18,[pz] ! (Y1_0) write into memory + st %f19,[pz+4] ! (Y1_0) write into memory + add pz,stridez,pz ! z += stridez; + +.xend_loop: + ba .xbegin + nop + + .align 16 +.xupdate0: + cmp counter,0 + sub py,stridey,%i2 + ble,pt %icc,.xcont0 + fmovd DZERO,%f10 + + stx %i2,[%fp+tmp_py] + + st counter,[%fp+tmp_counter] + ba .xcont0 + or %g0,0,counter + + .align 16 +.xupdate1: + cmp counter,0 + sub py,stridey,%i2 + ble,pt %icc,.xcont1 + fmovd DZERO,%f10 + + stx %i2,[%fp+tmp_py] + + st counter,[%fp+tmp_counter] + ba .xcont1 + or %g0,0,counter + + .align 16 +.xupdate2: + cmp counter,1 + sub py,stridey,%l3 + ble,pt %icc,.xcont2 + fmovd DZERO,%f14 + + stx %l3,[%fp+tmp_py] + sub counter,1,counter + + st counter,[%fp+tmp_counter] + ba .xcont2 + or %g0,1,counter + + .align 16 +.xupdate3: + cmp counter,1 + sub py,stridey,%l3 + ble,pt %icc,.xcont3 + fmovd DZERO,%f14 + + stx %l3,[%fp+tmp_py] + sub counter,1,counter + + st counter,[%fp+tmp_counter] + ba .xcont3 + or %g0,1,counter + + .align 16 +.xupdate4: + cmp counter,2 + ble,pt %icc,.xcont4 + fmovd DZERO,%f18 + + stx py,[%fp+tmp_py] + sub counter,2,counter + + st counter,[%fp+tmp_counter] + ba .xcont4 + or %g0,2,counter + + .align 16 +.xupdate5: + cmp counter,2 + ble,pt %icc,.xcont5 + fmovd DZERO,%f18 + + stx py,[%fp+tmp_py] + sub counter,2,counter + + st counter,[%fp+tmp_counter] + ba .xcont5 + or %g0,2,counter + + .align 16 +.xupdate6: + cmp counter,3 + sub py,stridey,%i2 + ble,pt %icc,.xcont6 + fmovd DZERO,%f20 + + stx %i2,[%fp+tmp_py] + sub counter,3,counter + + st counter,[%fp+tmp_counter] + ba .xcont6 + or %g0,3,counter + + .align 16 +.xupdate7: + cmp counter,3 + sub py,stridey,%i2 + ble,pt %icc,.xcont7 + fmovd DZERO,%f20 + + stx %i2,[%fp+tmp_py] + sub counter,3,counter + + st counter,[%fp+tmp_counter] + ba .xcont7 + or %g0,3,counter + + .align 16 +.xupdate8: + cmp counter,4 + sub py,stridey,%l3 + ble,pt %icc,.xcont8 + fmovd DZERO,%f26 + + stx %l3,[%fp+tmp_py] + sub counter,4,counter + + st counter,[%fp+tmp_counter] + ba .xcont8 + or %g0,4,counter + + .align 16 +.xupdate9: + cmp counter,4 + sub py,stridey,%l3 + ble,pt %icc,.xcont9 + fmovd DZERO,%f26 + + stx %l3,[%fp+tmp_py] + sub counter,4,counter + + st counter,[%fp+tmp_counter] + ba .xcont9 + or %g0,4,counter + + .align 16 +.xupdate10: + cmp counter,5 + ble,pt %icc,.xcont10 + fmovd DZERO,%f28 + + stx py,[%fp+tmp_py] + sub counter,5,counter + + st counter,[%fp+tmp_counter] + ba .xcont10 + or %g0,5,counter + + .align 16 +.xupdate11: + cmp counter,3 + ble,pt %icc,.xcont11 + fmovd DZERO,%f28 + + stx py,[%fp+tmp_py] + sub counter,3,counter + + st counter,[%fp+tmp_counter] + ba .xcont11 + or %g0,3,counter + + .align 16 +.xupdate12: + cmp counter,4 + sub py,stridey,%i2 + ble,pt %icc,.xcont12 + fmovd DZERO,%f8 + + stx %i2,[%fp+tmp_py] + sub counter,4,counter + + st counter,[%fp+tmp_counter] + ba .xcont12 + or %g0,4,counter + + .align 16 +.xupdate13: + cmp counter,4 + sub py,stridey,%i2 + ble,pt %icc,.xcont13 + fmovd DZERO,%f8 + + stx %i2,[%fp+tmp_py] + sub counter,4,counter + + st counter,[%fp+tmp_counter] + ba .xcont13 + or %g0,4,counter + + .align 16 +.xupdate14: + cmp counter,5 + sub py,stridey,%l3 + ble,pt %icc,.xcont14 + fmovd DZERO,%f26 + + stx %l3,[%fp+tmp_py] + sub counter,5,counter + + st counter,[%fp+tmp_counter] + ba .xcont14 + or %g0,5,counter + + .align 16 +.xupdate15: + cmp counter,5 + sub py,stridey,%l3 + ble,pt %icc,.xcont15 + fmovd DZERO,%f26 + + stx %l3,[%fp+tmp_py] + sub counter,5,counter + + st counter,[%fp+tmp_counter] + ba .xcont15 + or %g0,5,counter + + .align 16 +.xupdate16: + cmp counter,6 + ble,pt %icc,.xcont16 + fmovd DZERO,%f28 + + stx py,[%fp+tmp_py] + sub counter,6,counter + + st counter,[%fp+tmp_counter] + ba .xcont16 + or %g0,6,counter + + .align 16 +.xspec0: + add EXPTBL,4095,%l0 + add %l0,1,%l0 + ldd [%l0+8],%f20 ! ld DONE + st %f20,[pz] ! *pz = DONE; + ba .xupdate_point + st %f21,[pz+4] ! *pz = DONE; + + .align 16 +.xspec1: + ldx [%fp+tmp_px],%l1 + sethi %hi(0x7ffffc00),MASK_0x7fffffff + + sethi %hi(0x7ff00000),%o3 + add MASK_0x7fffffff,0x3ff,MASK_0x7fffffff + + and %l2,MASK_0x7fffffff,%o2 ! if (hy &= 0x7fffffff); + sethi %hi(0x3ff00000),MASK_0x3ff00000 + + cmp %o2,%o3 ! if (hy != 0x7ff00000); + bne,pn %icc,2f ! if (hy != 0x7ff00000); + nop + + ld [py+4],%l3 ! ld ly; + cmp %l3,0 ! if (ly != 0); + bne,a,pt %icc,3f ! if (ly != 0); + nop + + ld [%l1],%i1 ! ld hx; + cmp %i1,MASK_0x3ff00000 ! if (hx != 0x3ff00000); + bne,a,pn %icc,1f ! if (hx != 0x3ff00000); + srl %l2,31,%o7 ! sy = hy >> 31; + + ld [%l1+4],%i2 ! ld lx; + cmp %i2,0 ! if (lx != 0); + bne,pn %icc,1f ! if (lx != 0); + srl %l2,31,%o7 ! sy = hy >> 31; + + fzero %f28 + fmuld %f18,%f28,%f28 ! *pz = *py * 0.0; + st %f28,[pz] + ba .xupdate_point + st %f29,[pz+4] +1: + sub %i1,MASK_0x3ff00000,%o0 ! hx - 0x3ff00000; + srlx %o0,63,%o0 ! (hx - 0x3ff00000) >> 63; + + cmp %o0,%o7 ! if ((hx < 0x3ff00000) == sy); + be,pn %icc,1f ! if ((hx < 0x3ff00000) == sy); + + st DZERO_HI,[pz] + ba .xupdate_point + st DZERO_LO,[pz+4] +1: + st %o2,[pz] ! ((int*)pz)[0] = hy; + ba .xupdate_point + st %l3,[pz+4] ! ((int*)pz)[1] = ly; +2: + bl,a,pn %icc,1f ! if (hy < 0x7ff00000); + ld [%l1+4],%i2 ! ld lx; +3: + ld [%l1],%f20 ! x = *px; + ld [%l1+4],%f21 ! x = *px; + fmuld %f20,%f18,%f28 ! *pz = *px * *py; + st %f28,[pz] + ba .xupdate_point + st %f29,[pz+4] +1: + ld [%l1],%i1 ! ld hx; + cmp %i2,0 ! if (lx != 0); + bne,pn %icc,1f ! if (lx != 0); + nop + + cmp %i1,MASK_0x3ff00000 ! if (hx != 0x3ff00000); + add EXPTBL,4095,%l0 + bne,pn %icc,1f ! if (hx != 0x3ff00000); + add %l0,1,%l0 + + ldd [%l0+8],%f20 ! ld DONE + st %f20,[pz] ! *pz = DONE; + ba .xupdate_point + st %f21,[pz+4] ! *pz = DONE; +1: + srl %l2,31,%o7 ! sy = hy >> 31; + sub %i1,MASK_0x3ff00000,%o0 ! hx - 0x3ff00000; + + srlx %o0,63,%o0 ! (hx - 0x3ff00000) >> 63; + + cmp %o0,%o7 ! if (hx < 0x3ff00000) == sy); + be,a,pn %icc,1f ! if (hx < 0x3ff00000) == sy); + ldd [EXPTBL-ind_HUGE],%f20 ! y0 = _HUGE; + + ldd [EXPTBL-ind_TINY],%f20 ! y0 = _TINY; +1: + fmuld %f20,%f20,%f20 ! *pz = y0 * y0 + st %f20,[pz] + ba .xupdate_point + st %f21,[pz+4] + +.xupdate_point: + add py,stridey,py + ba .xbegin1 + add pz,stridez,pz + + SET_SIZE(__vpow) + diff --git a/usr/src/lib/libmvec/common/vis/__vpowf.S b/usr/src/lib/libmvec/common/vis/__vpowf.S new file mode 100644 index 0000000000..cddb99ef99 --- /dev/null +++ b/usr/src/lib/libmvec/common/vis/__vpowf.S @@ -0,0 +1,3139 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "__vpowf.S" + +#include "libm.h" + + RO_DATA + .align 64 + +! __mt_constexp2fa: + .word 0x3ff00000, 0x00000000, 0x3ff00b1a, 0xfa5abcbf + .word 0x3ff0163d, 0xa9fb3335, 0x3ff02168, 0x143b0281 + .word 0x3ff02c9a, 0x3e778061, 0x3ff037d4, 0x2e11bbcc + .word 0x3ff04315, 0xe86e7f85, 0x3ff04e5f, 0x72f654b1 + .word 0x3ff059b0, 0xd3158574, 0x3ff0650a, 0x0e3c1f89 + .word 0x3ff0706b, 0x29ddf6de, 0x3ff07bd4, 0x2b72a836 + .word 0x3ff08745, 0x18759bc8, 0x3ff092bd, 0xf66607e0 + .word 0x3ff09e3e, 0xcac6f383, 0x3ff0a9c7, 0x9b1f3919 + .word 0x3ff0b558, 0x6cf9890f, 0x3ff0c0f1, 0x45e46c85 + .word 0x3ff0cc92, 0x2b7247f7, 0x3ff0d83b, 0x23395dec + .word 0x3ff0e3ec, 0x32d3d1a2, 0x3ff0efa5, 0x5fdfa9c5 + .word 0x3ff0fb66, 0xaffed31b, 0x3ff10730, 0x28d7233e + .word 0x3ff11301, 0xd0125b51, 0x3ff11edb, 0xab5e2ab6 + .word 0x3ff12abd, 0xc06c31cc, 0x3ff136a8, 0x14f204ab + .word 0x3ff1429a, 0xaea92de0, 0x3ff14e95, 0x934f312e + .word 0x3ff15a98, 0xc8a58e51, 0x3ff166a4, 0x5471c3c2 + .word 0x3ff172b8, 0x3c7d517b, 0x3ff17ed4, 0x8695bbc0 + .word 0x3ff18af9, 0x388c8dea, 0x3ff19726, 0x58375d2f + .word 0x3ff1a35b, 0xeb6fcb75, 0x3ff1af99, 0xf8138a1c + .word 0x3ff1bbe0, 0x84045cd4, 0x3ff1c82f, 0x95281c6b + .word 0x3ff1d487, 0x3168b9aa, 0x3ff1e0e7, 0x5eb44027 + .word 0x3ff1ed50, 0x22fcd91d, 0x3ff1f9c1, 0x8438ce4d + .word 0x3ff2063b, 0x88628cd6, 0x3ff212be, 0x3578a819 + .word 0x3ff21f49, 0x917ddc96, 0x3ff22bdd, 0xa27912d1 + .word 0x3ff2387a, 0x6e756238, 0x3ff2451f, 0xfb82140a + .word 0x3ff251ce, 0x4fb2a63f, 0x3ff25e85, 0x711ece75 + .word 0x3ff26b45, 0x65e27cdd, 0x3ff2780e, 0x341ddf29 + .word 0x3ff284df, 0xe1f56381, 0x3ff291ba, 0x7591bb70 + .word 0x3ff29e9d, 0xf51fdee1, 0x3ff2ab8a, 0x66d10f13 + .word 0x3ff2b87f, 0xd0dad990, 0x3ff2c57e, 0x39771b2f + .word 0x3ff2d285, 0xa6e4030b, 0x3ff2df96, 0x1f641589 + .word 0x3ff2ecaf, 0xa93e2f56, 0x3ff2f9d2, 0x4abd886b + .word 0x3ff306fe, 0x0a31b715, 0x3ff31432, 0xedeeb2fd + .word 0x3ff32170, 0xfc4cd831, 0x3ff32eb8, 0x3ba8ea32 + .word 0x3ff33c08, 0xb26416ff, 0x3ff34962, 0x66e3fa2d + .word 0x3ff356c5, 0x5f929ff1, 0x3ff36431, 0xa2de883b + .word 0x3ff371a7, 0x373aa9cb, 0x3ff37f26, 0x231e754a + .word 0x3ff38cae, 0x6d05d866, 0x3ff39a40, 0x1b7140ef + .word 0x3ff3a7db, 0x34e59ff7, 0x3ff3b57f, 0xbfec6cf4 + .word 0x3ff3c32d, 0xc313a8e5, 0x3ff3d0e5, 0x44ede173 + .word 0x3ff3dea6, 0x4c123422, 0x3ff3ec70, 0xdf1c5175 + .word 0x3ff3fa45, 0x04ac801c, 0x3ff40822, 0xc367a024 + .word 0x3ff4160a, 0x21f72e2a, 0x3ff423fb, 0x2709468a + .word 0x3ff431f5, 0xd950a897, 0x3ff43ffa, 0x3f84b9d4 + .word 0x3ff44e08, 0x6061892d, 0x3ff45c20, 0x42a7d232 + .word 0x3ff46a41, 0xed1d0057, 0x3ff4786d, 0x668b3237 + .word 0x3ff486a2, 0xb5c13cd0, 0x3ff494e1, 0xe192aed2 + .word 0x3ff4a32a, 0xf0d7d3de, 0x3ff4b17d, 0xea6db7d7 + .word 0x3ff4bfda, 0xd5362a27, 0x3ff4ce41, 0xb817c114 + .word 0x3ff4dcb2, 0x99fddd0d, 0x3ff4eb2d, 0x81d8abff + .word 0x3ff4f9b2, 0x769d2ca7, 0x3ff50841, 0x7f4531ee + .word 0x3ff516da, 0xa2cf6642, 0x3ff5257d, 0xe83f4eef + .word 0x3ff5342b, 0x569d4f82, 0x3ff542e2, 0xf4f6ad27 + .word 0x3ff551a4, 0xca5d920f, 0x3ff56070, 0xdde910d2 + .word 0x3ff56f47, 0x36b527da, 0x3ff57e27, 0xdbe2c4cf + .word 0x3ff58d12, 0xd497c7fd, 0x3ff59c08, 0x27ff07cc + .word 0x3ff5ab07, 0xdd485429, 0x3ff5ba11, 0xfba87a03 + .word 0x3ff5c926, 0x8a5946b7, 0x3ff5d845, 0x90998b93 + .word 0x3ff5e76f, 0x15ad2148, 0x3ff5f6a3, 0x20dceb71 + .word 0x3ff605e1, 0xb976dc09, 0x3ff6152a, 0xe6cdf6f4 + .word 0x3ff6247e, 0xb03a5585, 0x3ff633dd, 0x1d1929fd + .word 0x3ff64346, 0x34ccc320, 0x3ff652b9, 0xfebc8fb7 + .word 0x3ff66238, 0x82552225, 0x3ff671c1, 0xc70833f6 + .word 0x3ff68155, 0xd44ca973, 0x3ff690f4, 0xb19e9538 + .word 0x3ff6a09e, 0x667f3bcd, 0x3ff6b052, 0xfa75173e + .word 0x3ff6c012, 0x750bdabf, 0x3ff6cfdc, 0xddd47645 + .word 0x3ff6dfb2, 0x3c651a2f, 0x3ff6ef92, 0x98593ae5 + .word 0x3ff6ff7d, 0xf9519484, 0x3ff70f74, 0x66f42e87 + .word 0x3ff71f75, 0xe8ec5f74, 0x3ff72f82, 0x86ead08a + .word 0x3ff73f9a, 0x48a58174, 0x3ff74fbd, 0x35d7cbfd + .word 0x3ff75feb, 0x564267c9, 0x3ff77024, 0xb1ab6e09 + .word 0x3ff78069, 0x4fde5d3f, 0x3ff790b9, 0x38ac1cf6 + .word 0x3ff7a114, 0x73eb0187, 0x3ff7b17b, 0x0976cfdb + .word 0x3ff7c1ed, 0x0130c132, 0x3ff7d26a, 0x62ff86f0 + .word 0x3ff7e2f3, 0x36cf4e62, 0x3ff7f387, 0x8491c491 + .word 0x3ff80427, 0x543e1a12, 0x3ff814d2, 0xadd106d9 + .word 0x3ff82589, 0x994cce13, 0x3ff8364c, 0x1eb941f7 + .word 0x3ff8471a, 0x4623c7ad, 0x3ff857f4, 0x179f5b21 + .word 0x3ff868d9, 0x9b4492ed, 0x3ff879ca, 0xd931a436 + .word 0x3ff88ac7, 0xd98a6699, 0x3ff89bd0, 0xa478580f + .word 0x3ff8ace5, 0x422aa0db, 0x3ff8be05, 0xbad61778 + .word 0x3ff8cf32, 0x16b5448c, 0x3ff8e06a, 0x5e0866d9 + .word 0x3ff8f1ae, 0x99157736, 0x3ff902fe, 0xd0282c8a + .word 0x3ff9145b, 0x0b91ffc6, 0x3ff925c3, 0x53aa2fe2 + .word 0x3ff93737, 0xb0cdc5e5, 0x3ff948b8, 0x2b5f98e5 + .word 0x3ff95a44, 0xcbc8520f, 0x3ff96bdd, 0x9a7670b3 + .word 0x3ff97d82, 0x9fde4e50, 0x3ff98f33, 0xe47a22a2 + .word 0x3ff9a0f1, 0x70ca07ba, 0x3ff9b2bb, 0x4d53fe0d + .word 0x3ff9c491, 0x82a3f090, 0x3ff9d674, 0x194bb8d5 + .word 0x3ff9e863, 0x19e32323, 0x3ff9fa5e, 0x8d07f29e + .word 0x3ffa0c66, 0x7b5de565, 0x3ffa1e7a, 0xed8eb8bb + .word 0x3ffa309b, 0xec4a2d33, 0x3ffa42c9, 0x80460ad8 + .word 0x3ffa5503, 0xb23e255d, 0x3ffa674a, 0x8af46052 + .word 0x3ffa799e, 0x1330b358, 0x3ffa8bfe, 0x53c12e59 + .word 0x3ffa9e6b, 0x5579fdbf, 0x3ffab0e5, 0x21356eba + .word 0x3ffac36b, 0xbfd3f37a, 0x3ffad5ff, 0x3a3c2774 + .word 0x3ffae89f, 0x995ad3ad, 0x3ffafb4c, 0xe622f2ff + .word 0x3ffb0e07, 0x298db666, 0x3ffb20ce, 0x6c9a8952 + .word 0x3ffb33a2, 0xb84f15fb, 0x3ffb4684, 0x15b749b1 + .word 0x3ffb5972, 0x8de5593a, 0x3ffb6c6e, 0x29f1c52a + .word 0x3ffb7f76, 0xf2fb5e47, 0x3ffb928c, 0xf22749e4 + .word 0x3ffba5b0, 0x30a1064a, 0x3ffbb8e0, 0xb79a6f1f + .word 0x3ffbcc1e, 0x904bc1d2, 0x3ffbdf69, 0xc3f3a207 + .word 0x3ffbf2c2, 0x5bd71e09, 0x3ffc0628, 0x6141b33d + .word 0x3ffc199b, 0xdd85529c, 0x3ffc2d1c, 0xd9fa652c + .word 0x3ffc40ab, 0x5fffd07a, 0x3ffc5447, 0x78fafb22 + .word 0x3ffc67f1, 0x2e57d14b, 0x3ffc7ba8, 0x8988c933 + .word 0x3ffc8f6d, 0x9406e7b5, 0x3ffca340, 0x5751c4db + .word 0x3ffcb720, 0xdcef9069, 0x3ffccb0f, 0x2e6d1675 + .word 0x3ffcdf0b, 0x555dc3fa, 0x3ffcf315, 0x5b5bab74 + .word 0x3ffd072d, 0x4a07897c, 0x3ffd1b53, 0x2b08c968 + .word 0x3ffd2f87, 0x080d89f2, 0x3ffd43c8, 0xeacaa1d6 + .word 0x3ffd5818, 0xdcfba487, 0x3ffd6c76, 0xe862e6d3 + .word 0x3ffd80e3, 0x16c98398, 0x3ffd955d, 0x71ff6075 + .word 0x3ffda9e6, 0x03db3285, 0x3ffdbe7c, 0xd63a8315 + .word 0x3ffdd321, 0xf301b460, 0x3ffde7d5, 0x641c0658 + .word 0x3ffdfc97, 0x337b9b5f, 0x3ffe1167, 0x6b197d17 + .word 0x3ffe2646, 0x14f5a129, 0x3ffe3b33, 0x3b16ee12 + .word 0x3ffe502e, 0xe78b3ff6, 0x3ffe6539, 0x24676d76 + .word 0x3ffe7a51, 0xfbc74c83, 0x3ffe8f79, 0x77cdb740 + .word 0x3ffea4af, 0xa2a490da, 0x3ffeb9f4, 0x867cca6e + .word 0x3ffecf48, 0x2d8e67f1, 0x3ffee4aa, 0xa2188510 + .word 0x3ffefa1b, 0xee615a27, 0x3fff0f9c, 0x1cb6412a + .word 0x3fff252b, 0x376bba97, 0x3fff3ac9, 0x48dd7274 + .word 0x3fff5076, 0x5b6e4540, 0x3fff6632, 0x798844f8 + .word 0x3fff7bfd, 0xad9cbe14, 0x3fff91d8, 0x02243c89 + .word 0x3fffa7c1, 0x819e90d8, 0x3fffbdba, 0x3692d514 + .word 0x3fffd3c2, 0x2b8f71f1, 0x3fffe9d9, 0x6b2a23d9 + +! __mt_constexp2fb: + .word 0x36900000, 0x36a00000, 0x36b00000, 0x36c00000 + .word 0x36d00000, 0x36e00000, 0x36f00000, 0x37000000 + .word 0x37100000, 0x37200000, 0x37300000, 0x37400000 + .word 0x37500000, 0x37600000, 0x37700000, 0x37800000 + .word 0x37900000, 0x37a00000, 0x37b00000, 0x37c00000 + .word 0x37d00000, 0x37e00000, 0x37f00000, 0x38000000 + .word 0x38100000, 0x38200000, 0x38300000, 0x38400000 + .word 0x38500000, 0x38600000, 0x38700000, 0x38800000 + .word 0x38900000, 0x38a00000, 0x38b00000, 0x38c00000 + .word 0x38d00000, 0x38e00000, 0x38f00000, 0x39000000 + .word 0x39100000, 0x39200000, 0x39300000, 0x39400000 + .word 0x39500000, 0x39600000, 0x39700000, 0x39800000 + .word 0x39900000, 0x39a00000, 0x39b00000, 0x39c00000 + .word 0x39d00000, 0x39e00000, 0x39f00000, 0x3a000000 + .word 0x3a100000, 0x3a200000, 0x3a300000, 0x3a400000 + .word 0x3a500000, 0x3a600000, 0x3a700000, 0x3a800000 + .word 0x3a900000, 0x3aa00000, 0x3ab00000, 0x3ac00000 + .word 0x3ad00000, 0x3ae00000, 0x3af00000, 0x3b000000 + .word 0x3b100000, 0x3b200000, 0x3b300000, 0x3b400000 + .word 0x3b500000, 0x3b600000, 0x3b700000, 0x3b800000 + .word 0x3b900000, 0x3ba00000, 0x3bb00000, 0x3bc00000 + .word 0x3bd00000, 0x3be00000, 0x3bf00000, 0x3c000000 + .word 0x3c100000, 0x3c200000, 0x3c300000, 0x3c400000 + .word 0x3c500000, 0x3c600000, 0x3c700000, 0x3c800000 + .word 0x3c900000, 0x3ca00000, 0x3cb00000, 0x3cc00000 + .word 0x3cd00000, 0x3ce00000, 0x3cf00000, 0x3d000000 + .word 0x3d100000, 0x3d200000, 0x3d300000, 0x3d400000 + .word 0x3d500000, 0x3d600000, 0x3d700000, 0x3d800000 + .word 0x3d900000, 0x3da00000, 0x3db00000, 0x3dc00000 + .word 0x3dd00000, 0x3de00000, 0x3df00000, 0x3e000000 + .word 0x3e100000, 0x3e200000, 0x3e300000, 0x3e400000 + .word 0x3e500000, 0x3e600000, 0x3e700000, 0x3e800000 + .word 0x3e900000, 0x3ea00000, 0x3eb00000, 0x3ec00000 + .word 0x3ed00000, 0x3ee00000, 0x3ef00000, 0x3f000000 + .word 0x3f100000, 0x3f200000, 0x3f300000, 0x3f400000 + .word 0x3f500000, 0x3f600000, 0x3f700000, 0x3f800000 + .word 0x3f900000, 0x3fa00000, 0x3fb00000, 0x3fc00000 + .word 0x3fd00000, 0x3fe00000, 0x3ff00000, 0x40000000 + .word 0x40100000, 0x40200000, 0x40300000, 0x40400000 + .word 0x40500000, 0x40600000, 0x40700000, 0x40800000 + .word 0x40900000, 0x40a00000, 0x40b00000, 0x40c00000 + .word 0x40d00000, 0x40e00000, 0x40f00000, 0x41000000 + .word 0x41100000, 0x41200000, 0x41300000, 0x41400000 + .word 0x41500000, 0x41600000, 0x41700000, 0x41800000 + .word 0x41900000, 0x41a00000, 0x41b00000, 0x41c00000 + .word 0x41d00000, 0x41e00000, 0x41f00000, 0x42000000 + .word 0x42100000, 0x42200000, 0x42300000, 0x42400000 + .word 0x42500000, 0x42600000, 0x42700000, 0x42800000 + .word 0x42900000, 0x42a00000, 0x42b00000, 0x42c00000 + .word 0x42d00000, 0x42e00000, 0x42f00000, 0x43000000 + .word 0x43100000, 0x43200000, 0x43300000, 0x43400000 + .word 0x43500000, 0x43600000, 0x43700000, 0x43800000 + .word 0x43900000, 0x43a00000, 0x43b00000, 0x43c00000 + .word 0x43d00000, 0x43e00000, 0x43f00000, 0x44000000 + .word 0x44100000, 0x44200000, 0x44300000, 0x44400000 + .word 0x44500000, 0x44600000, 0x44700000, 0x44800000 + .word 0x44900000, 0x44a00000, 0x44b00000, 0x44c00000 + .word 0x44d00000, 0x44e00000, 0x44f00000, 0x45000000 + .word 0x45100000, 0x45200000, 0x45300000, 0x45400000 + .word 0x45500000, 0x45600000, 0x45700000, 0x45800000 + .word 0x45900000, 0x45a00000, 0x45b00000, 0x45c00000 + .word 0x45d00000, 0x45e00000, 0x45f00000, 0x46000000 + .word 0x46100000, 0x46200000, 0x46300000, 0x46400000 + .word 0x46500000, 0x46600000, 0x46700000, 0x46800000 + .word 0x46900000, 0x46a00000, 0x46b00000, 0x46c00000 + .word 0x46d00000, 0x46e00000, 0x46f00000, 0x47000000 + .word 0x47100000, 0x47200000, 0x47300000, 0x47400000 + .word 0x47500000, 0x47600000, 0x47700000, 0x47800000 + .word 0x47900000, 0x47a00000, 0x47b00000, 0x47c00000 + .word 0x47d00000, 0x47e00000, 0x47f00000, 0x00000000 + + .word 0,0,0,0 + .word 0,0,0,0 + +.CONST_TBL: +! __mt_constlog4f: + .word 0x00000000, 0x00000000, 0x3e800000, 0x00000000 + .word 0x4006fe50, 0xb6ef0851, 0x3e7fc07f, 0x01fc07f0 + .word 0x4016e796, 0x85c2d22a, 0x3e7f81f8, 0x1f81f820 + .word 0x40211cd1, 0xd5133413, 0x3e7f4465, 0x9e4a4271 + .word 0x4026bad3, 0x758efd87, 0x3e7f07c1, 0xf07c1f08 + .word 0x402c4dfa, 0xb90aab5f, 0x3e7ecc07, 0xb301ecc0 + .word 0x4030eb38, 0x9fa29f9b, 0x3e7e9131, 0xabf0b767 + .word 0x4033aa2f, 0xdd27f1c3, 0x3e7e573a, 0xc901e574 + .word 0x403663f6, 0xfac91316, 0x3e7e1e1e, 0x1e1e1e1e + .word 0x403918a1, 0x6e46335b, 0x3e7de5d6, 0xe3f8868a + .word 0x403bc842, 0x40adabba, 0x3e7dae60, 0x76b981db + .word 0x403e72ec, 0x117fa5b2, 0x3e7d77b6, 0x54b82c34 + .word 0x40408c58, 0x8cda79e4, 0x3e7d41d4, 0x1d41d41d + .word 0x4041dcd1, 0x97552b7b, 0x3e7d0cb5, 0x8f6ec074 + .word 0x40432ae9, 0xe278ae1a, 0x3e7cd856, 0x89039b0b + .word 0x404476a9, 0xf983f74d, 0x3e7ca4b3, 0x055ee191 + .word 0x4045c01a, 0x39fbd688, 0x3e7c71c7, 0x1c71c71c + .word 0x40470742, 0xd4ef027f, 0x3e7c3f8f, 0x01c3f8f0 + .word 0x40484c2b, 0xd02f03b3, 0x3e7c0e07, 0x0381c0e0 + .word 0x40498edd, 0x077e70df, 0x3e7bdd2b, 0x899406f7 + .word 0x404acf5e, 0x2db4ec94, 0x3e7bacf9, 0x14c1bad0 + .word 0x404c0db6, 0xcdd94dee, 0x3e7b7d6c, 0x3dda338b + .word 0x404d49ee, 0x4c325970, 0x3e7b4e81, 0xb4e81b4f + .word 0x404e840b, 0xe74e6a4d, 0x3e7b2036, 0x406c80d9 + .word 0x404fbc16, 0xb902680a, 0x3e7af286, 0xbca1af28 + .word 0x4050790a, 0xdbb03009, 0x3e7ac570, 0x1ac5701b + .word 0x40511307, 0xdad30b76, 0x3e7a98ef, 0x606a63be + .word 0x4051ac05, 0xb291f070, 0x3e7a6d01, 0xa6d01a6d + .word 0x40524407, 0xab0e073a, 0x3e7a41a4, 0x1a41a41a + .word 0x4052db10, 0xfc4d9aaf, 0x3e7a16d3, 0xf97a4b02 + .word 0x40537124, 0xcea4cded, 0x3e79ec8e, 0x951033d9 + .word 0x40540646, 0x3b1b0449, 0x3e79c2d1, 0x4ee4a102 + .word 0x40549a78, 0x4bcd1b8b, 0x3e799999, 0x9999999a + .word 0x40552dbd, 0xfc4c96b3, 0x3e7970e4, 0xf80cb872 + .word 0x4055c01a, 0x39fbd688, 0x3e7948b0, 0xfcd6e9e0 + .word 0x4056518f, 0xe4677ba7, 0x3e7920fb, 0x49d0e229 + .word 0x4056e221, 0xcd9d0cde, 0x3e78f9c1, 0x8f9c18fa + .word 0x405771d2, 0xba7efb3c, 0x3e78d301, 0x8d3018d3 + .word 0x405800a5, 0x63161c54, 0x3e78acb9, 0x0f6bf3aa + .word 0x40588e9c, 0x72e0b226, 0x3e7886e5, 0xf0abb04a + .word 0x40591bba, 0x891f1709, 0x3e786186, 0x18618618 + .word 0x4059a802, 0x391e232f, 0x3e783c97, 0x7ab2bedd + .word 0x405a3376, 0x0a7f6051, 0x3e781818, 0x18181818 + .word 0x405abe18, 0x797f1f49, 0x3e77f405, 0xfd017f40 + .word 0x405b47eb, 0xf73882a1, 0x3e77d05f, 0x417d05f4 + .word 0x405bd0f2, 0xe9e79031, 0x3e77ad22, 0x08e0ecc3 + .word 0x405c592f, 0xad295b56, 0x3e778a4c, 0x8178a4c8 + .word 0x405ce0a4, 0x923a587d, 0x3e7767dc, 0xe434a9b1 + .word 0x405d6753, 0xe032ea0f, 0x3e7745d1, 0x745d1746 + .word 0x405ded3f, 0xd442364c, 0x3e772428, 0x7f46debc + .word 0x405e726a, 0xa1e754d2, 0x3e7702e0, 0x5c0b8170 + .word 0x405ef6d6, 0x7328e220, 0x3e76e1f7, 0x6b4337c7 + .word 0x405f7a85, 0x68cb06cf, 0x3e76c16c, 0x16c16c17 + .word 0x405ffd79, 0x9a83ff9b, 0x3e76a13c, 0xd1537290 + .word 0x40603fda, 0x8b97997f, 0x3e768168, 0x16816817 + .word 0x4060809c, 0xf27f703d, 0x3e7661ec, 0x6a5122f9 + .word 0x4060c105, 0x00d63aa6, 0x3e7642c8, 0x590b2164 + .word 0x40610113, 0xb153c8ea, 0x3e7623fa, 0x77016240 + .word 0x406140c9, 0xfaa1e544, 0x3e760581, 0x60581606 + .word 0x40618028, 0xcf72976a, 0x3e75e75b, 0xb8d015e7 + .word 0x4061bf31, 0x1e95d00e, 0x3e75c988, 0x2b931057 + .word 0x4061fde3, 0xd30e8126, 0x3e75ac05, 0x6b015ac0 + .word 0x40623c41, 0xd42727c8, 0x3e758ed2, 0x308158ed + .word 0x40627a4c, 0x0585cbf8, 0x3e7571ed, 0x3c506b3a + .word 0x4062b803, 0x473f7ad1, 0x3e755555, 0x55555555 + .word 0x4062f568, 0x75eb3f26, 0x3e753909, 0x48f40feb + .word 0x4063327c, 0x6ab49ca7, 0x3e751d07, 0xeae2f815 + .word 0x40636f3f, 0xfb6d9162, 0x3e750150, 0x15015015 + .word 0x4063abb3, 0xfaa02167, 0x3e74e5e0, 0xa72f0539 + .word 0x4063e7d9, 0x379f7016, 0x3e74cab8, 0x8725af6e + .word 0x406423b0, 0x7e986aa9, 0x3e74afd6, 0xa052bf5b + .word 0x40645f3a, 0x98a20739, 0x3e749539, 0xe3b2d067 + .word 0x40649a78, 0x4bcd1b8b, 0x3e747ae1, 0x47ae147b + .word 0x4064d56a, 0x5b33cec4, 0x3e7460cb, 0xc7f5cf9a + .word 0x40651011, 0x8708a8f9, 0x3e7446f8, 0x6562d9fb + .word 0x40654a6e, 0x8ca5438e, 0x3e742d66, 0x25d51f87 + .word 0x40658482, 0x26989d34, 0x3e741414, 0x14141414 + .word 0x4065be4d, 0x0cb51435, 0x3e73fb01, 0x3fb013fb + .word 0x4065f7cf, 0xf41e09af, 0x3e73e22c, 0xbce4a902 + .word 0x4066310b, 0x8f553048, 0x3e73c995, 0xa47babe7 + .word 0x40666a00, 0x8e4788cc, 0x3e73b13b, 0x13b13b14 + .word 0x4066a2af, 0x9e5a0f0a, 0x3e73991c, 0x2c187f63 + .word 0x4066db19, 0x6a76194a, 0x3e738138, 0x13813814 + .word 0x4067133e, 0x9b156c7c, 0x3e73698d, 0xf3de0748 + .word 0x40674b1f, 0xd64e0754, 0x3e73521c, 0xfb2b78c1 + .word 0x406782bd, 0xbfdda657, 0x3e733ae4, 0x5b57bcb2 + .word 0x4067ba18, 0xf93502e4, 0x3e7323e3, 0x4a2b10bf + .word 0x4067f132, 0x2182cf16, 0x3e730d19, 0x0130d190 + .word 0x40682809, 0xd5be7073, 0x3e72f684, 0xbda12f68 + .word 0x40685ea0, 0xb0b27b26, 0x3e72e025, 0xc04b8097 + .word 0x406894f7, 0x4b06ef8b, 0x3e72c9fb, 0x4d812ca0 + .word 0x4068cb0e, 0x3b4b3bbe, 0x3e72b404, 0xad012b40 + .word 0x406900e6, 0x160002cd, 0x3e729e41, 0x29e4129e + .word 0x4069367f, 0x6da0ab2f, 0x3e7288b0, 0x1288b013 + .word 0x40696bda, 0xd2acb5f6, 0x3e727350, 0xb8812735 + .word 0x4069a0f8, 0xd3b0e050, 0x3e725e22, 0x708092f1 + .word 0x4069d5d9, 0xfd5010b3, 0x3e724924, 0x92492492 + .word 0x406a0a7e, 0xda4c112d, 0x3e723456, 0x789abcdf + .word 0x406a3ee7, 0xf38e181f, 0x3e721fb7, 0x8121fb78 + .word 0x406a7315, 0xd02f20c8, 0x3e720b47, 0x0c67c0d9 + .word 0x406aa708, 0xf58014d3, 0x3e71f704, 0x7dc11f70 + .word 0x406adac1, 0xe711c833, 0x3e71e2ef, 0x3b3fb874 + .word 0x406b0e41, 0x26bcc86c, 0x3e71cf06, 0xada2811d + .word 0x406b4187, 0x34a9008c, 0x3e71bb4a, 0x4046ed29 + .word 0x406b7494, 0x8f5532da, 0x3e71a7b9, 0x611a7b96 + .word 0x406ba769, 0xb39e4964, 0x3e719453, 0x808ca29c + .word 0x406bda07, 0x1cc67e6e, 0x3e718118, 0x11811812 + .word 0x406c0c6d, 0x447c5dd3, 0x3e716e06, 0x89427379 + .word 0x406c3e9c, 0xa2e1a055, 0x3e715b1e, 0x5f75270d + .word 0x406c7095, 0xae91e1c7, 0x3e71485f, 0x0e0acd3b + .word 0x406ca258, 0xdca93316, 0x3e7135c8, 0x1135c811 + .word 0x406cd3e6, 0xa0ca8907, 0x3e712358, 0xe75d3033 + .word 0x406d053f, 0x6d260896, 0x3e711111, 0x11111111 + .word 0x406d3663, 0xb27f31d5, 0x3e70fef0, 0x10fef011 + .word 0x406d6753, 0xe032ea0f, 0x3e70ecf5, 0x6be69c90 + .word 0x406d9810, 0x643d6615, 0x3e70db20, 0xa88f4696 + .word 0x406dc899, 0xab3ff56c, 0x3e70c971, 0x4fbcda3b + .word 0x406df8f0, 0x2086af2c, 0x3e70b7e6, 0xec259dc8 + .word 0x406e2914, 0x2e0e0140, 0x3e70a681, 0x0a6810a7 + .word 0x406e5906, 0x3c8822ce, 0x3e70953f, 0x39010954 + .word 0x406e88c6, 0xb3626a73, 0x3e708421, 0x08421084 + .word 0x406eb855, 0xf8ca88fb, 0x3e707326, 0x0a47f7c6 + .word 0x406ee7b4, 0x71b3a950, 0x3e70624d, 0xd2f1a9fc + .word 0x406f16e2, 0x81db7630, 0x3e705197, 0xf7d73404 + .word 0x406f45e0, 0x8bcf0655, 0x3e704104, 0x10410410 + .word 0x406f74ae, 0xf0efafae, 0x3e703091, 0xb51f5e1a + .word 0x406fa34e, 0x1177c233, 0x3e702040, 0x81020408 + .word 0x406fd1be, 0x4c7f2af9, 0x3e701010, 0x10101010 + .word 0x40700000, 0x00000000, 0x3e700000, 0x00000000 + +! __mt_constexp2f: + .word 0x3ff00000, 0x00000000, 0x3ff00b1a, 0xfa5abcbf + .word 0x3ff0163d, 0xa9fb3335, 0x3ff02168, 0x143b0281 + .word 0x3ff02c9a, 0x3e778061, 0x3ff037d4, 0x2e11bbcc + .word 0x3ff04315, 0xe86e7f85, 0x3ff04e5f, 0x72f654b1 + .word 0x3ff059b0, 0xd3158574, 0x3ff0650a, 0x0e3c1f89 + .word 0x3ff0706b, 0x29ddf6de, 0x3ff07bd4, 0x2b72a836 + .word 0x3ff08745, 0x18759bc8, 0x3ff092bd, 0xf66607e0 + .word 0x3ff09e3e, 0xcac6f383, 0x3ff0a9c7, 0x9b1f3919 + .word 0x3fefb558, 0x6cf9890f, 0x3fefc0f1, 0x45e46c85 + .word 0x3fefcc92, 0x2b7247f7, 0x3fefd83b, 0x23395dec + .word 0x3fefe3ec, 0x32d3d1a2, 0x3fefefa5, 0x5fdfa9c5 + .word 0x3feffb66, 0xaffed31b, 0x3ff00730, 0x28d7233e + .word 0x3ff01301, 0xd0125b51, 0x3ff01edb, 0xab5e2ab6 + .word 0x3ff02abd, 0xc06c31cc, 0x3ff036a8, 0x14f204ab + .word 0x3ff0429a, 0xaea92de0, 0x3ff04e95, 0x934f312e + .word 0x3ff05a98, 0xc8a58e51, 0x3ff066a4, 0x5471c3c2 + .word 0x3fef72b8, 0x3c7d517b, 0x3fef7ed4, 0x8695bbc0 + .word 0x3fef8af9, 0x388c8dea, 0x3fef9726, 0x58375d2f + .word 0x3fefa35b, 0xeb6fcb75, 0x3fefaf99, 0xf8138a1c + .word 0x3fefbbe0, 0x84045cd4, 0x3fefc82f, 0x95281c6b + .word 0x3fefd487, 0x3168b9aa, 0x3fefe0e7, 0x5eb44027 + .word 0x3fefed50, 0x22fcd91d, 0x3feff9c1, 0x8438ce4d + .word 0x3ff0063b, 0x88628cd6, 0x3ff012be, 0x3578a819 + .word 0x3ff01f49, 0x917ddc96, 0x3ff02bdd, 0xa27912d1 + .word 0x3fef387a, 0x6e756238, 0x3fef451f, 0xfb82140a + .word 0x3fef51ce, 0x4fb2a63f, 0x3fef5e85, 0x711ece75 + .word 0x3fef6b45, 0x65e27cdd, 0x3fef780e, 0x341ddf29 + .word 0x3fef84df, 0xe1f56381, 0x3fef91ba, 0x7591bb70 + .word 0x3fef9e9d, 0xf51fdee1, 0x3fefab8a, 0x66d10f13 + .word 0x3fefb87f, 0xd0dad990, 0x3fefc57e, 0x39771b2f + .word 0x3fefd285, 0xa6e4030b, 0x3fefdf96, 0x1f641589 + .word 0x3fefecaf, 0xa93e2f56, 0x3feff9d2, 0x4abd886b + .word 0x3fef06fe, 0x0a31b715, 0x3fef1432, 0xedeeb2fd + .word 0x3fef2170, 0xfc4cd831, 0x3fef2eb8, 0x3ba8ea32 + .word 0x3fef3c08, 0xb26416ff, 0x3fef4962, 0x66e3fa2d + .word 0x3fef56c5, 0x5f929ff1, 0x3fef6431, 0xa2de883b + .word 0x3fef71a7, 0x373aa9cb, 0x3fef7f26, 0x231e754a + .word 0x3fef8cae, 0x6d05d866, 0x3fef9a40, 0x1b7140ef + .word 0x3fefa7db, 0x34e59ff7, 0x3fefb57f, 0xbfec6cf4 + .word 0x3fefc32d, 0xc313a8e5, 0x3fefd0e5, 0x44ede173 + .word 0x3feedea6, 0x4c123422, 0x3feeec70, 0xdf1c5175 + .word 0x3feefa45, 0x04ac801c, 0x3fef0822, 0xc367a024 + .word 0x3fef160a, 0x21f72e2a, 0x3fef23fb, 0x2709468a + .word 0x3fef31f5, 0xd950a897, 0x3fef3ffa, 0x3f84b9d4 + .word 0x3fef4e08, 0x6061892d, 0x3fef5c20, 0x42a7d232 + .word 0x3fef6a41, 0xed1d0057, 0x3fef786d, 0x668b3237 + .word 0x3fef86a2, 0xb5c13cd0, 0x3fef94e1, 0xe192aed2 + .word 0x3fefa32a, 0xf0d7d3de, 0x3fefb17d, 0xea6db7d7 + .word 0x3feebfda, 0xd5362a27, 0x3feece41, 0xb817c114 + .word 0x3feedcb2, 0x99fddd0d, 0x3feeeb2d, 0x81d8abff + .word 0x3feef9b2, 0x769d2ca7, 0x3fef0841, 0x7f4531ee + .word 0x3fef16da, 0xa2cf6642, 0x3fef257d, 0xe83f4eef + .word 0x3fef342b, 0x569d4f82, 0x3fef42e2, 0xf4f6ad27 + .word 0x3fef51a4, 0xca5d920f, 0x3fef6070, 0xdde910d2 + .word 0x3fef6f47, 0x36b527da, 0x3fef7e27, 0xdbe2c4cf + .word 0x3fef8d12, 0xd497c7fd, 0x3fef9c08, 0x27ff07cc + .word 0x3feeab07, 0xdd485429, 0x3feeba11, 0xfba87a03 + .word 0x3feec926, 0x8a5946b7, 0x3feed845, 0x90998b93 + .word 0x3feee76f, 0x15ad2148, 0x3feef6a3, 0x20dceb71 + .word 0x3fef05e1, 0xb976dc09, 0x3fef152a, 0xe6cdf6f4 + .word 0x3fef247e, 0xb03a5585, 0x3fef33dd, 0x1d1929fd + .word 0x3fef4346, 0x34ccc320, 0x3fef52b9, 0xfebc8fb7 + .word 0x3fef6238, 0x82552225, 0x3fef71c1, 0xc70833f6 + .word 0x3fef8155, 0xd44ca973, 0x3fef90f4, 0xb19e9538 + .word 0x3feea09e, 0x667f3bcd, 0x3feeb052, 0xfa75173e + .word 0x3feec012, 0x750bdabf, 0x3feecfdc, 0xddd47645 + .word 0x3feedfb2, 0x3c651a2f, 0x3feeef92, 0x98593ae5 + .word 0x3feeff7d, 0xf9519484, 0x3fef0f74, 0x66f42e87 + .word 0x3fef1f75, 0xe8ec5f74, 0x3fef2f82, 0x86ead08a + .word 0x3fef3f9a, 0x48a58174, 0x3fef4fbd, 0x35d7cbfd + .word 0x3fef5feb, 0x564267c9, 0x3fef7024, 0xb1ab6e09 + .word 0x3fef8069, 0x4fde5d3f, 0x3fef90b9, 0x38ac1cf6 + .word 0x3feea114, 0x73eb0187, 0x3feeb17b, 0x0976cfdb + .word 0x3feec1ed, 0x0130c132, 0x3feed26a, 0x62ff86f0 + .word 0x3feee2f3, 0x36cf4e62, 0x3feef387, 0x8491c491 + .word 0x3fef0427, 0x543e1a12, 0x3fef14d2, 0xadd106d9 + .word 0x3fef2589, 0x994cce13, 0x3fef364c, 0x1eb941f7 + .word 0x3fef471a, 0x4623c7ad, 0x3fef57f4, 0x179f5b21 + .word 0x3fef68d9, 0x9b4492ed, 0x3fef79ca, 0xd931a436 + .word 0x3fef8ac7, 0xd98a6699, 0x3fef9bd0, 0xa478580f + .word 0x3feeace5, 0x422aa0db, 0x3feebe05, 0xbad61778 + .word 0x3feecf32, 0x16b5448c, 0x3feee06a, 0x5e0866d9 + .word 0x3feef1ae, 0x99157736, 0x3fef02fe, 0xd0282c8a + .word 0x3fef145b, 0x0b91ffc6, 0x3fef25c3, 0x53aa2fe2 + .word 0x3fef3737, 0xb0cdc5e5, 0x3fef48b8, 0x2b5f98e5 + .word 0x3fef5a44, 0xcbc8520f, 0x3fef6bdd, 0x9a7670b3 + .word 0x3fef7d82, 0x9fde4e50, 0x3fef8f33, 0xe47a22a2 + .word 0x3fefa0f1, 0x70ca07ba, 0x3fefb2bb, 0x4d53fe0d + .word 0x3feec491, 0x82a3f090, 0x3feed674, 0x194bb8d5 + .word 0x3feee863, 0x19e32323, 0x3feefa5e, 0x8d07f29e + .word 0x3fef0c66, 0x7b5de565, 0x3fef1e7a, 0xed8eb8bb + .word 0x3fef309b, 0xec4a2d33, 0x3fef42c9, 0x80460ad8 + .word 0x3fef5503, 0xb23e255d, 0x3fef674a, 0x8af46052 + .word 0x3fef799e, 0x1330b358, 0x3fef8bfe, 0x53c12e59 + .word 0x3fef9e6b, 0x5579fdbf, 0x3fefb0e5, 0x21356eba + .word 0x3fefc36b, 0xbfd3f37a, 0x3fefd5ff, 0x3a3c2774 + .word 0x3feee89f, 0x995ad3ad, 0x3feefb4c, 0xe622f2ff + .word 0x3fef0e07, 0x298db666, 0x3fef20ce, 0x6c9a8952 + .word 0x3fef33a2, 0xb84f15fb, 0x3fef4684, 0x15b749b1 + .word 0x3fef5972, 0x8de5593a, 0x3fef6c6e, 0x29f1c52a + .word 0x3fef7f76, 0xf2fb5e47, 0x3fef928c, 0xf22749e4 + .word 0x3fefa5b0, 0x30a1064a, 0x3fefb8e0, 0xb79a6f1f + .word 0x3fefcc1e, 0x904bc1d2, 0x3fefdf69, 0xc3f3a207 + .word 0x3feff2c2, 0x5bd71e09, 0x3ff00628, 0x6141b33d + .word 0x3fef199b, 0xdd85529c, 0x3fef2d1c, 0xd9fa652c + .word 0x3fef40ab, 0x5fffd07a, 0x3fef5447, 0x78fafb22 + .word 0x3fef67f1, 0x2e57d14b, 0x3fef7ba8, 0x8988c933 + .word 0x3fef8f6d, 0x9406e7b5, 0x3fefa340, 0x5751c4db + .word 0x3fefb720, 0xdcef9069, 0x3fefcb0f, 0x2e6d1675 + .word 0x3fefdf0b, 0x555dc3fa, 0x3feff315, 0x5b5bab74 + .word 0x3ff0072d, 0x4a07897c, 0x3ff01b53, 0x2b08c968 + .word 0x3ff02f87, 0x080d89f2, 0x3ff043c8, 0xeacaa1d6 + .word 0x3fef5818, 0xdcfba487, 0x3fef6c76, 0xe862e6d3 + .word 0x3fef80e3, 0x16c98398, 0x3fef955d, 0x71ff6075 + .word 0x3fefa9e6, 0x03db3285, 0x3fefbe7c, 0xd63a8315 + .word 0x3fefd321, 0xf301b460, 0x3fefe7d5, 0x641c0658 + .word 0x3feffc97, 0x337b9b5f, 0x3ff01167, 0x6b197d17 + .word 0x3ff02646, 0x14f5a129, 0x3ff03b33, 0x3b16ee12 + .word 0x3ff0502e, 0xe78b3ff6, 0x3ff06539, 0x24676d76 + .word 0x3ff07a51, 0xfbc74c83, 0x3ff08f79, 0x77cdb740 + .word 0x3fefa4af, 0xa2a490da, 0x3fefb9f4, 0x867cca6e + .word 0x3fefcf48, 0x2d8e67f1, 0x3fefe4aa, 0xa2188510 + .word 0x3feffa1b, 0xee615a27, 0x3ff00f9c, 0x1cb6412a + .word 0x3ff0252b, 0x376bba97, 0x3ff03ac9, 0x48dd7274 + .word 0x3ff05076, 0x5b6e4540, 0x3ff06632, 0x798844f8 + .word 0x3ff07bfd, 0xad9cbe14, 0x3ff091d8, 0x02243c89 + .word 0x3ff0a7c1, 0x819e90d8, 0x3ff0bdba, 0x3692d514 + .word 0x3ff0d3c2, 0x2b8f71f1, 0x3ff0e9d9, 0x6b2a23d9 + + .word 0xc057150d, 0x5f6e1c54 ! KA3 = -3.60659926599003171364e-01*256.0 + .word 0x405ec71c, 0x2e92efda ! KA2 = 4.80902715189356683026e-01*256.0 + .word 0xc0671547, 0x653cbec4 ! KA1 = -7.21347520569871841065e-01*256.0 + .word 0x40771547, 0x652af190 ! KA0 = 1.44269504088069658645e+00*256.0 + .word 0x3ecebfbe, 0x9d182250 ! KB2 = 3.66556671660783833261e-06 + .word 0x3f662e43, 0xe2528362 ! KB1 = 2.70760782821392980564e-03 + .word 0x40e00000, 0x00000000 ! HTHRESH = 32768.0 + .word 0xc0e2c000, 0x00000000 ! LTHRESH = -38400.0 ; 0.0f + .word 0x3f800000, 0x00000000 ! 1.0f ; free + +#define tmp_px STACK_BIAS-48 +#define tmp_py STACK_BIAS-40 +#define tmp_counter STACK_BIAS-32 +#define tmp0 STACK_BIAS-28 +#define tmp1 STACK_BIAS-24 +#define tmp2 STACK_BIAS-20 +#define tmp3 STACK_BIAS-16 +#define tmp4 STACK_BIAS-12 +#define tmp5 STACK_BIAS-8 +#define tmp6 STACK_BIAS-4 + + +#define KA3 %f34 +#define KA2 %f36 +#define KA1 %f38 +#define KA0 %f40 +#define KB2 %f42 +#define KB1 %f44 +#define HTHRESHOLD %f30 +#define LTHRESHOLD %f32 + +#define counter %o7 +#define stridex %i0 +#define stridey %i4 +#define stridez %l3 + +#define CONST_0x8000 %l1 +#define MASK_0x007fffff %l4 +#define MASK_0x7fffffff %l5 + +! sizeof temp storage - must be a multiple of 16 for V9 +#define tmps 0x30 + +!-------------------------------------------------------------------- +! !!!!! vpowf algorithm !!!!! +! uy = *(unsigned int*)py; +! ux = *(unsigned int*)px; +! ay = uy & 0x7fffffff; +! ax0 = ux & 0x7fffffff; +! sx = ux >> 31; +! yisint0 = 0; /* Y - non-integer */ +! if (ax0 >= 0x7f800000 || ay >= 0x7f800000) { /* |X| or |Y| = Inf,Nan */ +! if (ax0 > 0x7f800000 || ay > 0x7f800000) /* |X| or |Y| = Nan */ +! pz[0] = *px * *py; +! goto next; +! if (ay == 0x7f800000) { /* |Y| = Inf */ +! float fy; +! if (ax0 == 0x3f800000) fy = *py - *py; /* +-1 ** +-Inf = NaN */ +! else fy = ((ax0 < 0x3f800000) != (uy >> 31)) ? ZERO : *(float*) &ay; +! pz[0] = fy; +! goto next; +! } +! if (sx) { /* X = -Inf */ +! exp = ay >> 23; +! if (exp >= 0x97) /* |Y| >= 2^24 */ +! yisint0 = 2; /* Y - even */ +! else { +! if (exp >= 0x7f) { /* |Y| >= 1 */ +! i0 = ay >> ((0x7f + 23) - exp); +! if ((i0 << ((0x7f + 23) - exp)) == ay) yisint0 = 2 - (i0 & 1); +! } +! } +! } +! if (uy >> 31) ax0 = 0; +! ax0 += yisint0 << 31; +! pz[0] = *(float*)&ax0; +! goto next; +! } +! exp0 = (ax0 >> 23) - 127; +! if ((int)ux < 0x00800000) { /* X = denormal or negative */ +! if ((int)ax0 < 0x00800000) { /* X = denormal */ +! *((float*) &ax0) = (float) (int)ax0; +! exp0 = (ax0 >> 23) - (127 + 149); +! } +! if ((int)ux <= 0) { /* X <= 0 */ +! exp = ay >> 23; +! if (exp >= 0x97) /* |Y| >= 2^24 */ +! yisint0 = 2; /* Y - even */ +! else { +! if (exp >= 0x7f) { /* |Y| >= 1 */ +! i0 = ay >> ((0x7f + 23) - exp); +! if ((i0 << ((0x7f + 23) - exp)) == ay) yisint0 = 2 - (i0 & 1); +! } +! } +! if (ax0 == 0) { /* pow(0,Y) */ +! float fy; +! fy = (uy >> 31) ? ONE / ZERO : ZERO; +! if (sx & yisint0) fy = -fy; +! pz[0] = fy; +! goto next; +! } +! if (yisint0 == 0) { /* pow(neg,non-integer) */ +! pz[0] = ZERO / ZERO; /* NaN */ +! goto next; +! } +! } +! } +! +! ax0 = *px; +! exp0 = ax0 & 0x7fffffff; +! exp0 >>= 23; +! exp0 -= 127; +! exp0 <<= 8; +! ax0 &= 0x007fffff; +! i0 = ax0 + 0x8000; +! i0 &= 0xffff0000; +! ind0 = i0 >> 12; +! ind0 &= -8; +! i0 = ax0 - i0; +! dtmp0 = (double) i0; +! dtmp1 = *(double *)((char*)__mt_constlog4f + ind0 + 8); +! y0 = dtmp0 * dtmp1; +! dtmp0 = *(double *)((char*)__mt_constlog4f + ind0); +! dtmp1 = (double) exp0; +! yy0 = dtmp0 + dtmp1; +! dtmp0 = KA3 * y0; +! dtmp0 += KA2; +! dtmp0 *= y0; +! dtmp0 += KA1; +! dtmp0 *= y0; +! dtmp0 += KA0; +! dtmp0 *= y0; +! yy0 += dtmp0; +! ftmp0 = *py0; +! dtmp0 = (double)ftmp0; +! yy0 *= dtmp0; +! if (yy0 >= HTHRESH) +! yy0 = HTHRESH; +! if (yy0 <= LTHRESH) +! yy0 = LTHRESH; +! ind0 = (int) yy0; +! ((int*)&dtmp1)[0] = ind0; +! ((int*)&dtmp1)[1] = 0; +! dtmp1 = vis_fpackfix(dtmp1); +! dtmp0 = (double)ind0; +! y0 = yy0 - dtmp0; +! dtmp0 = KB2 * y0; +! dtmp0 += KB1; +! yy0 = dtmp0 * y0; +! ind0 &= 255; +! ind0 <<= 3; +! di0 = *(double*)((char*)__mt_constexp2f + ind0); +! di0 = vis_fpadd32(di0,dtmp1); +! yy0 *= di0; +! yy0 += di0; +! ftmp0 = (float)yy0; +! *pz0 = ftmp0; +!-------------------------------------------------------------------- +! !!!!! vpowf algorithm,stridex=0 !!!!! +! +! ax = ax0 = *px; +! exp0 = ax0 & 0x7fffffff; +! exp0 >>= 23; +! exp0 -= 127; +! exp0 <<= 8; +! ax0 &= 0x007fffff; +! i0 = ax0 + 0x8000; +! i0 &= 0xffff0000; +! ind0 = i0 >> 12; +! ind0 &= -8; +! i0 = ax0 - i0; +! dtmp0 = (double) i0; +! dtmp1 = *(double *)((char*)__mt_constlog4f + ind0 + 8); +! y0 = dtmp0 * dtmp1; +! dtmp0 = *(double *)((char*)__mt_constlog4f + ind0); +! dtmp1 = (double) exp0; +! yy0 = dtmp0 + dtmp1; +! dtmp0 = KA3 * y0; +! dtmp0 += KA2; +! dtmp0 *= y0; +! dtmp0 += KA1; +! dtmp0 *= y0; +! dtmp0 += KA0; +! dtmp0 *= y0; +! yy = yy0 + dtmp0; +! +! uy = ((int*)py)[0]; +! ay = uy & 0x7fffffff; +! if (ay >= 0x7f800000) { /* |Y| = Inf or Nan */ +! float fy; +! if (ay > 0x7f800000) fy = *py + *py; /* |Y| = Nan */ +! else fy = ((ax < 0x3f800000) != (uy >> 31)) ? ZERO : *(float*)&ay; +! pz[0] = fy; +! goto next; +! } +! +! +! ftmp0 = py[0]; +! dtmp0 = (double)ftmp0; +! yy0 = dtmp0 * yy; +! if (yy0 >= HTHRESH) +! if (yy0 <= LTHRESH) +! yy0 = HTHRESH; +! yy0 = LTHRESH; +! ii0 = (int) yy0; +! dtmp0 = (double)ii0; +! i0 = ii0 >> 5; +! i0 &= -8; +! di0 = ((double*)((char*)(__mt_constexp2fb + 150) + i0))[0]; +! y0 = yy0 - dtmp0; +! dtmp0 = KB2 * y0; +! dtmp0 += KB1; +! yy0 = dtmp0 * y0; +! ii0 &= 255; +! ii0 <<= 3; +! dtmp0 = ((double*)((char*)__mt_constexp2fa + ii0))[0]; +! di0 *= dtmp0; +! dtmp0 = yy0 * di0; +! dtmp0 += di0; +! ftmp0 = (float)dtmp0; +! pz[0] = ftmp0; +!-------------------------------------------------------------------- + ENTRY(__vpowf) + save %sp,-SA(MINFRAME)-tmps,%sp + PIC_SETUP(l7) + PIC_SET(l7,.CONST_TBL,l2) + wr %g0,0x60,%gsr + +#ifdef __sparcv9 + ldx [%fp+STACK_BIAS+176],stridez +#else + ld [%fp+STACK_BIAS+92],stridez +#endif + + ld [%i1],%o3 + add %l2,2064,%l0 + st %i0,[%fp+tmp_counter] + add %l0,2048,%l6 + ldd [%l6],KA3 + ldd [%l6+8],KA2 + sll stridey,2,stridey + ldd [%l6+16],KA1 + sll stridez,2,stridez + ldd [%l6+24],KA0 + sll %i2,2,stridex + ldd [%l6+32],KB2 + sethi %hi(0x7ffffc00),MASK_0x7fffffff + fzero %f2 + ldd [%l6+40],KB1 + add MASK_0x7fffffff,1023,MASK_0x7fffffff + fzero %f10 + ldd [%l6+48],HTHRESHOLD + sethi %hi(0x7ffc00),MASK_0x007fffff + fzero %f20 + ldd [%l6+56],LTHRESHOLD + sethi %hi(0x8000),CONST_0x8000 + add MASK_0x007fffff,1023,MASK_0x007fffff + + cmp stridex,0 + bne,pt %icc,.common_case + sethi %hi(0x00800000),%l6 + + cmp %o3,%l6 + bl,pn %icc,.common_case + sethi %hi(0x7f800000),%o1 + + cmp %o3,%o1 + bge,pn %icc,.common_case + sethi %hi(0x3f800000),%l6 + + cmp %o3,%l6 + bne,pt %icc,.stridex_zero + nop + +.common_case: + stx %i1,[%fp+tmp_px] + stx %i3,[%fp+tmp_py] +.begin: + ld [%fp+tmp_counter],counter + ldx [%fp+tmp_px],%o2 + ldx [%fp+tmp_py],%i2 + st %g0,[%fp+tmp_counter] +.begin1: + cmp counter,0 + ble,pn %icc,.exit + lda [%o2]0x82,%i1 ! (Y0_2) ax0 = *px; + + lda [%i2]0x82,%l7 + sethi %hi(0xffff0000),%l6 + sethi %hi(0x7f800000),%o5 + + and %i1,MASK_0x7fffffff,%i3 ! (Y0_2) exp0 = ax0 & 0x7fffffff; + and %i1,MASK_0x007fffff,%g5 ! (Y0_2) ax0 &= 0x007fffff; + + cmp %i3,%o5 ! (Y0_2) ax0 ? 0x7f800000 + bge,pn %icc,.spec1 ! (Y0_2) if( ax0 >= 0x7f800000 ) + and %l7,MASK_0x7fffffff,%o4 + + cmp %o4,%o5 ! (Y0_2) ay0 ? 0x7f800000 + bge,pn %icc,.spec1 ! (Y0_2) if( ay0 >= 0x7f800000 ) + nop + + cmp %i1,MASK_0x007fffff ! (Y0_2) ux0 ? 0x800000 + ble,pn %icc,.spec2 ! (Y0_2) if(ux0 < 0x800000) + srl %i3,23,%o3 ! (Y0_2) exp0 >>= 23; + + sub %o3,127,%o3 ! (Y0_2) exp0 -= 127; + + add %g5,CONST_0x8000,%i3 ! (Y0_2) i0 = ax0 + 0x8000; + + sll %o3,8,%o4 ! (Y0_2) exp0 <<= 8; + and %i3,%l6,%i3 ! (Y0_2) i0 &= 0xffff0000; + st %o4,[%fp+tmp3] ! (Y0_2) STORE exp0 + + sub %g5,%i3,%o4 ! (Y0_2) i0 = ax0 - i0; + st %o4,[%fp+tmp2] ! (Y0_2) STORE i0 + add %o2,stridex,%o2 ! px += stridex + + sra %i3,12,%o0 ! (Y0_2) ind0 = i0 >> 12; + lda [%o2]0x82,%o3 ! (Y1_2) ax0 = *px; + + and %o0,-8,%g5 ! (Y0_2) ind0 &= -8; + ld [%fp+tmp2],%f14 ! (Y0_2) dtmp0 = (double) i0; + + and %o3,MASK_0x7fffffff,%i3 ! (Y1_2) exp0 = ax0 & 0x7fffffff; + and %o3,MASK_0x007fffff,%o0 ! (Y1_2) ax0 &= 0x007fffff; + + cmp %i3,%o5 ! (Y1_2) ax0 ? 0x7f800000 + add %l2,%g5,%g1 ! (Y0_2) (char*)__mt_constlog4f + ind0 + + srl %i3,23,%i3 ! (Y1_2) exp0 >>= 23; + add %o0,CONST_0x8000,%i1 ! (Y1_2) i0 = ax0 + 0x8000; + + ldd [%g1+8],%f48 ! (Y0_2) dtmp1 = *(double *)((char*)__mt_constlog4f + ind0 + 8); + sub %i3,127,%i3 ! (Y1_2) exp0 -= 127; + fitod %f14,%f60 ! (Y0_2) dtmp0 = (double) i0; + + sll %i3,8,%i3 ! (Y1_2) exp0 <<= 8; + and %i1,%l6,%i1 ! (Y1_2) i0 &= 0xffff0000; + st %i3,[%fp+tmp4] ! (Y1_2) STORE exp0 + + sub %o0,%i1,%o0 ! (Y1_2) i0 = ax0 - i0; + st %o0,[%fp+tmp5] ! (Y1_2) STORE i0 + bge,pn %icc,.update0 ! (Y1_2) if(ax0 >= 0x7f800000) + nop +.cont0: + cmp %o3,MASK_0x007fffff ! (Y1_2) ux0 ? 0x800000 + + fmuld %f60,%f48,%f48 ! (Y0_2) y0 = dtmp0 * dtmp1; + ble,pn %icc,.update1 ! (Y1_2) if(ux0 < 0x800000) + nop +.cont1: + fmuld KA3,%f48,%f62 ! (Y0_2) dtmp0 = KA3 * y0; + + faddd %f62,KA2,%f22 ! (Y0_2) dtmp0 += KA2; + + sra %i1,12,%o1 ! (Y1_2) ind0 = i0 >> 12; + add %o2,stridex,%i3 ! px += stridex + lda [stridex+%o2]0x82,%g1 ! (Y2_2) ax0 = *px; + + and %o1,-8,%o0 ! (Y1_2) ind0 &= -8; + ld [%fp+tmp5],%f12 ! (Y1_2) LOAD i0 + + and %g1,MASK_0x7fffffff,%i1 ! (Y2_2) exp0 = ax0 & 0x7fffffff; + and %g1,MASK_0x007fffff,%o2 ! (Y2_2) ax0 &= 0x007fffff; + lda [%i2]0x82,%f0 ! (Y0_2) ftmp0 = *py0; + + srl %i1,23,%o3 ! (Y2_2) exp0 >>= 23; + cmp %i1,%o5 ! (Y2_2) ax0 ? 0x7f800000 + + fmuld %f22,%f48,%f26 ! (Y0_2) dtmp0 *= y0; + add %l2,%o0,%i1 ! (Y1_2) (char*)__mt_constlog4f + ind0 + sub %o3,127,%l7 ! (Y2_2) exp0 -= 127; + + add %o2,CONST_0x8000,%o1 ! (Y2_2) i0 = ax0 + 0x8000; + ldd [%i1+8],%f50 ! (Y1_2) dtmp1 = *(double *)((char*)__mt_constlog4f + ind0 + 8); + fitod %f12,%f28 ! (Y1_2) dtmp0 = (double) i0; + + sll %l7,8,%l7 ! (Y2_2) exp0 <<= 8; + and %o1,%l6,%o1 ! (Y2_2) i0 &= 0xffff0000; + st %l7,[%fp+tmp6] ! (Y2_2) STORE exp0 + + sub %o2,%o1,%i1 ! (Y2_2) i0 = ax0 - i0; + st %i1,[%fp+tmp2] ! (Y2_2) STORE i0 + bge,pn %icc,.update2 ! (Y2_2) if(ax0 >= 0x7f800000) + nop +.cont2: + cmp %g1,MASK_0x007fffff ! (Y2_2) ux0 ? 0x800000 + + fmuld %f28,%f50,%f46 ! (Y1_2) y0 = dtmp0 * dtmp1; + ble,pn %icc,.update3 ! (Y2_2) if(ux0 < 0x800000) + faddd %f26,KA1,%f50 ! (Y0_2) dtmp0 += KA1; +.cont3: + ld [%fp+tmp3],%f4 ! (Y0_2) dtmp1 = (double) exp0; + + fstod %f0,%f24 ! (Y0_2) dtmp0 = (double)ftmp0; + + fmuld KA3,%f46,%f28 ! (Y1_1) dtmp0 = KA3 * y0; + + fitod %f4,%f26 ! (Y0_1) dtmp1 = (double) exp0; + + fmuld %f50,%f48,%f50 ! (Y0_1) dtmp0 *= y0; + + faddd %f28,KA2,%f28 ! (Y1_1) dtmp0 += KA2; + + ldd [%l2+%g5],%f60 ! (Y0_1) dtmp0 = *(double *)((char*)__mt_constlog4f + ind0); + add %i3,stridex,%o2 ! px += stridex + + lda [%o2]0x82,%i1 ! (Y0_2) ax0 = *px; + sra %o1,12,%g5 ! (Y2_1) ind0 = i0 >> 12; + + faddd %f50,KA0,%f58 ! (Y0_1) dtmp0 += KA0; + and %g5,-8,%o1 ! (Y2_1) ind0 &= -8; + ld [%fp+tmp2],%f6 ! (Y2_1) dtmp0 = (double) i0; + + and %i1,MASK_0x7fffffff,%i3 ! (Y0_2) exp0 = ax0 & 0x7fffffff; + and %i1,MASK_0x007fffff,%g5 ! (Y0_2) ax0 &= 0x007fffff; + + srl %i3,23,%o3 ! (Y0_2) exp0 >>= 23; + add %l2,%o1,%g1 ! (Y2_1) (char*)__mt_constlog4f + ind0 + faddd %f60,%f26,%f26 ! (Y0_1) yy0 = dtmp0 + dtmp1; + + fmuld %f28,%f46,%f50 ! (Y1_1) dtmp0 *= y0; + sub %o3,127,%o3 ! (Y0_2) exp0 -= 127; + cmp %i3,%o5 ! (Y0_2) ax0 ? 0x7f800000 + + fmuld %f58,%f48,%f48 ! (Y0_1) dtmp0 *= y0; + add %g5,CONST_0x8000,%i3 ! (Y0_2) i0 = ax0 + 0x8000; + ldd [%g1+8],%f58 ! (Y2_1) dtmp1 = *(double *)((char*)__mt_constlog4f + ind0 + 8); + fitod %f6,%f54 ! (Y2_1) dtmp0 = (double) i0; + + sll %o3,8,%o4 ! (Y0_2) exp0 <<= 8; + and %i3,%l6,%i3 ! (Y0_2) i0 &= 0xffff0000; + st %o4,[%fp+tmp3] ! (Y0_2) STORE exp0 + + sub %g5,%i3,%o4 ! (Y0_2) i0 = ax0 - i0; + st %o4,[%fp+tmp2] ! (Y0_2) STORE i0 + bge,pn %icc,.update4 ! (Y0_2) if( ax0 >= 0x7f800000 ) + nop +.cont4: + lda [stridey+%i2]0x82,%g1 ! (Y1_1) ay0 = *(unsigned*)py0 + add %i2,stridey,%o4 ! py += stridey + cmp %i1,MASK_0x007fffff ! (Y0_2) ux0 ? 0x800000 + + fmuld %f54,%f58,%f28 ! (Y2_1) y0 = dtmp0 * dtmp1; + lda [stridey+%i2]0x82,%f2 ! (Y1_1) ftmp0 = *py0; + ble,pn %icc,.update5 ! (Y0_2) if(ux0 < 0x800000) + faddd %f50,KA1,%f54 ! (Y1_1) dtmp0 += KA1; +.cont5: + and %g1,MASK_0x7fffffff,%g1 ! (Y1_1) ay0 &= 0x7fffffff; + ld [%fp+tmp4],%f1 ! (Y1_1) LOAD exp0 + faddd %f26,%f48,%f58 ! (Y0_1) yy0 += dtmp0; + + cmp %g1,%o5 ! (Y1_1) ay0 ? 0x7f800000 + bge,pn %icc,.update6 ! (Y1_1) if(ay0 >= 0x7f800000) + nop +.cont6: + fmuld KA3,%f28,%f62 ! (Y2_1) dtmp0 = KA3 * y0; + fstod %f2,%f22 ! (Y1_1) dtmp0 = (double)ftmp0; + + fmuld %f24,%f58,%f58 ! (Y0_1) yy0 *= dtmp0; + + fitod %f1,%f48 ! (Y1_1) dtmp1 = (double) exp0; + + fmuld %f54,%f46,%f54 ! (Y1_1) dtmp0 *= y0; + + faddd %f62,KA2,%f26 ! (Y2_1) dtmp0 += KA2; + + add %o2,stridex,%o2 ! px += stridex + ldd [%l2+%o0],%f60 ! (Y1_1) dtmp0 = *(double *)((char*)__mt_constlog4f + ind0); + fcmped %fcc0,HTHRESHOLD,%f58 ! (Y0_1) if (yy0 >= HTHRESH) + + sra %i3,12,%o0 ! (Y0_2) ind0 = i0 >> 12; + lda [%o2]0x82,%o3 ! (Y1_2) ax0 = *px; + + faddd %f54,KA0,%f56 ! (Y1_1) dtmp0 += KA0; + and %o0,-8,%g5 ! (Y0_2) ind0 &= -8; + ld [%fp+tmp2],%f14 ! (Y0_2) dtmp0 = (double) i0; + + and %o3,MASK_0x7fffffff,%i3 ! (Y1_2) exp0 = ax0 & 0x7fffffff; + and %o3,MASK_0x007fffff,%o0 ! (Y1_2) ax0 &= 0x007fffff; + + cmp %i3,%o5 ! (Y1_2) ax0 ? 0x7f800000 + add %l2,%g5,%g1 ! (Y0_2) (char*)__mt_constlog4f + ind0 + faddd %f60,%f48,%f12 ! (Y1_1) yy0 = dtmp0 + dtmp1; + + fmuld %f26,%f28,%f50 ! (Y2_1) dtmp0 *= y0; + srl %i3,23,%i3 ! (Y1_2) exp0 >>= 23; + add %o0,CONST_0x8000,%i1 ! (Y1_2) i0 = ax0 + 0x8000; + fcmped %fcc1,LTHRESHOLD,%f58 ! (Y0_1) if (yy0 <= LTHRESH) + + fmuld %f56,%f46,%f46 ! (Y1_1) dtmp0 *= y0; + ldd [%g1+8],%f48 ! (Y0_2) dtmp1 = *(double *)((char*)__mt_constlog4f + ind0 + 8); + sub %i3,127,%i3 ! (Y1_2) exp0 -= 127; + fitod %f14,%f60 ! (Y0_2) dtmp0 = (double) i0; + + sll %i3,8,%i2 ! (Y1_2) exp0 <<= 8; + and %i1,%l6,%i1 ! (Y1_2) i0 &= 0xffff0000; + st %i2,[%fp+tmp4] ! (Y1_2) STORE exp0 + + sub %o0,%i1,%o0 ! (Y1_2) i0 = ax0 - i0; + st %o0,[%fp+tmp5] ! (Y1_2) STORE i0 + bge,pn %icc,.update7 ! (Y1_2) if(ax0 >= 0x7f800000) + nop +.cont7: + lda [stridey+%o4]0x82,%i3 ! Y(2_1) ay0 = *py0 + cmp %o3,MASK_0x007fffff ! (Y1_2) ux0 ? 0x800000 + add %o4,stridey,%i2 ! py += stridey; + fmovdl %fcc0,HTHRESHOLD,%f58 ! (Y0_1) yy0 = HTHRESH; + + fmuld %f60,%f48,%f48 ! (Y0_2) y0 = dtmp0 * dtmp1; + lda [stridey+%o4]0x82,%f16 ! (Y2_1) ftmp0 = *py0; + ble,pn %icc,.update8 ! (Y1_2) if(ux0 < 0x800000) + faddd %f50,KA1,%f52 ! (Y2_1) dtmp0 += KA1; +.cont8: + and %i3,MASK_0x7fffffff,%i3 ! (Y2_1) ay0 &= 0x7fffffff + ld [%fp+tmp6],%f17 ! (Y2_1) dtmp1 = (double) exp0; + faddd %f12,%f46,%f60 ! (Y1_1) yy0 += dtmp0; + + cmp %i3,%o5 ! (Y2_1) ay0 ? 0x7f800000 + bge,pn %icc,.update9 ! (Y2_1) if(ay0 >= 0x7f800000) + nop + +.cont9: + fmovdg %fcc1,LTHRESHOLD,%f58 ! (Y0_1) yy0 = LTHRESH; + + fmuld KA3,%f48,%f62 ! (Y0_2) dtmp0 = KA3 * y0; + fstod %f16,%f54 ! (Y2_1) dtmp0 = (double)ftmp0; + + fmuld %f22,%f60,%f56 ! (Y1_1) yy0 *= dtmp0; + + fitod %f17,%f24 ! (Y2_1) dtmp1 = (double) exp0; + + fmuld %f52,%f28,%f52 ! (Y2_1) dtmp0 *= y0; + fdtoi %f58,%f10 ! (Y0_1) ind0 = (int) yy0; + + st %f10,[%fp+tmp0] ! (Y0_1) STORE ind0 + faddd %f62,KA2,%f22 ! (Y0_2) dtmp0 += KA2; + + fcmped %fcc0,HTHRESHOLD,%f56 ! (Y1_1) if (yy0 >= HTHRESH) + ldd [%l2+%o1],%f60 ! (Y2_1) dtmp0 = *(double *)((char*)__mt_constlog4f + ind0); + + sra %i1,12,%o1 ! (Y1_2) ind0 = i0 >> 12; + add %o2,stridex,%i3 ! px += stridex + lda [stridex+%o2]0x82,%g1 ! (Y2_2) ax0 = *px; + + and %o1,-8,%o0 ! (Y1_2) ind0 &= -8; + add %i2,stridey,%i2 ! py += stridey + ld [%fp+tmp5],%f12 ! (Y1_2) LOAD i0 + faddd %f52,KA0,%f4 ! (Y2_1) dtmp0 += KA0; + + and %g1,MASK_0x7fffffff,%i1 ! (Y2_2) exp0 = ax0 & 0x7fffffff; + and %g1,MASK_0x007fffff,%o2 ! (Y2_2) ax0 &= 0x007fffff; + lda [%i2]0x82,%f0 ! (Y0_2) ftmp0 = *py0; + fitod %f10,%f52 ! (Y0_1) dtmp0 = (double)ind0; + + srl %i1,23,%o3 ! (Y2_2) exp0 >>= 23; + cmp %i1,%o5 ! (Y2_2) ax0 ? 0x7f800000 + faddd %f60,%f24,%f18 ! (Y2_1) yy0 = dtmp0 + dtmp1; + + fmuld %f22,%f48,%f26 ! (Y0_2) dtmp0 *= y0; + add %l2,%o0,%i1 ! (Y1_2) (char*)__mt_constlog4f + ind0 + sub %o3,127,%l7 ! (Y2_2) exp0 -= 127; + fcmped %fcc1,LTHRESHOLD,%f56 ! (Y1_1) if (yy0 <= LTHRESH) + + fmuld %f4,%f28,%f24 ! (Y2_1) dtmp0 *= y0; + add %o2,CONST_0x8000,%o1 ! (Y2_2) i0 = ax0 + 0x8000; + ldd [%i1+8],%f50 ! (Y1_2) dtmp1 = *(double *)((char*)__mt_constlog4f + ind0 + 8); + fitod %f12,%f28 ! (Y1_2) dtmp0 = (double) i0; + + sll %l7,8,%l7 ! (Y2_2) exp0 <<= 8; + and %o1,%l6,%o1 ! (Y2_2) i0 &= 0xffff0000; + st %l7,[%fp+tmp6] ! (Y2_2) STORE exp0 + fsubd %f58,%f52,%f60 ! (Y0_1) y0 = yy0 - dtmp0; + + + sub %o2,%o1,%i1 ! (Y2_2) i0 = ax0 - i0; + st %i1,[%fp+tmp2] ! (Y2_2) STORE i0 + bge,pn %icc,.update10 ! (Y2_2) if(ax0 >= 0x7f800000) + nop +.cont10: + lda [%i2]0x82,%o2 ! (Y0_2) ay0 = *(int*)py0; + cmp %g1,MASK_0x007fffff ! (Y2_2) ux0 ? 0x800000 + fmovdl %fcc0,HTHRESHOLD,%f56 ! (Y1_1) yy0 = HTHRESH; + + fmuld %f28,%f50,%f46 ! (Y1_2) y0 = dtmp0 * dtmp1; + ble,pn %icc,.update11 ! (Y2_2) if(ux0 < 0x800000) + faddd %f26,KA1,%f50 ! (Y0_2) dtmp0 += KA1; +.cont11: + fmuld KB2,%f60,%f62 ! (Y0_1) dtmp0 = KB2 * y0; + and %o2,MASK_0x7fffffff,%o2 ! (Y0_2) ay0 &= 0x7fffffff + ld [%fp+tmp3],%f4 ! (Y0_2) dtmp1 = (double) exp0; + faddd %f18,%f24,%f52 ! (Y2_1) yy0 += dtmp0; + + ld [%fp+tmp0],%g1 ! (Y0_1) LAOD ind0 + cmp %o2,%o5 ! (Y0_2) ay0 ? 0x7f800000 + bge,pn %icc,.update12 ! (Y0_2) if( ay0 >= 0x7f800000) + nop +.cont12: + fstod %f0,%f24 ! (Y0_2) dtmp0 = (double)ftmp0; + + cmp counter,6 ! counter + bl,pn %icc,.tail + sub %i5,stridez,%o4 + + ba .main_loop + nop + + .align 16 +.main_loop: + fmuld KA3,%f46,%f28 ! (Y1_1) dtmp0 = KA3 * y0; + and %g1,255,%o2 ! (Y0_0) ind0 &= 255; + sub counter,3,counter ! counter + fmovdg %fcc1,LTHRESHOLD,%f56 ! (Y1_0) yy0 = LTHRESH; + + fmuld %f54,%f52,%f18 ! (Y2_0) yy0 *= dtmp0; + sll %o2,3,%i1 ! (Y0_0) ind0 <<= 3; + add %o4,stridez,%l7 ! pz += stridez + faddd %f62,KB1,%f62 ! (Y0_0) dtmp0 += KB1; + + fpackfix %f10,%f10 ! (Y0_0) dtmp1 = vis_fpackfix(dtmp1); + fitod %f4,%f26 ! (Y0_1) dtmp1 = (double) exp0; + ldd [%l0+%i1],%f58 ! (Y0_0) di0 = *(double*)((char*)__mt_constexp2f + ind0); + + fmuld %f50,%f48,%f50 ! (Y0_1) dtmp0 *= y0; + fdtoi %f56,%f20 ! (Y1_0) ind0 = (int) yy0; + st %f20,[%fp+tmp1] ! (Y1_0) STORE ind0 + + faddd %f28,KA2,%f28 ! (Y1_1) dtmp0 += KA2; + + fmuld %f62,%f60,%f62 ! (Y0_0) yy0 = dtmp0 * y0; + ldd [%l2+%g5],%f60 ! (Y0_1) dtmp0 = *(double *)((char*)__mt_constlog4f + ind0); + add %i3,stridex,%o2 ! px += stridex + fcmped %fcc0,HTHRESHOLD,%f18 ! (Y2_0) if (yy0 >= HTHRESH) + + lda [%o2]0x82,%i1 ! (Y0_2) ax0 = *px; + sra %o1,12,%g5 ! (Y2_1) ind0 = i0 >> 12; + fpadd32 %f10,%f58,%f22 ! (Y0_0) di0 = vis_fpadd32(di0,dtmp1); + + faddd %f50,KA0,%f58 ! (Y0_1) dtmp0 += KA0; + and %g5,-8,%o1 ! (Y2_1) ind0 &= -8; + ld [%fp+tmp2],%f6 ! (Y2_1) dtmp0 = (double) i0; + + fitod %f20,%f52 ! (Y1_0) dtmp0 = (double)ind0; + and %i1,MASK_0x7fffffff,%i3 ! (Y0_2) exp0 = ax0 & 0x7fffffff; + and %i1,MASK_0x007fffff,%g5 ! (Y0_2) ax0 &= 0x007fffff; + + fmuld %f62,%f22,%f62 ! (Y0_0) yy0 *= di0; + srl %i3,23,%o3 ! (Y0_2) exp0 >>= 23; + add %l2,%o1,%g1 ! (Y2_1) (char*)__mt_constlog4f + ind0 + faddd %f60,%f26,%f26 ! (Y0_1) yy0 = dtmp0 + dtmp1; + + fmuld %f28,%f46,%f50 ! (Y1_1) dtmp0 *= y0; + sub %o3,127,%o3 ! (Y0_2) exp0 -= 127; + cmp %i3,%o5 ! (Y0_2) ax0 ? 0x7f800000 + fcmped %fcc1,LTHRESHOLD,%f18 ! (Y2_0) if (yy0 <= LTHRESH) + + fmuld %f58,%f48,%f48 ! (Y0_1) dtmp0 *= y0; + add %g5,CONST_0x8000,%i3 ! (Y0_2) i0 = ax0 + 0x8000; + ldd [%g1+8],%f58 ! (Y2_1) dtmp1 = *(double *)((char*)__mt_constlog4f + ind0 + 8); + fitod %f6,%f54 ! (Y2_1) dtmp0 = (double) i0; + + sll %o3,8,%o4 ! (Y0_2) exp0 <<= 8; + and %i3,%l6,%i3 ! (Y0_2) i0 &= 0xffff0000; + st %o4,[%fp+tmp3] ! (Y0_2) STORE exp0 + fsubd %f56,%f52,%f52 ! (Y1_0) y0 = yy0 - dtmp0; + + sub %g5,%i3,%o4 ! (Y0_2) i0 = ax0 - i0; + st %o4,[%fp+tmp2] ! (Y0_2) STORE i0 + bge,pn %icc,.update13 ! (Y0_2) if( ax0 >= 0x7f800000 ) + faddd %f62,%f22,%f62 ! (Y0_0) yy0 += di0; +.cont13: + lda [stridey+%i2]0x82,%g1 ! (Y1_1) ay0 = *(unsigned*)py0 + add %i2,stridey,%o4 ! py += stridey + cmp %i1,MASK_0x007fffff ! (Y0_2) ux0 ? 0x800000 + fmovdl %fcc0,HTHRESHOLD,%f18 ! (Y2_0) yy0 = HTHRESH; + + fmuld %f54,%f58,%f28 ! (Y2_1) y0 = dtmp0 * dtmp1; + lda [stridey+%i2]0x82,%f2 ! (Y1_1) ftmp0 = *py0; + ble,pn %icc,.update14 ! (Y0_2) if(ux0 < 0x800000) + faddd %f50,KA1,%f54 ! (Y1_1) dtmp0 += KA1; +.cont14: + fmuld KB2,%f52,%f56 ! (Y1_0) dtmp0 = KB2 * y0; + and %g1,MASK_0x7fffffff,%g1 ! (Y1_1) ay0 &= 0x7fffffff; + ld [%fp+tmp4],%f1 ! (Y1_1) LOAD exp0 + faddd %f26,%f48,%f58 ! (Y0_1) yy0 += dtmp0; + + ld [%fp+tmp1],%g5 ! (Y1_0) ind0 = (int) yy0; + cmp %g1,%o5 ! (Y1_1) ay0 ? 0x7f800000 + bge,pn %icc,.update15 ! (Y1_1) if(ay0 >= 0x7f800000) + fdtos %f62,%f8 ! (Y0_0) ftmp0 = (float)yy0; +.cont15: + st %f8,[%l7] ! (Y0_0) *pz0 = ftmp0; + fmovdg %fcc1,LTHRESHOLD,%f18 ! (Y2_0) yy0 = LTHRESH; + + add %l7,stridez,%l7 ! pz += stridez + fmuld KA3,%f28,%f62 ! (Y2_1) dtmp0 = KA3 * y0; + and %g5,255,%g5 ! (Y1_0) ind0 &= 255; + fstod %f2,%f22 ! (Y1_1) dtmp0 = (double)ftmp0; + + fmuld %f24,%f58,%f58 ! (Y0_1) yy0 *= dtmp0; + sll %g5,3,%i2 ! (Y1_0) ind0 <<= 3; + faddd %f56,KB1,%f60 ! (Y1_0) dtmp0 += KB1; + + fpackfix %f20,%f20 ! (Y1_0) dtmp1 = vis_fpackfix(dtmp1); + fitod %f1,%f48 ! (Y1_1) dtmp1 = (double) exp0; + ldd [%l0+%i2],%f56 ! (Y1_0) di0 = *(double*)((char*)__mt_constexp2f + ind0); + + fmuld %f54,%f46,%f54 ! (Y1_1) dtmp0 *= y0; + fdtoi %f18,%f2 ! (Y2_0) ind0 = (int) yy0; + st %f2,[%fp+tmp1] ! (Y2_0) STORE ind0 + + faddd %f62,KA2,%f26 ! (Y2_1) dtmp0 += KA2; + + fmuld %f60,%f52,%f62 ! (Y1_0) yy0 = dtmp0 * y0; + add %o2,stridex,%o2 ! px += stridex + ldd [%l2+%o0],%f60 ! (Y1_1) dtmp0 = *(double *)((char*)__mt_constlog4f + ind0); + fcmped %fcc0,HTHRESHOLD,%f58 ! (Y0_1) if (yy0 >= HTHRESH) + + fpadd32 %f20,%f56,%f52 ! (Y1_0) di0 = vis_fpadd32(di0,dtmp1); + sra %i3,12,%o0 ! (Y0_2) ind0 = i0 >> 12; + lda [%o2]0x82,%o3 ! (Y1_2) ax0 = *px; + + faddd %f54,KA0,%f56 ! (Y1_1) dtmp0 += KA0; + and %o0,-8,%g5 ! (Y0_2) ind0 &= -8; + ld [%fp+tmp2],%f14 ! (Y0_2) dtmp0 = (double) i0; + + fitod %f2,%f54 ! (Y2_0) dtmp0 = (double)ind0; + and %o3,MASK_0x7fffffff,%i3 ! (Y1_2) exp0 = ax0 & 0x7fffffff; + and %o3,MASK_0x007fffff,%o0 ! (Y1_2) ax0 &= 0x007fffff; + + fmuld %f62,%f52,%f62 ! (Y1_0) yy0 *= di0; + cmp %i3,%o5 ! (Y1_2) ax0 ? 0x7f800000 + add %l2,%g5,%g1 ! (Y0_2) (char*)__mt_constlog4f + ind0 + faddd %f60,%f48,%f12 ! (Y1_1) yy0 = dtmp0 + dtmp1; + + fmuld %f26,%f28,%f50 ! (Y2_1) dtmp0 *= y0; + srl %i3,23,%i3 ! (Y1_2) exp0 >>= 23; + add %o0,CONST_0x8000,%i1 ! (Y1_2) i0 = ax0 + 0x8000; + fcmped %fcc1,LTHRESHOLD,%f58 ! (Y0_1) if (yy0 <= LTHRESH) + + fmuld %f56,%f46,%f46 ! (Y1_1) dtmp0 *= y0; + ldd [%g1+8],%f48 ! (Y0_2) dtmp1 = *(double *)((char*)__mt_constlog4f + ind0 + 8); + sub %i3,127,%i3 ! (Y1_2) exp0 -= 127; + fitod %f14,%f60 ! (Y0_2) dtmp0 = (double) i0; + + sll %i3,8,%i2 ! (Y1_2) exp0 <<= 8; + and %i1,%l6,%i1 ! (Y1_2) i0 &= 0xffff0000; + st %i2,[%fp+tmp4] ! (Y1_2) STORE exp0 + fsubd %f18,%f54,%f26 ! (Y2_0) y0 = yy0 - dtmp0; + + sub %o0,%i1,%o0 ! (Y1_2) i0 = ax0 - i0; + st %o0,[%fp+tmp5] ! (Y1_2) STORE i0 + bge,pn %icc,.update16 ! (Y1_2) if(ax0 >= 0x7f800000) + faddd %f62,%f52,%f54 ! (Y1_0) yy0 += di0; +.cont16: + lda [stridey+%o4]0x82,%i3 ! Y(2_1) ay0 = *py0 + cmp %o3,MASK_0x007fffff ! (Y1_2) ux0 ? 0x800000 + add %o4,stridey,%i2 ! py += stridey; + fmovdl %fcc0,HTHRESHOLD,%f58 ! (Y0_1) yy0 = HTHRESH; + + fmuld %f60,%f48,%f48 ! (Y0_2) y0 = dtmp0 * dtmp1; + lda [stridey+%o4]0x82,%f16 ! (Y2_1) ftmp0 = *py0; + ble,pn %icc,.update17 ! (Y1_2) if(ux0 < 0x800000) + faddd %f50,KA1,%f52 ! (Y2_1) dtmp0 += KA1; +.cont17: + fmuld KB2,%f26,%f4 ! (Y2_0) dtmp0 = KB2 * y0; + and %i3,MASK_0x7fffffff,%i3 ! (Y2_1) ay0 &= 0x7fffffff + ld [%fp+tmp6],%f17 ! (Y2_1) dtmp1 = (double) exp0; + faddd %f12,%f46,%f60 ! (Y1_1) yy0 += dtmp0; + + ld [%fp+tmp1],%o0 + cmp %i3,%o5 ! (Y2_1) ay0 ? 0x7f800000 + bge,pn %icc,.update18 ! (Y2_1) if(ay0 >= 0x7f800000) + fdtos %f54,%f15 ! (Y1_0) ftmp0 = (float)yy0; +.cont18: + st %f15,[%l7] ! (Y1_0) *pz0 = ftmp0; + add %l7,stridez,%o4 ! pz += stridez + fmovdg %fcc1,LTHRESHOLD,%f58 ! (Y0_1) yy0 = LTHRESH; + + fmuld KA3,%f48,%f62 ! (Y0_2) dtmp0 = KA3 * y0; + and %o0,255,%o0 ! (Y2_0) ind0 &= 255; + fstod %f16,%f54 ! (Y2_1) dtmp0 = (double)ftmp0; + + fmuld %f22,%f60,%f56 ! (Y1_1) yy0 *= dtmp0; + sll %o0,3,%l7 ! (Y2_0) ind0 <<= 3; + faddd %f4,KB1,%f60 ! (Y2_0) dtmp0 += KB1; + + fpackfix %f2,%f2 ! (Y2_0) dtmp1 = vis_fpackfix(dtmp1); + fitod %f17,%f24 ! (Y2_1) dtmp1 = (double) exp0; + ldd [%l0+%l7],%f4 ! (Y2_0) di0 = *(double*)((char*)__mt_constexp2f + ind0); + + fmuld %f52,%f28,%f52 ! (Y2_1) dtmp0 *= y0; + fdtoi %f58,%f10 ! (Y0_1) ind0 = (int) yy0; + + st %f10,[%fp+tmp0] ! (Y0_1) STORE ind0 + faddd %f62,KA2,%f22 ! (Y0_2) dtmp0 += KA2; + + fmuld %f60,%f26,%f62 ! (Y2_0) yy0 = dtmp0 * y0; + fcmped %fcc0,HTHRESHOLD,%f56 ! (Y1_1) if (yy0 >= HTHRESH) + ldd [%l2+%o1],%f60 ! (Y2_1) dtmp0 = *(double *)((char*)__mt_constlog4f + ind0); + + sra %i1,12,%o1 ! (Y1_2) ind0 = i0 >> 12; + add %o2,stridex,%i3 ! px += stridex + lda [stridex+%o2]0x82,%g1 ! (Y2_2) ax0 = *px; + fpadd32 %f2,%f4,%f46 ! (Y2_0) di0 = vis_fpadd32(di0,dtmp1); + + and %o1,-8,%o0 ! (Y1_2) ind0 &= -8; + add %i2,stridey,%i2 ! py += stridey + ld [%fp+tmp5],%f12 ! (Y1_2) LOAD i0 + faddd %f52,KA0,%f4 ! (Y2_1) dtmp0 += KA0; + + and %g1,MASK_0x7fffffff,%i1 ! (Y2_2) exp0 = ax0 & 0x7fffffff; + and %g1,MASK_0x007fffff,%o2 ! (Y2_2) ax0 &= 0x007fffff; + lda [%i2]0x82,%f0 ! (Y0_2) ftmp0 = *py0; + fitod %f10,%f52 ! (Y0_1) dtmp0 = (double)ind0; + + fmuld %f62,%f46,%f62 ! (Y2_0) yy0 *= di0; + srl %i1,23,%o3 ! (Y2_2) exp0 >>= 23; + cmp %i1,%o5 ! (Y2_2) ax0 ? 0x7f800000 + faddd %f60,%f24,%f18 ! (Y2_1) yy0 = dtmp0 + dtmp1; + + fmuld %f22,%f48,%f26 ! (Y0_2) dtmp0 *= y0; + add %l2,%o0,%i1 ! (Y1_2) (char*)__mt_constlog4f + ind0 + sub %o3,127,%l7 ! (Y2_2) exp0 -= 127; + fcmped %fcc1,LTHRESHOLD,%f56 ! (Y1_1) if (yy0 <= LTHRESH) + + fmuld %f4,%f28,%f24 ! (Y2_1) dtmp0 *= y0; + add %o2,CONST_0x8000,%o1 ! (Y2_2) i0 = ax0 + 0x8000; + ldd [%i1+8],%f50 ! (Y1_2) dtmp1 = *(double *)((char*)__mt_constlog4f + ind0 + 8); + fitod %f12,%f28 ! (Y1_2) dtmp0 = (double) i0; + + sll %l7,8,%l7 ! (Y2_2) exp0 <<= 8; + and %o1,%l6,%o1 ! (Y2_2) i0 &= 0xffff0000; + st %l7,[%fp+tmp6] ! (Y2_2) STORE exp0 + fsubd %f58,%f52,%f60 ! (Y0_1) y0 = yy0 - dtmp0; + + sub %o2,%o1,%i1 ! (Y2_2) i0 = ax0 - i0; + st %i1,[%fp+tmp2] ! (Y2_2) STORE i0 + bge,pn %icc,.update19 ! (Y2_2) if(ax0 >= 0x7f800000) + faddd %f62,%f46,%f22 ! (Y2_0) yy0 += di0; +.cont19: + lda [%i2]0x82,%o2 ! (Y0_2) ay0 = *(int*)py0; + cmp %g1,MASK_0x007fffff ! (Y2_2) ux0 ? 0x800000 + fmovdl %fcc0,HTHRESHOLD,%f56 ! (Y1_1) yy0 = HTHRESH; + + fmuld %f28,%f50,%f46 ! (Y1_2) y0 = dtmp0 * dtmp1; + ble,pn %icc,.update20 ! (Y2_2) if(ux0 < 0x800000) + faddd %f26,KA1,%f50 ! (Y0_2) dtmp0 += KA1; +.cont20: + fmuld KB2,%f60,%f62 ! (Y0_1) dtmp0 = KB2 * y0; + and %o2,MASK_0x7fffffff,%o2 ! (Y0_2) ay0 &= 0x7fffffff + ld [%fp+tmp3],%f4 ! (Y0_2) dtmp1 = (double) exp0; + faddd %f18,%f24,%f52 ! (Y2_1) yy0 += dtmp0; + + ld [%fp+tmp0],%g1 ! (Y0_1) LAOD ind0 + cmp %o2,%o5 ! (Y0_2) ay0 ? 0x7f800000 + bge,pn %icc,.update21 ! (Y0_2) if( ay0 >= 0x7f800000) + fdtos %f22,%f12 ! (Y2_0) ftmp0 = (float)yy0; +.cont21: + st %f12,[%o4] ! (Y2_0) *pz0 = ftmp0; + cmp counter,6 ! counter + bge,pt %icc,.main_loop + fstod %f0,%f24 ! (Y0_2) dtmp0 = (double)ftmp0; + +.tail: + subcc counter,1,counter + bneg,pn %icc,.begin + add %o4,stridez,%i5 + + fmuld KA3,%f46,%f28 ! (Y1_1) dtmp0 = KA3 * y0; + and %g1,255,%o2 ! (Y0_0) ind0 &= 255; + fmovdg %fcc1,LTHRESHOLD,%f56 ! (Y1_0) yy0 = LTHRESH; + + fmuld %f54,%f52,%f18 ! (Y2_0) yy0 *= dtmp0; + sll %o2,3,%i1 ! (Y0_0) ind0 <<= 3; + add %o4,stridez,%l7 ! pz += stridez + faddd %f62,KB1,%f62 ! (Y0_0) dtmp0 += KB1; + + fpackfix %f10,%f10 ! (Y0_0) dtmp1 = vis_fpackfix(dtmp1); + fitod %f4,%f26 ! (Y0_1) dtmp1 = (double) exp0; + ldd [%l0+%i1],%f58 ! (Y0_0) di0 = *(double*)((char*)__mt_constexp2f + ind0); + + fmuld %f50,%f48,%f50 ! (Y0_1) dtmp0 *= y0; + fdtoi %f56,%f20 ! (Y1_0) ind0 = (int) yy0; + st %f20,[%fp+tmp1] ! (Y1_0) STORE ind0 + + faddd %f28,KA2,%f28 ! (Y1_1) dtmp0 += KA2; + + fmuld %f62,%f60,%f62 ! (Y0_0) yy0 = dtmp0 * y0; + ldd [%l2+%g5],%f60 ! (Y0_1) dtmp0 = *(double *)((char*)__mt_constlog4f + ind0); + fcmped %fcc0,HTHRESHOLD,%f18 ! (Y2_0) if (yy0 >= HTHRESH) + + fpadd32 %f10,%f58,%f22 ! (Y0_0) di0 = vis_fpadd32(di0,dtmp1); + + faddd %f50,KA0,%f58 ! (Y0_1) dtmp0 += KA0; + + fitod %f20,%f52 ! (Y1_0) dtmp0 = (double)ind0; + + fmuld %f62,%f22,%f62 ! (Y0_0) yy0 *= di0; + faddd %f60,%f26,%f26 ! (Y0_1) yy0 = dtmp0 + dtmp1; + + fmuld %f28,%f46,%f50 ! (Y1_1) dtmp0 *= y0; + fcmped %fcc1,LTHRESHOLD,%f18 ! (Y2_0) if (yy0 <= LTHRESH) + + fmuld %f58,%f48,%f48 ! (Y0_1) dtmp0 *= y0; + + fsubd %f56,%f52,%f52 ! (Y1_0) y0 = yy0 - dtmp0; + + faddd %f62,%f22,%f62 ! (Y0_0) yy0 += di0; + + lda [stridey+%i2]0x82,%g1 ! (Y1_1) ay0 = *(unsigned*)py0 + add %i2,stridey,%o4 ! py += stridey + fmovdl %fcc0,HTHRESHOLD,%f18 ! (Y2_0) yy0 = HTHRESH; + + lda [stridey+%i2]0x82,%f2 ! (Y1_1) ftmp0 = *py0; + faddd %f50,KA1,%f54 ! (Y1_1) dtmp0 += KA1; + + fmuld KB2,%f52,%f56 ! (Y1_0) dtmp0 = KB2 * y0; + and %g1,MASK_0x7fffffff,%g1 ! (Y1_1) ay0 &= 0x7fffffff; + ld [%fp+tmp4],%f1 ! (Y1_1) LOAD exp0 + faddd %f26,%f48,%f58 ! (Y0_1) yy0 += dtmp0; + + ld [%fp+tmp1],%g5 ! (Y1_0) ind0 = (int) yy0; + cmp %g1,%o5 ! (Y1_1) ay0 ? 0x7f800000 + bge,pn %icc,.update22 ! (Y1_1) if(ay0 >= 0x7f800000) + fdtos %f62,%f8 ! (Y0_0) ftmp0 = (float)yy0; +.cont22: + st %f8,[%l7] ! (Y0_0) *pz0 = ftmp0; + fmovdg %fcc1,LTHRESHOLD,%f18 ! (Y2_0) yy0 = LTHRESH; + + subcc counter,1,counter + bneg,pn %icc,.begin + add %l7,stridez,%i5 + + add %l7,stridez,%l7 ! pz += stridez + and %g5,255,%g5 ! (Y1_0) ind0 &= 255; + fstod %f2,%f22 ! (Y1_1) dtmp0 = (double)ftmp0; + + fmuld %f24,%f58,%f58 ! (Y0_1) yy0 *= dtmp0; + sll %g5,3,%i2 ! (Y1_0) ind0 <<= 3; + faddd %f56,KB1,%f60 ! (Y1_0) dtmp0 += KB1; + + fpackfix %f20,%f20 ! (Y1_0) dtmp1 = vis_fpackfix(dtmp1); + fitod %f1,%f48 ! (Y1_1) dtmp1 = (double) exp0; + ldd [%l0+%i2],%f56 ! (Y1_0) di0 = *(double*)((char*)__mt_constexp2f + ind0); + + fmuld %f54,%f46,%f54 ! (Y1_1) dtmp0 *= y0; + fdtoi %f18,%f2 ! (Y2_0) ind0 = (int) yy0; + st %f2,[%fp+tmp1] ! (Y2_0) STORE ind0 + + + fmuld %f60,%f52,%f62 ! (Y1_0) yy0 = dtmp0 * y0; + ldd [%l2+%o0],%f60 ! (Y1_1) dtmp0 = *(double *)((char*)__mt_constlog4f + ind0); + fcmped %fcc0,HTHRESHOLD,%f58 ! (Y0_1) if (yy0 >= HTHRESH) + + fpadd32 %f20,%f56,%f52 ! (Y1_0) di0 = vis_fpadd32(di0,dtmp1); + + faddd %f54,KA0,%f56 ! (Y1_1) dtmp0 += KA0; + + fitod %f2,%f54 ! (Y2_0) dtmp0 = (double)ind0; + + fmuld %f62,%f52,%f62 ! (Y1_0) yy0 *= di0; + faddd %f60,%f48,%f12 ! (Y1_1) yy0 = dtmp0 + dtmp1; + + fcmped %fcc1,LTHRESHOLD,%f58 ! (Y0_1) if (yy0 <= LTHRESH) + + fmuld %f56,%f46,%f46 ! (Y1_1) dtmp0 *= y0; + + fsubd %f18,%f54,%f26 ! (Y2_0) y0 = yy0 - dtmp0; + + faddd %f62,%f52,%f54 ! (Y1_0) yy0 += di0; + + fmovdl %fcc0,HTHRESHOLD,%f58 ! (Y0_1) yy0 = HTHRESH; + + + fmuld KB2,%f26,%f4 ! (Y2_0) dtmp0 = KB2 * y0; + faddd %f12,%f46,%f60 ! (Y1_1) yy0 += dtmp0; + + ld [%fp+tmp1],%o0 + fdtos %f54,%f15 ! (Y1_0) ftmp0 = (float)yy0; + + st %f15,[%l7] ! (Y1_0) *pz0 = ftmp0; + add %l7,stridez,%o4 ! pz += stridez + fmovdg %fcc1,LTHRESHOLD,%f58 ! (Y0_1) yy0 = LTHRESH; + + subcc counter,1,counter + bneg,pn %icc,.begin + or %g0,%o4,%i5 + + and %o0,255,%o0 ! (Y2_0) ind0 &= 255; + + fmuld %f22,%f60,%f56 ! (Y1_1) yy0 *= dtmp0; + sll %o0,3,%l7 ! (Y2_0) ind0 <<= 3; + faddd %f4,KB1,%f60 ! (Y2_0) dtmp0 += KB1; + + fpackfix %f2,%f2 ! (Y2_0) dtmp1 = vis_fpackfix(dtmp1); + ldd [%l0+%l7],%f4 ! (Y2_0) di0 = *(double*)((char*)__mt_constexp2f + ind0); + + fdtoi %f58,%f10 ! (Y0_1) ind0 = (int) yy0; + + st %f10,[%fp+tmp0] ! (Y0_1) STORE ind0 + + fmuld %f60,%f26,%f62 ! (Y2_0) yy0 = dtmp0 * y0; + fcmped %fcc0,HTHRESHOLD,%f56 ! (Y1_1) if (yy0 >= HTHRESH) + + fpadd32 %f2,%f4,%f46 ! (Y2_0) di0 = vis_fpadd32(di0,dtmp1); + + add %i2,stridey,%i2 ! py += stridey + + fitod %f10,%f52 ! (Y0_1) dtmp0 = (double)ind0; + + fmuld %f62,%f46,%f62 ! (Y2_0) yy0 *= di0; + + fcmped %fcc1,LTHRESHOLD,%f56 ! (Y1_1) if (yy0 <= LTHRESH) + + + fsubd %f58,%f52,%f60 ! (Y0_1) y0 = yy0 - dtmp0; + + faddd %f62,%f46,%f22 ! (Y2_0) yy0 += di0; + + fmovdl %fcc0,HTHRESHOLD,%f56 ! (Y1_1) yy0 = HTHRESH; + + fmuld KB2,%f60,%f62 ! (Y0_1) dtmp0 = KB2 * y0; + + ld [%fp+tmp0],%g1 ! (Y0_1) LAOD ind0 + fdtos %f22,%f12 ! (Y2_0) ftmp0 = (float)yy0; + + st %f12,[%o4] ! (Y2_0) *pz0 = ftmp0; + + subcc counter,1,counter + bneg,pn %icc,.begin + add %o4,stridez,%i5 + + and %g1,255,%o2 ! (Y0_0) ind0 &= 255; + fmovdg %fcc1,LTHRESHOLD,%f56 ! (Y1_0) yy0 = LTHRESH; + + sll %o2,3,%i1 ! (Y0_0) ind0 <<= 3; + add %o4,stridez,%l7 ! pz += stridez + faddd %f62,KB1,%f62 ! (Y0_0) dtmp0 += KB1; + + fpackfix %f10,%f10 ! (Y0_0) dtmp1 = vis_fpackfix(dtmp1); + ldd [%l0+%i1],%f58 ! (Y0_0) di0 = *(double*)((char*)__mt_constexp2f + ind0); + + fdtoi %f56,%f20 ! (Y1_0) ind0 = (int) yy0; + st %f20,[%fp+tmp1] ! (Y1_0) STORE ind0 + + fmuld %f62,%f60,%f62 ! (Y0_0) yy0 = dtmp0 * y0; + + fpadd32 %f10,%f58,%f22 ! (Y0_0) di0 = vis_fpadd32(di0,dtmp1); + + fitod %f20,%f52 ! (Y1_0) dtmp0 = (double)ind0; + + fmuld %f62,%f22,%f62 ! (Y0_0) yy0 *= di0; + + fsubd %f56,%f52,%f52 ! (Y1_0) y0 = yy0 - dtmp0; + + faddd %f62,%f22,%f62 ! (Y0_0) yy0 += di0; + + fmuld KB2,%f52,%f56 ! (Y1_0) dtmp0 = KB2 * y0; + + ld [%fp+tmp1],%g5 ! (Y1_0) ind0 = (int) yy0; + fdtos %f62,%f8 ! (Y0_0) ftmp0 = (float)yy0; + st %f8,[%l7] ! (Y0_0) *pz0 = ftmp0; + + subcc counter,1,counter + bneg .begin + add %l7,stridez,%i5 + + add %l7,stridez,%l7 ! pz += stridez + and %g5,255,%g5 ! (Y1_0) ind0 &= 255; + + sll %g5,3,%i2 ! (Y1_0) ind0 <<= 3; + faddd %f56,KB1,%f60 ! (Y1_0) dtmp0 += KB1; + + fpackfix %f20,%f20 ! (Y1_0) dtmp1 = vis_fpackfix(dtmp1); + ldd [%l0+%i2],%f56 ! (Y1_0) di0 = *(double*)((char*)__mt_constexp2f + ind0); + + fmuld %f60,%f52,%f62 ! (Y1_0) yy0 = dtmp0 * y0; + + fpadd32 %f20,%f56,%f52 ! (Y1_0) di0 = vis_fpadd32(di0,dtmp1); + + fmuld %f62,%f52,%f62 ! (Y1_0) yy0 *= di0; + + faddd %f62,%f52,%f54 ! (Y1_0) yy0 += di0; + + fdtos %f54,%f15 ! (Y1_0) ftmp0 = (float)yy0; + + st %f15,[%l7] ! (Y1_0) *pz0 = ftmp0; + ba .begin + add %l7,stridez,%i5 ! pz += stridez + +.exit: + ret + restore + + .align 16 +.specs_exit: + add %i1,stridex,%o2 + add %i3,stridey,%i2 + st %f4,[%i5] + + sub counter,1,counter + ba .begin1 + add %i5,stridez,%i5 + +.spec1: + ld [%l0+2048+64],%f0 ! LOAD 1.0f + or %g0,%i1,%o1 + or %g0,%i3,%o3 + + ld [%o2],%f4 ! *px + or %g0,%o2,%i1 + or %g0,%i2,%i3 + + ld [%i3],%f6 ! *py + or %g0,%l7,%o2 + fsubs %f0,%f0,%f5 ! 0.0f + + sethi %hi(0x7f800000),%l6 + cmp %o4,0 ! ay ? 0 + be,a,pn %icc,.specs_exit ! if(ay == 0) + fmovs %f0,%f4 ! return 1.0f + + cmp %o3,%l6 ! ax0 ? 0x7f800000 + bgu,a %icc,.specs_exit ! ax0 > 0x7f800000 + fmuls %f4,%f6,%f4 ! return *px * *py; /* |X| or |Y| = Nan */ + + cmp %o4,%l6 ! ay ? 0x7f800000 + bgu,a .specs_exit ! ay > 0x7f800000 + fmuls %f4,%f6,%f4 ! return *px * *py; /* |X| or |Y| = Nan */ + + sethi %hi(0x3f800000),%o5 + bne,a %icc,1f ! if (ay != 0x7f800000) { /* |Y| = Inf */ + srl %o1,31,%o1 ! sx = ux >> 31 + + cmp %o3,%o5 ! ax0 ? 0x3f800000 + be,a .specs_exit ! if (ax0 == 0x3f800000) + fmuls %f6,%f5,%f4 ! return *py * 0.0f; /* +-1 ** +-Inf = NaN */ + + sub %o3,%o5,%o3 ! ax0 - 0x3f800000 + srl %o2,31,%o2 ! uy >> 31 + + srlx %o3,63,%o3 ! (ax0 - 0x3f800000) << 63 + + cmp %o3,%o2 ! ((ax0 - 0x3f800000) << 63) ? (uy >> 31) + bne,a .specs_exit + fzeros %f4 ! return 0.f; + + ba .specs_exit + fabss %f6,%f4 ! return fabss(*py) +1: + cmp %o1,0 ! sx ? 0 + be,pn %icc,.spec1_exit ! if (sx == 0) + or %g0,%g0,%o5 ! yisint0 = 0; + + srl %o4,23,%l7 ! exp = ay >> 23; + cmp %l7,0x97 ! exp ? 0x97 + bge,a,pn %icc,.spec1_exit ! if (exp >= 0x97) /* |Y| >= 2^24 */ + add %g0,2,%o5 ! yisint = 2; + + cmp %l7,0x7f ! exp ? 0x7f + bl,pn %icc,.spec1_exit ! if (exp < 0x7f) + sub %g0,%l7,%l7 ! exp = -exp; + + add %l7,(0x7f + 23),%l7 ! exp += (0x07f + 23); + srl %o4,%l7,%l6 ! i0 = ay >> exp + sll %l6,%l7,%l7 ! i0 << exp + + cmp %l7,%o4 ! (i0 << exp) ? ay + bne,pn %icc,.spec1_exit ! if((i0 << exp) != ay) + and %l6,1,%l6 ! i0 &= 1 + + sub %g0,%l6,%l6 ! i0 = -i0; + add %l6,2,%o5 ! yisint0 = 2 + i0; + +.spec1_exit: + srl %o2,31,%o2 ! uy >> 31 + cmp %o2,0 ! (uy >> 31) ? 0 + movne %icc,%g0,%o3 ! if (uy >> 31) ax0 = 0; + + sll %o5,31,%o5 ! yisint0 <<= 31; + add %o5,%o3,%o5 ! ax0 += yisint0; + + add %i1,stridex,%o2 ! px += stridex; + add %i3,stridey,%i2 ! py += stridey; + st %o5,[%i5] ! return *(float*)&ax0; + + sub counter,1,counter ! counter--; + ba .begin1 + add %i5,stridez,%i5 ! pz += stridez; + +.spec2: + or %g0,%i1,%o1 + or %g0,%i3,%o3 + ld [%l0+2048+64],%f0 ! LOAD 1.0f + or %g0,%o2,%i1 + or %g0,%i2,%i3 + + or %g0,%l7,%o2 + cmp %o4,0 ! ay ? 0 + be,a,pn %icc,.specs_exit ! if(ay == 0) + fmovs %f0,%f4 ! return 1.0f + + srl %o3,23,%l7 ! exp0 = (ax0 >> 23); + sub %l7,127,%l7 ! exp = exp0 = exp0 - 127; + + or %g0,%g0,%o5 ! yisint = 0; + cmp %o3,MASK_0x007fffff ! (int)ax0 ? 0x00800000 + bg,pn %icc,1f ! if ((int)ax0 >= 0x00800000) + nop + + ! X = denormal or negative + st %o3,[%fp+tmp0] ! *((float*) &ax0) = (float) (int)ax0; + ld [%fp+tmp0],%f4 + fitos %f4,%f4 + st %f4,[%fp+tmp0] + ld [%fp+tmp0],%o3 + + srl %o3,23,%l7 ! exp = (ax0 >> 23) + sub %l7,127+149,%l7 ! exp -= (127+149) +1: + cmp %o1,0 ! ux ? 0 + bg,a %icc,.spec_proc ! if((int)ux > 0) + sethi %hi(0xffff0000),%l6 + + srl %o4,23,%o0 ! exp = ay >> 23; + cmp %o0,0x97 ! exp ? 0x97 + bge,a,pn %icc,2f ! if (exp >= 0x97) /* |Y| >= 2^24 */ + add %g0,2,%o5 ! yisint0 = 2; /* Y - even */ + + cmp %o0,0x7f ! exp ? 0x7f + bl,pn %icc,2f ! if(exp < 0x7f) + nop + + sub %g0,%o0,%o0 ! exp = -exp; + add %o0,(0x7f + 23),%o0 ! exp += (0x7f + 23) + srl %o4,%o0,%l6 ! i0 = ay >> ((0x7f + 23) - exp); + sll %l6,%o0,%o0 ! i0 << ((0x7f + 23) - exp + cmp %o0,%o4 ! (i0 << ((0x7f + 23) - exp)) ? ay + bne,pn %icc,2f ! if(i0 << ((0x7f + 23) - exp)) != ay) + nop + + and %l6,1,%l6 ! i0 &= 1; + sub %g0,%l6,%l6 ! i0 = -i0; + add %l6,2,%o5 ! yisint = i0 + 2; +2: + cmp %o3,0 ! ax0 ? 0 + bne,pn %icc,4f ! if(ax0 != 0) + nop + + srl %o1,31,%o1 ! sx = ux >> 31 + srl %o2,31,%o2 ! uy >> 31 + + cmp %o2,0 ! (uy >> 31) ? 0 + be,a,pn %icc,3f ! if((uy >> 31) == 0) + fzeros %f4 ! return ZERO + + fdivs %f0,%f3,%f4 ! fy = ONE/ZERO +3: + andcc %o1,%o5,%g0 ! sx & yisint0 + be,pn %icc,.specs_exit ! if( (sx & yisint0) == 0 ) + nop + + ba .specs_exit + fnegs %f4,%f4 ! fy = -fy; +4: + cmp %o5,0 ! ysisint0 ? 0 + be,a %icc,.specs_exit ! if(yisint0 == 0) + fdivs %f3,%f3,%f4 ! return ZERO/ZERO + + sethi %hi(0xffff0000),%l6 + +.spec_proc: + sll %l7,8,%l7 ! exp0 = exp0 << 8; + st %l7,[%fp+tmp1] ! STORE exp0 + and %o3,MASK_0x007fffff,%g5 ! ax0 &= 0x007fffff; + ld [%i3],%f14 ! ftmp0 = py[0] + sllx %o5,63,%o5 ! ysisint0 <<= 63; + add %g5,CONST_0x8000,%o3 ! i0 = ax0 + 0x8000; + stx %o5,[%fp+tmp5] ! STORE yisint0 + and %o3,%l6,%l7 ! i0 &= 0xffff0000; + sub %g5,%l7,%o1 ! i0 = ax0 - i0; + sra %l7,12,%g5 ! ind0 = i0 >> 12; + st %o1,[%fp+tmp2] ! STORE i0 + fstod %f14,%f54 ! dtmp1 = (double)ftmp0 + and %g5,-8,%g5 ! ind0 &= -8; + add %l2,%g5,%l7 ! (char*)__mt_constlog4f + ind0 + ld [%fp+tmp1],%f18 ! LOAD exp0 + ld [%fp+tmp2],%f16 ! LOAD i0 + ldd [%l7+8],%f62 ! dtmp2 = *(double *)((char*)__mt_constlog4f + ind0 + 8); + ldd [%l2+%g5],%f56 ! dtmp3 = *(double *)((char*)__mt_constlog4f + ind0); + fitod %f18,%f58 ! dtmp4 = (double)exp0 + fitod %f16,%f60 ! dtmp5 = (double)i0 + fmuld %f60,%f62,%f60 ! y0 = dtmp5 * dtmp2; + faddd %f56,%f58,%f58 ! yy0 = dtmp3 + dtmp4; + fmuld KA3,%f60,%f52 ! dtmp0 = KA3 * y0; + faddd %f52,KA2,%f50 ! dtmp0 += KA2; + fmuld %f50,%f60,%f48 ! dtmp0 *= y0; + faddd %f48,KA1,%f46 ! dtmp0 += KA1; + fmuld %f46,%f60,%f62 ! dtmp0 *= y0; + ldd [%fp+tmp5],%f24 ! LOAD yisint0 + faddd %f62,KA0,%f56 ! dtmp0 += KA0; + fmuld %f56,%f60,%f52 ! dtmp0 *= y0; + faddd %f58,%f52,%f50 ! yy0 += dtmp1; + fmuld %f54,%f50,%f52 ! yy0 *= dtmp1; + fcmped %fcc0,HTHRESHOLD,%f52 ! if (yy0 >= HTHRESH) + fcmped %fcc1,LTHRESHOLD,%f52 ! yy0 = HTHRESH; + fmovdl %fcc0,HTHRESHOLD,%f52 ! if (yy0 <= LTHRESH) + fmovdg %fcc1,LTHRESHOLD,%f52 ! yy0 = LTHRESH; + fdtoi %f52,%f20 ! ind0 = (int) yy0; + st %f20,[%fp+tmp3] ! STORE ind0 + fitod %f20,%f58 ! dtmp0 = (double) ind0; + fpackfix %f20,%f20 ! dtmp1 = vis_fpackfix(dtmp1) + ld [%fp+tmp3],%g1 ! LOAD ind0 + fsubd %f52,%f58,%f46 ! y0 = yy0 - dtmp0; + fpadd32 %f20,%f24,%f56 ! dtmp1 += yisint0 + and %g1,255,%o4 ! ind0 &= 255; + sll %o4,3,%o3 ! ind0 <<= 3; + ldd [%l0+%o3],%f54 ! di0 = *(double*)((char*)__mt_constexp2f + ind0); + fmuld KB2,%f46,%f48 ! dtmp0 = KB2 * y0; + fpadd32 %f56,%f54,%f56 ! di0 = vis_fpadd32(di0,dtmp1); + faddd %f48,KB1,%f62 ! dtmp0 += KB1; + fmuld %f62,%f46,%f60 ! yy0 = dtmp0 * y0; + fmuld %f60,%f56,%f52 ! yy0 *= di0; + faddd %f52,%f56,%f58 ! yy0 += di0; + ba .specs_exit + fdtos %f58,%f4 ! ftmp0 = (float)yy0; + + .align 16 +.update0: + cmp counter,1 + ble .cont0 + nop + + add %i2,stridey,%o1 + stx %o2,[%fp+tmp_px] + + stx %o1,[%fp+tmp_py] + sub counter,1,counter + + st counter,[%fp+tmp_counter] + ba .cont0 + or %g0,1,counter + + .align 16 +.update1: + cmp counter,1 + ble .cont1 + nop + + add %i2,stridey,%o1 + stx %o2,[%fp+tmp_px] + + stx %o1,[%fp+tmp_py] + sub counter,1,counter + + st counter,[%fp+tmp_counter] + ba .cont1 + or %g0,1,counter + + .align 16 +.update2: + cmp counter,2 + ble .cont2 + nop + + add %i2,stridey,%o2 + stx %i3,[%fp+tmp_px] + + add %o2,stridey,%o2 + stx %o2,[%fp+tmp_py] + + sub counter,2,counter + st counter,[%fp+tmp_counter] + ba .cont2 + or %g0,2,counter + + .align 16 +.update3: + cmp counter,2 + ble .cont3 + nop + + add %i2,stridey,%o2 + stx %i3,[%fp+tmp_px] + + add %o2,stridey,%o2 + stx %o2,[%fp+tmp_py] + + sub counter,2,counter + st counter,[%fp+tmp_counter] + ba .cont3 + or %g0,2,counter + + .align 16 +.update4: + cmp counter,3 + ble .cont4 + nop + + sll stridey,1,%g5 + add %i2,stridey,%o3 + stx %o2,[%fp+tmp_px] + + add %o3,%g5,%o3 + stx %o3,[%fp+tmp_py] + + sub counter,3,counter + st counter,[%fp+tmp_counter] + ba .cont4 + or %g0,3,counter + + .align 16 +.update5: + cmp counter,3 + ble .cont5 + nop + + sll stridey,1,%g5 + add %i2,stridey,%o3 + stx %o2,[%fp+tmp_px] + + add %o3,%g5,%o3 + stx %o3,[%fp+tmp_py] + + sub counter,3,counter + st counter,[%fp+tmp_counter] + ba .cont5 + or %g0,3,counter + + .align 16 +.update6: + fzeros %f2 + cmp counter,1 + ble .cont6 + nop + + ld [%fp+tmp_counter],%g1 + + sub %o2,stridex,%o3 + stx %o4,[%fp+tmp_py] + + sub %o3,stridex,%o3 + add %g1,counter,counter + stx %o3,[%fp+tmp_px] + + sub counter,1,counter + st counter,[%fp+tmp_counter] + ba .cont6 + or %g0,1,counter + + .align 16 +.update7: + cmp counter,4 + ble .cont7 + nop + + sll stridey,1,%g1 + add %o4,stridey,%o0 + stx %o2,[%fp+tmp_px] + + add %o0,%g1,%o0 + stx %o0,[%fp+tmp_py] + + sub counter,4,counter + st counter,[%fp+tmp_counter] + ba .cont7 + or %g0,4,counter + + .align 16 +.update8: + cmp counter,4 + ble .cont8 + nop + + sll stridey,1,%g1 + add %o4,stridey,%o0 + stx %o2,[%fp+tmp_px] + + add %o0,%g1,%o0 + stx %o0,[%fp+tmp_py] + + sub counter,4,counter + st counter,[%fp+tmp_counter] + ba .cont8 + or %g0,4,counter + + .align 16 +.update9: + cmp counter,2 + ble .cont9 + fzeros %f16 + + ld [%fp+tmp_counter],%i3 + + sub %o2,stridex,%g1 + stx %i2,[%fp+tmp_py] + + sub %g1,stridex,%g1 + add %i3,counter,counter + stx %g1,[%fp+tmp_px] + + sub counter,2,counter + st counter,[%fp+tmp_counter] + ba .cont9 + or %g0,2,counter + + .align 16 +.update10: + cmp counter,5 + ble .cont10 + nop + + add %i2,stridey,%i1 + stx %i3,[%fp+tmp_px] + + add %i1,stridey,%i1 + stx %i1,[%fp+tmp_py] + + sub counter,5,counter + st counter,[%fp+tmp_counter] + ba .cont10 + or %g0,5,counter + + .align 16 +.update11: + cmp counter,5 + ble .cont11 + nop + + add %i2,stridey,%i1 + stx %i3,[%fp+tmp_px] + + add %i1,stridey,%i1 + stx %i1,[%fp+tmp_py] + + sub counter,5,counter + st counter,[%fp+tmp_counter] + ba .cont11 + or %g0,5,counter + + .align 16 +.update12: + fzeros %f0 + cmp counter,3 + ble .cont12 + nop + + ld [%fp+tmp_counter],%o2 + + sub %i3,stridex,%i1 + stx %i2,[%fp+tmp_py] + + sub %i1,stridex,%i1 + add %o2,counter,counter + stx %i1,[%fp+tmp_px] + + sub counter,3,counter + st counter,[%fp+tmp_counter] + ba .cont12 + or %g0,3,counter + + .align 16 +.update13: + cmp counter,3 + ble .cont13 + nop + + sll stridey,1,%g5 + add %i2,stridey,%o3 + stx %o2,[%fp+tmp_px] + + add %o3,%g5,%o3 + stx %o3,[%fp+tmp_py] + + sub counter,3,counter + st counter,[%fp+tmp_counter] + ba .cont13 + or %g0,3,counter + + .align 16 +.update14: + cmp counter,3 + ble .cont14 + nop + + sll stridey,1,%g5 + add %i2,stridey,%o3 + stx %o2,[%fp+tmp_px] + + add %o3,%g5,%o3 + stx %o3,[%fp+tmp_py] + + sub counter,3,counter + st counter,[%fp+tmp_counter] + ba .cont14 + or %g0,3,counter + + .align 16 +.update15: + cmp counter,1 + ble .cont15 + fzeros %f2 + + ld [%fp+tmp_counter],%g1 + + sub %o2,stridex,%o3 + stx %o4,[%fp+tmp_py] + + sub %o3,stridex,%o3 + add %g1,counter,counter + stx %o3,[%fp+tmp_px] + + sub counter,1,counter + st counter,[%fp+tmp_counter] + ba .cont15 + or %g0,1,counter + + .align 16 +.update16: + cmp counter,4 + ble .cont16 + nop + + sll stridey,1,%g1 + add %o4,stridey,%o0 + stx %o2,[%fp+tmp_px] + + add %o0,%g1,%o0 + stx %o0,[%fp+tmp_py] + + sub counter,4,counter + st counter,[%fp+tmp_counter] + ba .cont16 + or %g0,4,counter + + .align 16 +.update17: + cmp counter,4 + ble .cont17 + nop + + sll stridey,1,%g1 + add %o4,stridey,%o0 + stx %o2,[%fp+tmp_px] + + add %o0,%g1,%o0 + stx %o0,[%fp+tmp_py] + + sub counter,4,counter + st counter,[%fp+tmp_counter] + ba .cont17 + or %g0,4,counter + + .align 16 +.update18: + fzeros %f16 + cmp counter,2 + ble .cont18 + nop + + ld [%fp+tmp_counter],%i3 + + sub %o2,stridex,%g1 + stx %i2,[%fp+tmp_py] + + sub %g1,stridex,%g1 + add %i3,counter,counter + stx %g1,[%fp+tmp_px] + + sub counter,2,counter + st counter,[%fp+tmp_counter] + ba .cont18 + or %g0,2,counter + + .align 16 +.update19: + cmp counter,5 + ble .cont19 + nop + + add %i2,stridey,%i1 + stx %i3,[%fp+tmp_px] + + add %i1,stridey,%i1 + stx %i1,[%fp+tmp_py] + + sub counter,5,counter + st counter,[%fp+tmp_counter] + ba .cont19 + or %g0,5,counter + + .align 16 +.update20: + cmp counter,5 + ble .cont20 + nop + + add %i2,stridey,%i1 + stx %i3,[%fp+tmp_px] + + add %i1,stridey,%i1 + stx %i1,[%fp+tmp_py] + + sub counter,5,counter + st counter,[%fp+tmp_counter] + ba .cont20 + or %g0,5,counter + + .align 16 +.update21: + cmp counter,3 + ble .cont21 + fzeros %f0 + + ld [%fp+tmp_counter],%o2 + + sub %i3,stridex,%i1 + stx %i2,[%fp+tmp_py] + + sub %i1,stridex,%i1 + add %o2,counter,counter + stx %i1,[%fp+tmp_px] + + + sub counter,3,counter + st counter,[%fp+tmp_counter] + ba .cont21 + or %g0,3,counter + + .align 16 +.update22: + cmp counter,3 + ble .cont22 + fzeros %f2 + + ld [%fp+tmp_counter],%g1 + + sub %i3,stridex,%i2 + stx %i2,[%fp+tmp_px] + + add %g1,counter,counter + stx %o4,[%fp+tmp_py] + + sub counter,3,counter + st counter,[%fp+tmp_counter] + ba .cont22 + or %g0,3,counter + +.stridex_zero: + ld [%fp+tmp_counter],counter + + stx %i3,[%fp+tmp_py] + + cmp counter,0 + ble,pn %icc,.exit + lda [%i1]0x82,%i1 ! (Y0_2) ax0 = *px; + + and %i1,MASK_0x7fffffff,%i3 ! (Y0_2) exp0 = ax0 & 0x7fffffff; + sub %i3,%l6,%l6 + and %i1,MASK_0x007fffff,%g5 ! (Y0_2) ax0 &= 0x007fffff; + srl %i3,23,%o3 ! (Y0_2) exp0 >>= 23; + srl %l6,31,%l6 + st %l6,[%fp+tmp5] + add %g5,CONST_0x8000,%i3 ! (Y0_2) i0 = ax0 + 0x8000; + sethi %hi(0xffff0000),%l6 + sub %o3,127,%o3 ! (Y0_2) exp0 -= 127; + and %i3,%l6,%i3 ! (Y0_2) i0 &= 0xffff0000; + sll %o3,8,%o4 ! (Y0_2) exp0 <<= 8; + st %o4,[%fp+tmp3] ! (Y0_2) STORE exp0 + sra %i3,12,%o0 ! (Y0_2) ind0 = i0 >> 12; + sub %g5,%i3,%o4 ! (Y0_2) i0 = ax0 - i0; + st %o4,[%fp+tmp2] ! (Y0_2) STORE i0 + and %o0,-8,%g5 ! (Y0_2) ind0 &= -8; + ld [%fp+tmp2],%f14 ! (Y0_2) dtmp0 = (double) i0; + add %l2,%g5,%g1 ! (Y0_2) (char*)__mt_constlog4f + ind0 + ldd [%g1+8],%f48 ! (Y0_2) dtmp1 = *(double *)((char*)__mt_constlog4f + ind0 + 8); + fitod %f14,%f60 ! (Y0_2) dtmp0 = (double) i0; + fmuld %f60,%f48,%f48 ! (Y0_2) y0 = dtmp0 * dtmp1; + fmuld KA3,%f48,%f62 ! (Y0_2) dtmp0 = KA3 * y0; + faddd %f62,KA2,%f22 ! (Y0_2) dtmp0 += KA2; + fmuld %f22,%f48,%f26 ! (Y0_2) dtmp0 *= y0; + faddd %f26,KA1,%f50 ! (Y0_2) dtmp0 += KA1; + ld [%fp+tmp3],%f4 ! (Y0_2) dtmp1 = (double) exp0; + fitod %f4,%f26 ! (Y0_1) dtmp1 = (double) exp0; + fmuld %f50,%f48,%f50 ! (Y0_1) dtmp0 *= y0; + ldd [%l2+%g5],%f60 ! (Y0_1) dtmp0 = *(double *)((char*)__mt_constlog4f + ind0); + faddd %f50,KA0,%f58 ! (Y0_1) dtmp0 += KA0; + faddd %f60,%f26,%f26 ! (Y0_1) yy0 = dtmp0 + dtmp1; + fmuld %f58,%f48,%f48 ! (Y0_1) dtmp0 *= y0; + sub %l2,3200,%o4 + sub %l2,1152-600,%o3 + faddd %f26,%f48,%f46 ! (Y0_1) yy0 += dtmp0; + or %g0,%i5,%g1 + sethi %hi(0x7f800000),%o1 + +.xbegin: + ld [%fp+tmp_counter],counter + ldx [%fp+tmp_py],%o5 + st %g0,[%fp+tmp_counter] +.xbegin1: + subcc counter,1,counter + bneg,pn %icc,.exit + nop + + lda [%o5]0x82,%i5 ! (Y0_0) ay = py[0]; + + lda [%o5]0x82,%f5 ! (Y0_0) ftmp0 = py[0]; + + and %i5,MASK_0x7fffffff,%i3 ! (Y0_0) ay &= 0x7fffffff + + cmp %i3,%o1 + bge,pn %icc,.xspec + nop + + fstod %f5,%f52 ! (Y0_0) dtmp0 = (double)ftmp0; + + fmuld %f52,%f46,%f26 ! (Y0_0) yy0 = dtmp0 * yy; + add %o5,stridey,%o5 ! py += stridey + + lda [%o5]0x82,%i5 ! (Y1_0) ay = ((int*)py)[0]; + + lda [%o5]0x82,%f7 ! (Y1_0) ftmp0 = py[0]; + + and %i5,MASK_0x7fffffff,%i5 ! (Y1_0) ay &= 0x7fffffff + fcmped %fcc0,HTHRESHOLD,%f26 ! (Y0_0) if (yy0 >= HTHRESH) + + cmp %i5,%o1 + bge,pn %icc,.xupdate0 + nop + +.xcont0: + fstod %f7,%f48 ! (Y1_0) dtmp0 = (double)ftmp0; + + fcmped %fcc1,LTHRESHOLD,%f26 ! (Y0_1) if (yy0 <= LTHRESH) + + add %o5,stridey,%o5 ! py += stridey + fmuld %f48,%f46,%f28 ! (Y1_1) yy0 = dtmp0 * yy; + + lda [%o5]0x82,%i3 ! (Y0_0) ay = py[0]; + + lda [%o5]0x82,%f5 ! (Y0_0) ftmp0 = py[0]; + + and %i3,MASK_0x7fffffff,%i3 ! (Y0_0) ay &= 0x7fffffff + fmovdl %fcc0,HTHRESHOLD,%f26 ! (Y0_1) yy0 = HTHRESH; + + cmp %i3,%o1 + bge,pn %icc,.xupdate1 + fcmped %fcc2,HTHRESHOLD,%f28 ! (Y1_1) if (yy0 >= HTHRESH) +.xcont1: + fstod %f5,%f52 ! (Y0_0) dtmp0 = (double)ftmp0; + + fmovdg %fcc1,LTHRESHOLD,%f26 ! (Y0_1) yy0 = LTHRESH; + + fcmped %fcc3,LTHRESHOLD,%f28 ! (Y1_1) if (yy0 <= LTHRESH) + + fmuld %f52,%f46,%f22 ! (Y0_0) yy0 = dtmp0 * yy; + + fdtoi %f26,%f0 ! (Y0_1) ii0 = (int) yy0; + + add %o5,stridey,%o5 ! py += stridey + st %f0,[%fp+tmp1] ! (Y0_1) STORE ii0 + + lda [%o5]0x82,%l7 ! (Y1_0) ay = ((int*)py)[0]; + + lda [%o5]0x82,%f7 ! (Y1_0) ftmp0 = py[0]; + fmovdl %fcc2,HTHRESHOLD,%f28 ! (Y1_1) yy0 = HTHRESH; + + and %l7,MASK_0x7fffffff,%l7 ! (Y1_0) ay &= 0x7fffffff + fcmped %fcc0,HTHRESHOLD,%f22 ! (Y0_0) if (yy0 >= HTHRESH) + + cmp %l7,%o1 + bge,pn %icc,.xupdate2 + nop +.xcont2: + fstod %f7,%f48 ! (Y1_0) dtmp0 = (double)ftmp0; + + fmovdg %fcc3,LTHRESHOLD,%f28 ! (Y1_2) yy0 = LTHRESH; + + fcmped %fcc1,LTHRESHOLD,%f22 ! (Y0_1) if (yy0 <= LTHRESH) + + fitod %f0,%f52 ! (Y0_2) dtmp0 = (double)ii0; + + add %o5,stridey,%o5 ! py += stridey + fmuld %f48,%f46,%f24 ! (Y1_1) yy0 = dtmp0 * yy; + + fdtoi %f28,%f3 ! (Y1_2) ii0 = (int) yy0; + lda [%o5]0x82,%i3 ! (Y0_0) ay = py[0]; + + st %f3,[%fp+tmp0] ! (Y1_2) STORE ii0 + + fsubd %f26,%f52,%f40 ! (Y0_2) y0 = yy0 - dtmp0; + lda [%o5]0x82,%f5 ! (Y0_0) ftmp0 = py[0]; + + and %i3,MASK_0x7fffffff,%i3 ! (Y0_0) ay &= 0x7fffffff + fmovdl %fcc0,HTHRESHOLD,%f22 ! (Y0_1) yy0 = HTHRESH; + + cmp %i3,%o1 + bge,pn %icc,.xupdate3 + fcmped %fcc2,HTHRESHOLD,%f24 ! (Y1_1) if (yy0 >= HTHRESH) +.xcont3: + ld [%fp+tmp1],%i2 ! (Y0_2) LOAD ii0 + fmuld KB2,%f40,%f36 ! (Y0_2) dtmp0 = KB2 * y0; + fstod %f5,%f52 ! (Y0_0) dtmp0 = (double)ftmp0; + + fmovdg %fcc1,LTHRESHOLD,%f22 ! (Y0_1) yy0 = LTHRESH; + + sra %i2,6,%l6 ! (Y0_2) i0 = ii0 >> 6; + and %i2,255,%l7 ! (Y0_2) ii0 &= 255; + fcmped %fcc3,LTHRESHOLD,%f24 ! (Y1_1) if (yy0 <= LTHRESH) + + fitod %f3,%f56 ! (Y1_2) dtmp0 = (double)ii0; + sll %l7,3,%o0 ! (Y0_2) ii0 <<= 3; + and %l6,-4,%g5 ! (Y0_2) i0 &= -4; + + faddd %f36,KB1,%f60 ! (Y0_2) dtmp0 += KB1; + fmuld %f52,%f46,%f26 ! (Y0_0) yy0 = dtmp0 * yy; + ld [%g5+%o3],%f10 ! (Y0_2) di0 = ((double*)((char*)(__mt_constexp2fb + 150 ) + i0))[0] + + fdtoi %f22,%f0 ! (Y0_1) ii0 = (int) yy0; + ldd [%o4+%o0],%f62 ! (Y0_2) dtmp0 = ((double*)((char*)__mt_constexp2fa + ii0))[0]; + + add %o5,stridey,%o5 ! py += stridey + st %f0,[%fp+tmp1] ! (Y0_1) STORE ii0 + + fsubd %f28,%f56,%f56 ! (Y1_2) y0 = yy0 - dtmp0; + lda [%o5]0x82,%i5 ! (Y1_0) ay = ((int*)py)[0]; + + fmuld %f60,%f40,%f60 ! (Y0_2) yy0 = dtmp0 * y0; + fmovdl %fcc2,HTHRESHOLD,%f24 ! (Y1_1) yy0 = HTHRESH; + lda [%o5]0x82,%f7 ! (Y1_0) ftmp0 = py[0]; + + fmuld %f10,%f62,%f62 ! (Y0_2) di0 *= dtmp0; + ld [%fp+tmp0],%g5 ! (Y1_2) LOAD ii0 + and %i5,MASK_0x7fffffff,%i5 ! (Y1_0) ay &= 0x7fffffff + fcmped %fcc0,HTHRESHOLD,%f26 ! (Y0_0) if (yy0 >= HTHRESH) + + cmp %i5,%o1 + bge,pn %icc,.xupdate4 +.xcont4: + fmuld KB2,%f56,%f58 ! (Y1_2) dtmp0 = KB2 * y0; + fstod %f7,%f48 ! (Y1_0) dtmp0 = (double)ftmp0; + + fmovdg %fcc3,LTHRESHOLD,%f24 ! (Y1_2) yy0 = LTHRESH; + sra %g5,6,%i0 ! (Y1_3) i0 = ii0 >> 6; + and %g5,255,%i1 ! (Y1_3) ii0 &= 255; + fmuld %f60,%f62,%f40 ! (Y0_3) dtmp0 = yy0 * di0; + + fcmped %fcc1,LTHRESHOLD,%f26 ! (Y0_1) if (yy0 <= LTHRESH) + sll %i1,3,%i3 ! (Y1_3) ii0 <<= 3; + and %i0,-4,%i0 ! (Y1_3) i0 &= -4; + + fitod %f0,%f52 ! (Y0_2) dtmp0 = (double)ii0; + ld [%i0+%o3],%f10 ! (Y1_3) di0 = ((double*)((char*)(__mt_constexp2fb + 150) + i0))[0]; + + faddd %f58,KB1,%f58 ! (Y1_3) dtmp0 += KB1; + add %o5,stridey,%o5 ! py += stridey + ldd [%o4+%i3],%f18 ! (Y1_3) dtmp0 = ((double*)((char*)__mt_constexp2fa + ii0))[0]; + fmuld %f48,%f46,%f28 ! (Y1_1) yy0 = dtmp0 * yy; + + fdtoi %f24,%f3 ! (Y1_2) ii0 = (int) yy0; + lda [%o5]0x82,%i3 ! (Y0_0) ay = py[0]; + + faddd %f40,%f62,%f60 ! (Y0_3) dtmp0 += di0; + st %f3,[%fp+tmp0] ! (Y1_2) STORE ii0 + + fsubd %f22,%f52,%f40 ! (Y0_2) y0 = yy0 - dtmp0; + lda [%o5]0x82,%f5 ! (Y0_0) ftmp0 = py[0]; + + fmuld %f58,%f56,%f56 ! (Y1_3) yy0 = dtmp0 * y0; + and %i3,MASK_0x7fffffff,%i3 ! (Y0_0) ay &= 0x7fffffff + fmovdl %fcc0,HTHRESHOLD,%f26 ! (Y0_1) yy0 = HTHRESH; + + fmuld %f10,%f18,%f50 ! (Y1_3) di0 *= dtmp0; + cmp %i3,%o1 + bge,pn %icc,.xupdate5 + fcmped %fcc2,HTHRESHOLD,%f28 ! (Y1_1) if (yy0 >= HTHRESH) +.xcont5: + fdtos %f60,%f1 ! (Y0_3) ftmp0 = (float)dtmp0; + add %g1,stridez,%i3 ! pz += stridez + st %f1,[%g1] ! (Y0_3) pz[0] = ftmp0; + + subcc counter,1,counter + bneg,pn %icc,.xbegin + or %g0,%i3,%g1 + + ld [%fp+tmp1],%i2 ! (Y0_2) LOAD ii0 + fmuld KB2,%f40,%f36 ! (Y0_2) dtmp0 = KB2 * y0; + fstod %f5,%f52 ! (Y0_0) dtmp0 = (double)ftmp0; + + fmovdg %fcc1,LTHRESHOLD,%f26 ! (Y0_1) yy0 = LTHRESH; + + fmuld %f56,%f50,%f58 ! (Y1_3) dtmp0 = yy0 * di0; + sra %i2,6,%l6 ! (Y0_2) i0 = ii0 >> 6; + and %i2,255,%l7 ! (Y0_2) ii0 &= 255; + fcmped %fcc3,LTHRESHOLD,%f28 ! (Y1_1) if (yy0 <= LTHRESH) + + fitod %f3,%f56 ! (Y1_2) dtmp0 = (double)ii0; + sll %l7,3,%o0 ! (Y0_2) ii0 <<= 3; + and %l6,-4,%g5 ! (Y0_2) i0 &= -4; + + faddd %f36,KB1,%f60 ! (Y0_2) dtmp0 += KB1; + fmuld %f52,%f46,%f22 ! (Y0_0) yy0 = dtmp0 * yy; + ld [%g5+%o3],%f10 ! (Y0_2) di0 = ((double*)((char*)(__mt_constexp2fb + 150 ) + i0))[0] + + fdtoi %f26,%f0 ! (Y0_1) ii0 = (int) yy0; + ldd [%o4+%o0],%f62 ! (Y0_2) dtmp0 = ((double*)((char*)__mt_constexp2fa + ii0))[0]; + + faddd %f58,%f50,%f58 ! (Y1_3) dtmp0 += di0; + add %o5,stridey,%o5 ! py += stridey + st %f0,[%fp+tmp1] ! (Y0_1) STORE ii0 + + fsubd %f24,%f56,%f56 ! (Y1_2) y0 = yy0 - dtmp0; + lda [%o5]0x82,%l7 ! (Y1_0) ay = ((int*)py)[0]; + + fmuld %f60,%f40,%f60 ! (Y0_2) yy0 = dtmp0 * y0; + add %i3,stridez,%i5 ! pz += stridez + lda [%o5]0x82,%f7 ! (Y1_0) ftmp0 = py[0]; + fmovdl %fcc2,HTHRESHOLD,%f28 ! (Y1_1) yy0 = HTHRESH; + + fmuld %f10,%f62,%f62 ! (Y0_2) di0 *= dtmp0; + and %l7,MASK_0x7fffffff,%l7 ! (Y1_0) ay &= 0x7fffffff + ld [%fp+tmp0],%g5 ! (Y1_2) LOAD ii0 + fcmped %fcc0,HTHRESHOLD,%f22 ! (Y0_0) if (yy0 >= HTHRESH) + + fdtos %f58,%f9 ! (Y1_3) ftmp0 = (float)dtmp0; + st %f9,[%i3] ! (Y1_3) pz[0] = ftmp0; + cmp %l7,%o1 + bge,pn %icc,.xupdate6 + +.xcont6: + fmuld KB2,%f56,%f58 ! (Y1_2) dtmp0 = KB2 * y0; + fstod %f7,%f48 ! (Y1_0) dtmp0 = (double)ftmp0; + + cmp counter,8 + bl,pn %icc,.xtail + nop + + ba .xmain_loop + nop + + .align 16 +.xmain_loop: + fmovdg %fcc3,LTHRESHOLD,%f28 ! (Y1_2) yy0 = LTHRESH; + sra %g5,6,%i0 ! (Y1_3) i0 = ii0 >> 6; + and %g5,255,%i1 ! (Y1_3) ii0 &= 255; + fmuld %f60,%f62,%f40 ! (Y0_3) dtmp0 = yy0 * di0; + + fcmped %fcc1,LTHRESHOLD,%f22 ! (Y0_1) if (yy0 <= LTHRESH) + sll %i1,3,%i3 ! (Y1_3) ii0 <<= 3; + and %i0,-4,%i0 ! (Y1_3) i0 &= -4; + + fitod %f0,%f52 ! (Y0_2) dtmp0 = (double)ii0; + sub counter,4,counter + ld [%i0+%o3],%f10 ! (Y1_3) di0 = ((double*)((char*)(__mt_constexp2fb + 150 ) + i0))[0]; + + faddd %f58,KB1,%f58 ! (Y1_3) dtmp0 += KB1; + add %o5,stridey,%o5 ! py += stridey + ldd [%o4+%i3],%f18 ! (Y1_3) dtmp0 = ((double*)((char*)__mt_constexp2fa + ii0))[0]; + fmuld %f48,%f46,%f24 ! (Y1_1) yy0 = dtmp0 * yy; + + fdtoi %f28,%f3 ! (Y1_2) ii0 = (int) yy0; + lda [%o5]0x82,%i3 ! (Y0_0) ay = py[0]; + + faddd %f40,%f62,%f60 ! (Y0_3) dtmp0 += di0; + st %f3,[%fp+tmp0] ! (Y1_2) STORE ii0 + + fsubd %f26,%f52,%f40 ! (Y0_2) y0 = yy0 - dtmp0; + lda [%o5]0x82,%f5 ! (Y0_0) ftmp0 = py[0]; + + fmuld %f58,%f56,%f56 ! (Y1_3) yy0 = dtmp0 * y0; + and %i3,MASK_0x7fffffff,%i3 ! (Y0_0) ay &= 0x7fffffff + fmovdl %fcc0,HTHRESHOLD,%f22 ! (Y0_1) yy0 = HTHRESH; + + fmuld %f10,%f18,%f50 ! (Y1_3) di0 *= dtmp0; + cmp %i3,%o1 + bge,pn %icc,.xupdate7 + fcmped %fcc2,HTHRESHOLD,%f24 ! (Y1_1) if (yy0 >= HTHRESH) +.xcont7: + fdtos %f60,%f1 ! (Y0_3) ftmp0 = (float)dtmp0; + add %i5,stridez,%i3 ! pz += stridez + st %f1,[%i5] ! (Y0_3) pz[0] = ftmp0; + + ld [%fp+tmp1],%i2 ! (Y0_2) LOAD ii0 + fmuld KB2,%f40,%f36 ! (Y0_2) dtmp0 = KB2 * y0; + fstod %f5,%f52 ! (Y0_0) dtmp0 = (double)ftmp0; + + fmovdg %fcc1,LTHRESHOLD,%f22 ! (Y0_1) yy0 = LTHRESH; + + fmuld %f56,%f50,%f58 ! (Y1_3) dtmp0 = yy0 * di0; + sra %i2,6,%l6 ! (Y0_2) i0 = ii0 >> 6; + and %i2,255,%l7 ! (Y0_2) ii0 &= 255; + fcmped %fcc3,LTHRESHOLD,%f24 ! (Y1_1) if (yy0 <= LTHRESH) + + fitod %f3,%f56 ! (Y1_2) dtmp0 = (double)ii0; + sll %l7,3,%o0 ! (Y0_2) ii0 <<= 3; + and %l6,-4,%g5 ! (Y0_2) i0 &= -4; + + faddd %f36,KB1,%f60 ! (Y0_2) dtmp0 += KB1; + fmuld %f52,%f46,%f26 ! (Y0_0) yy0 = dtmp0 * yy; + ld [%g5+%o3],%f10 ! (Y0_2) di0 = ((double*)((char*)(__mt_constexp2fb + 150 ) + i0))[0] + + fdtoi %f22,%f0 ! (Y0_1) ii0 = (int) yy0; + ldd [%o4+%o0],%f62 ! (Y0_2) dtmp0 = ((double*)((char*)__mt_constexp2fa + ii0))[0]; + + faddd %f58,%f50,%f58 ! (Y1_3) dtmp0 += di0; + add %o5,stridey,%o5 ! py += stridey + st %f0,[%fp+tmp1] ! (Y0_1) STORE ii0 + + fsubd %f28,%f56,%f56 ! (Y1_2) y0 = yy0 - dtmp0; + lda [%o5]0x82,%i5 ! (Y1_0) ay = ((int*)py)[0]; + + fmuld %f60,%f40,%f60 ! (Y0_2) yy0 = dtmp0 * y0; + fmovdl %fcc2,HTHRESHOLD,%f24 ! (Y1_1) yy0 = HTHRESH; + lda [%o5]0x82,%f7 ! (Y1_0) ftmp0 = py[0]; + + fmuld %f10,%f62,%f62 ! (Y0_2) di0 *= dtmp0; + ld [%fp+tmp0],%g5 ! (Y1_2) LOAD ii0 + and %i5,MASK_0x7fffffff,%i5 ! (Y1_0) ay &= 0x7fffffff + fcmped %fcc0,HTHRESHOLD,%f26 ! (Y0_0) if (yy0 >= HTHRESH) + + fdtos %f58,%f9 ! (Y1_3) ftmp0 = (float)dtmp0; + cmp %i5,%o1 + bge,pn %icc,.xupdate8 + +.xcont8: + fmuld KB2,%f56,%f58 ! (Y1_2) dtmp0 = KB2 * y0; + add %i3,stridez,%i5 ! pz += stridez + st %f9,[%i3] ! (Y1_3) pz[0] = ftmp0; + fstod %f7,%f48 ! (Y1_0) dtmp0 = (double)ftmp0; + + fmovdg %fcc3,LTHRESHOLD,%f24 ! (Y1_2) yy0 = LTHRESH; + sra %g5,6,%i0 ! (Y1_3) i0 = ii0 >> 6; + and %g5,255,%i1 ! (Y1_3) ii0 &= 255; + fmuld %f60,%f62,%f40 ! (Y0_3) dtmp0 = yy0 * di0; + + fcmped %fcc1,LTHRESHOLD,%f26 ! (Y0_1) if (yy0 <= LTHRESH) + sll %i1,3,%i3 ! (Y1_3) ii0 <<= 3; + and %i0,-4,%i0 ! (Y1_3) i0 &= -4; + + fitod %f0,%f52 ! (Y0_2) dtmp0 = (double)ii0; + ld [%i0+%o3],%f10 ! (Y1_3) di0 = ((double*)((char*)(__mt_constexp2fb + 150 ) + i0))[0]; + + faddd %f58,KB1,%f58 ! (Y1_3) dtmp0 += KB1; + add %o5,stridey,%o5 ! py += stridey + ldd [%o4+%i3],%f18 ! (Y1_3) dtmp0 = ((double*)((char*)__mt_constexp2fa + ii0))[0]; + fmuld %f48,%f46,%f28 ! (Y1_1) yy0 = dtmp0 * yy; + + fdtoi %f24,%f3 ! (Y1_2) ii0 = (int) yy0; + lda [%o5]0x82,%i3 ! (Y0_0) ay = py[0]; + + faddd %f40,%f62,%f60 ! (Y0_3) dtmp0 += di0; + st %f3,[%fp+tmp0] ! (Y1_2) STORE ii0 + + fsubd %f22,%f52,%f40 ! (Y0_2) y0 = yy0 - dtmp0; + lda [%o5]0x82,%f5 ! (Y0_0) ftmp0 = py[0]; + + fmuld %f58,%f56,%f56 ! (Y1_3) yy0 = dtmp0 * y0; + and %i3,MASK_0x7fffffff,%i3 ! (Y0_0) ay &= 0x7fffffff + fmovdl %fcc0,HTHRESHOLD,%f26 ! (Y0_1) yy0 = HTHRESH; + + fmuld %f10,%f18,%f50 ! (Y1_3) di0 *= dtmp0; + cmp %i3,%o1 + bge,pn %icc,.xupdate9 + fcmped %fcc2,HTHRESHOLD,%f28 ! (Y1_1) if (yy0 >= HTHRESH) +.xcont9: + fdtos %f60,%f1 ! (Y0_3) ftmp0 = (float)dtmp0; + add %i5,stridez,%i3 ! pz += stridez + st %f1,[%i5] ! (Y0_3) pz[0] = ftmp0; + + ld [%fp+tmp1],%i2 ! (Y0_2) LOAD ii0 + fmuld KB2,%f40,%f36 ! (Y0_2) dtmp0 = KB2 * y0; + fstod %f5,%f52 ! (Y0_0) dtmp0 = (double)ftmp0; + + fmovdg %fcc1,LTHRESHOLD,%f26 ! (Y0_1) yy0 = LTHRESH; + + fmuld %f56,%f50,%f58 ! (Y1_3) dtmp0 = yy0 * di0; + sra %i2,6,%l6 ! (Y0_2) i0 = ii0 >> 6; + and %i2,255,%l7 ! (Y0_2) ii0 &= 255; + fcmped %fcc3,LTHRESHOLD,%f28 ! (Y1_1) if (yy0 <= LTHRESH) + + fitod %f3,%f56 ! (Y1_2) dtmp0 = (double)ii0; + sll %l7,3,%o0 ! (Y0_2) ii0 <<= 3; + and %l6,-4,%g5 ! (Y0_2) i0 &= -4; + + faddd %f36,KB1,%f60 ! (Y0_2) dtmp0 += KB1; + fmuld %f52,%f46,%f22 ! (Y0_0) yy0 = dtmp0 * yy; + ld [%g5+%o3],%f10 ! (Y0_2) di0 = ((double*)((char*)(__mt_constexp2fb + 150 ) + i0))[0] + + fdtoi %f26,%f0 ! (Y0_1) ii0 = (int) yy0; + ldd [%o4+%o0],%f62 ! (Y0_2) dtmp0 = ((double*)((char*)__mt_constexp2fa + ii0))[0]; + + faddd %f58,%f50,%f58 ! (Y1_3) dtmp0 += di0; + add %o5,stridey,%o5 ! py += stridey + st %f0,[%fp+tmp1] ! (Y0_1) STORE ii0 + + fsubd %f24,%f56,%f56 ! (Y1_2) y0 = yy0 - dtmp0; + lda [%o5]0x82,%l7 ! (Y1_0) ay = ((int*)py)[0]; + + fmuld %f60,%f40,%f60 ! (Y0_2) yy0 = dtmp0 * y0; + add %i3,stridez,%i5 ! pz += stridez + lda [%o5]0x82,%f7 ! (Y1_0) ftmp0 = py[0]; + fmovdl %fcc2,HTHRESHOLD,%f28 ! (Y1_1) yy0 = HTHRESH; + + fmuld %f10,%f62,%f62 ! (Y0_2) di0 *= dtmp0; + and %l7,MASK_0x7fffffff,%l7 ! (Y1_0) ay &= 0x7fffffff + ld [%fp+tmp0],%g5 ! (Y1_2) LOAD ii0 + fcmped %fcc0,HTHRESHOLD,%f22 ! (Y0_0) if (yy0 >= HTHRESH) + + fdtos %f58,%f9 ! (Y1_3) ftmp0 = (float)dtmp0; + st %f9,[%i3] ! (Y1_3) pz[0] = ftmp0; + cmp %l7,%o1 + bge,pn %icc,.xupdate10 +.xcont10: + fmuld KB2,%f56,%f58 ! (Y1_2) dtmp0 = KB2 * y0; + cmp counter,4 + bge,pt %icc,.xmain_loop + fstod %f7,%f48 ! (Y1_0) dtmp0 = (double)ftmp0; + +.xtail: + subcc counter,1,counter + bneg,pn %icc,.xbegin + or %g0,%i5,%g1 + + fmovdg %fcc3,LTHRESHOLD,%f28 ! (Y1_2) yy0 = LTHRESH; + sra %g5,6,%i0 ! (Y1_3) i0 = ii0 >> 6; + and %g5,255,%i1 ! (Y1_3) ii0 &= 255; + fmuld %f60,%f62,%f40 ! (Y0_3) dtmp0 = yy0 * di0; + + fcmped %fcc1,LTHRESHOLD,%f22 ! (Y0_1) if (yy0 <= LTHRESH) + sll %i1,3,%i3 ! (Y1_3) ii0 <<= 3; + and %i0,-4,%i0 ! (Y1_3) i0 &= -4; + + fitod %f0,%f52 ! (Y0_2) dtmp0 = (double)ii0; + ld [%i0+%o3],%f10 ! (Y1_3) di0 = ((double*)((char*)(__mt_constexp2fb + 150 ) + i0))[0]; + + faddd %f58,KB1,%f58 ! (Y1_3) dtmp0 += KB1; + add %o5,stridey,%o5 ! py += stridey + ldd [%o4+%i3],%f18 ! (Y1_3) dtmp0 = ((double*)((char*)__mt_constexp2fa + ii0))[0]; + fmuld %f48,%f46,%f24 ! (Y1_1) yy0 = dtmp0 * yy; + + fdtoi %f28,%f3 ! (Y1_2) ii0 = (int) yy0; + lda [%o5]0x82,%i3 ! (Y0_0) ay = py[0]; + + faddd %f40,%f62,%f60 ! (Y0_3) dtmp0 += di0; + st %f3,[%fp+tmp0] ! (Y1_2) STORE ii0 + + fsubd %f26,%f52,%f40 ! (Y0_2) y0 = yy0 - dtmp0; + lda [%o5]0x82,%f5 ! (Y0_0) ftmp0 = py[0]; + + fmuld %f58,%f56,%f56 ! (Y1_3) yy0 = dtmp0 * y0; + and %i3,MASK_0x7fffffff,%i3 ! (Y0_0) ay &= 0x7fffffff + fmovdl %fcc0,HTHRESHOLD,%f22 ! (Y0_1) yy0 = HTHRESH; + + fmuld %f10,%f18,%f50 ! (Y1_3) di0 *= dtmp0; + cmp %i3,%o1 + bge,pn %icc,.xupdate11 + fcmped %fcc2,HTHRESHOLD,%f24 ! (Y1_1) if (yy0 >= HTHRESH) +.xcont11: + fdtos %f60,%f1 ! (Y0_3) ftmp0 = (float)dtmp0; + add %i5,stridez,%i3 ! pz += stridez + st %f1,[%i5] ! (Y0_3) pz[0] = ftmp0; + + subcc counter,1,counter + bneg,pn %icc,.xbegin + or %g0,%i3,%g1 + + ld [%fp+tmp1],%i2 ! (Y0_2) LOAD ii0 + fmuld KB2,%f40,%f36 ! (Y0_2) dtmp0 = KB2 * y0; + fstod %f5,%f52 ! (Y0_0) dtmp0 = (double)ftmp0; + + fmovdg %fcc1,LTHRESHOLD,%f22 ! (Y0_1) yy0 = LTHRESH; + + fmuld %f56,%f50,%f58 ! (Y1_3) dtmp0 = yy0 * di0; + sra %i2,6,%l6 ! (Y0_2) i0 = ii0 >> 6; + and %i2,255,%l7 ! (Y0_2) ii0 &= 255; + fcmped %fcc3,LTHRESHOLD,%f24 ! (Y1_1) if (yy0 <= LTHRESH) + + fitod %f3,%f56 ! (Y1_2) dtmp0 = (double)ii0; + sll %l7,3,%o0 ! (Y0_2) ii0 <<= 3; + and %l6,-4,%g5 ! (Y0_2) i0 &= -4; + + faddd %f36,KB1,%f60 ! (Y0_2) dtmp0 += KB1; + fmuld %f52,%f46,%f26 ! (Y0_0) yy0 = dtmp0 * yy; + ld [%g5+%o3],%f10 ! (Y0_2) di0 = ((double*)((char*)(__mt_constexp2fb + 150 ) + i0))[0] + + fdtoi %f22,%f0 ! (Y0_1) ii0 = (int) yy0; + ldd [%o4+%o0],%f62 ! (Y0_2) dtmp0 = ((double*)((char*)__mt_constexp2fa + ii0))[0]; + + faddd %f58,%f50,%f58 ! (Y1_3) dtmp0 += di0; + st %f0,[%fp+tmp1] ! (Y0_1) STORE ii0 + + fsubd %f28,%f56,%f56 ! (Y1_2) y0 = yy0 - dtmp0; + + fmuld %f60,%f40,%f60 ! (Y0_2) yy0 = dtmp0 * y0; + fmovdl %fcc2,HTHRESHOLD,%f24 ! (Y1_1) yy0 = HTHRESH; + + fmuld %f10,%f62,%f62 ! (Y0_2) di0 *= dtmp0; + ld [%fp+tmp0],%g5 ! (Y1_2) LOAD ii0 + fcmped %fcc0,HTHRESHOLD,%f26 ! (Y0_0) if (yy0 >= HTHRESH) + + fdtos %f58,%f9 ! (Y1_3) ftmp0 = (float)dtmp0; + + fmuld KB2,%f56,%f58 ! (Y1_2) dtmp0 = KB2 * y0; + add %i3,stridez,%i5 ! pz += stridez + st %f9,[%i3] ! (Y1_3) pz[0] = ftmp0; + + subcc counter,1,counter + bneg,pn %icc,.xbegin + or %g0,%i5,%g1 + + fmovdg %fcc3,LTHRESHOLD,%f24 ! (Y1_2) yy0 = LTHRESH; + sra %g5,6,%i0 ! (Y1_3) i0 = ii0 >> 6; + and %g5,255,%i1 ! (Y1_3) ii0 &= 255; + fmuld %f60,%f62,%f40 ! (Y0_3) dtmp0 = yy0 * di0; + + fcmped %fcc1,LTHRESHOLD,%f26 ! (Y0_1) if (yy0 <= LTHRESH) + sll %i1,3,%i3 ! (Y1_3) ii0 <<= 3; + and %i0,-4,%i0 ! (Y1_3) i0 &= -4; + + fitod %f0,%f52 ! (Y0_2) dtmp0 = (double)ii0; + ld [%i0+%o3],%f10 ! (Y1_3) di0 = ((double*)((char*)(__mt_constexp2fb + 150 ) + i0))[0]; + + faddd %f58,KB1,%f58 ! (Y1_3) dtmp0 += KB1; + ldd [%o4+%i3],%f18 ! (Y1_3) dtmp0 = ((double*)((char*)__mt_constexp2fa + ii0))[0]; + + fdtoi %f24,%f3 ! (Y1_2) ii0 = (int) yy0; + + faddd %f40,%f62,%f60 ! (Y0_3) dtmp0 += di0; + st %f3,[%fp+tmp0] ! (Y1_2) STORE ii0 + + fsubd %f22,%f52,%f40 ! (Y0_2) y0 = yy0 - dtmp0; + + fmuld %f58,%f56,%f56 ! (Y1_3) yy0 = dtmp0 * y0; + fmovdl %fcc0,HTHRESHOLD,%f26 ! (Y0_1) yy0 = HTHRESH; + + fmuld %f10,%f18,%f50 ! (Y1_3) di0 *= dtmp0; + + fdtos %f60,%f1 ! (Y0_3) ftmp0 = (float)dtmp0; + add %i5,stridez,%i3 ! pz += stridez + st %f1,[%i5] ! (Y0_3) pz[0] = ftmp0; + + subcc counter,1,counter + bneg,pn %icc,.xbegin + or %g0,%i3,%g1 + + ld [%fp+tmp1],%i2 ! (Y0_2) LOAD ii0 + fmuld KB2,%f40,%f36 ! (Y0_2) dtmp0 = KB2 * y0; + + fmovdg %fcc1,LTHRESHOLD,%f26 ! (Y0_1) yy0 = LTHRESH; + + fmuld %f56,%f50,%f58 ! (Y1_3) dtmp0 = yy0 * di0; + sra %i2,6,%l6 ! (Y0_2) i0 = ii0 >> 6; + and %i2,255,%l7 ! (Y0_2) ii0 &= 255; + + fitod %f3,%f56 ! (Y1_2) dtmp0 = (double)ii0; + sll %l7,3,%o0 ! (Y0_2) ii0 <<= 3; + and %l6,-4,%g5 ! (Y0_2) i0 &= -4; + + faddd %f36,KB1,%f60 ! (Y0_2) dtmp0 += KB1; + ld [%g5+%o3],%f10 ! (Y0_2) di0 = ((double*)((char*)(__mt_constexp2fb + 150) + i0))[0]; + + fdtoi %f26,%f0 ! (Y0_1) ii0 = (int) yy0; + ldd [%o4+%o0],%f62 ! (Y0_2) dtmp0 = ((double*)((char*)__mt_constexp2fa + ii0))[0]; + + faddd %f58,%f50,%f58 ! (Y1_3) dtmp0 += di0; + st %f0,[%fp+tmp1] ! (Y0_1) STORE ii0 + + fsubd %f24,%f56,%f56 ! (Y1_2) y0 = yy0 - dtmp0; + + fmuld %f60,%f40,%f60 ! (Y0_2) yy0 = dtmp0 * y0; + add %i3,stridez,%i5 ! pz += stridez + + fmuld %f10,%f62,%f62 ! (Y0_2) di0 *= dtmp0; + ld [%fp+tmp0],%g5 ! (Y1_2) LOAD ii0 + + fdtos %f58,%f9 ! (Y1_3) ftmp0 = (float)dtmp0; + st %f9,[%i3] ! (Y1_3) pz[0] = ftmp0; + + subcc counter,1,counter + bneg,pn %icc,.xbegin + or %g0,%i5,%g1 + + fmuld KB2,%f56,%f58 ! (Y1_2) dtmp0 = KB2 * y0; + + sra %g5,6,%i0 ! (Y1_3) i0 = ii0 >> 6; + and %g5,255,%i1 ! (Y1_3) ii0 &= 255; + fmuld %f60,%f62,%f40 ! (Y0_3) dtmp0 = yy0 * di0; + + sll %i1,3,%i3 ! (Y1_3) ii0 <<= 3; + and %i0,-4,%i0 ! (Y1_3) i0 &= -4; + + fitod %f0,%f52 ! (Y0_2) dtmp0 = (double)ii0; + ld [%i0+%o3],%f10 ! (Y1_3) di0 = ((double*)((char*)(__mt_constexp2fb + 150 ) + i0))[0]; + + faddd %f58,KB1,%f58 ! (Y1_3) dtmp0 += KB1; + ldd [%o4+%i3],%f18 ! (Y1_3) dtmp0 = ((double*)((char*)__mt_constexp2fa + ii0))[0]; + + faddd %f40,%f62,%f60 ! (Y0_3) dtmp0 += di0; + + fsubd %f26,%f52,%f40 ! (Y0_2) y0 = yy0 - dtmp0; + + fmuld %f58,%f56,%f56 ! (Y1_3) yy0 = dtmp0 * y0; + + fmuld %f10,%f18,%f50 ! (Y1_3) di0 *= dtmp0; + + fdtos %f60,%f1 ! (Y0_3) ftmp0 = (float)dtmp0; + add %i5,stridez,%i3 ! pz += stridez + st %f1,[%i5] ! (Y0_3) pz[0] = ftmp0; + + subcc counter,1,counter + bneg,pn %icc,.xbegin + or %g0,%i3,%g1 + + ld [%fp+tmp1],%i2 ! (Y0_2) LOAD ii0 + fmuld KB2,%f40,%f36 ! (Y0_2) dtmp0 = KB2 * y0; + + fmuld %f56,%f50,%f58 ! (Y1_3) dtmp0 = yy0 * di0; + sra %i2,6,%l6 ! (Y0_2) i0 = ii0 >> 6; + and %i2,255,%l7 ! (Y0_2) ii0 &= 255; + + sll %l7,3,%o0 ! (Y0_2) ii0 <<= 3; + and %l6,-4,%g5 ! (Y0_2) i0 &= -4; + + faddd %f36,KB1,%f60 ! (Y0_2) dtmp0 += KB1; + ld [%g5+%o3],%f10 ! (Y0_2) di0 = ((double*)((char*)(__mt_constexp2fb + 150 ) + i0))[0] + + ldd [%o4+%o0],%f62 ! (Y0_2) dtmp0 = ((double*)((char*)__mt_constexp2fa + ii0))[0]; + + faddd %f58,%f50,%f58 ! (Y1_3) dtmp0 += di0; + + fmuld %f60,%f40,%f60 ! (Y0_2) yy0 = dtmp0 * y0; + + fmuld %f10,%f62,%f62 ! (Y0_2) di0 *= dtmp0; + + fdtos %f58,%f9 ! (Y1_3) ftmp0 = (float)dtmp0; + add %i3,stridez,%i5 ! pz += stridez + st %f9,[%i3] ! (Y1_3) pz[0] = ftmp0; + + subcc counter,1,counter + bneg,pn %icc,.xbegin + or %g0,%i5,%g1 + + fmuld %f60,%f62,%f40 ! (Y0_3) dtmp0 = yy0 * di0; + + faddd %f40,%f62,%f60 ! (Y0_3) dtmp0 += di0; + + fdtos %f60,%f1 ! (Y0_3) ftmp0 = (float)dtmp0; + add %i5,stridez,%i3 ! pz += stridez + st %f1,[%i5] ! (Y0_3) pz[0] = ftmp0; + + ba .xbegin + or %g0,%i3,%g1 + +.xspec: + bg,a,pn %icc,.yisnan ! if (ay > 0x7f800000) /* |Y| = Nan */ + ld [%o5],%f8 ! fy = *py; + + ld [%fp+tmp5],%l6 ! LOAD (ax-0x3f800000)<<63 + srl %i5,31,%i5 ! uy >> 31 + + cmp %l6,%i5 ! if((ax < 0x3f800000) != (uy >> 31)) + be,a,pn %icc,.xspec_exit ! if((ax < 0x3f800000) != (uy >> 31)) + st %i3,[%g1] ! fy = *(float*)&ay; + + st %g0,[%g1] ! fy = ZERO + add %g1,stridez,%g1 + ba .xbegin1 + add %o5,stridey,%o5 + +.yisnan: + fmuls %f8,%f8,%f8 ! fy = *py * *py; /* |Y| = Nan */ + st %f8,[%g1] + +.xspec_exit: + add %g1,stridez,%g1 + ba .xbegin1 + add %o5,stridey,%o5 + + .align 16 +.xupdate0: + cmp counter,0 + ble .xcont0 + fzeros %f7 + + stx %o5,[%fp+tmp_py] + + st counter,[%fp+tmp_counter] + ba .xcont0 + or %g0,0,counter + + .align 16 +.xupdate1: + cmp counter,1 + ble .xcont1 + fzeros %f5 + + sub counter,1,counter + stx %o5,[%fp+tmp_py] + + st counter,[%fp+tmp_counter] + ba .xcont1 + or %g0,1,counter + + .align 16 +.xupdate2: + cmp counter,2 + ble .xcont2 + fzeros %f7 + + sub counter,2,counter + stx %o5,[%fp+tmp_py] + + st counter,[%fp+tmp_counter] + ba .xcont2 + or %g0,2,counter + + .align 16 +.xupdate3: + cmp counter,3 + ble .xcont3 + fzeros %f5 + + sub counter,3,counter + stx %o5,[%fp+tmp_py] + + st counter,[%fp+tmp_counter] + ba .xcont3 + or %g0,3,counter + + .align 16 +.xupdate4: + cmp counter,4 + ble .xcont4 + fzeros %f7 + + sub counter,4,counter + stx %o5,[%fp+tmp_py] + + st counter,[%fp+tmp_counter] + ba .xcont4 + or %g0,4,counter + + .align 16 +.xupdate5: + cmp counter,5 + ble .xcont5 + fzeros %f5 + + sub counter,5,counter + stx %o5,[%fp+tmp_py] + + st counter,[%fp+tmp_counter] + ba .xcont5 + or %g0,5,counter + + .align 16 +.xupdate6: + cmp counter,5 + ble .xcont6 + fzeros %f7 + + sub counter,5,counter + stx %o5,[%fp+tmp_py] + + st counter,[%fp+tmp_counter] + ba .xcont6 + or %g0,5,counter + + .align 16 +.xupdate7: + cmp counter,2 + ble .xcont7 + fzeros %f5 + + sub counter,2,counter + stx %o5,[%fp+tmp_py] + + st counter,[%fp+tmp_counter] + ba .xcont7 + or %g0,2,counter + + .align 16 +.xupdate8: + cmp counter,3 + ble .xcont8 + fzeros %f7 + + sub counter,3,counter + stx %o5,[%fp+tmp_py] + + st counter,[%fp+tmp_counter] + ba .xcont8 + or %g0,3,counter + + .align 16 +.xupdate9: + cmp counter,4 + ble .xcont9 + fzeros %f5 + + sub counter,4,counter + stx %o5,[%fp+tmp_py] + + st counter,[%fp+tmp_counter] + ba .xcont9 + or %g0,4,counter + + .align 16 +.xupdate10: + cmp counter,5 + ble .xcont10 + fzeros %f7 + + sub counter,5,counter + stx %o5,[%fp+tmp_py] + + st counter,[%fp+tmp_counter] + ba .xcont10 + or %g0,5,counter + + .align 16 +.xupdate11: + cmp counter,5 + ble .xcont11 + fzeros %f5 + + sub counter,5,counter + stx %o5,[%fp+tmp_py] + + st counter,[%fp+tmp_counter] + ba .xcont11 + or %g0,5,counter + + SET_SIZE(__vpowf) + diff --git a/usr/src/lib/libmvec/common/vis/__vrhypot.S b/usr/src/lib/libmvec/common/vis/__vrhypot.S new file mode 100644 index 0000000000..dc53584864 --- /dev/null +++ b/usr/src/lib/libmvec/common/vis/__vrhypot.S @@ -0,0 +1,3879 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "__vrhypot.S" + +#include "libm.h" + + RO_DATA + .align 64 + +.CONST_TBL: + .word 0x7fe00000, 0x7fdfc07f, 0x7fdf81f8, 0x7fdf4465, + .word 0x7fdf07c1, 0x7fdecc07, 0x7fde9131, 0x7fde573a, + .word 0x7fde1e1e, 0x7fdde5d6, 0x7fddae60, 0x7fdd77b6, + .word 0x7fdd41d4, 0x7fdd0cb5, 0x7fdcd856, 0x7fdca4b3, + .word 0x7fdc71c7, 0x7fdc3f8f, 0x7fdc0e07, 0x7fdbdd2b, + .word 0x7fdbacf9, 0x7fdb7d6c, 0x7fdb4e81, 0x7fdb2036, + .word 0x7fdaf286, 0x7fdac570, 0x7fda98ef, 0x7fda6d01, + .word 0x7fda41a4, 0x7fda16d3, 0x7fd9ec8e, 0x7fd9c2d1, + .word 0x7fd99999, 0x7fd970e4, 0x7fd948b0, 0x7fd920fb, + .word 0x7fd8f9c1, 0x7fd8d301, 0x7fd8acb9, 0x7fd886e5, + .word 0x7fd86186, 0x7fd83c97, 0x7fd81818, 0x7fd7f405, + .word 0x7fd7d05f, 0x7fd7ad22, 0x7fd78a4c, 0x7fd767dc, + .word 0x7fd745d1, 0x7fd72428, 0x7fd702e0, 0x7fd6e1f7, + .word 0x7fd6c16c, 0x7fd6a13c, 0x7fd68168, 0x7fd661ec, + .word 0x7fd642c8, 0x7fd623fa, 0x7fd60581, 0x7fd5e75b, + .word 0x7fd5c988, 0x7fd5ac05, 0x7fd58ed2, 0x7fd571ed, + .word 0x7fd55555, 0x7fd53909, 0x7fd51d07, 0x7fd50150, + .word 0x7fd4e5e0, 0x7fd4cab8, 0x7fd4afd6, 0x7fd49539, + .word 0x7fd47ae1, 0x7fd460cb, 0x7fd446f8, 0x7fd42d66, + .word 0x7fd41414, 0x7fd3fb01, 0x7fd3e22c, 0x7fd3c995, + .word 0x7fd3b13b, 0x7fd3991c, 0x7fd38138, 0x7fd3698d, + .word 0x7fd3521c, 0x7fd33ae4, 0x7fd323e3, 0x7fd30d19, + .word 0x7fd2f684, 0x7fd2e025, 0x7fd2c9fb, 0x7fd2b404, + .word 0x7fd29e41, 0x7fd288b0, 0x7fd27350, 0x7fd25e22, + .word 0x7fd24924, 0x7fd23456, 0x7fd21fb7, 0x7fd20b47, + .word 0x7fd1f704, 0x7fd1e2ef, 0x7fd1cf06, 0x7fd1bb4a, + .word 0x7fd1a7b9, 0x7fd19453, 0x7fd18118, 0x7fd16e06, + .word 0x7fd15b1e, 0x7fd1485f, 0x7fd135c8, 0x7fd12358, + .word 0x7fd11111, 0x7fd0fef0, 0x7fd0ecf5, 0x7fd0db20, + .word 0x7fd0c971, 0x7fd0b7e6, 0x7fd0a681, 0x7fd0953f, + .word 0x7fd08421, 0x7fd07326, 0x7fd0624d, 0x7fd05197, + .word 0x7fd04104, 0x7fd03091, 0x7fd02040, 0x7fd01010, + + .word 0x42300000, 0 ! D2ON36 = 2**36 + .word 0xffffff00, 0 ! DA0 + .word 0xfff00000, 0 ! DA1 + .word 0x3ff00000, 0 ! DONE = 1.0 + .word 0x40000000, 0 ! DTWO = 2.0 + .word 0x7fd00000, 0 ! D2ON1022 + .word 0x3cb00000, 0 ! D2ONM52 + .word 0x43200000, 0 ! D2ON51 + .word 0x0007ffff, 0xffffffff ! 0x0007ffffffffffff + +#define stridex %l2 +#define stridey %l3 +#define stridez %l5 + +#define TBL_SHIFT 512 + +#define TBL %l1 +#define counter %l4 + +#define _0x7ff00000 %l0 +#define _0x00100000 %o5 +#define _0x7fffffff %l6 + +#define D2ON36 %f4 +#define DTWO %f6 +#define DONE %f8 +#define DA0 %f58 +#define DA1 %f56 + +#define dtmp0 STACK_BIAS-0x80 +#define dtmp1 STACK_BIAS-0x78 +#define dtmp2 STACK_BIAS-0x70 +#define dtmp3 STACK_BIAS-0x68 +#define dtmp4 STACK_BIAS-0x60 +#define dtmp5 STACK_BIAS-0x58 +#define dtmp6 STACK_BIAS-0x50 +#define dtmp7 STACK_BIAS-0x48 +#define dtmp8 STACK_BIAS-0x40 +#define dtmp9 STACK_BIAS-0x38 +#define dtmp10 STACK_BIAS-0x30 +#define dtmp11 STACK_BIAS-0x28 +#define dtmp12 STACK_BIAS-0x20 +#define dtmp13 STACK_BIAS-0x18 +#define dtmp14 STACK_BIAS-0x10 +#define dtmp15 STACK_BIAS-0x08 + +#define ftmp0 STACK_BIAS-0x100 +#define tmp_px STACK_BIAS-0x98 +#define tmp_py STACK_BIAS-0x90 +#define tmp_counter STACK_BIAS-0x88 + +! sizeof temp storage - must be a multiple of 16 for V9 +#define tmps 0x100 + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +! !!!!! algorithm !!!!! +! hx0 = *(int*)px; +! hy0 = *(int*)py; +! +! ((float*)&x0)[0] = ((float*)px)[0]; +! ((float*)&x0)[1] = ((float*)px)[1]; +! ((float*)&y0)[0] = ((float*)py)[0]; +! ((float*)&y0)[1] = ((float*)py)[1]; +! +! hx0 &= 0x7fffffff; +! hy0 &= 0x7fffffff; +! +! diff0 = hy0 - hx0; +! j0 = diff0 >> 31; +! j0 &= diff0; +! j0 = hy0 - j0; +! j0 &= 0x7ff00000; +! +! j0 = 0x7ff00000 - j0; +! ll = (long long)j0 << 32; +! *(long long*)&scl0 = ll; +! +! if ( hx0 >= 0x7ff00000 || hy0 >= 0x7ff00000 ) +! { +! lx = ((int*)px)[1]; +! ly = ((int*)py)[1]; +! +! if ( hx0 == 0x7ff00000 && lx == 0 ) res0 = 0.0; +! else if ( hy0 == 0x7ff00000 && ly == 0 ) res0 = 0.0; +! else res0 = fabs(x0) * fabs(y0); +! +! ((float*)pz)[0] = ((float*)&res0)[0]; +! ((float*)pz)[1] = ((float*)&res0)[1]; +! +! px += stridex; +! py += stridey; +! pz += stridez; +! continue; +! } +! if ( hx0 < 0x00100000 && hy0 < 0x00100000 ) +! { +! lx = ((int*)px)[1]; +! ly = ((int*)py)[1]; +! ii = hx0 | hy0; +! ii |= lx; +! ii |= ly; +! if ( ii == 0 ) +! { +! res0 = 1.0 / 0.0; +! ((float*)pz)[0] = ((float*)&res0)[0]; +! ((float*)pz)[1] = ((float*)&res0)[1]; +! +! px += stridex; +! py += stridey; +! pz += stridez; +! continue; +! } +! x0 = fabs(x0); +! y0 = fabs(y0); +! if ( hx0 < 0x00080000 ) +! { +! x0 = *(long long*)&x0; +! } +! else +! { +! ((long long*)&dtmp0)[0] = 0x0007ffffffffffffULL; +! x0 = vis_fand(x0, dtmp0); +! x0 = *(long long*)&x0; +! x0 += D2ON51; +! } +! x0 *= D2ONM52; +! if ( hy0 < 0x00080000 ) +! { +! y0 = *(long long*)&y0; +! } +! else +! { +! ((long long*)&dtmp0)[0] = 0x0007ffffffffffffULL; +! y0 = vis_fand(y0, dtmp0); +! y0 = *(long long*)&y0; +! y0 += D2ON51; +! } +! y0 *= D2ONM52; +! *(long long*)&scl0 = 0x7fd0000000000000ULL; +! } +! else +! { +! x0 *= scl0; +! y0 *= scl0; +! } +! +! x_hi0 = x0 + D2ON36; +! y_hi0 = y0 + D2ON36; +! x_hi0 -= D2ON36; +! y_hi0 -= D2ON36; +! x_lo0 = x0 - x_hi0; +! y_lo0 = y0 - y_hi0; +! res0_hi = x_hi0 * x_hi0; +! dtmp0 = y_hi0 * y_hi0; +! res0_hi += dtmp0; +! res0_lo = x0 + x_hi0; +! res0_lo *= x_lo0; +! dtmp1 = y0 + y_hi0; +! dtmp1 *= y_lo0; +! res0_lo += dtmp1; +! +! dres = res0_hi + res0_lo; +! dexp0 = vis_fand(dres,DA1); +! iarr = ((int*)&dres)[0]; +! +! iarr >>= 11; +! iarr &= 0x1fc; +! dtmp0 = ((double*)((char*)dll1 + iarr))[0]; +! dd = vis_fpsub32(dtmp0, dexp0); +! +! dtmp0 = dd * dres; +! dtmp0 = DTWO - dtmp0; +! dd *= dtmp0; +! dtmp1 = dd * dres; +! dtmp1 = DTWO - dtmp1; +! dd *= dtmp1; +! dtmp2 = dd * dres; +! dtmp2 = DTWO - dtmp2; +! dres = dd * dtmp2; +! +! res0 = vis_fand(dres,DA0); +! +! dtmp0 = res0_hi * res0; +! dtmp0 = DONE - dtmp0; +! dtmp1 = res0_lo * res0; +! dtmp0 -= dtmp1; +! dtmp0 *= dres; +! res0 += dtmp0; +! +! res0 = sqrt ( res0 ); +! +! res0 = scl0 * res0; +! +! ((float*)pz)[0] = ((float*)&res0)[0]; +! ((float*)pz)[1] = ((float*)&res0)[1]; +! +! px += stridex; +! py += stridey; +! pz += stridez; +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + ENTRY(__vrhypot) + save %sp,-SA(MINFRAME)-tmps,%sp + PIC_SETUP(l7) + PIC_SET(l7,.CONST_TBL,l1) + wr %g0,0x82,%asi + +#ifdef __sparcv9 + ldx [%fp+STACK_BIAS+176],stridez +#else + ld [%fp+STACK_BIAS+92],stridez +#endif + + sll %i2,3,stridex + sethi %hi(0x7ff00000),_0x7ff00000 + st %i0,[%fp+tmp_counter] + + sll %i4,3,stridey + sethi %hi(0x00100000),_0x00100000 + stx %i1,[%fp+tmp_px] + + sll stridez,3,stridez + sethi %hi(0x7ffffc00),_0x7fffffff + stx %i3,[%fp+tmp_py] + + ldd [TBL+TBL_SHIFT],D2ON36 + add _0x7fffffff,1023,_0x7fffffff + + ldd [TBL+TBL_SHIFT+8],DA0 + + ldd [TBL+TBL_SHIFT+16],DA1 + + ldd [TBL+TBL_SHIFT+24],DONE + + ldd [TBL+TBL_SHIFT+32],DTWO + +.begin: + ld [%fp+tmp_counter],counter + ldx [%fp+tmp_px],%i4 + ldx [%fp+tmp_py],%i3 + st %g0,[%fp+tmp_counter] +.begin1: + cmp counter,0 + ble,pn %icc,.exit + + lda [%i4]0x82,%o1 ! (7_0) hx0 = *(int*)px; + add %i4,stridex,%i1 + + lda [%i3]0x82,%o4 ! (7_0) hy0 = *(int*)py; + add %i3,stridey,%i0 ! py += stridey + + and %o1,_0x7fffffff,%o7 ! (7_0) hx0 &= 0x7fffffff; + + cmp %o7,_0x7ff00000 ! (7_0) hx0 ? 0x7ff00000 + bge,pn %icc,.spec0 ! (7_0) if ( hx0 >= 0x7ff00000 ) + and %o4,_0x7fffffff,%l7 ! (7_0) hy0 &= 0x7fffffff; + + cmp %l7,_0x7ff00000 ! (7_0) hy0 ? 0x7ff00000 + bge,pn %icc,.spec0 ! (7_0) if ( hy0 >= 0x7ff00000 ) + sub %l7,%o7,%o1 ! (7_0) diff0 = hy0 - hx0; + + sra %o1,31,%o3 ! (7_0) j0 = diff0 >> 31; + cmp %o7,_0x00100000 ! (7_0) hx0 ? 0x00100000 + bl,pn %icc,.spec1 ! (7_0) if ( hx0 < 0x00100000 ) + + and %o1,%o3,%o1 ! (7_0) j0 &= diff0; +.cont_spec0: + sub %l7,%o1,%o4 ! (7_0) j0 = hy0 - j0; + + and %o4,%l0,%o4 ! (7_0) j0 &= 0x7ff00000; + + sub %l0,%o4,%g1 ! (7_0) j0 = 0x7ff00000 - j0; + + sllx %g1,32,%g1 ! (7_0) ll = (long long)j0 << 32; + + stx %g1,[%fp+dtmp15] ! (7_0) *(long long*)&scl0 = ll; + + stx %g1,[%fp+dtmp0] ! (7_1) *(long long*)&scl0 = ll; +.cont_spec1: + lda [%i1]0x82,%o1 ! (0_0) hx0 = *(int*)px; + mov %i1,%i2 + + lda [%i0]0x82,%o4 ! (0_0) hy0 = *(int*)py; + + and %o1,_0x7fffffff,%o7 ! (0_0) hx0 &= 0x7fffffff; + mov %i0,%o0 + + cmp %o7,_0x7ff00000 ! (0_0) hx0 ? 0x7ff00000 + bge,pn %icc,.update0 ! (0_0) if ( hx0 >= 0x7ff00000 ) + and %o4,_0x7fffffff,%l7 ! (0_0) hy0 &= 0x7fffffff; + + cmp %l7,_0x7ff00000 ! (0_0) hy0 ? 0x7ff00000 + sub %l7,%o7,%o1 ! (0_0) diff0 = hy0 - hx0; + bge,pn %icc,.update0 ! (0_0) if ( hy0 >= 0x7ff00000 ) + sra %o1,31,%o3 ! (0_0) j0 = diff0 >> 31; + + cmp %o7,_0x00100000 ! (0_0) hx0 ? 0x00100000 + + and %o1,%o3,%o1 ! (0_0) j0 &= diff0; + bl,pn %icc,.update1 ! (0_0) if ( hx0 < 0x00100000 ) + sub %l7,%o1,%o4 ! (0_0) j0 = hy0 - j0; +.cont0: + and %o4,%l0,%o4 ! (0_0) j0 &= 0x7ff00000; + + sub %l0,%o4,%o4 ! (0_0) j0 = 0x7ff00000 - j0; +.cont1: + sllx %o4,32,%o4 ! (0_0) ll = (long long)j0 << 32; + stx %o4,[%fp+dtmp1] ! (0_0) *(long long*)&scl0 = ll; + + ldd [%fp+dtmp15],%f62 ! (7_1) *(long long*)&scl0 = ll; + + lda [%i4]%asi,%f10 ! (7_1) ((float*)&x0)[0] = ((float*)px)[0]; + + lda [%i4+4]%asi,%f11 ! (7_1) ((float*)&x0)[1] = ((float*)px)[1]; + + lda [%i3]%asi,%f12 ! (7_1) ((float*)&y0)[0] = ((float*)py)[0]; + + add %i1,stridex,%i4 ! px += stridex + lda [%i3+4]%asi,%f13 ! (7_1) ((float*)&y0)[1] = ((float*)py)[1]; + + fmuld %f10,%f62,%f10 ! (7_1) x0 *= scl0; + add %i4,stridex,%i1 ! px += stridex + + fmuld %f12,%f62,%f60 ! (7_1) y0 *= scl0; + + lda [%i4]0x82,%o1 ! (1_0) hx0 = *(int*)px; + + add %i0,stridey,%i3 ! py += stridey + faddd %f10,D2ON36,%f46 ! (7_1) x_hi0 = x0 + D2ON36; + + lda [%i3]0x82,%g1 ! (1_0) hy0 = *(int*)py; + add %i3,stridey,%i0 ! py += stridey + faddd %f60,D2ON36,%f50 ! (7_1) y_hi0 = y0 + D2ON36; + + and %o1,_0x7fffffff,%o7 ! (1_0) hx0 &= 0x7fffffff; + + cmp %o7,_0x7ff00000 ! (1_0) hx0 ? 0x7ff00000 + stx %o4,[%fp+dtmp2] ! (0_0) *(long long*)&scl0 = ll; + + and %g1,_0x7fffffff,%l7 ! (1_0) hy0 &= 0x7fffffff; + bge,pn %icc,.update2 ! (1_0) if ( hx0 >= 0x7ff00000 ) + fsubd %f46,D2ON36,%f20 ! (7_1) x_hi0 -= D2ON36; + + cmp %l7,_0x7ff00000 ! (1_0) hy0 ? 0x7ff00000 + sub %l7,%o7,%o1 ! (1_0) diff0 = hy0 - hx0; + bge,pn %icc,.update3 ! (1_0) if ( hy0 >= 0x7ff00000 ) + fsubd %f50,D2ON36,%f54 ! (7_1) y_hi0 -= D2ON36; + + sra %o1,31,%o3 ! (1_0) j0 = diff0 >> 31; + + and %o1,%o3,%o1 ! (1_0) j0 &= diff0; + + fmuld %f20,%f20,%f2 ! (7_1) res0_hi = x_hi0 * x_hi0; + sub %l7,%o1,%o4 ! (1_0) j0 = hy0 - j0; + cmp %o7,_0x00100000 ! (1_0) hx0 ? 0x00100000 + fsubd %f10,%f20,%f0 ! (7_1) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (7_1) dtmp0 = y_hi0 * y_hi0; + and %o4,%l0,%o4 ! (1_0) j0 &= 0x7ff00000; + bl,pn %icc,.update4 ! (1_0) if ( hx0 < 0x00100000 ) + faddd %f10,%f20,%f62 ! (7_1) res0_lo = x0 + x_hi0; + + sub %l0,%o4,%o4 ! (1_0) j0 = 0x7ff00000 - j0; +.cont4: + sllx %o4,32,%o4 ! (1_0) ll = (long long)j0 << 32; + stx %o4,[%fp+dtmp3] ! (1_0) *(long long*)&scl0 = ll; + faddd %f60,%f54,%f50 ! (7_1) dtmp1 = y0 + y_hi0; + + fsubd %f60,%f54,%f12 ! (7_1) y_lo0 = y0 - y_hi0; + + fmuld %f62,%f0,%f0 ! (7_1) res0_lo *= x_lo0; + ldd [%fp+dtmp1],%f62 ! (0_0) *(long long*)&scl0 = ll; + faddd %f2,%f46,%f44 ! (7_1) res0_hi += dtmp0; + + lda [%i2]%asi,%f10 ! (0_0) ((float*)&x0)[0] = ((float*)px)[0]; + + lda [%i2+4]%asi,%f11 ! (0_0) ((float*)&x0)[1] = ((float*)px)[1]; + + fmuld %f50,%f12,%f26 ! (7_1) dtmp1 *= y_lo0; + lda [%o0]%asi,%f12 ! (0_0) ((float*)&y0)[0] = ((float*)py)[0]; + + lda [%o0+4]%asi,%f13 ! (0_0) ((float*)&y0)[1] = ((float*)py)[1]; + + fmuld %f10,%f62,%f10 ! (0_0) x0 *= scl0; + + fmuld %f12,%f62,%f60 ! (0_0) y0 *= scl0; + faddd %f0,%f26,%f38 ! (7_1) res0_lo += dtmp1; + + lda [%i1]0x82,%o1 ! (2_0) hx0 = *(int*)px; + mov %i1,%i2 + + faddd %f10,D2ON36,%f46 ! (0_0) x_hi0 = x0 + D2ON36; + + lda [%i0]0x82,%g1 ! (2_0) hy0 = *(int*)py; + mov %i0,%o0 + faddd %f60,D2ON36,%f12 ! (0_0) y_hi0 = y0 + D2ON36; + + faddd %f44,%f38,%f14 ! (7_1) dres = res0_hi + res0_lo; + and %o1,_0x7fffffff,%o7 ! (2_0) hx0 &= 0x7fffffff; + + cmp %o7,_0x7ff00000 ! (2_0) hx0 ? 0x7ff00000 + bge,pn %icc,.update5 ! (2_0) if ( hx0 >= 0x7ff00000 ) + stx %o4,[%fp+dtmp4] ! (1_0) *(long long*)&scl0 = ll; + + and %g1,_0x7fffffff,%l7 ! (2_0) hx0 &= 0x7fffffff; + st %f14,[%fp+ftmp0] ! (7_1) iarr = ((int*)&dres)[0]; + fsubd %f46,D2ON36,%f20 ! (0_0) x_hi0 -= D2ON36; + + sub %l7,%o7,%o1 ! (2_0) diff0 = hy0 - hx0; + cmp %l7,_0x7ff00000 ! (2_0) hy0 ? 0x7ff00000 + bge,pn %icc,.update6 ! (2_0) if ( hy0 >= 0x7ff00000 ) + fsubd %f12,D2ON36,%f54 ! (0_0) y_hi0 -= D2ON36; + + sra %o1,31,%o3 ! (2_0) j0 = diff0 >> 31; + + and %o1,%o3,%o1 ! (2_0) j0 &= diff0; + + fmuld %f20,%f20,%f2 ! (0_0) res0_hi = x_hi0 * x_hi0; + cmp %o7,_0x00100000 ! (2_0) hx0 ? 0x00100000 + sub %l7,%o1,%o4 ! (2_0) j0 = hy0 - j0; + fsubd %f10,%f20,%f0 ! (0_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (0_0) dtmp0 = y_hi0 * y_hi0; + and %o4,%l0,%o4 ! (2_0) j0 &= 0x7ff00000; + bl,pn %icc,.update7 ! (2_0) if ( hx0 < 0x00100000 ) + faddd %f10,%f20,%f62 ! (0_0) res0_lo = x0 + x_hi0; +.cont7: + sub %l0,%o4,%g1 ! (2_0) j0 = 0x7ff00000 - j0; + + sllx %g1,32,%g1 ! (2_0) ll = (long long)j0 << 32; +.cont8: + stx %g1,[%fp+dtmp5] ! (2_0) *(long long*)&scl0 = ll; + faddd %f60,%f54,%f50 ! (0_0) dtmp1 = y0 + y_hi0; + + fsubd %f60,%f54,%f12 ! (0_0) y_lo0 = y0 - y_hi0; + + fmuld %f62,%f0,%f0 ! (0_0) res0_lo *= x_lo0; + ldd [%fp+dtmp3],%f62 ! (1_0) *(long long*)&scl0 = ll; + faddd %f2,%f46,%f32 ! (0_0) res0_hi += dtmp0; + + lda [%i4]%asi,%f10 ! (1_0) ((float*)&x0)[0] = ((float*)px)[0]; + + lda [%i4+4]%asi,%f11 ! (1_0) ((float*)&x0)[1] = ((float*)px)[1]; + + fmuld %f50,%f12,%f28 ! (0_0) dtmp1 *= y_lo0; + lda [%i3]%asi,%f12 ! (1_0) ((float*)&y0)[0] = ((float*)py)[0]; + + add %i1,stridex,%i4 ! px += stridex + lda [%i3+4]%asi,%f13 ! (1_0) ((float*)&y0)[1] = ((float*)py)[1]; + + ld [%fp+ftmp0],%o2 ! (7_1) iarr = ((int*)&dres)[0]; + add %i4,stridex,%i1 ! px += stridex + fand %f14,DA1,%f2 ! (7_1) dexp0 = vis_fand(dres,DA1); + + fmuld %f10,%f62,%f10 ! (1_0) x0 *= scl0; + + fmuld %f12,%f62,%f60 ! (1_0) y0 *= scl0; + sra %o2,11,%i3 ! (7_1) iarr >>= 11; + faddd %f0,%f28,%f36 ! (0_0) res0_lo += dtmp1; + + and %i3,0x1fc,%i3 ! (7_1) iarr &= 0x1fc; + + add %i3,TBL,%o4 ! (7_1) (char*)dll1 + iarr + lda [%i4]0x82,%o1 ! (3_0) hx0 = *(int*)px; + + add %i0,stridey,%i3 ! py += stridey + ld [%o4],%f26 ! (7_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; + faddd %f10,D2ON36,%f46 ! (1_0) x_hi0 = x0 + D2ON36; + + lda [%i3]0x82,%o4 ! (3_0) hy0 = *(int*)py; + add %i3,stridey,%i0 ! py += stridey + faddd %f60,D2ON36,%f12 ! (1_0) y_hi0 = y0 + D2ON36; + + faddd %f32,%f36,%f22 ! (0_0) dres = res0_hi + res0_lo; + and %o1,_0x7fffffff,%o7 ! (3_0) hx0 &= 0x7fffffff; + + cmp %o7,_0x7ff00000 ! (3_0) hx0 ? 0x7ff00000 + stx %g1,[%fp+dtmp6] ! (2_0) *(long long*)&scl0 = ll; + bge,pn %icc,.update9 ! (3_0) if ( hx0 >= 0x7ff00000 ) + fpsub32 %f26,%f2,%f26 ! (7_1) dd = vis_fpsub32(dtmp0, dexp0); + + and %o4,_0x7fffffff,%l7 ! (3_0) hy0 &= 0x7fffffff; + st %f22,[%fp+ftmp0] ! (0_0) iarr = ((int*)&dres)[0]; + fsubd %f46,D2ON36,%f20 ! (1_0) x_hi0 -= D2ON36; + + sub %l7,%o7,%o1 ! (3_0) diff0 = hy0 - hx0; + cmp %l7,_0x7ff00000 ! (3_0) hy0 ? 0x7ff00000 + bge,pn %icc,.update10 ! (3_0) if ( hy0 >= 0x7ff00000 ) + fsubd %f12,D2ON36,%f54 ! (1_0) y_hi0 -= D2ON36; + + fmuld %f26,%f14,%f50 ! (7_1) dtmp0 = dd * dres; + sra %o1,31,%o3 ! (3_0) j0 = diff0 >> 31; + + and %o1,%o3,%o1 ! (3_0) j0 &= diff0; + + fmuld %f20,%f20,%f2 ! (1_0) res0_hi = x_hi0 * x_hi0; + cmp %o7,_0x00100000 ! (3_0) hx0 ? 0x00100000 + sub %l7,%o1,%o4 ! (3_0) j0 = hy0 - j0; + fsubd %f10,%f20,%f0 ! (1_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (1_0) dtmp0 = y_hi0 * y_hi0; + and %o4,%l0,%o4 ! (3_0) j0 &= 0x7ff00000; + bl,pn %icc,.update11 ! (3_0) if ( hx0 < 0x00100000 ) + faddd %f10,%f20,%f62 ! (1_0) res0_lo = x0 + x_hi0; +.cont11: + sub %l0,%o4,%g1 ! (3_0) j0 = 0x7ff00000 - j0; + fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0; +.cont12: + sllx %g1,32,%g1 ! (3_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp7] ! (3_0) *(long long*)&scl0 = ll; + faddd %f60,%f54,%f50 ! (1_0) dtmp1 = y0 + y_hi0; + + fsubd %f60,%f54,%f12 ! (1_0) y_lo0 = y0 - y_hi0 + + fmuld %f62,%f0,%f0 ! (1_0) res0_lo *= x_lo0; + ldd [%fp+dtmp5],%f62 ! (2_0) *(long long*)&scl0 = ll; + faddd %f2,%f46,%f42 ! (1_0) res0_hi += dtmp0; + + lda [%i2]%asi,%f10 ! (2_0) ((float*)&x0)[0] = ((float*)px)[0]; + fmuld %f26,%f20,%f54 ! (7_1) dd *= dtmp0; + + lda [%i2+4]%asi,%f11 ! (2_0) ((float*)&x0)[1] = ((float*)px)[1]; + + fmuld %f50,%f12,%f26 ! (1_0) dtmp1 *= y_lo0; + lda [%o0]%asi,%f12 ! (2_0) ((float*)&y0)[0] = ((float*)py)[0]; + + lda [%o0+4]%asi,%f13 ! (2_0) ((float*)&y0)[1] = ((float*)py)[1]; + + fmuld %f54,%f14,%f50 ! (7_1) dtmp1 = dd * dres; + ld [%fp+ftmp0],%o2 ! (0_0) iarr = ((int*)&dres)[0]; + fand %f22,DA1,%f2 ! (0_0) dexp0 = vis_fand(dres,DA1); + + fmuld %f10,%f62,%f10 ! (2_0) x0 *= scl0; + + fmuld %f12,%f62,%f60 ! (2_0) y0 *= scl0; + sra %o2,11,%o4 ! (0_0) iarr >>= 11; + faddd %f0,%f26,%f34 ! (1_0) res0_lo += dtmp1; + + and %o4,0x1fc,%o4 ! (0_0) iarr &= 0x1fc; + + add %o4,TBL,%o4 ! (0_0) (char*)dll1 + iarr + mov %i1,%i2 + lda [%i1]0x82,%o1 ! (4_0) hx0 = *(int*)px; + fsubd DTWO,%f50,%f20 ! (7_1) dtmp1 = DTWO - dtmp1; + + ld [%o4],%f28 ! (0_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; + faddd %f10,D2ON36,%f46 ! (2_0) x_hi0 = x0 + D2ON36; + + lda [%i0]0x82,%o4 ! (4_0) hy0 = *(int*)py; + mov %i0,%o0 + faddd %f60,D2ON36,%f50 ! (2_0) y_hi0 = y0 + D2ON36; + + and %o1,_0x7fffffff,%o7 ! (4_0) hx0 &= 0x7fffffff; + faddd %f42,%f34,%f18 ! (1_0) dres = res0_hi + res0_lo; + + fmuld %f54,%f20,%f16 ! (7_1) dd *= dtmp1; + cmp %o7,_0x7ff00000 ! (4_0) hx0 ? 0x7ff00000 + stx %g1,[%fp+dtmp8] ! (3_0) *(long long*)&scl0 = ll; + fpsub32 %f28,%f2,%f28 ! (0_0) dd = vis_fpsub32(dtmp0, dexp0); + + and %o4,_0x7fffffff,%l7 ! (4_0) hy0 &= 0x7fffffff; + bge,pn %icc,.update13 ! (4_0) if ( hx0 >= 0x7ff00000 ) + st %f18,[%fp+ftmp0] ! (1_0) iarr = ((int*)&dres)[0]; + fsubd %f46,D2ON36,%f20 ! (2_0) x_hi0 -= D2ON36; + + sub %l7,%o7,%o1 ! (4_0) diff0 = hy0 - hx0; + cmp %l7,_0x7ff00000 ! (4_0) hy0 ? 0x7ff00000 + bge,pn %icc,.update14 ! (4_0) if ( hy0 >= 0x7ff00000 ) + fsubd %f50,D2ON36,%f54 ! (2_0) y_hi0 -= D2ON36; + + fmuld %f28,%f22,%f50 ! (0_0) dtmp0 = dd * dres; + sra %o1,31,%o3 ! (4_0) j0 = diff0 >> 31; + + and %o1,%o3,%o1 ! (4_0) j0 &= diff0; + + fmuld %f20,%f20,%f2 ! (2_0) res0_hi = x_hi0 * x_hi0; + sub %l7,%o1,%o4 ! (4_0) j0 = hy0 - j0; + cmp %o7,_0x00100000 ! (4_0) hx0 ? 0x00100000 + fsubd %f10,%f20,%f0 ! (2_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (2_0) dtmp0 = y_hi0 * y_hi0; + and %o4,%l0,%o4 ! (4_0) j0 &= 0x7ff00000; + bl,pn %icc,.update15 ! (4_0) if ( hx0 < 0x00100000 ) + faddd %f10,%f20,%f62 ! (2_0) res0_lo = x0 + x_hi0; +.cont15: + sub %l0,%o4,%g1 ! (4_0) j0 = 0x7ff00000 - j0; + fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0; +.cont16: + fmuld %f16,%f14,%f14 ! (7_1) dtmp2 = dd * dres; + sllx %g1,32,%g1 ! (4_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp9] ! (4_0) *(long long*)&scl0 = ll; + faddd %f60,%f54,%f50 ! (2_0) dtmp1 = y0 + y_hi0; + + fsubd %f60,%f54,%f12 ! (2_0) y_lo0 = y0 - y_hi0; + + fmuld %f62,%f0,%f0 ! (2_0) res0_lo *= x_lo0; + ldd [%fp+dtmp7],%f62 ! (3_0) *(long long*)&scl0 = ll; + faddd %f2,%f46,%f30 ! (2_0) res0_hi += dtmp0; + + lda [%i4]%asi,%f10 ! (3_0) ((float*)&x0)[0] = ((float*)px)[0]; + fmuld %f28,%f20,%f54 ! (0_0) dd *= dtmp0; + + lda [%i4+4]%asi,%f11 ! (3_0) ((float*)&x0)[1] = ((float*)px)[1]; + + fmuld %f50,%f12,%f28 ! (2_0) dtmp1 *= y_lo0; + lda [%i3]%asi,%f12 ! (3_0) ((float*)&y0)[0] = ((float*)py)[0]; + fsubd DTWO,%f14,%f20 ! (7_1) dtmp2 = DTWO - dtmp2; + + lda [%i3+4]%asi,%f13 ! (3_0) ((float*)&y0)[1] = ((float*)py)[1]; + add %i1,stridex,%i4 ! px += stridex + + fmuld %f54,%f22,%f50 ! (0_0) dtmp1 = dd * dres; + ld [%fp+ftmp0],%o2 ! (1_0) iarr = ((int*)&dres)[0]; + add %i4,stridex,%i1 ! px += stridex + fand %f18,DA1,%f2 ! (1_0) dexp0 = vis_fand(dres,DA1); + + fmuld %f10,%f62,%f10 ! (3_0) x0 *= scl0; + + fmuld %f12,%f62,%f60 ! (3_0) y0 *= scl0; + sra %o2,11,%i3 ! (1_0) iarr >>= 11; + faddd %f0,%f28,%f40 ! (2_0) res0_lo += dtmp1; + + and %i3,0x1fc,%i3 ! (1_0) iarr &= 0x1fc; + fmuld %f16,%f20,%f28 ! (7_1) dres = dd * dtmp2; + + add %i3,TBL,%o4 ! (1_0) (char*)dll1 + iarr + lda [%i4]0x82,%o1 ! (5_0) hx0 = *(int*)px; + fsubd DTWO,%f50,%f20 ! (0_0) dtmp1 = DTWO - dtmp1; + + add %i0,stridey,%i3 ! py += stridey + ld [%o4],%f26 ! (1_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; + faddd %f10,D2ON36,%f46 ! (3_0) x_hi0 = x0 + D2ON36; + + lda [%i3]0x82,%o4 ! (5_0) hy0 = *(int*)py; + add %i3,stridey,%i0 ! py += stridey + faddd %f60,D2ON36,%f50 ! (3_0) y_hi0 = y0 + D2ON36; + + and %o1,_0x7fffffff,%o7 ! (5_0) hx0 &= 0x7fffffff; + faddd %f30,%f40,%f14 ! (2_0) dres = res0_hi + res0_lo; + + fmuld %f54,%f20,%f24 ! (0_0) dd *= dtmp1; + cmp %o7,_0x7ff00000 ! (5_0) hx0 ? 0x7ff00000 + stx %g1,[%fp+dtmp10] ! (4_0) *(long long*)&scl0 = ll; + fpsub32 %f26,%f2,%f26 ! (1_0) dd = vis_fpsub32(dtmp0, dexp0); + + and %o4,_0x7fffffff,%l7 ! (5_0) hy0 &= 0x7fffffff; + st %f14,[%fp+ftmp0] ! (2_0) iarr = ((int*)&dres)[0]; + bge,pn %icc,.update17 ! (5_0) if ( hx0 >= 0x7ff00000 ) + fsubd %f46,D2ON36,%f20 ! (3_0) x_hi0 -= D2ON36; + + sub %l7,%o7,%o1 ! (5_0) diff0 = hy0 - hx0; + cmp %l7,_0x7ff00000 ! (5_0) hy0 ? 0x7ff00000 + bge,pn %icc,.update18 ! (5_0) if ( hy0 >= 0x7ff00000 ) + fsubd %f50,D2ON36,%f54 ! (3_0) y_hi0 -= D2ON36; + + fmuld %f26,%f18,%f50 ! (1_0) dtmp0 = dd * dres; + sra %o1,31,%o3 ! (5_0) j0 = diff0 >> 31; + + and %o1,%o3,%o1 ! (5_0) j0 &= diff0; + fand %f28,DA0,%f48 ! (7_1) res0 = vis_fand(dres,DA0); + + fmuld %f20,%f20,%f2 ! (3_0) res0_hi = x_hi0 * x_hi0; + sub %l7,%o1,%o4 ! (5_0) j0 = hy0 - j0; + cmp %o7,_0x00100000 ! (5_0) hx0 ? 0x00100000 + fsubd %f10,%f20,%f0 ! (3_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (3_0) dtmp0 = y_hi0 * y_hi0; + and %o4,%l0,%o4 ! (5_0) j0 &= 0x7ff00000; + bl,pn %icc,.update19 ! (5_0) if ( hx0 < 0x00100000 ) + faddd %f10,%f20,%f62 ! (3_0) res0_lo = x0 + x_hi0; +.cont19a: + fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0; + sub %l0,%o4,%g1 ! (5_0) j0 = 0x7ff00000 - j0; + fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0; +.cont19b: + fmuld %f24,%f22,%f22 ! (0_0) dtmp2 = dd * dres; + sllx %g1,32,%g1 ! (5_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp11] ! (5_0) *(long long*)&scl0 = ll; + faddd %f60,%f54,%f50 ! (3_0) dtmp1 = y0 + y_hi0; + + fmuld %f38,%f48,%f38 ! (7_1) dtmp1 = res0_lo * res0; + fsubd %f60,%f54,%f12 ! (3_0) y_lo0 = y0 - y_hi0; +.cont20: + fmuld %f62,%f0,%f0 ! (3_0) res0_lo *= x_lo0; + ldd [%fp+dtmp9],%f62 ! (4_0) *(long long*)&scl0 = ll; + faddd %f2,%f46,%f44 ! (3_0) res0_hi += dtmp0; + + fsubd DONE,%f10,%f60 ! (7_1) dtmp0 = DONE - dtmp0; + lda [%i2]%asi,%f10 ! (4_0) ((float*)&x0)[0] = ((float*)px)[0]; + fmuld %f26,%f20,%f54 ! (1_0) dd *= dtmp0; + + lda [%i2+4]%asi,%f11 ! (4_0) ((float*)&x0)[1] = ((float*)px)[1]; + + fmuld %f50,%f12,%f26 ! (3_0) dtmp1 *= y_lo0; + lda [%o0]%asi,%f12 ! (4_0) ((float*)&y0)[0] = ((float*)py)[0]; + fsubd DTWO,%f22,%f20 ! (0_0) dtmp2 = DTWO - dtmp2; + + lda [%o0+4]%asi,%f13 ! (4_0) ((float*)&y0)[1] = ((float*)py)[1]; + + fmuld %f54,%f18,%f50 ! (1_0) dtmp1 = dd * dres; + ld [%fp+ftmp0],%o2 ! (2_0) iarr = ((int*)&dres)[0]; + fand %f14,DA1,%f2 ! (2_0) dexp0 = vis_fand(dres,DA1); + + fmuld %f10,%f62,%f10 ! (4_0) x0 *= scl0; + fsubd %f60,%f38,%f46 ! (7_1) dtmp0 -= dtmp1; + + fmuld %f12,%f62,%f60 ! (4_0) y0 *= scl0; + sra %o2,11,%o4 ! (2_0) iarr >>= 11; + faddd %f0,%f26,%f38 ! (3_0) res0_lo += dtmp1; + + and %o4,0x1fc,%o4 ! (2_0) iarr &= 0x1fc; + fmuld %f24,%f20,%f26 ! (0_0) dres = dd * dtmp2; + + add %o4,TBL,%o4 ! (2_0) (char*)dll1 + iarr + mov %i1,%i2 + lda [%i1]0x82,%o1 ! (6_0) hx0 = *(int*)px; + fsubd DTWO,%f50,%f52 ! (1_0) dtmp1 = DTWO - dtmp1; + + fmuld %f46,%f28,%f28 ! (7_1) dtmp0 *= dres; + ld [%o4],%f20 ! (2_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; + faddd %f10,D2ON36,%f46 ! (4_0) x_hi0 = x0 + D2ON36; + + lda [%i0]0x82,%o4 ! (6_0) hy0 = *(int*)py; + mov %i0,%o0 + faddd %f60,D2ON36,%f50 ! (4_0) y_hi0 = y0 + D2ON36; + + and %o1,_0x7fffffff,%o7 ! (6_0) hx0 &= 0x7fffffff; + faddd %f44,%f38,%f22 ! (3_0) dres = res0_hi + res0_lo; + + fmuld %f54,%f52,%f16 ! (1_0) dd *= dtmp1; + cmp %o7,_0x7ff00000 ! (6_0) hx0 ? 0x7ff00000 + stx %g1,[%fp+dtmp12] ! (5_0) *(long long*)&scl0 = ll; + fpsub32 %f20,%f2,%f52 ! (2_0) dd = vis_fpsub32(dtmp0, dexp0); + + and %o4,_0x7fffffff,%l7 ! (6_0) hy0 &= 0x7fffffff; + st %f22,[%fp+ftmp0] ! (3_0) iarr = ((int*)&dres)[0]; + bge,pn %icc,.update21 ! (6_0) if ( hx0 >= 0x7ff00000 ) + fsubd %f46,D2ON36,%f46 ! (4_0) x_hi0 -= D2ON36; + + sub %l7,%o7,%o1 ! (6_0) diff0 = hy0 - hx0; + cmp %l7,_0x7ff00000 ! (6_0) hy0 ? 0x7ff00000 + bge,pn %icc,.update22 ! (6_0) if ( hy0 >= 0x7ff00000 ) + fsubd %f50,D2ON36,%f54 ! (4_0) y_hi0 -= D2ON36; + + fmuld %f52,%f14,%f50 ! (2_0) dtmp0 = dd * dres; + sra %o1,31,%o3 ! (6_0) j0 = diff0 >> 31; + faddd %f48,%f28,%f48 ! (7_1) res0 += dtmp0; + + and %o1,%o3,%o1 ! (6_0) j0 &= diff0; + fand %f26,DA0,%f28 ! (0_0) res0 = vis_fand(dres,DA0); + + fmuld %f46,%f46,%f0 ! (4_0) res0_hi = x_hi0 * x_hi0; + sub %l7,%o1,%o4 ! (6_0) j0 = hy0 - j0; + cmp %o7,_0x00100000 ! (6_0) hx0 ? 0x00100000 + fsubd %f10,%f46,%f2 ! (4_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f20 ! (4_0) dtmp0 = y_hi0 * y_hi0; + and %o4,%l0,%o4 ! (6_0) j0 &= 0x7ff00000; + bl,pn %icc,.update23 ! (6_0) if ( hx0 < 0x00100000 ) + faddd %f10,%f46,%f62 ! (4_0) res0_lo = x0 + x_hi0; +.cont23a: + fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres; + sub %l0,%o4,%g1 ! (6_0) j0 = 0x7ff00000 - j0; + fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0; +.cont23b: + fmuld %f32,%f28,%f50 ! (0_0) dtmp0 = res0_hi * res0; + sllx %g1,32,%g1 ! (6_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp13] ! (6_0) *(long long*)&scl0 = ll; + faddd %f60,%f54,%f46 ! (4_0) dtmp1 = y0 + y_hi0; + + fmuld %f36,%f28,%f36 ! (0_0) dtmp1 = res0_lo * res0; + fsubd %f60,%f54,%f60 ! (4_0) y_lo0 = y0 - y_hi0; +.cont24: + fmuld %f62,%f2,%f2 ! (4_0) res0_lo *= x_lo0; + ldd [%fp+dtmp11],%f62 ! (5_0) *(long long*)&scl0 = ll; + faddd %f0,%f20,%f32 ! (4_0) res0_hi += dtmp0; + + lda [%i4]%asi,%f0 ! (5_0) ((float*)&x0)[0] = ((float*)px)[0]; + fmuld %f52,%f10,%f10 ! (2_0) dd *= dtmp0; + + lda [%i4+4]%asi,%f1 ! (5_0) ((float*)&x0)[1] = ((float*)px)[1]; + fsubd DONE,%f50,%f52 ! (0_0) dtmp0 = DONE - dtmp0; + + fmuld %f46,%f60,%f46 ! (4_0) dtmp1 *= y_lo0; + lda [%i3]%asi,%f12 ! (5_0) ((float*)&y0)[0] = ((float*)py)[0]; + fsubd DTWO,%f18,%f18 ! (1_0) dtmp2 = DTWO - dtmp2; + + add %i1,stridex,%i4 ! px += stridex + lda [%i3+4]%asi,%f13 ! (5_0) ((float*)&y0)[1] = ((float*)py)[1]; + + fmuld %f10,%f14,%f50 ! (2_0) dtmp1 = dd * dres; + add %i4,stridex,%i1 ! px += stridex + ld [%fp+ftmp0],%o2 ! (3_0) iarr = ((int*)&dres)[0]; + fand %f22,DA1,%f54 ! (3_0) dexp0 = vis_fand(dres,DA1); + + fmuld %f0,%f62,%f60 ! (5_0) x0 *= scl0; + fsubd %f52,%f36,%f20 ! (0_0) dtmp0 -= dtmp1; + + fmuld %f12,%f62,%f52 ! (5_0) y0 *= scl0; + sra %o2,11,%i3 ! (3_0) iarr >>= 11; + faddd %f2,%f46,%f36 ! (4_0) res0_lo += dtmp1; + + and %i3,0x1fc,%i3 ! (3_0) iarr &= 0x1fc; + fmuld %f16,%f18,%f16 ! (1_0) dres = dd * dtmp2; + + fsqrtd %f48,%f18 ! (7_1) res0 = sqrt ( res0 ); + add %i3,TBL,%o4 ! (3_0) (char*)dll1 + iarr + lda [%i4]0x82,%o1 ! (7_0) hx0 = *(int*)px; + fsubd DTWO,%f50,%f46 ! (2_0) dtmp1 = DTWO - dtmp1; + + fmuld %f20,%f26,%f48 ! (0_0) dtmp0 *= dres; + add %i0,stridey,%i3 ! py += stridey + ld [%o4],%f20 ! (3_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; + faddd %f60,D2ON36,%f50 ! (5_0) x_hi0 = x0 + D2ON36; + + lda [%i3]0x82,%o4 ! (7_0) hy0 = *(int*)py; + add %i3,stridey,%i0 ! py += stridey + faddd %f52,D2ON36,%f12 ! (5_0) y_hi0 = y0 + D2ON36; + + and %o1,_0x7fffffff,%o7 ! (7_0) hx0 &= 0x7fffffff; + faddd %f32,%f36,%f24 ! (4_0) dres = res0_hi + res0_lo; + + fmuld %f10,%f46,%f26 ! (2_0) dd *= dtmp1; + cmp %o7,_0x7ff00000 ! (7_0) hx0 ? 0x7ff00000 + stx %g1,[%fp+dtmp14] ! (6_0) *(long long*)&scl0 = ll; + fpsub32 %f20,%f54,%f10 ! (3_0) dd = vis_fpsub32(dtmp0, dexp0); + + and %o4,_0x7fffffff,%l7 ! (7_0) hy0 &= 0x7fffffff; + st %f24,[%fp+ftmp0] ! (4_0) iarr = ((int*)&dres)[0]; + bge,pn %icc,.update25 ! (7_0) if ( hx0 >= 0x7ff00000 ) + fsubd %f50,D2ON36,%f20 ! (5_0) x_hi0 -= D2ON36; + + sub %l7,%o7,%o1 ! (7_0) diff0 = hy0 - hx0; + cmp %l7,_0x7ff00000 ! (7_0) hy0 ? 0x7ff00000 + bge,pn %icc,.update26 ! (7_0) if ( hy0 >= 0x7ff00000 ) + fsubd %f12,D2ON36,%f54 ! (5_0) y_hi0 -= D2ON36; + + fmuld %f10,%f22,%f50 ! (3_0) dtmp0 = dd * dres; + sra %o1,31,%o3 ! (7_0) j0 = diff0 >> 31; + faddd %f28,%f48,%f48 ! (0_0) res0 += dtmp0; + + and %o1,%o3,%o1 ! (7_0) j0 &= diff0; + fand %f16,DA0,%f28 ! (1_0) res0 = vis_fand(dres,DA0); + + fmuld %f20,%f20,%f0 ! (5_0) res0_hi = x_hi0 * x_hi0; + sub %l7,%o1,%o4 ! (7_0) j0 = hy0 - j0; + cmp %o7,_0x00100000 ! (7_0) hx0 ? 0x00100000 + fsubd %f60,%f20,%f2 ! (5_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (5_0) dtmp0 = y_hi0 * y_hi0; + and %o4,%l0,%o4 ! (7_0) j0 &= 0x7ff00000; + bl,pn %icc,.update27 ! (7_0) if ( hx0 < 0x00100000 ) + faddd %f60,%f20,%f62 ! (5_0) res0_lo = x0 + x_hi0; +.cont27a: + fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres; + sub %l0,%o4,%g1 ! (7_0) j0 = 0x7ff00000 - j0; + fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0; +.cont27b: + fmuld %f42,%f28,%f60 ! (1_0) dtmp0 = res0_hi * res0; + sllx %g1,32,%g1 ! (7_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp15] ! (7_0) *(long long*)&scl0 = ll; + faddd %f52,%f54,%f50 ! (5_0) dtmp1 = y0 + y_hi0; + + fmuld %f34,%f28,%f34 ! (1_0) dtmp1 = res0_lo * res0; + fsubd %f52,%f54,%f54 ! (5_0) y_lo0 = y0 - y_hi0; +.cont28: + fmuld %f62,%f2,%f2 ! (5_0) res0_lo *= x_lo0; + ldd [%fp+dtmp13],%f62 ! (6_0) *(long long*)&scl0 = ll; + faddd %f0,%f46,%f42 ! (5_0) res0_hi += dtmp0; + + fmuld %f10,%f20,%f52 ! (3_0) dd *= dtmp0; + lda [%i2]%asi,%f10 ! (6_0) ((float*)&x0)[0] = ((float*)px)[0]; + + lda [%i2+4]%asi,%f11 ! (6_0) ((float*)&x0)[1] = ((float*)px)[1]; + fsubd DONE,%f60,%f60 ! (1_0) dtmp0 = DONE - dtmp0; + + fmuld %f50,%f54,%f46 ! (5_0) dtmp1 *= y_lo0; + lda [%o0]%asi,%f12 ! (6_0) ((float*)&y0)[0] = ((float*)py)[0]; + fsubd DTWO,%f14,%f14 ! (2_0) dtmp2 = DTWO - dtmp2; + + lda [%o0+4]%asi,%f13 ! (6_0) ((float*)&y0)[1] = ((float*)py)[1]; + + fmuld %f52,%f22,%f50 ! (3_0) dtmp1 = dd * dres; + ld [%fp+ftmp0],%o2 ! (4_0) iarr = ((int*)&dres)[0]; + fand %f24,DA1,%f54 ! (4_0) dexp0 = vis_fand(dres,DA1); + + fmuld %f10,%f62,%f10 ! (6_0) x0 *= scl0; + ldd [%fp+dtmp0],%f0 ! (7_1) *(long long*)&scl0 = ll; + fsubd %f60,%f34,%f20 ! (1_0) dtmp0 -= dtmp1; + + fmuld %f12,%f62,%f60 ! (6_0) y0 *= scl0; + sra %o2,11,%o4 ! (4_0) iarr >>= 11; + faddd %f2,%f46,%f34 ! (5_0) res0_lo += dtmp1; + + and %o4,0x1fc,%o4 ! (4_0) iarr &= 0x1fc; + fmuld %f26,%f14,%f26 ! (2_0) dres = dd * dtmp2; + + cmp counter,8 + bl,pn %icc,.tail + nop + + ba .main_loop + sub counter,8,counter + + .align 16 +.main_loop: + fsqrtd %f48,%f14 ! (0_1) res0 = sqrt ( res0 ); + add %o4,TBL,%o4 ! (4_1) (char*)dll1 + iarr + lda [%i1]0x82,%o1 ! (0_0) hx0 = *(int*)px; + fsubd DTWO,%f50,%f46 ! (3_1) dtmp1 = DTWO - dtmp1; + + fmuld %f20,%f16,%f48 ! (1_1) dtmp0 *= dres; + mov %i1,%i2 + ld [%o4],%f20 ! (4_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; + faddd %f10,D2ON36,%f50 ! (6_1) x_hi0 = x0 + D2ON36; + + nop + mov %i0,%o0 + lda [%i0]0x82,%o4 ! (0_0) hy0 = *(int*)py; + faddd %f60,D2ON36,%f2 ! (6_1) y_hi0 = y0 + D2ON36; + + faddd %f42,%f34,%f16 ! (5_1) dres = res0_hi + res0_lo; + and %o1,_0x7fffffff,%o7 ! (0_0) hx0 &= 0x7fffffff; + st %f16,[%fp+ftmp0] ! (5_1) iarr = ((int*)&dres)[0]; + fmuld %f0,%f18,%f0 ! (7_2) res0 = scl0 * res0; + + fmuld %f52,%f46,%f18 ! (3_1) dd *= dtmp1; + cmp %o7,_0x7ff00000 ! (0_0) hx0 ? 0x7ff00000 + st %f0,[%i5] ! (7_2) ((float*)pz)[0] = ((float*)&res0)[0]; + fpsub32 %f20,%f54,%f54 ! (4_1) dd = vis_fpsub32(dtmp0, dexp0); + + and %o4,_0x7fffffff,%l7 ! (0_0) hy0 &= 0x7fffffff; + st %f1,[%i5+4] ! (7_2) ((float*)pz)[1] = ((float*)&res0)[1]; + bge,pn %icc,.update29 ! (0_0) if ( hx0 >= 0x7ff00000 ) + fsubd %f50,D2ON36,%f20 ! (6_1) x_hi0 -= D2ON36; + + cmp %l7,_0x7ff00000 ! (0_0) hy0 ? 0x7ff00000 + sub %l7,%o7,%o1 ! (0_0) diff0 = hy0 - hx0; + bge,pn %icc,.update30 ! (0_0) if ( hy0 >= 0x7ff00000 ) + fsubd %f2,D2ON36,%f2 ! (6_1) y_hi0 -= D2ON36; + + fmuld %f54,%f24,%f50 ! (4_1) dtmp0 = dd * dres; + sra %o1,31,%o3 ! (0_0) j0 = diff0 >> 31; + stx %g1,[%fp+dtmp0] ! (7_1) *(long long*)&scl0 = ll; + faddd %f28,%f48,%f52 ! (1_1) res0 += dtmp0; + + and %o1,%o3,%o1 ! (0_0) j0 &= diff0; + cmp %o7,_0x00100000 ! (0_0) hx0 ? 0x00100000 + bl,pn %icc,.update31 ! (0_0) if ( hx0 < 0x00100000 ) + fand %f26,DA0,%f48 ! (2_1) res0 = vis_fand(dres,DA0); +.cont31: + fmuld %f20,%f20,%f0 ! (6_1) res0_hi = x_hi0 * x_hi0; + sub %l7,%o1,%o4 ! (0_0) j0 = hy0 - j0; + nop + fsubd %f10,%f20,%f28 ! (6_1) x_lo0 = x0 - x_hi0; + + fmuld %f2,%f2,%f46 ! (6_1) dtmp0 = y_hi0 * y_hi0; + add %i5,stridez,%i5 ! pz += stridez + and %o4,%l0,%o4 ! (0_0) j0 &= 0x7ff00000; + faddd %f10,%f20,%f62 ! (6_1) res0_lo = x0 + x_hi0; + + fmuld %f18,%f22,%f22 ! (3_1) dtmp2 = dd * dres; + sub %l0,%o4,%o4 ! (0_0) j0 = 0x7ff00000 - j0; + nop + fsubd DTWO,%f50,%f20 ! (4_1) dtmp0 = DTWO - dtmp0; +.cont32: + fmuld %f30,%f48,%f12 ! (2_1) dtmp0 = res0_hi * res0; + sllx %o4,32,%o4 ! (0_0) ll = (long long)j0 << 32; + stx %o4,[%fp+dtmp1] ! (0_0) *(long long*)&scl0 = ll; + faddd %f60,%f2,%f50 ! (6_1) dtmp1 = y0 + y_hi0; + + fmuld %f40,%f48,%f40 ! (2_1) dtmp1 = res0_lo * res0; + nop + bn,pn %icc,.exit + fsubd %f60,%f2,%f2 ! (6_1) y_lo0 = y0 - y_hi0; + + fmuld %f62,%f28,%f28 ! (6_1) res0_lo *= x_lo0; + nop + ldd [%fp+dtmp15],%f62 ! (7_1) *(long long*)&scl0 = ll; + faddd %f0,%f46,%f30 ! (6_1) res0_hi += dtmp0; + + nop + nop + lda [%i4]%asi,%f10 ! (7_1) ((float*)&x0)[0] = ((float*)px)[0]; + fmuld %f54,%f20,%f54 ! (4_1) dd *= dtmp0; + + nop + nop + lda [%i4+4]%asi,%f11 ! (7_1) ((float*)&x0)[1] = ((float*)px)[1]; + fsubd DONE,%f12,%f60 ! (2_1) dtmp0 = DONE - dtmp0; + + fmuld %f50,%f2,%f46 ! (6_1) dtmp1 *= y_lo0; + nop + lda [%i3]%asi,%f12 ! (7_1) ((float*)&y0)[0] = ((float*)py)[0]; + fsubd DTWO,%f22,%f22 ! (3_1) dtmp2 = DTWO - dtmp2; + + add %i1,stridex,%i4 ! px += stridex + nop + lda [%i3+4]%asi,%f13 ! (7_1) ((float*)&y0)[1] = ((float*)py)[1]; + bn,pn %icc,.exit + + fmuld %f54,%f24,%f50 ! (4_1) dtmp1 = dd * dres; + add %i4,stridex,%i1 ! px += stridex + ld [%fp+ftmp0],%o2 ! (5_1) iarr = ((int*)&dres)[0]; + fand %f16,DA1,%f2 ! (5_1) dexp0 = vis_fand(dres,DA1); + + fmuld %f10,%f62,%f10 ! (7_1) x0 *= scl0; + nop + ldd [%fp+dtmp2],%f0 ! (0_1) *(long long*)&scl0 = ll; + fsubd %f60,%f40,%f20 ! (2_1) dtmp0 -= dtmp1; + + fmuld %f12,%f62,%f60 ! (7_1) y0 *= scl0; + sra %o2,11,%i3 ! (5_1) iarr >>= 11; + nop + faddd %f28,%f46,%f40 ! (6_1) res0_lo += dtmp1; + + and %i3,0x1fc,%i3 ! (5_1) iarr &= 0x1fc; + nop + bn,pn %icc,.exit + fmuld %f18,%f22,%f28 ! (3_1) dres = dd * dtmp2; + + fsqrtd %f52,%f22 ! (1_1) res0 = sqrt ( res0 ); + lda [%i4]0x82,%o1 ! (1_0) hx0 = *(int*)px; + add %i3,TBL,%g1 ! (5_1) (char*)dll1 + iarr + fsubd DTWO,%f50,%f62 ! (4_1) dtmp1 = DTWO - dtmp1; + + fmuld %f20,%f26,%f52 ! (2_1) dtmp0 *= dres; + add %i0,stridey,%i3 ! py += stridey + ld [%g1],%f26 ! (5_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; + faddd %f10,D2ON36,%f46 ! (7_1) x_hi0 = x0 + D2ON36; + + nop + add %i3,stridey,%i0 ! py += stridey + lda [%i3]0x82,%g1 ! (1_0) hy0 = *(int*)py; + faddd %f60,D2ON36,%f50 ! (7_1) y_hi0 = y0 + D2ON36; + + faddd %f30,%f40,%f18 ! (6_1) dres = res0_hi + res0_lo; + and %o1,_0x7fffffff,%o7 ! (1_0) hx0 &= 0x7fffffff; + st %f18,[%fp+ftmp0] ! (6_1) iarr = ((int*)&dres)[0]; + fmuld %f0,%f14,%f0 ! (0_1) res0 = scl0 * res0; + + fmuld %f54,%f62,%f14 ! (4_1) dd *= dtmp1; + cmp %o7,_0x7ff00000 ! (1_0) hx0 ? 0x7ff00000 + st %f0,[%i5] ! (0_1) ((float*)pz)[0] = ((float*)&res0)[0]; + fpsub32 %f26,%f2,%f26 ! (5_1) dd = vis_fpsub32(dtmp0, dexp0); + + and %g1,_0x7fffffff,%l7 ! (1_0) hy0 &= 0x7fffffff; + nop + bge,pn %icc,.update33 ! (1_0) if ( hx0 >= 0x7ff00000 ) + fsubd %f46,D2ON36,%f20 ! (7_1) x_hi0 -= D2ON36; + + cmp %l7,_0x7ff00000 ! (1_0) hy0 ? 0x7ff00000 + sub %l7,%o7,%o1 ! (1_0) diff0 = hy0 - hx0; + st %f1,[%i5+4] ! (0_1) ((float*)pz)[1] = ((float*)&res0)[1]; + fsubd %f50,D2ON36,%f54 ! (7_1) y_hi0 -= D2ON36; + + fmuld %f26,%f16,%f50 ! (5_1) dtmp0 = dd * dres; + sra %o1,31,%o3 ! (1_0) j0 = diff0 >> 31; + bge,pn %icc,.update34 ! (1_0) if ( hy0 >= 0x7ff00000 ) + faddd %f48,%f52,%f52 ! (2_1) res0 += dtmp0; + + and %o1,%o3,%o1 ! (1_0) j0 &= diff0; + add %i5,stridez,%i5 ! pz += stridez + stx %o4,[%fp+dtmp2] ! (0_0) *(long long*)&scl0 = ll; + fand %f28,DA0,%f48 ! (3_1) res0 = vis_fand(dres,DA0); + + fmuld %f20,%f20,%f2 ! (7_1) res0_hi = x_hi0 * x_hi0; + sub %l7,%o1,%o4 ! (1_0) j0 = hy0 - j0; + cmp %o7,_0x00100000 ! (1_0) hx0 ? 0x00100000 + fsubd %f10,%f20,%f0 ! (7_1) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (7_1) dtmp0 = y_hi0 * y_hi0; + and %o4,%l0,%o4 ! (1_0) j0 &= 0x7ff00000; + bl,pn %icc,.update35 ! (1_0) if ( hx0 < 0x00100000 ) + faddd %f10,%f20,%f62 ! (7_1) res0_lo = x0 + x_hi0; +.cont35a: + fmuld %f44,%f48,%f10 ! (3_1) dtmp0 = res0_hi * res0; + nop + sub %l0,%o4,%o4 ! (1_0) j0 = 0x7ff00000 - j0; + fsubd DTWO,%f50,%f20 ! (5_1) dtmp0 = DTWO - dtmp0; +.cont35b: + fmuld %f14,%f24,%f24 ! (4_1) dtmp2 = dd * dres; + sllx %o4,32,%o4 ! (1_0) ll = (long long)j0 << 32; + stx %o4,[%fp+dtmp3] ! (1_0) *(long long*)&scl0 = ll; + faddd %f60,%f54,%f50 ! (7_1) dtmp1 = y0 + y_hi0; + + fmuld %f38,%f48,%f38 ! (3_1) dtmp1 = res0_lo * res0; + nop + nop + fsubd %f60,%f54,%f12 ! (7_1) y_lo0 = y0 - y_hi0; +.cont36: + fmuld %f62,%f0,%f0 ! (7_1) res0_lo *= x_lo0; + nop + ldd [%fp+dtmp1],%f62 ! (0_0) *(long long*)&scl0 = ll; + faddd %f2,%f46,%f44 ! (7_1) res0_hi += dtmp0; + + fsubd DONE,%f10,%f60 ! (3_1) dtmp0 = DONE - dtmp0; + nop + lda [%i2]%asi,%f10 ! (0_0) ((float*)&x0)[0] = ((float*)px)[0]; + fmuld %f26,%f20,%f54 ! (5_1) dd *= dtmp0; + + nop + nop + lda [%i2+4]%asi,%f11 ! (0_0) ((float*)&x0)[1] = ((float*)px)[1]; + bn,pn %icc,.exit + + fmuld %f50,%f12,%f26 ! (7_1) dtmp1 *= y_lo0; + nop + lda [%o0]%asi,%f12 ! (0_0) ((float*)&y0)[0] = ((float*)py)[0]; + fsubd DTWO,%f24,%f24 ! (4_1) dtmp2 = DTWO - dtmp2; + + nop + nop + lda [%o0+4]%asi,%f13 ! (0_0) ((float*)&y0)[1] = ((float*)py)[1]; + bn,pn %icc,.exit + + fmuld %f54,%f16,%f46 ! (5_1) dtmp1 = dd * dres; + nop + ld [%fp+ftmp0],%o2 ! (6_1) iarr = ((int*)&dres)[0]; + fand %f18,DA1,%f2 ! (6_1) dexp0 = vis_fand(dres,DA1); + + fmuld %f10,%f62,%f10 ! (0_0) x0 *= scl0; + nop + ldd [%fp+dtmp4],%f50 ! (1_1) *(long long*)&scl0 = ll; + fsubd %f60,%f38,%f20 ! (3_1) dtmp0 -= dtmp1; + + fmuld %f12,%f62,%f60 ! (0_0) y0 *= scl0; + sra %o2,11,%g1 ! (6_1) iarr >>= 11; + nop + faddd %f0,%f26,%f38 ! (7_1) res0_lo += dtmp1; + + nop + and %g1,0x1fc,%g1 ! (6_1) iarr &= 0x1fc; + bn,pn %icc,.exit + fmuld %f14,%f24,%f26 ! (4_1) dres = dd * dtmp2; + + fsqrtd %f52,%f24 ! (2_1) res0 = sqrt ( res0 ); + lda [%i1]0x82,%o1 ! (2_0) hx0 = *(int*)px; + add %g1,TBL,%g1 ! (6_1) (char*)dll1 + iarr + fsubd DTWO,%f46,%f62 ! (5_1) dtmp1 = DTWO - dtmp1; + + fmuld %f20,%f28,%f52 ! (3_1) dtmp0 *= dres; + mov %i1,%i2 + ld [%g1],%f28 ! (6_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; + faddd %f10,D2ON36,%f46 ! (0_0) x_hi0 = x0 + D2ON36; + + nop + mov %i0,%o0 + lda [%i0]0x82,%g1 ! (2_0) hy0 = *(int*)py; + faddd %f60,D2ON36,%f12 ! (0_0) y_hi0 = y0 + D2ON36; + + faddd %f44,%f38,%f14 ! (7_1) dres = res0_hi + res0_lo; + and %o1,_0x7fffffff,%o7 ! (2_0) hx0 &= 0x7fffffff; + st %f14,[%fp+ftmp0] ! (7_1) iarr = ((int*)&dres)[0]; + fmuld %f50,%f22,%f0 ! (1_1) res0 = scl0 * res0; + + fmuld %f54,%f62,%f22 ! (5_1) dd *= dtmp1; + cmp %o7,_0x7ff00000 ! (2_0) hx0 ? 0x7ff00000 + st %f0,[%i5] ! (1_1) ((float*)pz)[0] = ((float*)&res0)[0]; + fpsub32 %f28,%f2,%f28 ! (6_1) dd = vis_fpsub32(dtmp0, dexp0); + + and %g1,_0x7fffffff,%l7 ! (2_0) hx0 &= 0x7fffffff; + nop + bge,pn %icc,.update37 ! (2_0) if ( hx0 >= 0x7ff00000 ) + fsubd %f46,D2ON36,%f20 ! (0_0) x_hi0 -= D2ON36; + + sub %l7,%o7,%o1 ! (2_0) diff0 = hy0 - hx0; + cmp %l7,_0x7ff00000 ! (2_0) hy0 ? 0x7ff00000 + st %f1,[%i5+4] ! (1_1) ((float*)pz)[1] = ((float*)&res0)[1]; + fsubd %f12,D2ON36,%f54 ! (0_0) y_hi0 -= D2ON36; + + fmuld %f28,%f18,%f50 ! (6_1) dtmp0 = dd * dres; + sra %o1,31,%o3 ! (2_0) j0 = diff0 >> 31; + bge,pn %icc,.update38 ! (2_0) if ( hy0 >= 0x7ff00000 ) + faddd %f48,%f52,%f52 ! (3_1) res0 += dtmp0; + + and %o1,%o3,%o1 ! (2_0) j0 &= diff0; + add %i5,stridez,%i5 ! pz += stridez + stx %o4,[%fp+dtmp4] ! (1_0) *(long long*)&scl0 = ll; + fand %f26,DA0,%f48 ! (4_1) res0 = vis_fand(dres,DA0); + + fmuld %f20,%f20,%f2 ! (0_0) res0_hi = x_hi0 * x_hi0; + cmp %o7,_0x00100000 ! (2_0) hx0 ? 0x00100000 + sub %l7,%o1,%o4 ! (2_0) j0 = hy0 - j0; + fsubd %f10,%f20,%f0 ! (0_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (0_0) dtmp0 = y_hi0 * y_hi0; + and %o4,%l0,%o4 ! (2_0) j0 &= 0x7ff00000; + bl,pn %icc,.update39 ! (2_0) if ( hx0 < 0x00100000 ) + faddd %f10,%f20,%f62 ! (0_0) res0_lo = x0 + x_hi0; +.cont39a: + fmuld %f32,%f48,%f10 ! (4_1) dtmp0 = res0_hi * res0; + sub %l0,%o4,%g1 ! (2_0) j0 = 0x7ff00000 - j0; + nop + fsubd DTWO,%f50,%f20 ! (6_1) dtmp0 = DTWO - dtmp0; +.cont39b: + fmuld %f22,%f16,%f16 ! (5_1) dtmp2 = dd * dres; + sllx %g1,32,%g1 ! (2_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp5] ! (2_0) *(long long*)&scl0 = ll; + faddd %f60,%f54,%f50 ! (0_0) dtmp1 = y0 + y_hi0; + + fmuld %f36,%f48,%f36 ! (4_1) dtmp1 = res0_lo * res0; + nop + nop + fsubd %f60,%f54,%f12 ! (0_0) y_lo0 = y0 - y_hi0; +.cont40: + fmuld %f62,%f0,%f0 ! (0_0) res0_lo *= x_lo0; + nop + ldd [%fp+dtmp3],%f62 ! (1_0) *(long long*)&scl0 = ll; + faddd %f2,%f46,%f32 ! (0_0) res0_hi += dtmp0; + + fsubd DONE,%f10,%f60 ! (4_1) dtmp0 = DONE - dtmp0; + nop + lda [%i4]%asi,%f10 ! (1_0) ((float*)&x0)[0] = ((float*)px)[0]; + fmuld %f28,%f20,%f54 ! (6_1) dd *= dtmp0; + + nop + nop + lda [%i4+4]%asi,%f11 ! (1_0) ((float*)&x0)[1] = ((float*)px)[1]; + bn,pn %icc,.exit + + fmuld %f50,%f12,%f28 ! (0_0) dtmp1 *= y_lo0; + nop + lda [%i3]%asi,%f12 ! (1_0) ((float*)&y0)[0] = ((float*)py)[0]; + fsubd DTWO,%f16,%f16 ! (5_1) dtmp2 = DTWO - dtmp2; + + add %i1,stridex,%i4 ! px += stridex + nop + lda [%i3+4]%asi,%f13 ! (1_0) ((float*)&y0)[1] = ((float*)py)[1]; + bn,pn %icc,.exit + + fmuld %f54,%f18,%f46 ! (6_1) dtmp1 = dd * dres; + add %i4,stridex,%i1 ! px += stridex + ld [%fp+ftmp0],%o2 ! (7_1) iarr = ((int*)&dres)[0]; + fand %f14,DA1,%f2 ! (7_1) dexp0 = vis_fand(dres,DA1); + + fmuld %f10,%f62,%f10 ! (1_0) x0 *= scl0; + nop + ldd [%fp+dtmp6],%f50 ! (2_1) *(long long*)&scl0 = ll; + fsubd %f60,%f36,%f20 ! (4_1) dtmp0 -= dtmp1; + + fmuld %f12,%f62,%f60 ! (1_0) y0 *= scl0; + sra %o2,11,%i3 ! (7_1) iarr >>= 11; + nop + faddd %f0,%f28,%f36 ! (0_0) res0_lo += dtmp1; + + and %i3,0x1fc,%i3 ! (7_1) iarr &= 0x1fc; + nop + bn,pn %icc,.exit + fmuld %f22,%f16,%f28 ! (5_1) dres = dd * dtmp2; + + fsqrtd %f52,%f16 ! (3_1) res0 = sqrt ( res0 ); + add %i3,TBL,%o4 ! (7_1) (char*)dll1 + iarr + lda [%i4]0x82,%o1 ! (3_0) hx0 = *(int*)px; + fsubd DTWO,%f46,%f62 ! (6_1) dtmp1 = DTWO - dtmp1; + + fmuld %f20,%f26,%f52 ! (4_1) dtmp0 *= dres; + add %i0,stridey,%i3 ! py += stridey + ld [%o4],%f26 ! (7_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; + faddd %f10,D2ON36,%f46 ! (1_0) x_hi0 = x0 + D2ON36; + + nop + add %i3,stridey,%i0 ! py += stridey + lda [%i3]0x82,%o4 ! (3_0) hy0 = *(int*)py; + faddd %f60,D2ON36,%f12 ! (1_0) y_hi0 = y0 + D2ON36; + + faddd %f32,%f36,%f22 ! (0_0) dres = res0_hi + res0_lo; + and %o1,_0x7fffffff,%o7 ! (3_0) hx0 &= 0x7fffffff; + st %f22,[%fp+ftmp0] ! (0_0) iarr = ((int*)&dres)[0]; + fmuld %f50,%f24,%f0 ! (2_1) res0 = scl0 * res0; + + fmuld %f54,%f62,%f24 ! (6_1) dd *= dtmp1; + cmp %o7,_0x7ff00000 ! (3_0) hx0 ? 0x7ff00000 + st %f0,[%i5] ! (2_1) ((float*)pz)[0] = ((float*)&res0)[0]; + fpsub32 %f26,%f2,%f26 ! (7_1) dd = vis_fpsub32(dtmp0, dexp0); + + and %o4,_0x7fffffff,%l7 ! (3_0) hy0 &= 0x7fffffff; + nop + bge,pn %icc,.update41 ! (3_0) if ( hx0 >= 0x7ff00000 ) + fsubd %f46,D2ON36,%f20 ! (1_0) x_hi0 -= D2ON36; + + sub %l7,%o7,%o1 ! (3_0) diff0 = hy0 - hx0; + cmp %l7,_0x7ff00000 ! (3_0) hy0 ? 0x7ff00000 + st %f1,[%i5+4] ! (2_1) ((float*)pz)[1] = ((float*)&res0)[1]; + fsubd %f12,D2ON36,%f54 ! (1_0) y_hi0 -= D2ON36; + + fmuld %f26,%f14,%f50 ! (7_1) dtmp0 = dd * dres; + sra %o1,31,%o3 ! (3_0) j0 = diff0 >> 31; + bge,pn %icc,.update42 ! (3_0) if ( hy0 >= 0x7ff00000 ) + faddd %f48,%f52,%f52 ! (4_1) res0 += dtmp0; + + and %o1,%o3,%o1 ! (3_0) j0 &= diff0; + add %i5,stridez,%i5 ! pz += stridez + stx %g1,[%fp+dtmp6] ! (2_0) *(long long*)&scl0 = ll; + fand %f28,DA0,%f48 ! (5_1) res0 = vis_fand(dres,DA0); + + fmuld %f20,%f20,%f2 ! (1_0) res0_hi = x_hi0 * x_hi0; + cmp %o7,_0x00100000 ! (3_0) hx0 ? 0x00100000 + sub %l7,%o1,%o4 ! (3_0) j0 = hy0 - j0; + fsubd %f10,%f20,%f0 ! (1_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (1_0) dtmp0 = y_hi0 * y_hi0; + and %o4,%l0,%o4 ! (3_0) j0 &= 0x7ff00000; + bl,pn %icc,.update43 ! (3_0) if ( hx0 < 0x00100000 ) + faddd %f10,%f20,%f62 ! (1_0) res0_lo = x0 + x_hi0; +.cont43a: + fmuld %f42,%f48,%f10 ! (5_1) dtmp0 = res0_hi * res0; + nop + sub %l0,%o4,%g1 ! (3_0) j0 = 0x7ff00000 - j0; + fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0; +.cont43b: + fmuld %f24,%f18,%f18 ! (6_1) dtmp2 = dd * dres; + sllx %g1,32,%g1 ! (3_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp7] ! (3_0) *(long long*)&scl0 = ll; + faddd %f60,%f54,%f50 ! (1_0) dtmp1 = y0 + y_hi0; + + fmuld %f34,%f48,%f34 ! (5_1) dtmp1 = res0_lo * res0; + nop + nop + fsubd %f60,%f54,%f12 ! (1_0) y_lo0 = y0 - y_hi0 +.cont44: + fmuld %f62,%f0,%f0 ! (1_0) res0_lo *= x_lo0; + nop + ldd [%fp+dtmp5],%f62 ! (2_0) *(long long*)&scl0 = ll; + faddd %f2,%f46,%f42 ! (1_0) res0_hi += dtmp0; + + fsubd DONE,%f10,%f60 ! (5_1) dtmp0 = DONE - dtmp0; + nop + lda [%i2]%asi,%f10 ! (2_0) ((float*)&x0)[0] = ((float*)px)[0]; + fmuld %f26,%f20,%f54 ! (7_1) dd *= dtmp0; + + nop + nop + lda [%i2+4]%asi,%f11 ! (2_0) ((float*)&x0)[1] = ((float*)px)[1]; + bn,pn %icc,.exit + + fmuld %f50,%f12,%f26 ! (1_0) dtmp1 *= y_lo0; + nop + lda [%o0]%asi,%f12 ! (2_0) ((float*)&y0)[0] = ((float*)py)[0]; + fsubd DTWO,%f18,%f20 ! (6_1) dtmp2 = DTWO - dtmp2; + + nop + nop + lda [%o0+4]%asi,%f13 ! (2_0) ((float*)&y0)[1] = ((float*)py)[1]; + bn,pn %icc,.exit + + fmuld %f54,%f14,%f50 ! (7_1) dtmp1 = dd * dres; + nop + ld [%fp+ftmp0],%o2 ! (0_0) iarr = ((int*)&dres)[0]; + fand %f22,DA1,%f2 ! (0_0) dexp0 = vis_fand(dres,DA1); + + fmuld %f10,%f62,%f10 ! (2_0) x0 *= scl0; + nop + ldd [%fp+dtmp8],%f18 ! (3_1) *(long long*)&scl0 = ll; + fsubd %f60,%f34,%f46 ! (5_1) dtmp0 -= dtmp1; + + fmuld %f12,%f62,%f60 ! (2_0) y0 *= scl0; + sra %o2,11,%o4 ! (0_0) iarr >>= 11; + nop + faddd %f0,%f26,%f34 ! (1_0) res0_lo += dtmp1; + + and %o4,0x1fc,%o4 ! (0_0) iarr &= 0x1fc; + nop + bn,pn %icc,.exit + fmuld %f24,%f20,%f26 ! (6_1) dres = dd * dtmp2; + + fsqrtd %f52,%f24 ! (4_1) res0 = sqrt ( res0 ); + add %o4,TBL,%o4 ! (0_0) (char*)dll1 + iarr + lda [%i1]0x82,%o1 ! (4_0) hx0 = *(int*)px; + fsubd DTWO,%f50,%f20 ! (7_1) dtmp1 = DTWO - dtmp1; + + fmuld %f46,%f28,%f52 ! (5_1) dtmp0 -= dtmp1; + mov %i1,%i2 + ld [%o4],%f28 ! (0_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; + faddd %f10,D2ON36,%f46 ! (2_0) x_hi0 = x0 + D2ON36; + + nop + mov %i0,%o0 + lda [%i0]0x82,%o4 ! (4_0) hy0 = *(int*)py; + faddd %f60,D2ON36,%f50 ! (2_0) y_hi0 = y0 + D2ON36; + + fmuld %f18,%f16,%f0 ! (3_1) res0 = scl0 * res0; + nop + and %o1,_0x7fffffff,%o7 ! (4_0) hx0 &= 0x7fffffff; + faddd %f42,%f34,%f18 ! (1_0) dres = res0_hi + res0_lo; + + fmuld %f54,%f20,%f16 ! (7_1) dd *= dtmp1; + cmp %o7,_0x7ff00000 ! (4_0) hx0 ? 0x7ff00000 + st %f18,[%fp+ftmp0] ! (1_0) iarr = ((int*)&dres)[0]; + fpsub32 %f28,%f2,%f28 ! (0_0) dd = vis_fpsub32(dtmp0, dexp0); + + and %o4,_0x7fffffff,%l7 ! (4_0) hy0 &= 0x7fffffff; + st %f0,[%i5] ! (3_1) ((float*)pz)[0] = ((float*)&res0)[0]; + bge,pn %icc,.update45 ! (4_0) if ( hx0 >= 0x7ff00000 ) + fsubd %f46,D2ON36,%f20 ! (2_0) x_hi0 -= D2ON36; + + sub %l7,%o7,%o1 ! (4_0) diff0 = hy0 - hx0; + cmp %l7,_0x7ff00000 ! (4_0) hy0 ? 0x7ff00000 + bge,pn %icc,.update46 ! (4_0) if ( hy0 >= 0x7ff00000 ) + fsubd %f50,D2ON36,%f54 ! (2_0) y_hi0 -= D2ON36; + + fmuld %f28,%f22,%f50 ! (0_0) dtmp0 = dd * dres; + sra %o1,31,%o3 ! (4_0) j0 = diff0 >> 31; + st %f1,[%i5+4] ! (3_1) ((float*)pz)[1] = ((float*)&res0)[1]; + faddd %f48,%f52,%f52 ! (5_1) res0 += dtmp0; + + and %o1,%o3,%o1 ! (4_0) j0 &= diff0; + cmp %o7,_0x00100000 ! (4_0) hx0 ? 0x00100000 + bl,pn %icc,.update47 ! (4_0) if ( hx0 < 0x00100000 ) + fand %f26,DA0,%f48 ! (6_1) res0 = vis_fand(dres,DA0); +.cont47a: + fmuld %f20,%f20,%f2 ! (2_0) res0_hi = x_hi0 * x_hi0; + sub %l7,%o1,%o4 ! (4_0) j0 = hy0 - j0; + stx %g1,[%fp+dtmp8] ! (3_0) *(long long*)&scl0 = ll; + fsubd %f10,%f20,%f0 ! (2_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (2_0) dtmp0 = y_hi0 * y_hi0; + and %o4,%l0,%o4 ! (4_0) j0 &= 0x7ff00000; + add %i5,stridez,%i5 ! pz += stridez + faddd %f10,%f20,%f62 ! (2_0) res0_lo = x0 + x_hi0; + + fmuld %f30,%f48,%f10 ! (6_1) dtmp0 = res0_hi * res0; + nop + sub %l0,%o4,%g1 ! (4_0) j0 = 0x7ff00000 - j0; + fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0; +.cont47b: + fmuld %f16,%f14,%f14 ! (7_1) dtmp2 = dd * dres; + sllx %g1,32,%g1 ! (4_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp9] ! (4_0) *(long long*)&scl0 = ll; + faddd %f60,%f54,%f50 ! (2_0) dtmp1 = y0 + y_hi0; + + fmuld %f40,%f48,%f40 ! (6_1) dtmp1 = res0_lo * res0; + nop + nop + fsubd %f60,%f54,%f12 ! (2_0) y_lo0 = y0 - y_hi0; +.cont48: + fmuld %f62,%f0,%f0 ! (2_0) res0_lo *= x_lo0; + nop + ldd [%fp+dtmp7],%f62 ! (3_0) *(long long*)&scl0 = ll; + faddd %f2,%f46,%f30 ! (2_0) res0_hi += dtmp0; + + fsubd DONE,%f10,%f60 ! (6_1) dtmp0 = DONE - dtmp0; + nop + lda [%i4]%asi,%f10 ! (3_0) ((float*)&x0)[0] = ((float*)px)[0]; + fmuld %f28,%f20,%f54 ! (0_0) dd *= dtmp0; + + nop + nop + lda [%i4+4]%asi,%f11 ! (3_0) ((float*)&x0)[1] = ((float*)px)[1]; + bn,pn %icc,.exit + + fmuld %f50,%f12,%f28 ! (2_0) dtmp1 *= y_lo0; + nop + lda [%i3]%asi,%f12 ! (3_0) ((float*)&y0)[0] = ((float*)py)[0]; + fsubd DTWO,%f14,%f20 ! (7_1) dtmp2 = DTWO - dtmp2; + + lda [%i3+4]%asi,%f13 ! (3_0) ((float*)&y0)[1] = ((float*)py)[1]; + add %i1,stridex,%i4 ! px += stridex + nop + bn,pn %icc,.exit + + fmuld %f54,%f22,%f50 ! (0_0) dtmp1 = dd * dres; + add %i4,stridex,%i1 ! px += stridex + ld [%fp+ftmp0],%o2 ! (1_0) iarr = ((int*)&dres)[0]; + fand %f18,DA1,%f2 ! (1_0) dexp0 = vis_fand(dres,DA1); + + fmuld %f10,%f62,%f10 ! (3_0) x0 *= scl0; + nop + ldd [%fp+dtmp10],%f14 ! (4_1) *(long long*)&scl0 = ll; + fsubd %f60,%f40,%f46 ! (6_1) dtmp0 -= dtmp1; + + fmuld %f12,%f62,%f60 ! (3_0) y0 *= scl0; + sra %o2,11,%i3 ! (1_0) iarr >>= 11; + nop + faddd %f0,%f28,%f40 ! (2_0) res0_lo += dtmp1; + + and %i3,0x1fc,%i3 ! (1_0) iarr &= 0x1fc; + nop + bn,pn %icc,.exit + fmuld %f16,%f20,%f28 ! (7_1) dres = dd * dtmp2; + + fsqrtd %f52,%f16 ! (5_1) res0 = sqrt ( res0 ); + add %i3,TBL,%o4 ! (1_0) (char*)dll1 + iarr + lda [%i4]0x82,%o1 ! (5_0) hx0 = *(int*)px; + fsubd DTWO,%f50,%f20 ! (0_0) dtmp1 = DTWO - dtmp1; + + fmuld %f46,%f26,%f52 ! (6_1) dtmp0 *= dres; + add %i0,stridey,%i3 ! py += stridey + ld [%o4],%f26 ! (1_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; + faddd %f10,D2ON36,%f46 ! (3_0) x_hi0 = x0 + D2ON36; + + nop + add %i3,stridey,%i0 ! py += stridey + lda [%i3]0x82,%o4 ! (5_0) hy0 = *(int*)py; + faddd %f60,D2ON36,%f50 ! (3_0) y_hi0 = y0 + D2ON36; + + fmuld %f14,%f24,%f0 ! (4_1) res0 = scl0 * res0; + and %o1,_0x7fffffff,%o7 ! (5_0) hx0 &= 0x7fffffff; + nop + faddd %f30,%f40,%f14 ! (2_0) dres = res0_hi + res0_lo; + + fmuld %f54,%f20,%f24 ! (0_0) dd *= dtmp1; + cmp %o7,_0x7ff00000 ! (5_0) hx0 ? 0x7ff00000 + st %f14,[%fp+ftmp0] ! (2_0) iarr = ((int*)&dres)[0]; + fpsub32 %f26,%f2,%f26 ! (1_0) dd = vis_fpsub32(dtmp0, dexp0); + + and %o4,_0x7fffffff,%l7 ! (5_0) hy0 &= 0x7fffffff; + st %f0,[%i5] ! (4_1) ((float*)pz)[0] = ((float*)&res0)[0]; + bge,pn %icc,.update49 ! (5_0) if ( hx0 >= 0x7ff00000 ) + fsubd %f46,D2ON36,%f20 ! (3_0) x_hi0 -= D2ON36; + + sub %l7,%o7,%o1 ! (5_0) diff0 = hy0 - hx0; + cmp %l7,_0x7ff00000 ! (5_0) hy0 ? 0x7ff00000 + bge,pn %icc,.update50 ! (5_0) if ( hy0 >= 0x7ff00000 ) + fsubd %f50,D2ON36,%f54 ! (3_0) y_hi0 -= D2ON36; + + fmuld %f26,%f18,%f50 ! (1_0) dtmp0 = dd * dres; + sra %o1,31,%o3 ! (5_0) j0 = diff0 >> 31; + st %f1,[%i5+4] ! (4_1) ((float*)pz)[1] = ((float*)&res0)[1]; + faddd %f48,%f52,%f52 ! (6_1) res0 += dtmp0; + + and %o1,%o3,%o1 ! (5_0) j0 &= diff0; + cmp %o7,_0x00100000 ! (5_0) hx0 ? 0x00100000 + bl,pn %icc,.update51 ! (5_0) if ( hx0 < 0x00100000 ) + fand %f28,DA0,%f48 ! (7_1) res0 = vis_fand(dres,DA0); +.cont51a: + fmuld %f20,%f20,%f2 ! (3_0) res0_hi = x_hi0 * x_hi0; + sub %l7,%o1,%o4 ! (5_0) j0 = hy0 - j0; + stx %g1,[%fp+dtmp10] ! (4_0) *(long long*)&scl0 = ll; + fsubd %f10,%f20,%f0 ! (3_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (3_0) dtmp0 = y_hi0 * y_hi0; + and %o4,%l0,%o4 ! (5_0) j0 &= 0x7ff00000; + add %i5,stridez,%i5 ! pz += stridez + faddd %f10,%f20,%f62 ! (3_0) res0_lo = x0 + x_hi0; + + fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0; + sub %l0,%o4,%g1 ! (5_0) j0 = 0x7ff00000 - j0; + nop + fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0; +.cont51b: + fmuld %f24,%f22,%f22 ! (0_0) dtmp2 = dd * dres; + sllx %g1,32,%g1 ! (5_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp11] ! (5_0) *(long long*)&scl0 = ll; + faddd %f60,%f54,%f50 ! (3_0) dtmp1 = y0 + y_hi0; + + fmuld %f38,%f48,%f38 ! (7_1) dtmp1 = res0_lo * res0; + nop + nop + fsubd %f60,%f54,%f12 ! (3_0) y_lo0 = y0 - y_hi0; +.cont52: + fmuld %f62,%f0,%f0 ! (3_0) res0_lo *= x_lo0; + nop + ldd [%fp+dtmp9],%f62 ! (4_0) *(long long*)&scl0 = ll; + faddd %f2,%f46,%f44 ! (3_0) res0_hi += dtmp0; + + fsubd DONE,%f10,%f60 ! (7_1) dtmp0 = DONE - dtmp0; + nop + lda [%i2]%asi,%f10 ! (4_0) ((float*)&x0)[0] = ((float*)px)[0]; + fmuld %f26,%f20,%f54 ! (1_0) dd *= dtmp0; + + nop + nop + lda [%i2+4]%asi,%f11 ! (4_0) ((float*)&x0)[1] = ((float*)px)[1]; + bn,pn %icc,.exit + + fmuld %f50,%f12,%f26 ! (3_0) dtmp1 *= y_lo0; + nop + lda [%o0]%asi,%f12 ! (4_0) ((float*)&y0)[0] = ((float*)py)[0]; + fsubd DTWO,%f22,%f20 ! (0_0) dtmp2 = DTWO - dtmp2; + + nop + nop + lda [%o0+4]%asi,%f13 ! (4_0) ((float*)&y0)[1] = ((float*)py)[1]; + bn,pn %icc,.exit + + fmuld %f54,%f18,%f50 ! (1_0) dtmp1 = dd * dres; + nop + ld [%fp+ftmp0],%o2 ! (2_0) iarr = ((int*)&dres)[0]; + fand %f14,DA1,%f2 ! (2_0) dexp0 = vis_fand(dres,DA1); + + fmuld %f10,%f62,%f10 ! (4_0) x0 *= scl0; + nop + ldd [%fp+dtmp12],%f22 ! (5_1) *(long long*)&scl0 = ll; + fsubd %f60,%f38,%f46 ! (7_1) dtmp0 -= dtmp1; + + fmuld %f12,%f62,%f60 ! (4_0) y0 *= scl0; + sra %o2,11,%o4 ! (2_0) iarr >>= 11; + nop + faddd %f0,%f26,%f38 ! (3_0) res0_lo += dtmp1; + + and %o4,0x1fc,%o4 ! (2_0) iarr &= 0x1fc; + nop + bn,pn %icc,.exit + fmuld %f24,%f20,%f26 ! (0_0) dres = dd * dtmp2; + + fsqrtd %f52,%f24 ! (6_1) res0 = sqrt ( res0 ); + add %o4,TBL,%o4 ! (2_0) (char*)dll1 + iarr + lda [%i1]0x82,%o1 ! (6_0) hx0 = *(int*)px; + fsubd DTWO,%f50,%f52 ! (1_0) dtmp1 = DTWO - dtmp1; + + fmuld %f46,%f28,%f28 ! (7_1) dtmp0 *= dres; + mov %i1,%i2 + ld [%o4],%f20 ! (2_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; + faddd %f10,D2ON36,%f46 ! (4_0) x_hi0 = x0 + D2ON36; + + nop + mov %i0,%o0 + lda [%i0]0x82,%o4 ! (6_0) hy0 = *(int*)py; + faddd %f60,D2ON36,%f50 ! (4_0) y_hi0 = y0 + D2ON36; + + fmuld %f22,%f16,%f0 ! (5_1) res0 = scl0 * res0; + and %o1,_0x7fffffff,%o7 ! (6_0) hx0 &= 0x7fffffff; + nop + faddd %f44,%f38,%f22 ! (3_0) dres = res0_hi + res0_lo; + + fmuld %f54,%f52,%f16 ! (1_0) dd *= dtmp1; + cmp %o7,_0x7ff00000 ! (6_0) hx0 ? 0x7ff00000 + st %f22,[%fp+ftmp0] ! (3_0) iarr = ((int*)&dres)[0]; + fpsub32 %f20,%f2,%f52 ! (2_0) dd = vis_fpsub32(dtmp0, dexp0); + + and %o4,_0x7fffffff,%l7 ! (6_0) hy0 &= 0x7fffffff; + st %f0,[%i5] ! (5_1) ((float*)pz)[0] = ((float*)&res0)[0]; + bge,pn %icc,.update53 ! (6_0) if ( hx0 >= 0x7ff00000 ) + fsubd %f46,D2ON36,%f46 ! (4_0) x_hi0 -= D2ON36; + + sub %l7,%o7,%o1 ! (6_0) diff0 = hy0 - hx0; + cmp %l7,_0x7ff00000 ! (6_0) hy0 ? 0x7ff00000 + bge,pn %icc,.update54 ! (6_0) if ( hy0 >= 0x7ff00000 ) + fsubd %f50,D2ON36,%f54 ! (4_0) y_hi0 -= D2ON36; + + fmuld %f52,%f14,%f50 ! (2_0) dtmp0 = dd * dres; + sra %o1,31,%o3 ! (6_0) j0 = diff0 >> 31; + st %f1,[%i5+4] ! (5_1) ((float*)pz)[1] = ((float*)&res0)[1]; + faddd %f48,%f28,%f48 ! (7_1) res0 += dtmp0; + + and %o1,%o3,%o1 ! (6_0) j0 &= diff0; + cmp %o7,_0x00100000 ! (6_0) hx0 ? 0x00100000 + bl,pn %icc,.update55 ! (6_0) if ( hx0 < 0x00100000 ) + fand %f26,DA0,%f28 ! (0_0) res0 = vis_fand(dres,DA0); +.cont55a: + fmuld %f46,%f46,%f0 ! (4_0) res0_hi = x_hi0 * x_hi0; + sub %l7,%o1,%o4 ! (6_0) j0 = hy0 - j0; + stx %g1,[%fp+dtmp12] ! (5_0) *(long long*)&scl0 = ll; + fsubd %f10,%f46,%f2 ! (4_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f20 ! (4_0) dtmp0 = y_hi0 * y_hi0; + and %o4,%l0,%o4 ! (6_0) j0 &= 0x7ff00000; + add %i5,stridez,%i5 ! pz += stridez + faddd %f10,%f46,%f62 ! (4_0) res0_lo = x0 + x_hi0; + + fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres; + sub %l0,%o4,%g1 ! (6_0) j0 = 0x7ff00000 - j0; + nop + fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0; +.cont55b: + fmuld %f32,%f28,%f50 ! (0_0) dtmp0 = res0_hi * res0; + sllx %g1,32,%g1 ! (6_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp13] ! (6_0) *(long long*)&scl0 = ll; + faddd %f60,%f54,%f46 ! (4_0) dtmp1 = y0 + y_hi0; + + fmuld %f36,%f28,%f36 ! (0_0) dtmp1 = res0_lo * res0; + nop + nop + fsubd %f60,%f54,%f60 ! (4_0) y_lo0 = y0 - y_hi0; +.cont56: + fmuld %f62,%f2,%f2 ! (4_0) res0_lo *= x_lo0; + nop + ldd [%fp+dtmp11],%f62 ! (5_0) *(long long*)&scl0 = ll; + faddd %f0,%f20,%f32 ! (4_0) res0_hi += dtmp0; + + lda [%i4]%asi,%f0 ! (5_0) ((float*)&x0)[0] = ((float*)px)[0]; + nop + nop + fmuld %f52,%f10,%f10 ! (2_0) dd *= dtmp0; + + lda [%i4+4]%asi,%f1 ! (5_0) ((float*)&x0)[1] = ((float*)px)[1]; + nop + nop + fsubd DONE,%f50,%f52 ! (0_0) dtmp0 = DONE - dtmp0; + + fmuld %f46,%f60,%f46 ! (4_0) dtmp1 *= y_lo0; + nop + lda [%i3]%asi,%f12 ! (5_0) ((float*)&y0)[0] = ((float*)py)[0]; + fsubd DTWO,%f18,%f18 ! (1_0) dtmp2 = DTWO - dtmp2; + + nop + add %i1,stridex,%i4 ! px += stridex + lda [%i3+4]%asi,%f13 ! (5_0) ((float*)&y0)[1] = ((float*)py)[1]; + bn,pn %icc,.exit + + fmuld %f10,%f14,%f50 ! (2_0) dtmp1 = dd * dres; + add %i4,stridex,%i1 ! px += stridex + ld [%fp+ftmp0],%o2 ! (3_0) iarr = ((int*)&dres)[0]; + fand %f22,DA1,%f54 ! (3_0) dexp0 = vis_fand(dres,DA1); + + fmuld %f0,%f62,%f60 ! (5_0) x0 *= scl0; + nop + ldd [%fp+dtmp14],%f0 ! (6_1) *(long long*)&scl0 = ll; + fsubd %f52,%f36,%f20 ! (0_0) dtmp0 -= dtmp1; + + fmuld %f12,%f62,%f52 ! (5_0) y0 *= scl0; + sra %o2,11,%i3 ! (3_0) iarr >>= 11; + nop + faddd %f2,%f46,%f36 ! (4_0) res0_lo += dtmp1; + + and %i3,0x1fc,%i3 ! (3_0) iarr &= 0x1fc; + nop + bn,pn %icc,.exit + fmuld %f16,%f18,%f16 ! (1_0) dres = dd * dtmp2; + + fsqrtd %f48,%f18 ! (7_1) res0 = sqrt ( res0 ); + add %i3,TBL,%o4 ! (3_0) (char*)dll1 + iarr + lda [%i4]0x82,%o1 ! (7_0) hx0 = *(int*)px; + fsubd DTWO,%f50,%f46 ! (2_0) dtmp1 = DTWO - dtmp1; + + fmuld %f20,%f26,%f48 ! (0_0) dtmp0 *= dres; + add %i0,stridey,%i3 ! py += stridey + ld [%o4],%f20 ! (3_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; + faddd %f60,D2ON36,%f50 ! (5_0) x_hi0 = x0 + D2ON36; + + nop + add %i3,stridey,%i0 ! py += stridey + lda [%i3]0x82,%o4 ! (7_0) hy0 = *(int*)py; + faddd %f52,D2ON36,%f12 ! (5_0) y_hi0 = y0 + D2ON36; + + fmuld %f0,%f24,%f2 ! (6_1) res0 = scl0 * res0; + and %o1,_0x7fffffff,%o7 ! (7_0) hx0 &= 0x7fffffff; + nop + faddd %f32,%f36,%f24 ! (4_0) dres = res0_hi + res0_lo; + + fmuld %f10,%f46,%f26 ! (2_0) dd *= dtmp1; + cmp %o7,_0x7ff00000 ! (7_0) hx0 ? 0x7ff00000 + st %f24,[%fp+ftmp0] ! (4_0) iarr = ((int*)&dres)[0]; + fpsub32 %f20,%f54,%f10 ! (3_0) dd = vis_fpsub32(dtmp0, dexp0); + + and %o4,_0x7fffffff,%l7 ! (7_0) hy0 &= 0x7fffffff; + st %f2,[%i5] ! (6_1) ((float*)pz)[0] = ((float*)&res0)[0]; + bge,pn %icc,.update57 ! (7_0) if ( hx0 >= 0x7ff00000 ) + fsubd %f50,D2ON36,%f20 ! (5_0) x_hi0 -= D2ON36; + + sub %l7,%o7,%o1 ! (7_0) diff0 = hy0 - hx0; + cmp %l7,_0x7ff00000 ! (7_0) hy0 ? 0x7ff00000 + bge,pn %icc,.update58 ! (7_0) if ( hy0 >= 0x7ff00000 ) + fsubd %f12,D2ON36,%f54 ! (5_0) y_hi0 -= D2ON36; + + fmuld %f10,%f22,%f50 ! (3_0) dtmp0 = dd * dres; + sra %o1,31,%o3 ! (7_0) j0 = diff0 >> 31; + st %f3,[%i5+4] ! (6_1) ((float*)pz)[1] = ((float*)&res0)[1]; + faddd %f28,%f48,%f48 ! (0_0) res0 += dtmp0; + + and %o1,%o3,%o1 ! (7_0) j0 &= diff0; + cmp %o7,_0x00100000 ! (7_0) hx0 ? 0x00100000 + bl,pn %icc,.update59 ! (7_0) if ( hx0 < 0x00100000 ) + fand %f16,DA0,%f28 ! (1_0) res0 = vis_fand(dres,DA0); +.cont59a: + fmuld %f20,%f20,%f0 ! (5_0) res0_hi = x_hi0 * x_hi0; + sub %l7,%o1,%o4 ! (7_0) j0 = hy0 - j0; + stx %g1,[%fp+dtmp14] ! (6_0) *(long long*)&scl0 = ll; + fsubd %f60,%f20,%f2 ! (5_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (5_0) dtmp0 = y_hi0 * y_hi0; + and %o4,%l0,%o4 ! (7_0) j0 &= 0x7ff00000; + add %i5,stridez,%i5 ! pz += stridez + faddd %f60,%f20,%f62 ! (5_0) res0_lo = x0 + x_hi0; + + fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres; + sub %l0,%o4,%g1 ! (7_0) j0 = 0x7ff00000 - j0; + nop + fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0; +.cont59b: + fmuld %f42,%f28,%f60 ! (1_0) dtmp0 = res0_hi * res0; + sllx %g1,32,%g1 ! (7_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp15] ! (7_0) *(long long*)&scl0 = ll; + faddd %f52,%f54,%f50 ! (5_0) dtmp1 = y0 + y_hi0; + + fmuld %f34,%f28,%f34 ! (1_0) dtmp1 = res0_lo * res0; + nop + nop + fsubd %f52,%f54,%f54 ! (5_0) y_lo0 = y0 - y_hi0; +.cont60: + fmuld %f62,%f2,%f2 ! (5_0) res0_lo *= x_lo0; + nop + ldd [%fp+dtmp13],%f62 ! (6_0) *(long long*)&scl0 = ll; + faddd %f0,%f46,%f42 ! (5_0) res0_hi += dtmp0; + + fmuld %f10,%f20,%f52 ! (3_0) dd *= dtmp0; + nop + lda [%i2]%asi,%f10 ! (6_0) ((float*)&x0)[0] = ((float*)px)[0]; + bn,pn %icc,.exit + + lda [%i2+4]%asi,%f11 ! (6_0) ((float*)&x0)[1] = ((float*)px)[1]; + nop + nop + fsubd DONE,%f60,%f60 ! (1_0) dtmp0 = DONE - dtmp0; + + fmuld %f50,%f54,%f46 ! (5_0) dtmp1 *= y_lo0; + nop + lda [%o0]%asi,%f12 ! (6_0) ((float*)&y0)[0] = ((float*)py)[0]; + fsubd DTWO,%f14,%f14 ! (2_0) dtmp2 = DTWO - dtmp2; + + nop + nop + lda [%o0+4]%asi,%f13 ! (6_0) ((float*)&y0)[1] = ((float*)py)[1]; + bn,pn %icc,.exit + + fmuld %f52,%f22,%f50 ! (3_0) dtmp1 = dd * dres; + nop + ld [%fp+ftmp0],%o2 ! (4_0) iarr = ((int*)&dres)[0]; + fand %f24,DA1,%f54 ! (4_0) dexp0 = vis_fand(dres,DA1); + + fmuld %f10,%f62,%f10 ! (6_0) x0 *= scl0; + nop + ldd [%fp+dtmp0],%f0 ! (7_1) *(long long*)&scl0 = ll; + fsubd %f60,%f34,%f20 ! (1_0) dtmp0 -= dtmp1; + + fmuld %f12,%f62,%f60 ! (6_0) y0 *= scl0; + sra %o2,11,%o4 ! (4_0) iarr >>= 11; + nop + faddd %f2,%f46,%f34 ! (5_0) res0_lo += dtmp1; + + and %o4,0x1fc,%o4 ! (4_0) iarr &= 0x1fc; + subcc counter,8,counter ! counter -= 8; + bpos,pt %icc,.main_loop + fmuld %f26,%f14,%f26 ! (2_0) dres = dd * dtmp2; + + add counter,8,counter + +.tail: + subcc counter,1,counter + bneg .begin + nop + + fsqrtd %f48,%f14 ! (0_1) res0 = sqrt ( res0 ); + add %o4,TBL,%o4 ! (4_1) (char*)dll1 + iarr + fsubd DTWO,%f50,%f46 ! (3_1) dtmp1 = DTWO - dtmp1; + + fmuld %f20,%f16,%f48 ! (1_1) dtmp0 *= dres; + ld [%o4],%f20 ! (4_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; + + fmuld %f0,%f18,%f0 ! (7_2) res0 = scl0 * res0; + st %f0,[%i5] ! (7_2) ((float*)pz)[0] = ((float*)&res0)[0]; + faddd %f42,%f34,%f16 ! (5_1) dres = res0_hi + res0_lo; + + subcc counter,1,counter + st %f1,[%i5+4] ! (7_2) ((float*)pz)[1] = ((float*)&res0)[1]; + bneg .begin + add %i5,stridez,%i5 ! pz += stridez + + fmuld %f52,%f46,%f18 ! (3_1) dd *= dtmp1; + st %f16,[%fp+ftmp0] ! (5_1) iarr = ((int*)&dres)[0]; + fpsub32 %f20,%f54,%f54 ! (4_1) dd = vis_fpsub32(dtmp0, dexp0); + + fmuld %f54,%f24,%f50 ! (4_1) dtmp0 = dd * dres; + faddd %f28,%f48,%f52 ! (1_1) res0 += dtmp0; + + + fand %f26,DA0,%f48 ! (2_1) res0 = vis_fand(dres,DA0); + + fmuld %f18,%f22,%f22 ! (3_1) dtmp2 = dd * dres; + fsubd DTWO,%f50,%f20 ! (4_1) dtmp0 = DTWO - dtmp0; + + fmuld %f30,%f48,%f12 ! (2_1) dtmp0 = res0_hi * res0; + + fmuld %f40,%f48,%f40 ! (2_1) dtmp1 = res0_lo * res0; + + fmuld %f54,%f20,%f54 ! (4_1) dd *= dtmp0; + + fsubd DONE,%f12,%f60 ! (2_1) dtmp0 = DONE - dtmp0; + + fsubd DTWO,%f22,%f22 ! (3_1) dtmp2 = DTWO - dtmp2; + + fmuld %f54,%f24,%f50 ! (4_1) dtmp1 = dd * dres; + ld [%fp+ftmp0],%o2 ! (5_1) iarr = ((int*)&dres)[0]; + fand %f16,DA1,%f2 ! (5_1) dexp0 = vis_fand(dres,DA1); + + ldd [%fp+dtmp2],%f0 ! (0_1) *(long long*)&scl0 = ll; + fsubd %f60,%f40,%f20 ! (2_1) dtmp0 -= dtmp1; + + sra %o2,11,%i3 ! (5_1) iarr >>= 11; + + and %i3,0x1fc,%i3 ! (5_1) iarr &= 0x1fc; + fmuld %f18,%f22,%f28 ! (3_1) dres = dd * dtmp2; + + fsqrtd %f52,%f22 ! (1_1) res0 = sqrt ( res0 ); + add %i3,TBL,%g1 ! (5_1) (char*)dll1 + iarr + fsubd DTWO,%f50,%f62 ! (4_1) dtmp1 = DTWO - dtmp1; + + fmuld %f20,%f26,%f52 ! (2_1) dtmp0 *= dres; + ld [%g1],%f26 ! (5_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; + + fmuld %f0,%f14,%f0 ! (0_1) res0 = scl0 * res0; + + fmuld %f54,%f62,%f14 ! (4_1) dd *= dtmp1; + fpsub32 %f26,%f2,%f26 ! (5_1) dd = vis_fpsub32(dtmp0, dexp0); + + st %f0,[%i5] ! (0_1) ((float*)pz)[0] = ((float*)&res0)[0]; + + fmuld %f26,%f16,%f50 ! (5_1) dtmp0 = dd * dres; + st %f1,[%i5+4] ! (0_1) ((float*)pz)[1] = ((float*)&res0)[1]; + faddd %f48,%f52,%f52 ! (2_1) res0 += dtmp0; + + subcc counter,1,counter + bneg .begin + add %i5,stridez,%i5 ! pz += stridez + + fand %f28,DA0,%f48 ! (3_1) res0 = vis_fand(dres,DA0); + + fmuld %f44,%f48,%f10 ! (3_1) dtmp0 = res0_hi * res0; + fsubd DTWO,%f50,%f20 ! (5_1) dtmp0 = DTWO - dtmp0; + + fmuld %f14,%f24,%f24 ! (4_1) dtmp2 = dd * dres; + + fmuld %f38,%f48,%f38 ! (3_1) dtmp1 = res0_lo * res0; + + fsubd DONE,%f10,%f60 ! (3_1) dtmp0 = DONE - dtmp0; + fmuld %f26,%f20,%f54 ! (5_1) dd *= dtmp0; + + fsubd DTWO,%f24,%f24 ! (4_1) dtmp2 = DTWO - dtmp2; + + fmuld %f54,%f16,%f46 ! (5_1) dtmp1 = dd * dres; + + ldd [%fp+dtmp4],%f50 ! (1_1) *(long long*)&scl0 = ll; + fsubd %f60,%f38,%f20 ! (3_1) dtmp0 -= dtmp1; + + fmuld %f14,%f24,%f26 ! (4_1) dres = dd * dtmp2; + + fsqrtd %f52,%f24 ! (2_1) res0 = sqrt ( res0 ); + fsubd DTWO,%f46,%f62 ! (5_1) dtmp1 = DTWO - dtmp1; + + fmuld %f20,%f28,%f52 ! (3_1) dtmp0 *= dres; + + fmuld %f50,%f22,%f0 ! (1_1) res0 = scl0 * res0; + + fmuld %f54,%f62,%f22 ! (5_1) dd *= dtmp1; + + st %f0,[%i5] ! (1_1) ((float*)pz)[0] = ((float*)&res0)[0]; + + subcc counter,1,counter + st %f1,[%i5+4] ! (1_1) ((float*)pz)[1] = ((float*)&res0)[1]; + bneg .begin + add %i5,stridez,%i5 ! pz += stridez + + faddd %f48,%f52,%f52 ! (3_1) res0 += dtmp0; + + fand %f26,DA0,%f48 ! (4_1) res0 = vis_fand(dres,DA0); + + fmuld %f32,%f48,%f10 ! (4_1) dtmp0 = res0_hi * res0; + + fmuld %f22,%f16,%f16 ! (5_1) dtmp2 = dd * dres; + + fmuld %f36,%f48,%f36 ! (4_1) dtmp1 = res0_lo * res0; + + fsubd DONE,%f10,%f60 ! (4_1) dtmp0 = DONE - dtmp0; + + fsubd DTWO,%f16,%f16 ! (5_1) dtmp2 = DTWO - dtmp2; + + ldd [%fp+dtmp6],%f50 ! (2_1) *(long long*)&scl0 = ll; + fsubd %f60,%f36,%f20 ! (4_1) dtmp0 -= dtmp1; + + fmuld %f22,%f16,%f28 ! (5_1) dres = dd * dtmp2; + + fsqrtd %f52,%f16 ! (3_1) res0 = sqrt ( res0 ); + + fmuld %f20,%f26,%f52 ! (4_1) dtmp0 *= dres; + + fmuld %f50,%f24,%f0 ! (2_1) res0 = scl0 * res0; + + st %f0,[%i5] ! (2_1) ((float*)pz)[0] = ((float*)&res0)[0]; + + st %f1,[%i5+4] ! (2_1) ((float*)pz)[1] = ((float*)&res0)[1]; + faddd %f48,%f52,%f52 ! (4_1) res0 += dtmp0; + + subcc counter,1,counter + bneg .begin + add %i5,stridez,%i5 ! pz += stridez + + fand %f28,DA0,%f48 ! (5_1) res0 = vis_fand(dres,DA0); + + fmuld %f42,%f48,%f10 ! (5_1) dtmp0 = res0_hi * res0; + + fmuld %f34,%f48,%f34 ! (5_1) dtmp1 = res0_lo * res0; + + fsubd DONE,%f10,%f60 ! (5_1) dtmp0 = DONE - dtmp0; + + ldd [%fp+dtmp8],%f18 ! (3_1) *(long long*)&scl0 = ll; + fsubd %f60,%f34,%f46 ! (5_1) dtmp0 -= dtmp1; + + fsqrtd %f52,%f24 ! (4_1) res0 = sqrt ( res0 ); + + fmuld %f46,%f28,%f52 ! (5_1) dtmp0 -= dtmp1; + + fmuld %f18,%f16,%f0 ! (3_1) res0 = scl0 * res0; + st %f0,[%i5] ! (3_1) ((float*)pz)[0] = ((float*)&res0)[0]; + st %f1,[%i5+4] ! (3_1) ((float*)pz)[1] = ((float*)&res0)[1]; + faddd %f48,%f52,%f52 ! (5_1) res0 += dtmp0; + + subcc counter,1,counter + bneg .begin + add %i5,stridez,%i5 ! pz += stridez + + ldd [%fp+dtmp10],%f14 ! (4_1) *(long long*)&scl0 = ll; + + fsqrtd %f52,%f16 ! (5_1) res0 = sqrt ( res0 ); + + fmuld %f14,%f24,%f0 ! (4_1) res0 = scl0 * res0 + st %f0,[%i5] ! (4_1) ((float*)pz)[0] = ((float*)&res0)[0]; + st %f1,[%i5+4] ! (4_1) ((float*)pz)[1] = ((float*)&res0)[1]; + + subcc counter,1,counter + bneg .begin + add %i5,stridez,%i5 ! pz += stridez + + ldd [%fp+dtmp12],%f22 ! (5_1) *(long long*)&scl0 = ll; + + fmuld %f22,%f16,%f0 ! (5_1) res0 = scl0 * res0; + st %f0,[%i5] ! (5_1) ((float*)pz)[0] = ((float*)&res0)[0]; + st %f1,[%i5+4] ! (5_1) ((float*)pz)[1] = ((float*)&res0)[1]; + + ba .begin + add %i5,stridez,%i5 + + .align 16 +.spec0: + cmp %o7,_0x7ff00000 ! hx0 ? 0x7ff00000 + bne 1f ! if ( hx0 != 0x7ff00000 ) + ld [%i4+4],%i2 ! lx = ((int*)px)[1]; + + cmp %i2,0 ! lx ? 0 + be 3f ! if ( lx == 0 ) + nop +1: + cmp %l7,_0x7ff00000 ! hy0 ? 0x7ff00000 + bne 2f ! if ( hy0 != 0x7ff00000 ) + ld [%i3+4],%o2 ! ly = ((int*)py)[1]; + + cmp %o2,0 ! ly ? 0 + be 3f ! if ( ly == 0 ) +2: + ld [%i4],%f0 ! ((float*)&x0)[0] = ((float*)px)[0]; + ld [%i4+4],%f1 ! ((float*)&x0)[1] = ((float*)px)[1]; + + ld [%i3],%f2 ! ((float*)&y0)[0] = ((float*)py)[0]; + add %i4,stridex,%i4 ! px += stridex + ld [%i3+4],%f3 ! ((float*)&y0)[1] = ((float*)py)[1]; + + fabsd %f0,%f0 + + fabsd %f2,%f2 + + fmuld %f0,%f2,%f0 ! res0 = fabs(x0) * fabs(y0); + add %i3,stridey,%i3 ! py += stridey; + st %f0,[%i5] ! ((float*)pz)[0] = ((float*)&res0)[0]; + + st %f1,[%i5+4] ! ((float*)pz)[1] = ((float*)&res0)[1]; + add %i5,stridez,%i5 ! pz += stridez + ba .begin1 + sub counter,1,counter +3: + add %i4,stridex,%i4 ! px += stridex + add %i3,stridey,%i3 ! py += stridey + st %g0,[%i5] ! ((int*)pz)[0] = 0; + + add %i5,stridez,%i5 ! pz += stridez; + st %g0,[%i5+4] ! ((int*)pz)[1] = 0; + ba .begin1 + sub counter,1,counter + + .align 16 +.spec1: + and %o1,%o3,%o1 ! (7_0) j0 &= diff0; + + cmp %l7,_0x00100000 ! (7_0) hy0 ? 0x00100000 + bge,pn %icc,.cont_spec0 ! (7_0) if ( hy0 < 0x00100000 ) + + ld [%i4+4],%i2 ! lx = ((int*)px)[1]; + or %o7,%l7,%g5 ! ii = hx0 | hy0; + fzero %f0 + + ld [%i3+4],%o2 ! ly = ((int*)py)[1]; + or %i2,%g5,%g5 ! ii |= lx; + + orcc %o2,%g5,%g5 ! ii |= ly; + bnz,a,pn %icc,1f ! if ( ii != 0 ) + sethi %hi(0x00080000),%i2 + + fdivd DONE,%f0,%f0 ! res0 = 1.0 / 0.0; + + st %f0,[%i5] ! ((float*)pz)[0] = ((float*)&res0)[0]; + + add %i4,stridex,%i4 ! px += stridex; + add %i3,stridey,%i3 ! py += stridey; + st %f1,[%i5+4] ! ((float*)pz)[1] = ((float*)&res0)[1]; + + add %i5,stridez,%i5 ! pz += stridez; + ba .begin1 + sub counter,1,counter +1: + ld [%i4],%f0 ! ((float*)&x0)[0] = ((float*)px)[0]; + + ld [%i4+4],%f1 ! ((float*)&x0)[1] = ((float*)px)[1]; + + ld [%i3],%f2 ! ((float*)&y0)[0] = ((float*)py)[0]; + + fabsd %f0,%f0 ! x0 = fabs(x0); + ld [%i3+4],%f3 ! ((float*)&y0)[1] = ((float*)py)[1]; + + ldd [TBL+TBL_SHIFT+64],%f12 ! ((long long*)&dtmp0)[0] = 0x0007ffffffffffffULL; + add %fp,dtmp2,%i4 + add %fp,dtmp3,%i3 + + fabsd %f2,%f2 ! y0 = fabs(y0); + ldd [TBL+TBL_SHIFT+56],%f10 ! D2ON51 + + ldx [TBL+TBL_SHIFT+48],%g5 ! D2ONM52 + cmp %o7,%i2 ! hx0 ? 0x00080000 + bl,a 1f ! if ( hx0 < 0x00080000 ) + fxtod %f0,%f0 ! x0 = *(long long*)&x0; + + fand %f0,%f12,%f0 ! x0 = vis_fand(x0, dtmp0); + fxtod %f0,%f0 ! x0 = *(long long*)&x0; + faddd %f0,%f10,%f0 ! x0 += D2ON51; +1: + std %f0,[%i4] + + ldx [TBL+TBL_SHIFT+40],%g1 ! D2ON1022 + cmp %l7,%i2 ! hy0 ? 0x00080000 + bl,a 1f ! if ( hy0 < 0x00080000 ) + fxtod %f2,%f2 ! y0 = *(long long*)&y0; + + fand %f2,%f12,%f2 ! y0 = vis_fand(y0, dtmp0); + fxtod %f2,%f2 ! y0 = *(long long*)&y0; + faddd %f2,%f10,%f2 ! y0 += D2ON51; +1: + std %f2,[%i3] + + stx %g5,[%fp+dtmp15] ! D2ONM52 + + ba .cont_spec1 + stx %g1,[%fp+dtmp0] ! D2ON1022 + + .align 16 +.update0: + cmp counter,1 + ble 1f + nop + + sub counter,1,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + + stx %o0,[%fp+tmp_py] + + mov 1,counter +1: + sethi %hi(0x3ff00000),%o4 + add TBL,TBL_SHIFT+24,%i2 + ba .cont1 + add TBL,TBL_SHIFT+24,%o0 + + .align 16 +.update1: + cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 + bge,pn %icc,.cont0 ! (0_0) if ( hy0 < 0x00100000 ) + + cmp counter,1 + ble,a 1f + nop + + sub counter,1,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + + mov 1,counter + stx %o0,[%fp+tmp_py] +1: + sethi %hi(0x3ff00000),%o4 + add TBL,TBL_SHIFT+24,%i2 + ba .cont1 + add TBL,TBL_SHIFT+24,%o0 + + .align 16 +.update2: + cmp counter,2 + ble 1f + nop + + sub counter,2,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + stx %i3,[%fp+tmp_py] + + mov 2,counter +1: + fsubd %f50,D2ON36,%f54 ! (7_1) y_hi0 -= D2ON36; + + fmuld %f20,%f20,%f2 ! (7_1) res0_hi = x_hi0 * x_hi0; + fsubd %f10,%f20,%f0 ! (7_1) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (7_1) dtmp0 = y_hi0 * y_hi0; + faddd %f10,%f20,%f62 ! (7_1) res0_lo = x0 + x_hi0; + + sethi %hi(0x3ff00000),%o4 + add TBL,TBL_SHIFT+24,%i4 + ba .cont4 + add TBL,TBL_SHIFT+24,%i3 + + .align 16 +.update3: + cmp counter,2 + ble 1f + nop + + sub counter,2,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + stx %i3,[%fp+tmp_py] + + mov 2,counter +1: + fmuld %f20,%f20,%f2 ! (7_1) res0_hi = x_hi0 * x_hi0; + fsubd %f10,%f20,%f0 ! (7_1) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (7_1) dtmp0 = y_hi0 * y_hi0; + faddd %f10,%f20,%f62 ! (7_1) res0_lo = x0 + x_hi0; + + sethi %hi(0x3ff00000),%o4 + add TBL,TBL_SHIFT+24,%i4 + ba .cont4 + add TBL,TBL_SHIFT+24,%i3 + + .align 16 +.update4: + cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 + bge,a,pn %icc,.cont4 ! (0_0) if ( hy0 < 0x00100000 ) + sub %l0,%o4,%o4 ! (1_0) j0 = 0x7ff00000 - j0; + + cmp counter,2 + ble,a 1f + nop + + sub counter,2,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + mov 2,counter + stx %i3,[%fp+tmp_py] +1: + sethi %hi(0x3ff00000),%o4 + add TBL,TBL_SHIFT+24,%i4 + ba .cont4 + add TBL,TBL_SHIFT+24,%i3 + + .align 16 +.update5: + cmp counter,3 + ble 1f + nop + + sub counter,3,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + + stx %o0,[%fp+tmp_py] + + mov 3,counter +1: + st %f14,[%fp+ftmp0] ! (7_1) iarr = ((int*)&dres)[0]; + fsubd %f46,D2ON36,%f20 ! (0_0) x_hi0 -= D2ON36; + + fsubd %f12,D2ON36,%f54 ! (0_0) y_hi0 -= D2ON36; + + fmuld %f20,%f20,%f2 ! (0_0) res0_hi = x_hi0 * x_hi0; + fsubd %f10,%f20,%f0 ! (0_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (0_0) dtmp0 = y_hi0 * y_hi0; + faddd %f10,%f20,%f62 ! (0_0) res0_lo = x0 + x_hi0; + + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i2 + + sllx %g1,32,%g1 + ba .cont8 + add TBL,TBL_SHIFT+24,%o0 + + .align 16 +.update6: + cmp counter,3 + ble 1f + nop + + sub counter,3,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + + stx %o0,[%fp+tmp_py] + + mov 3,counter +1: + fmuld %f20,%f20,%f2 ! (0_0) res0_hi = x_hi0 * x_hi0; + fsubd %f10,%f20,%f0 ! (0_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (0_0) dtmp0 = y_hi0 * y_hi0; + faddd %f10,%f20,%f62 ! (0_0) res0_lo = x0 + x_hi0; + + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i2 + + sllx %g1,32,%g1 + ba .cont8 + add TBL,TBL_SHIFT+24,%o0 + + .align 16 +.update7: + cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 + bge,pn %icc,.cont7 ! (0_0) if ( hy0 < 0x00100000 ) + + cmp counter,3 + ble,a 1f + nop + + sub counter,3,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + + mov 3,counter + stx %o0,[%fp+tmp_py] +1: + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i2 + + sllx %g1,32,%g1 + ba .cont8 + add TBL,TBL_SHIFT+24,%o0 + + .align 16 +.update9: + cmp counter,4 + ble 1f + nop + + sub counter,4,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + stx %i3,[%fp+tmp_py] + + mov 4,counter +1: + st %f22,[%fp+ftmp0] ! (0_0) iarr = ((int*)&dres)[0]; + fsubd %f46,D2ON36,%f20 ! (1_0) x_hi0 -= D2ON36; + + fsubd %f12,D2ON36,%f54 ! (1_0) y_hi0 -= D2ON36; + + fmuld %f26,%f14,%f50 ! (7_1) dtmp0 = dd * dres; + + + fmuld %f20,%f20,%f2 ! (1_0) res0_hi = x_hi0 * x_hi0; + fsubd %f10,%f20,%f0 ! (1_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (1_0) dtmp0 = y_hi0 * y_hi0; + faddd %f10,%f20,%f62 ! (1_0) res0_lo = x0 + x_hi0; + + fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0; + + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i4 + ba .cont12 + add TBL,TBL_SHIFT+24,%i3 + + .align 16 +.update10: + cmp counter,4 + ble 1f + nop + + sub counter,4,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + stx %i3,[%fp+tmp_py] + + mov 4,counter +1: + fmuld %f26,%f14,%f50 ! (7_1) dtmp0 = dd * dres; + + + fmuld %f20,%f20,%f2 ! (1_0) res0_hi = x_hi0 * x_hi0; + fsubd %f10,%f20,%f0 ! (1_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (1_0) dtmp0 = y_hi0 * y_hi0; + faddd %f10,%f20,%f62 ! (1_0) res0_lo = x0 + x_hi0; + + fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0; + + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i4 + ba .cont12 + add TBL,TBL_SHIFT+24,%i3 + + .align 16 +.update11: + cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 + bge,pn %icc,.cont11 ! (0_0) if ( hy0 < 0x00100000 ) + + cmp counter,4 + ble,a 1f + nop + + sub counter,4,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + mov 4,counter + stx %i3,[%fp+tmp_py] +1: + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i4 + + fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0; + ba .cont12 + add TBL,TBL_SHIFT+24,%i3 + + .align 16 +.update13: + cmp counter,5 + ble 1f + nop + + sub counter,5,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + + stx %o0,[%fp+tmp_py] + + mov 5,counter +1: + fsubd %f46,D2ON36,%f20 ! (2_0) x_hi0 -= D2ON36; + + fsubd %f50,D2ON36,%f54 ! (2_0) y_hi0 -= D2ON36; + + fmuld %f28,%f22,%f50 ! (0_0) dtmp0 = dd * dres; + + fmuld %f20,%f20,%f2 ! (2_0) res0_hi = x_hi0 * x_hi0; + fsubd %f10,%f20,%f0 ! (2_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (2_0) dtmp0 = y_hi0 * y_hi0; + faddd %f10,%f20,%f62 ! (2_0) res0_lo = x0 + x_hi0; + + fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0; + + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i2 + ba .cont16 + add TBL,TBL_SHIFT+24,%o0 + + .align 16 +.update14: + cmp counter,5 + ble 1f + nop + + sub counter,5,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + + stx %o0,[%fp+tmp_py] + + mov 5,counter +1: + fmuld %f28,%f22,%f50 ! (0_0) dtmp0 = dd * dres; + + fmuld %f20,%f20,%f2 ! (2_0) res0_hi = x_hi0 * x_hi0; + fsubd %f10,%f20,%f0 ! (2_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (2_0) dtmp0 = y_hi0 * y_hi0; + faddd %f10,%f20,%f62 ! (2_0) res0_lo = x0 + x_hi0; + + fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0; + + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i2 + ba .cont16 + add TBL,TBL_SHIFT+24,%o0 + + .align 16 +.update15: + cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 + bge,pn %icc,.cont15 ! (0_0) if ( hy0 < 0x00100000 ) + + cmp counter,5 + ble,a 1f + nop + + sub counter,5,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + + mov 5,counter + stx %o0,[%fp+tmp_py] +1: + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i2 + + fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0; + ba .cont16 + add TBL,TBL_SHIFT+24,%o0 + + .align 16 +.update17: + cmp counter,6 + ble 1f + nop + + sub counter,6,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + stx %i3,[%fp+tmp_py] + + mov 6,counter +1: + fsubd %f50,D2ON36,%f54 ! (3_0) y_hi0 -= D2ON36; + + fmuld %f26,%f18,%f50 ! (1_0) dtmp0 = dd * dres; + + fand %f28,DA0,%f48 ! (7_1) res0 = vis_fand(dres,DA0); + + fmuld %f20,%f20,%f2 ! (3_0) res0_hi = x_hi0 * x_hi0; + fsubd %f10,%f20,%f0 ! (3_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (3_0) dtmp0 = y_hi0 * y_hi0; + faddd %f10,%f20,%f62 ! (3_0) res0_lo = x0 + x_hi0; + + fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0; + fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0; + + fmuld %f24,%f22,%f22 ! (0_0) dtmp2 = dd * dres; + faddd %f60,%f54,%f50 ! (3_0) dtmp1 = y0 + y_hi0; + + fmuld %f38,%f48,%f38 ! (7_1) dtmp1 = res0_lo * res0; + fsubd %f60,%f54,%f12 ! (3_0) y_lo0 = y0 - y_hi0; + + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i4 + + sllx %g1,32,%g1 ! (5_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp11] ! (5_0) *(long long*)&scl0 = ll; + ba .cont20 + add TBL,TBL_SHIFT+24,%i3 + + .align 16 +.update18: + cmp counter,6 + ble 1f + nop + + sub counter,6,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + stx %i3,[%fp+tmp_py] + + mov 6,counter +1: + fmuld %f26,%f18,%f50 ! (1_0) dtmp0 = dd * dres; + + fand %f28,DA0,%f48 ! (7_1) res0 = vis_fand(dres,DA0); + + fmuld %f20,%f20,%f2 ! (3_0) res0_hi = x_hi0 * x_hi0; + fsubd %f10,%f20,%f0 ! (3_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (3_0) dtmp0 = y_hi0 * y_hi0; + faddd %f10,%f20,%f62 ! (3_0) res0_lo = x0 + x_hi0; + + fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0; + fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0; + + fmuld %f24,%f22,%f22 ! (0_0) dtmp2 = dd * dres; + faddd %f60,%f54,%f50 ! (3_0) dtmp1 = y0 + y_hi0; + + fmuld %f38,%f48,%f38 ! (7_1) dtmp1 = res0_lo * res0; + fsubd %f60,%f54,%f12 ! (3_0) y_lo0 = y0 - y_hi0; + + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i4 + + sllx %g1,32,%g1 ! (5_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp11] ! (5_0) *(long long*)&scl0 = ll; + ba .cont20 + add TBL,TBL_SHIFT+24,%i3 + + .align 16 +.update19: + cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 + bge,pn %icc,.cont19a ! (0_0) if ( hy0 < 0x00100000 ) + + cmp counter,6 + ble,a 1f + nop + + sub counter,6,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + mov 6,counter + stx %i3,[%fp+tmp_py] +1: + fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0; + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i4 + fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0; + + ba .cont19b + add TBL,TBL_SHIFT+24,%i3 + + .align 16 +.update21: + cmp counter,7 + ble 1f + nop + + sub counter,7,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + + stx %o0,[%fp+tmp_py] + + mov 7,counter +1: + fsubd %f50,D2ON36,%f54 ! (4_0) y_hi0 -= D2ON36; + + fmuld %f52,%f14,%f50 ! (2_0) dtmp0 = dd * dres; + faddd %f48,%f28,%f48 ! (7_1) res0 += dtmp0; + + fand %f26,DA0,%f28 ! (0_0) res0 = vis_fand(dres,DA0); + + fmuld %f46,%f46,%f0 ! (4_0) res0_hi = x_hi0 * x_hi0; + fsubd %f10,%f46,%f2 ! (4_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f20 ! (4_0) dtmp0 = y_hi0 * y_hi0; + faddd %f10,%f46,%f62 ! (4_0) res0_lo = x0 + x_hi0; + + fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres; + fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0; + + fmuld %f32,%f28,%f50 ! (0_0) dtmp0 = res0_hi * res0; + faddd %f60,%f54,%f46 ! (4_0) dtmp1 = y0 + y_hi0; + + fmuld %f36,%f28,%f36 ! (0_0) dtmp1 = res0_lo * res0; + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i2 + fsubd %f60,%f54,%f60 ! (4_0) y_lo0 = y0 - y_hi0; + + sllx %g1,32,%g1 ! (6_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp13] ! (6_0) *(long long*)&scl0 = ll; + ba .cont24 + add TBL,TBL_SHIFT+24,%o0 + + .align 16 +.update22: + cmp counter,7 + ble 1f + nop + + sub counter,7,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + + stx %o0,[%fp+tmp_py] + + mov 7,counter +1: + fmuld %f52,%f14,%f50 ! (2_0) dtmp0 = dd * dres; + faddd %f48,%f28,%f48 ! (7_1) res0 += dtmp0; + + fand %f26,DA0,%f28 ! (0_0) res0 = vis_fand(dres,DA0); + + fmuld %f46,%f46,%f0 ! (4_0) res0_hi = x_hi0 * x_hi0; + fsubd %f10,%f46,%f2 ! (4_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f20 ! (4_0) dtmp0 = y_hi0 * y_hi0; + faddd %f10,%f46,%f62 ! (4_0) res0_lo = x0 + x_hi0; + + fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres; + fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0; + + fmuld %f32,%f28,%f50 ! (0_0) dtmp0 = res0_hi * res0; + faddd %f60,%f54,%f46 ! (4_0) dtmp1 = y0 + y_hi0; + + fmuld %f36,%f28,%f36 ! (0_0) dtmp1 = res0_lo * res0; + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i2 + fsubd %f60,%f54,%f60 ! (4_0) y_lo0 = y0 - y_hi0; + + sllx %g1,32,%g1 ! (6_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp13] ! (6_0) *(long long*)&scl0 = ll; + ba .cont24 + add TBL,TBL_SHIFT+24,%o0 + + .align 16 +.update23: + cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 + bge,pn %icc,.cont23a ! (0_0) if ( hy0 < 0x00100000 ) + + cmp counter,7 + ble,a 1f + nop + + sub counter,7,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + + mov 7,counter + stx %o0,[%fp+tmp_py] +1: + fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres; + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i2 + fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0; + + ba .cont23b + add TBL,TBL_SHIFT+24,%o0 + + .align 16 +.update25: + cmp counter,8 + ble 1f + nop + + sub counter,8,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + stx %i3,[%fp+tmp_py] + + mov 8,counter +1: + fsubd %f12,D2ON36,%f54 ! (5_0) y_hi0 -= D2ON36; + + fmuld %f10,%f22,%f50 ! (3_0) dtmp0 = dd * dres; + faddd %f28,%f48,%f48 ! (0_0) res0 += dtmp0; + + fand %f16,DA0,%f28 ! (1_0) res0 = vis_fand(dres,DA0); + + fmuld %f20,%f20,%f0 ! (5_0) res0_hi = x_hi0 * x_hi0; + fsubd %f60,%f20,%f2 ! (5_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (5_0) dtmp0 = y_hi0 * y_hi0; + faddd %f60,%f20,%f62 ! (5_0) res0_lo = x0 + x_hi0; + + fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres; + fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0; + + fmuld %f42,%f28,%f60 ! (1_0) dtmp0 = res0_hi * res0; + faddd %f52,%f54,%f50 ! (5_0) dtmp1 = y0 + y_hi0; + + fmuld %f34,%f28,%f34 ! (1_0) dtmp1 = res0_lo * res0; + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i4 + fsubd %f52,%f54,%f54 ! (5_0) y_lo0 = y0 - y_hi0; + + sllx %g1,32,%g1 ! (7_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp15] ! (7_0) *(long long*)&scl0 = ll; + ba .cont28 + add TBL,TBL_SHIFT+24,%i3 + + .align 16 +.update26: + cmp counter,8 + ble 1f + nop + + sub counter,8,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + stx %i3,[%fp+tmp_py] + + mov 8,counter +1: + fmuld %f10,%f22,%f50 ! (3_0) dtmp0 = dd * dres; + faddd %f28,%f48,%f48 ! (0_0) res0 += dtmp0; + + fand %f16,DA0,%f28 ! (1_0) res0 = vis_fand(dres,DA0); + + fmuld %f20,%f20,%f0 ! (5_0) res0_hi = x_hi0 * x_hi0; + fsubd %f60,%f20,%f2 ! (5_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (5_0) dtmp0 = y_hi0 * y_hi0; + faddd %f60,%f20,%f62 ! (5_0) res0_lo = x0 + x_hi0; + + fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres; + fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0; + + fmuld %f42,%f28,%f60 ! (1_0) dtmp0 = res0_hi * res0; + faddd %f52,%f54,%f50 ! (5_0) dtmp1 = y0 + y_hi0; + + fmuld %f34,%f28,%f34 ! (1_0) dtmp1 = res0_lo * res0; + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i4 + fsubd %f52,%f54,%f54 ! (5_0) y_lo0 = y0 - y_hi0; + + sllx %g1,32,%g1 ! (7_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp15] ! (7_0) *(long long*)&scl0 = ll; + ba .cont28 + add TBL,TBL_SHIFT+24,%i3 + + .align 16 +.update27: + cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 + bge,pn %icc,.cont27a ! (0_0) if ( hy0 < 0x00100000 ) + + cmp counter,8 + ble,a 1f + nop + + sub counter,8,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + mov 8,counter + stx %i3,[%fp+tmp_py] +1: + fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres; + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i4 + fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0; + + ba .cont27b + add TBL,TBL_SHIFT+24,%i3 + + .align 16 +.update29: + cmp counter,1 + ble 1f + nop + + sub counter,1,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + + stx %o0,[%fp+tmp_py] + + mov 1,counter +1: + fsubd %f2,D2ON36,%f2 ! (6_1) y_hi0 -= D2ON36; + + fmuld %f54,%f24,%f50 ! (4_1) dtmp0 = dd * dres; + stx %g1,[%fp+dtmp0] ! (7_1) *(long long*)&scl0 = ll; + faddd %f28,%f48,%f52 ! (1_1) res0 += dtmp0; + + fand %f26,DA0,%f48 ! (2_1) res0 = vis_fand(dres,DA0); + + fmuld %f20,%f20,%f0 ! (6_1) res0_hi = x_hi0 * x_hi0; + fsubd %f10,%f20,%f28 ! (6_1) x_lo0 = x0 - x_hi0; + + fmuld %f2,%f2,%f46 ! (6_1) dtmp0 = y_hi0 * y_hi0; + add %i5,stridez,%i5 ! pz += stridez + faddd %f10,%f20,%f62 ! (6_1) res0_lo = x0 + x_hi0; + + fmuld %f18,%f22,%f22 ! (3_1) dtmp2 = dd * dres; + sethi %hi(0x3ff00000),%o4 + add TBL,TBL_SHIFT+24,%i2 + fsubd DTWO,%f50,%f20 ! (4_1) dtmp0 = DTWO - dtmp0; + + ba .cont32 + add TBL,TBL_SHIFT+24,%o0 + + .align 16 +.update30: + cmp counter,1 + ble 1f + nop + + sub counter,1,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + + stx %o0,[%fp+tmp_py] + + mov 1,counter +1: + fmuld %f54,%f24,%f50 ! (4_1) dtmp0 = dd * dres; + stx %g1,[%fp+dtmp0] ! (7_1) *(long long*)&scl0 = ll; + faddd %f28,%f48,%f52 ! (1_1) res0 += dtmp0; + + fand %f26,DA0,%f48 ! (2_1) res0 = vis_fand(dres,DA0); + + fmuld %f20,%f20,%f0 ! (6_1) res0_hi = x_hi0 * x_hi0; + fsubd %f10,%f20,%f28 ! (6_1) x_lo0 = x0 - x_hi0; + + fmuld %f2,%f2,%f46 ! (6_1) dtmp0 = y_hi0 * y_hi0; + add %i5,stridez,%i5 ! pz += stridez + faddd %f10,%f20,%f62 ! (6_1) res0_lo = x0 + x_hi0; + + fmuld %f18,%f22,%f22 ! (3_1) dtmp2 = dd * dres; + sethi %hi(0x3ff00000),%o4 + add TBL,TBL_SHIFT+24,%i2 + fsubd DTWO,%f50,%f20 ! (4_1) dtmp0 = DTWO - dtmp0; + + ba .cont32 + add TBL,TBL_SHIFT+24,%o0 + + .align 16 +.update31: + cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 + bge,pn %icc,.cont31 ! (0_0) if ( hy0 < 0x00100000 ) + + cmp counter,1 + ble,a 1f + nop + + sub counter,1,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + + mov 1,counter + stx %o0,[%fp+tmp_py] +1: + fmuld %f20,%f20,%f0 ! (6_1) res0_hi = x_hi0 * x_hi0; + fsubd %f10,%f20,%f28 ! (6_1) x_lo0 = x0 - x_hi0; + + fmuld %f2,%f2,%f46 ! (6_1) dtmp0 = y_hi0 * y_hi0; + add %i5,stridez,%i5 ! pz += stridez + faddd %f10,%f20,%f62 ! (6_1) res0_lo = x0 + x_hi0; + + fmuld %f18,%f22,%f22 ! (3_1) dtmp2 = dd * dres; + sethi %hi(0x3ff00000),%o4 + add TBL,TBL_SHIFT+24,%i2 + fsubd DTWO,%f50,%f20 ! (4_1) dtmp0 = DTWO - dtmp0; + + ba .cont32 + add TBL,TBL_SHIFT+24,%o0 + + .align 16 +.update33: + cmp counter,2 + ble 1f + nop + + sub counter,2,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + stx %i3,[%fp+tmp_py] + + mov 2,counter +1: + st %f1,[%i5+4] ! (0_1) ((float*)pz)[1] = ((float*)&res0)[1]; + fsubd %f50,D2ON36,%f54 ! (7_1) y_hi0 -= D2ON36; + + fmuld %f26,%f16,%f50 ! (5_1) dtmp0 = dd * dres; + faddd %f48,%f52,%f52 ! (2_1) res0 += dtmp0; + + add %i5,stridez,%i5 ! pz += stridez + stx %o4,[%fp+dtmp2] ! (0_0) *(long long*)&scl0 = ll; + fand %f28,DA0,%f48 ! (3_1) res0 = vis_fand(dres,DA0); + + fmuld %f20,%f20,%f2 ! (7_1) res0_hi = x_hi0 * x_hi0; + fsubd %f10,%f20,%f0 ! (7_1) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (7_1) dtmp0 = y_hi0 * y_hi0; + faddd %f10,%f20,%f62 ! (7_1) res0_lo = x0 + x_hi0; + + fmuld %f44,%f48,%f10 ! (3_1) dtmp0 = res0_hi * res0; + fsubd DTWO,%f50,%f20 ! (5_1) dtmp0 = DTWO - dtmp0; + + fmuld %f14,%f24,%f24 ! (4_1) dtmp2 = dd * dres; + faddd %f60,%f54,%f50 ! (7_1) dtmp1 = y0 + y_hi0; + + fmuld %f38,%f48,%f38 ! (3_1) dtmp1 = res0_lo * res0; + sethi %hi(0x3ff00000),%o4 + add TBL,TBL_SHIFT+24,%i4 + fsubd %f60,%f54,%f12 ! (7_1) y_lo0 = y0 - y_hi0; + + sllx %o4,32,%o4 ! (1_0) ll = (long long)j0 << 32; + stx %o4,[%fp+dtmp3] ! (1_0) *(long long*)&scl0 = ll; + ba .cont36 + add TBL,TBL_SHIFT+24,%i3 + + .align 16 +.update34: + cmp counter,2 + ble 1f + nop + + sub counter,2,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + stx %i3,[%fp+tmp_py] + + mov 2,counter +1: + add %i5,stridez,%i5 ! pz += stridez + stx %o4,[%fp+dtmp2] ! (0_0) *(long long*)&scl0 = ll; + fand %f28,DA0,%f48 ! (3_1) res0 = vis_fand(dres,DA0); + + fmuld %f20,%f20,%f2 ! (7_1) res0_hi = x_hi0 * x_hi0; + fsubd %f10,%f20,%f0 ! (7_1) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (7_1) dtmp0 = y_hi0 * y_hi0; + faddd %f10,%f20,%f62 ! (7_1) res0_lo = x0 + x_hi0; + + fmuld %f44,%f48,%f10 ! (3_1) dtmp0 = res0_hi * res0; + fsubd DTWO,%f50,%f20 ! (5_1) dtmp0 = DTWO - dtmp0; + + fmuld %f14,%f24,%f24 ! (4_1) dtmp2 = dd * dres; + faddd %f60,%f54,%f50 ! (7_1) dtmp1 = y0 + y_hi0; + + fmuld %f38,%f48,%f38 ! (3_1) dtmp1 = res0_lo * res0; + sethi %hi(0x3ff00000),%o4 + add TBL,TBL_SHIFT+24,%i4 + fsubd %f60,%f54,%f12 ! (7_1) y_lo0 = y0 - y_hi0; + + sllx %o4,32,%o4 ! (1_0) ll = (long long)j0 << 32; + stx %o4,[%fp+dtmp3] ! (1_0) *(long long*)&scl0 = ll; + ba .cont36 + add TBL,TBL_SHIFT+24,%i3 + + .align 16 +.update35: + cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 + bge,pn %icc,.cont35a ! (0_0) if ( hy0 < 0x00100000 ) + + cmp counter,2 + ble,a 1f + nop + + sub counter,2,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + mov 2,counter + stx %i3,[%fp+tmp_py] +1: + fmuld %f44,%f48,%f10 ! (3_1) dtmp0 = res0_hi * res0; + sethi %hi(0x3ff00000),%o4 + add TBL,TBL_SHIFT+24,%i4 + fsubd DTWO,%f50,%f20 ! (5_1) dtmp0 = DTWO - dtmp0; + + ba .cont35b + add TBL,TBL_SHIFT+24,%i3 + + .align 16 +.update37: + cmp counter,3 + ble 1f + nop + + sub counter,3,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + + stx %o0,[%fp+tmp_py] + + mov 3,counter +1: + st %f1,[%i5+4] ! (1_1) ((float*)pz)[1] = ((float*)&res0)[1]; + fsubd %f12,D2ON36,%f54 ! (0_0) y_hi0 -= D2ON36; + + fmuld %f28,%f18,%f50 ! (6_1) dtmp0 = dd * dres; + faddd %f48,%f52,%f52 ! (3_1) res0 += dtmp0; + + add %i5,stridez,%i5 ! pz += stridez + stx %o4,[%fp+dtmp4] ! (1_0) *(long long*)&scl0 = ll; + fand %f26,DA0,%f48 ! (4_1) res0 = vis_fand(dres,DA0); + + fmuld %f20,%f20,%f2 ! (0_0) res0_hi = x_hi0 * x_hi0; + fsubd %f10,%f20,%f0 ! (0_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (0_0) dtmp0 = y_hi0 * y_hi0; + faddd %f10,%f20,%f62 ! (0_0) res0_lo = x0 + x_hi0; + + fmuld %f32,%f48,%f10 ! (4_1) dtmp0 = res0_hi * res0; + fsubd DTWO,%f50,%f20 ! (6_1) dtmp0 = DTWO - dtmp0; + + fmuld %f22,%f16,%f16 ! (5_1) dtmp2 = dd * dres; + faddd %f60,%f54,%f50 ! (0_0) dtmp1 = y0 + y_hi0; + + fmuld %f36,%f48,%f36 ! (4_1) dtmp1 = res0_lo * res0; + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i2 + fsubd %f60,%f54,%f12 ! (0_0) y_lo0 = y0 - y_hi0; + + sllx %g1,32,%g1 ! (2_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp5] ! (2_0) *(long long*)&scl0 = ll; + ba .cont40 + add TBL,TBL_SHIFT+24,%o0 + + .align 16 +.update38: + cmp counter,3 + ble 1f + nop + + sub counter,3,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + + stx %o0,[%fp+tmp_py] + + mov 3,counter +1: + add %i5,stridez,%i5 ! pz += stridez + stx %o4,[%fp+dtmp4] ! (1_0) *(long long*)&scl0 = ll; + fand %f26,DA0,%f48 ! (4_1) res0 = vis_fand(dres,DA0); + + fmuld %f20,%f20,%f2 ! (0_0) res0_hi = x_hi0 * x_hi0; + fsubd %f10,%f20,%f0 ! (0_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (0_0) dtmp0 = y_hi0 * y_hi0; + faddd %f10,%f20,%f62 ! (0_0) res0_lo = x0 + x_hi0; + + fmuld %f32,%f48,%f10 ! (4_1) dtmp0 = res0_hi * res0; + fsubd DTWO,%f50,%f20 ! (6_1) dtmp0 = DTWO - dtmp0; + + fmuld %f22,%f16,%f16 ! (5_1) dtmp2 = dd * dres; + faddd %f60,%f54,%f50 ! (0_0) dtmp1 = y0 + y_hi0; + + fmuld %f36,%f48,%f36 ! (4_1) dtmp1 = res0_lo * res0; + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i2 + fsubd %f60,%f54,%f12 ! (0_0) y_lo0 = y0 - y_hi0; + + sllx %g1,32,%g1 ! (2_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp5] ! (2_0) *(long long*)&scl0 = ll; + ba .cont40 + add TBL,TBL_SHIFT+24,%o0 + + .align 16 +.update39: + cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 + bge,pn %icc,.cont39a ! (0_0) if ( hy0 < 0x00100000 ) + + cmp counter,3 + ble,a 1f + nop + + sub counter,3,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + + mov 3,counter + stx %o0,[%fp+tmp_py] +1: + fmuld %f32,%f48,%f10 ! (4_1) dtmp0 = res0_hi * res0; + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i2 + fsubd DTWO,%f50,%f20 ! (6_1) dtmp0 = DTWO - dtmp0; + + ba .cont39b + add TBL,TBL_SHIFT+24,%o0 + + .align 16 +.update41: + cmp counter,4 + ble 1f + nop + + sub counter,4,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + stx %i3,[%fp+tmp_py] + + mov 4,counter +1: + st %f1,[%i5+4] ! (2_1) ((float*)pz)[1] = ((float*)&res0)[1]; + fsubd %f12,D2ON36,%f54 ! (1_0) y_hi0 -= D2ON36; + + fmuld %f26,%f14,%f50 ! (7_1) dtmp0 = dd * dres; + faddd %f48,%f52,%f52 ! (4_1) res0 += dtmp0; + + add %i5,stridez,%i5 ! pz += stridez + stx %g1,[%fp+dtmp6] ! (2_0) *(long long*)&scl0 = ll; + fand %f28,DA0,%f48 ! (5_1) res0 = vis_fand(dres,DA0); + + fmuld %f20,%f20,%f2 ! (1_0) res0_hi = x_hi0 * x_hi0; + fsubd %f10,%f20,%f0 ! (1_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (1_0) dtmp0 = y_hi0 * y_hi0; + faddd %f10,%f20,%f62 ! (1_0) res0_lo = x0 + x_hi0; + + fmuld %f42,%f48,%f10 ! (5_1) dtmp0 = res0_hi * res0; + fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0; + + fmuld %f24,%f18,%f18 ! (6_1) dtmp2 = dd * dres; + faddd %f60,%f54,%f50 ! (1_0) dtmp1 = y0 + y_hi0; + + fmuld %f34,%f48,%f34 ! (5_1) dtmp1 = res0_lo * res0; + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i4 + fsubd %f60,%f54,%f12 ! (1_0) y_lo0 = y0 - y_hi0 + + sllx %g1,32,%g1 ! (3_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp7] ! (3_0) *(long long*)&scl0 = ll; + ba .cont44 + add TBL,TBL_SHIFT+24,%i3 + + .align 16 +.update42: + cmp counter,4 + ble 1f + nop + + sub counter,4,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + stx %i3,[%fp+tmp_py] + + mov 4,counter +1: + add %i5,stridez,%i5 ! pz += stridez + stx %g1,[%fp+dtmp6] ! (2_0) *(long long*)&scl0 = ll; + fand %f28,DA0,%f48 ! (5_1) res0 = vis_fand(dres,DA0); + + fmuld %f20,%f20,%f2 ! (1_0) res0_hi = x_hi0 * x_hi0; + fsubd %f10,%f20,%f0 ! (1_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (1_0) dtmp0 = y_hi0 * y_hi0; + faddd %f10,%f20,%f62 ! (1_0) res0_lo = x0 + x_hi0; + + fmuld %f42,%f48,%f10 ! (5_1) dtmp0 = res0_hi * res0; + fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0; + + fmuld %f24,%f18,%f18 ! (6_1) dtmp2 = dd * dres; + faddd %f60,%f54,%f50 ! (1_0) dtmp1 = y0 + y_hi0; + + fmuld %f34,%f48,%f34 ! (5_1) dtmp1 = res0_lo * res0; + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i4 + fsubd %f60,%f54,%f12 ! (1_0) y_lo0 = y0 - y_hi0 + + sllx %g1,32,%g1 ! (3_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp7] ! (3_0) *(long long*)&scl0 = ll; + ba .cont44 + add TBL,TBL_SHIFT+24,%i3 + + .align 16 +.update43: + cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 + bge,pn %icc,.cont43a ! (0_0) if ( hy0 < 0x00100000 ) + + cmp counter,4 + ble,a 1f + nop + + sub counter,4,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + mov 4,counter + stx %i3,[%fp+tmp_py] +1: + fmuld %f42,%f48,%f10 ! (5_1) dtmp0 = res0_hi * res0; + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i4 + fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0; + + ba .cont43b + add TBL,TBL_SHIFT+24,%i3 + + .align 16 +.update45: + cmp counter,5 + ble 1f + nop + + sub counter,5,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + + stx %o0,[%fp+tmp_py] + + mov 5,counter +1: + fsubd %f50,D2ON36,%f54 ! (2_0) y_hi0 -= D2ON36; + + fmuld %f28,%f22,%f50 ! (0_0) dtmp0 = dd * dres; + st %f1,[%i5+4] ! (3_1) ((float*)pz)[1] = ((float*)&res0)[1]; + faddd %f48,%f52,%f52 ! (5_1) res0 += dtmp0; + + fand %f26,DA0,%f48 ! (6_1) res0 = vis_fand(dres,DA0); + + fmuld %f20,%f20,%f2 ! (2_0) res0_hi = x_hi0 * x_hi0; + stx %g1,[%fp+dtmp8] ! (3_0) *(long long*)&scl0 = ll; + fsubd %f10,%f20,%f0 ! (2_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (2_0) dtmp0 = y_hi0 * y_hi0; + add %i5,stridez,%i5 ! pz += stridez + faddd %f10,%f20,%f62 ! (2_0) res0_lo = x0 + x_hi0; + + fmuld %f30,%f48,%f10 ! (6_1) dtmp0 = res0_hi * res0; + fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0; + + fmuld %f16,%f14,%f14 ! (7_1) dtmp2 = dd * dres; + faddd %f60,%f54,%f50 ! (2_0) dtmp1 = y0 + y_hi0; + + fmuld %f40,%f48,%f40 ! (6_1) dtmp1 = res0_lo * res0; + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i2 + fsubd %f60,%f54,%f12 ! (2_0) y_lo0 = y0 - y_hi0; + + sllx %g1,32,%g1 ! (4_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp9] ! (4_0) *(long long*)&scl0 = ll; + ba .cont48 + add TBL,TBL_SHIFT+24,%o0 + + .align 16 +.update46: + cmp counter,5 + ble 1f + nop + + sub counter,5,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + + stx %o0,[%fp+tmp_py] + + mov 5,counter +1: + fmuld %f28,%f22,%f50 ! (0_0) dtmp0 = dd * dres; + st %f1,[%i5+4] ! (3_1) ((float*)pz)[1] = ((float*)&res0)[1]; + faddd %f48,%f52,%f52 ! (5_1) res0 += dtmp0; + + fand %f26,DA0,%f48 ! (6_1) res0 = vis_fand(dres,DA0); + + fmuld %f20,%f20,%f2 ! (2_0) res0_hi = x_hi0 * x_hi0; + stx %g1,[%fp+dtmp8] ! (3_0) *(long long*)&scl0 = ll; + fsubd %f10,%f20,%f0 ! (2_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (2_0) dtmp0 = y_hi0 * y_hi0; + add %i5,stridez,%i5 ! pz += stridez + faddd %f10,%f20,%f62 ! (2_0) res0_lo = x0 + x_hi0; + + fmuld %f30,%f48,%f10 ! (6_1) dtmp0 = res0_hi * res0; + fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0; + + fmuld %f16,%f14,%f14 ! (7_1) dtmp2 = dd * dres; + faddd %f60,%f54,%f50 ! (2_0) dtmp1 = y0 + y_hi0; + + fmuld %f40,%f48,%f40 ! (6_1) dtmp1 = res0_lo * res0; + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i2 + fsubd %f60,%f54,%f12 ! (2_0) y_lo0 = y0 - y_hi0; + + sllx %g1,32,%g1 ! (4_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp9] ! (4_0) *(long long*)&scl0 = ll; + ba .cont48 + add TBL,TBL_SHIFT+24,%o0 + + .align 16 +.update47: + cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 + bge,pn %icc,.cont47a ! (0_0) if ( hy0 < 0x00100000 ) + + cmp counter,5 + ble,a 1f + nop + + sub counter,5,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + + mov 5,counter + stx %o0,[%fp+tmp_py] +1: + fmuld %f20,%f20,%f2 ! (2_0) res0_hi = x_hi0 * x_hi0; + stx %g1,[%fp+dtmp8] ! (3_0) *(long long*)&scl0 = ll; + fsubd %f10,%f20,%f0 ! (2_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (2_0) dtmp0 = y_hi0 * y_hi0; + add %i5,stridez,%i5 ! pz += stridez + faddd %f10,%f20,%f62 ! (2_0) res0_lo = x0 + x_hi0; + + fmuld %f30,%f48,%f10 ! (6_1) dtmp0 = res0_hi * res0; + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i2 + fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0; + + ba .cont47b + add TBL,TBL_SHIFT+24,%o0 + + .align 16 +.update49: + cmp counter,6 + ble 1f + nop + + sub counter,6,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + stx %i3,[%fp+tmp_py] + + mov 6,counter +1: + fsubd %f50,D2ON36,%f54 ! (3_0) y_hi0 -= D2ON36; + + fmuld %f26,%f18,%f50 ! (1_0) dtmp0 = dd * dres; + st %f1,[%i5+4] ! (4_1) ((float*)pz)[1] = ((float*)&res0)[1]; + faddd %f48,%f52,%f52 ! (6_1) res0 += dtmp0; + + fand %f28,DA0,%f48 ! (7_1) res0 = vis_fand(dres,DA0); + + fmuld %f20,%f20,%f2 ! (3_0) res0_hi = x_hi0 * x_hi0; + stx %g1,[%fp+dtmp10] ! (4_0) *(long long*)&scl0 = ll; + fsubd %f10,%f20,%f0 ! (3_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (3_0) dtmp0 = y_hi0 * y_hi0; + add %i5,stridez,%i5 ! pz += stridez + faddd %f10,%f20,%f62 ! (3_0) res0_lo = x0 + x_hi0; + + fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0; + fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0; + + fmuld %f24,%f22,%f22 ! (0_0) dtmp2 = dd * dres; + faddd %f60,%f54,%f50 ! (3_0) dtmp1 = y0 + y_hi0; + + fmuld %f38,%f48,%f38 ! (7_1) dtmp1 = res0_lo * res0; + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i4 + fsubd %f60,%f54,%f12 ! (3_0) y_lo0 = y0 - y_hi0; + + sllx %g1,32,%g1 ! (5_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp11] ! (5_0) *(long long*)&scl0 = ll; + ba .cont52 + add TBL,TBL_SHIFT+24,%i3 + + .align 16 +.update50: + cmp counter,6 + ble 1f + nop + + sub counter,6,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + stx %i3,[%fp+tmp_py] + + mov 6,counter +1: + fmuld %f26,%f18,%f50 ! (1_0) dtmp0 = dd * dres; + st %f1,[%i5+4] ! (4_1) ((float*)pz)[1] = ((float*)&res0)[1]; + faddd %f48,%f52,%f52 ! (6_1) res0 += dtmp0; + + fand %f28,DA0,%f48 ! (7_1) res0 = vis_fand(dres,DA0); + + fmuld %f20,%f20,%f2 ! (3_0) res0_hi = x_hi0 * x_hi0; + stx %g1,[%fp+dtmp10] ! (4_0) *(long long*)&scl0 = ll; + fsubd %f10,%f20,%f0 ! (3_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (3_0) dtmp0 = y_hi0 * y_hi0; + add %i5,stridez,%i5 ! pz += stridez + faddd %f10,%f20,%f62 ! (3_0) res0_lo = x0 + x_hi0; + + fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0; + fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0; + + fmuld %f24,%f22,%f22 ! (0_0) dtmp2 = dd * dres; + faddd %f60,%f54,%f50 ! (3_0) dtmp1 = y0 + y_hi0; + + fmuld %f38,%f48,%f38 ! (7_1) dtmp1 = res0_lo * res0; + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i4 + fsubd %f60,%f54,%f12 ! (3_0) y_lo0 = y0 - y_hi0; + + sllx %g1,32,%g1 ! (5_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp11] ! (5_0) *(long long*)&scl0 = ll; + ba .cont52 + add TBL,TBL_SHIFT+24,%i3 + + .align 16 +.update51: + cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 + bge,pn %icc,.cont51a ! (0_0) if ( hy0 < 0x00100000 ) + + cmp counter,6 + ble,a 1f + nop + + sub counter,6,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + mov 6,counter + stx %i3,[%fp+tmp_py] +1: + fmuld %f20,%f20,%f2 ! (3_0) res0_hi = x_hi0 * x_hi0; + stx %g1,[%fp+dtmp10] ! (4_0) *(long long*)&scl0 = ll; + fsubd %f10,%f20,%f0 ! (3_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (3_0) dtmp0 = y_hi0 * y_hi0; + add %i5,stridez,%i5 ! pz += stridez + faddd %f10,%f20,%f62 ! (3_0) res0_lo = x0 + x_hi0; + + fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0; + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i4 + fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0; + + ba .cont51b + add TBL,TBL_SHIFT+24,%i3 + + .align 16 +.update53: + cmp counter,7 + ble 1f + nop + + sub counter,7,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + + stx %o0,[%fp+tmp_py] + + mov 7,counter +1: + fsubd %f50,D2ON36,%f54 ! (4_0) y_hi0 -= D2ON36; + + fmuld %f52,%f14,%f50 ! (2_0) dtmp0 = dd * dres; + st %f1,[%i5+4] ! (5_1) ((float*)pz)[1] = ((float*)&res0)[1]; + faddd %f48,%f28,%f48 ! (7_1) res0 += dtmp0; + + fand %f26,DA0,%f28 ! (0_0) res0 = vis_fand(dres,DA0); + + fmuld %f46,%f46,%f0 ! (4_0) res0_hi = x_hi0 * x_hi0; + stx %g1,[%fp+dtmp12] ! (5_0) *(long long*)&scl0 = ll; + fsubd %f10,%f46,%f2 ! (4_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f20 ! (4_0) dtmp0 = y_hi0 * y_hi0; + add %i5,stridez,%i5 ! pz += stridez + faddd %f10,%f46,%f62 ! (4_0) res0_lo = x0 + x_hi0; + + fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres; + fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0; + + fmuld %f32,%f28,%f50 ! (0_0) dtmp0 = res0_hi * res0; + faddd %f60,%f54,%f46 ! (4_0) dtmp1 = y0 + y_hi0; + + fmuld %f36,%f28,%f36 ! (0_0) dtmp1 = res0_lo * res0; + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i2 + fsubd %f60,%f54,%f60 ! (4_0) y_lo0 = y0 - y_hi0; + + sllx %g1,32,%g1 ! (6_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp13] ! (6_0) *(long long*)&scl0 = ll; + ba .cont56 + add TBL,TBL_SHIFT+24,%o0 + + .align 16 +.update54: + cmp counter,7 + ble 1f + nop + + sub counter,7,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + + stx %o0,[%fp+tmp_py] + + mov 7,counter +1: + fmuld %f52,%f14,%f50 ! (2_0) dtmp0 = dd * dres; + st %f1,[%i5+4] ! (5_1) ((float*)pz)[1] = ((float*)&res0)[1]; + faddd %f48,%f28,%f48 ! (7_1) res0 += dtmp0; + + fand %f26,DA0,%f28 ! (0_0) res0 = vis_fand(dres,DA0); + + fmuld %f46,%f46,%f0 ! (4_0) res0_hi = x_hi0 * x_hi0; + stx %g1,[%fp+dtmp12] ! (5_0) *(long long*)&scl0 = ll; + fsubd %f10,%f46,%f2 ! (4_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f20 ! (4_0) dtmp0 = y_hi0 * y_hi0; + add %i5,stridez,%i5 ! pz += stridez + faddd %f10,%f46,%f62 ! (4_0) res0_lo = x0 + x_hi0; + + fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres; + fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0; + + fmuld %f32,%f28,%f50 ! (0_0) dtmp0 = res0_hi * res0; + faddd %f60,%f54,%f46 ! (4_0) dtmp1 = y0 + y_hi0; + + fmuld %f36,%f28,%f36 ! (0_0) dtmp1 = res0_lo * res0; + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i2 + fsubd %f60,%f54,%f60 ! (4_0) y_lo0 = y0 - y_hi0; + + sllx %g1,32,%g1 ! (6_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp13] ! (6_0) *(long long*)&scl0 = ll; + ba .cont56 + add TBL,TBL_SHIFT+24,%o0 + + .align 16 +.update55: + cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 + bge,pn %icc,.cont55a ! (0_0) if ( hy0 < 0x00100000 ) + + cmp counter,7 + ble,a 1f + nop + + sub counter,7,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + + mov 7,counter + stx %o0,[%fp+tmp_py] +1: + fmuld %f46,%f46,%f0 ! (4_0) res0_hi = x_hi0 * x_hi0; + stx %g1,[%fp+dtmp12] ! (5_0) *(long long*)&scl0 = ll; + fsubd %f10,%f46,%f2 ! (4_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f20 ! (4_0) dtmp0 = y_hi0 * y_hi0; + add %i5,stridez,%i5 ! pz += stridez + faddd %f10,%f46,%f62 ! (4_0) res0_lo = x0 + x_hi0; + + fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres; + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i2 + fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0; + + ba .cont55b + add TBL,TBL_SHIFT+24,%o0 + + .align 16 +.update57: + cmp counter,8 + ble 1f + nop + + sub counter,8,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + stx %i3,[%fp+tmp_py] + + mov 8,counter +1: + fsubd %f12,D2ON36,%f54 ! (5_0) y_hi0 -= D2ON36; + + fmuld %f10,%f22,%f50 ! (3_0) dtmp0 = dd * dres; + st %f3,[%i5+4] ! (6_1) ((float*)pz)[1] = ((float*)&res0)[1]; + faddd %f28,%f48,%f48 ! (0_0) res0 += dtmp0; + + fand %f16,DA0,%f28 ! (1_0) res0 = vis_fand(dres,DA0); + + fmuld %f20,%f20,%f0 ! (5_0) res0_hi = x_hi0 * x_hi0; + stx %g1,[%fp+dtmp14] ! (6_0) *(long long*)&scl0 = ll; + fsubd %f60,%f20,%f2 ! (5_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (5_0) dtmp0 = y_hi0 * y_hi0; + add %i5,stridez,%i5 ! pz += stridez + faddd %f60,%f20,%f62 ! (5_0) res0_lo = x0 + x_hi0; + + fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres; + fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0; + + fmuld %f42,%f28,%f60 ! (1_0) dtmp0 = res0_hi * res0; + faddd %f52,%f54,%f50 ! (5_0) dtmp1 = y0 + y_hi0; + + fmuld %f34,%f28,%f34 ! (1_0) dtmp1 = res0_lo * res0; + fsubd %f52,%f54,%f54 ! (5_0) y_lo0 = y0 - y_hi0; + + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i4 + + sllx %g1,32,%g1 ! (7_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp15] ! (7_0) *(long long*)&scl0 = ll; + ba .cont60 + add TBL,TBL_SHIFT+24,%i3 + + .align 16 +.update58: + cmp counter,8 + ble 1f + nop + + sub counter,8,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + stx %i3,[%fp+tmp_py] + + mov 8,counter +1: + fmuld %f10,%f22,%f50 ! (3_0) dtmp0 = dd * dres; + st %f3,[%i5+4] ! (6_1) ((float*)pz)[1] = ((float*)&res0)[1]; + faddd %f28,%f48,%f48 ! (0_0) res0 += dtmp0; + + fand %f16,DA0,%f28 ! (1_0) res0 = vis_fand(dres,DA0); + + fmuld %f20,%f20,%f0 ! (5_0) res0_hi = x_hi0 * x_hi0; + stx %g1,[%fp+dtmp14] ! (6_0) *(long long*)&scl0 = ll; + fsubd %f60,%f20,%f2 ! (5_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (5_0) dtmp0 = y_hi0 * y_hi0; + add %i5,stridez,%i5 ! pz += stridez + faddd %f60,%f20,%f62 ! (5_0) res0_lo = x0 + x_hi0; + + fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres; + fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0; + + fmuld %f42,%f28,%f60 ! (1_0) dtmp0 = res0_hi * res0; + faddd %f52,%f54,%f50 ! (5_0) dtmp1 = y0 + y_hi0; + + fmuld %f34,%f28,%f34 ! (1_0) dtmp1 = res0_lo * res0; + fsubd %f52,%f54,%f54 ! (5_0) y_lo0 = y0 - y_hi0; + + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i4 + + sllx %g1,32,%g1 ! (7_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp15] ! (7_0) *(long long*)&scl0 = ll; + ba .cont60 + add TBL,TBL_SHIFT+24,%i3 + + .align 16 +.update59: + cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 + bge,pn %icc,.cont59a ! (0_0) if ( hy0 < 0x00100000 ) + + cmp counter,8 + ble,a 1f + nop + + sub counter,8,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + mov 8,counter + stx %i3,[%fp+tmp_py] +1: + fmuld %f20,%f20,%f0 ! (5_0) res0_hi = x_hi0 * x_hi0; + stx %g1,[%fp+dtmp14] ! (6_0) *(long long*)&scl0 = ll; + fsubd %f60,%f20,%f2 ! (5_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (5_0) dtmp0 = y_hi0 * y_hi0; + add %i5,stridez,%i5 ! pz += stridez + faddd %f60,%f20,%f62 ! (5_0) res0_lo = x0 + x_hi0; + + fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres; + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i4 + fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0; + + ba .cont59b + add TBL,TBL_SHIFT+24,%i3 + + .align 16 +.exit: + ret + restore + SET_SIZE(__vrhypot) + diff --git a/usr/src/lib/libmvec/common/vis/__vrhypotf.S b/usr/src/lib/libmvec/common/vis/__vrhypotf.S new file mode 100644 index 0000000000..b8b01da025 --- /dev/null +++ b/usr/src/lib/libmvec/common/vis/__vrhypotf.S @@ -0,0 +1,1519 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "__vrhypotf.S" + +#include "libm.h" + + RO_DATA + .align 64 +.CONST_TBL: +! i = [0,63] +! TBL[2*i+0] = 1.0 / (*(double*)&(0x3ff0000000000000LL + (i << 46))); +! TBL[2*i+1] = (double)(0.5/sqrtl(2) / sqrtl(*(double*)&(0x3ff0000000000000LL + (i << 46)))); +! TBL[128+2*i+0] = 1.0 / (*(double*)&(0x3ff0000000000000LL + (i << 46))); +! TBL[128+2*i+1] = (double)(0.25 / sqrtl(*(double*)&(0x3ff0000000000000LL + (i << 46)))); + + .word 0x3ff00000, 0x00000000, 0x3fd6a09e, 0x667f3bcd, + .word 0x3fef81f8, 0x1f81f820, 0x3fd673e3, 0x2ef63a03, + .word 0x3fef07c1, 0xf07c1f08, 0x3fd6482d, 0x37a5a3d2, + .word 0x3fee9131, 0xabf0b767, 0x3fd61d72, 0xb7978671, + .word 0x3fee1e1e, 0x1e1e1e1e, 0x3fd5f3aa, 0x673fa911, + .word 0x3fedae60, 0x76b981db, 0x3fd5cacb, 0x7802f342, + .word 0x3fed41d4, 0x1d41d41d, 0x3fd5a2cd, 0x8c69d61a, + .word 0x3fecd856, 0x89039b0b, 0x3fd57ba8, 0xb0ee01b9, + .word 0x3fec71c7, 0x1c71c71c, 0x3fd55555, 0x55555555, + .word 0x3fec0e07, 0x0381c0e0, 0x3fd52fcc, 0x468d6b54, + .word 0x3febacf9, 0x14c1bad0, 0x3fd50b06, 0xa8fc6b70, + .word 0x3feb4e81, 0xb4e81b4f, 0x3fd4e6fd, 0xf33cf032, + .word 0x3feaf286, 0xbca1af28, 0x3fd4c3ab, 0xe93bcf74, + .word 0x3fea98ef, 0x606a63be, 0x3fd4a10a, 0x97af7b92, + .word 0x3fea41a4, 0x1a41a41a, 0x3fd47f14, 0x4fe17f9f, + .word 0x3fe9ec8e, 0x951033d9, 0x3fd45dc3, 0xa3c34fa3, + .word 0x3fe99999, 0x9999999a, 0x3fd43d13, 0x6248490f, + .word 0x3fe948b0, 0xfcd6e9e0, 0x3fd41cfe, 0x93ff5199, + .word 0x3fe8f9c1, 0x8f9c18fa, 0x3fd3fd80, 0x77e70577, + .word 0x3fe8acb9, 0x0f6bf3aa, 0x3fd3de94, 0x8077db58, + .word 0x3fe86186, 0x18618618, 0x3fd3c036, 0x50e00e03, + .word 0x3fe81818, 0x18181818, 0x3fd3a261, 0xba6d7a37, + .word 0x3fe7d05f, 0x417d05f4, 0x3fd38512, 0xba21f51e, + .word 0x3fe78a4c, 0x8178a4c8, 0x3fd36845, 0x766eec92, + .word 0x3fe745d1, 0x745d1746, 0x3fd34bf6, 0x3d156826, + .word 0x3fe702e0, 0x5c0b8170, 0x3fd33021, 0x8127c0e0, + .word 0x3fe6c16c, 0x16c16c17, 0x3fd314c3, 0xd92a9e91, + .word 0x3fe68168, 0x16816817, 0x3fd2f9d9, 0xfd52fd50, + .word 0x3fe642c8, 0x590b2164, 0x3fd2df60, 0xc5df2c9e, + .word 0x3fe60581, 0x60581606, 0x3fd2c555, 0x2988e428, + .word 0x3fe5c988, 0x2b931057, 0x3fd2abb4, 0x3c0eb0f4, + .word 0x3fe58ed2, 0x308158ed, 0x3fd2927b, 0x2cd320f5, + .word 0x3fe55555, 0x55555555, 0x3fd279a7, 0x4590331c, + .word 0x3fe51d07, 0xeae2f815, 0x3fd26135, 0xe91daf55, + .word 0x3fe4e5e0, 0xa72f0539, 0x3fd24924, 0x92492492, + .word 0x3fe4afd6, 0xa052bf5b, 0x3fd23170, 0xd2be638a, + .word 0x3fe47ae1, 0x47ae147b, 0x3fd21a18, 0x51ff630a, + .word 0x3fe446f8, 0x6562d9fb, 0x3fd20318, 0xcc6a8f5d, + .word 0x3fe41414, 0x14141414, 0x3fd1ec70, 0x124e98f9, + .word 0x3fe3e22c, 0xbce4a902, 0x3fd1d61c, 0x070ae7d3, + .word 0x3fe3b13b, 0x13b13b14, 0x3fd1c01a, 0xa03be896, + .word 0x3fe38138, 0x13813814, 0x3fd1aa69, 0xe4f2777f, + .word 0x3fe3521c, 0xfb2b78c1, 0x3fd19507, 0xecf5b9e9, + .word 0x3fe323e3, 0x4a2b10bf, 0x3fd17ff2, 0xe00ec3ee, + .word 0x3fe2f684, 0xbda12f68, 0x3fd16b28, 0xf55d72d4, + .word 0x3fe2c9fb, 0x4d812ca0, 0x3fd156a8, 0x72b5ef62, + .word 0x3fe29e41, 0x29e4129e, 0x3fd1426f, 0xac0654db, + .word 0x3fe27350, 0xb8812735, 0x3fd12e7d, 0x02c40253, + .word 0x3fe24924, 0x92492492, 0x3fd11ace, 0xe560242a, + .word 0x3fe21fb7, 0x8121fb78, 0x3fd10763, 0xcec30b26, + .word 0x3fe1f704, 0x7dc11f70, 0x3fd0f43a, 0x45cdedad, + .word 0x3fe1cf06, 0xada2811d, 0x3fd0e150, 0xdce2b60c, + .word 0x3fe1a7b9, 0x611a7b96, 0x3fd0cea6, 0x317186dc, + .word 0x3fe18118, 0x11811812, 0x3fd0bc38, 0xeb8ba412, + .word 0x3fe15b1e, 0x5f75270d, 0x3fd0aa07, 0xbd7b7488, + .word 0x3fe135c8, 0x1135c811, 0x3fd09811, 0x63615499, + .word 0x3fe11111, 0x11111111, 0x3fd08654, 0xa2d4f6db, + .word 0x3fe0ecf5, 0x6be69c90, 0x3fd074d0, 0x4a8b1438, + .word 0x3fe0c971, 0x4fbcda3b, 0x3fd06383, 0x31ff307a, + .word 0x3fe0a681, 0x0a6810a7, 0x3fd0526c, 0x39213bfa, + .word 0x3fe08421, 0x08421084, 0x3fd0418a, 0x4806de7d, + .word 0x3fe0624d, 0xd2f1a9fc, 0x3fd030dc, 0x4ea03a72, + .word 0x3fe04104, 0x10410410, 0x3fd02061, 0x446ffa9a, + .word 0x3fe02040, 0x81020408, 0x3fd01018, 0x28467ee9, + .word 0x3ff00000, 0x00000000, 0x3fd00000, 0x00000000, + .word 0x3fef81f8, 0x1f81f820, 0x3fcfc0bd, 0x88a0f1d9, + .word 0x3fef07c1, 0xf07c1f08, 0x3fcf82ec, 0x882c0f9b, + .word 0x3fee9131, 0xabf0b767, 0x3fcf467f, 0x2814b0cc, + .word 0x3fee1e1e, 0x1e1e1e1e, 0x3fcf0b68, 0x48d2af1c, + .word 0x3fedae60, 0x76b981db, 0x3fced19b, 0x75e78957, + .word 0x3fed41d4, 0x1d41d41d, 0x3fce990c, 0xdad55ed2, + .word 0x3fecd856, 0x89039b0b, 0x3fce61b1, 0x38f18adc, + .word 0x3fec71c7, 0x1c71c71c, 0x3fce2b7d, 0xddfefa66, + .word 0x3fec0e07, 0x0381c0e0, 0x3fcdf668, 0x9b7e6350, + .word 0x3febacf9, 0x14c1bad0, 0x3fcdc267, 0xbea45549, + .word 0x3feb4e81, 0xb4e81b4f, 0x3fcd8f72, 0x08e6b82d, + .word 0x3feaf286, 0xbca1af28, 0x3fcd5d7e, 0xa914b937, + .word 0x3fea98ef, 0x606a63be, 0x3fcd2c85, 0x34ed6d86, + .word 0x3fea41a4, 0x1a41a41a, 0x3fccfc7d, 0xa32a9213, + .word 0x3fe9ec8e, 0x951033d9, 0x3fcccd60, 0x45f5d358, + .word 0x3fe99999, 0x9999999a, 0x3fcc9f25, 0xc5bfedd9, + .word 0x3fe948b0, 0xfcd6e9e0, 0x3fcc71c7, 0x1c71c71c, + .word 0x3fe8f9c1, 0x8f9c18fa, 0x3fcc453d, 0x90f057a2, + .word 0x3fe8acb9, 0x0f6bf3aa, 0x3fcc1982, 0xb2ece47b, + .word 0x3fe86186, 0x18618618, 0x3fcbee90, 0x56fb9c39, + .word 0x3fe81818, 0x18181818, 0x3fcbc460, 0x92eb3118, + .word 0x3fe7d05f, 0x417d05f4, 0x3fcb9aed, 0xba588347, + .word 0x3fe78a4c, 0x8178a4c8, 0x3fcb7232, 0x5b79db11, + .word 0x3fe745d1, 0x745d1746, 0x3fcb4a29, 0x3c1d9550, + .word 0x3fe702e0, 0x5c0b8170, 0x3fcb22cd, 0x56d87d7e, + .word 0x3fe6c16c, 0x16c16c17, 0x3fcafc19, 0xd8606169, + .word 0x3fe68168, 0x16816817, 0x3fcad60a, 0x1d0fb394, + .word 0x3fe642c8, 0x590b2164, 0x3fcab099, 0xae8f539a, + .word 0x3fe60581, 0x60581606, 0x3fca8bc4, 0x41a3d02c, + .word 0x3fe5c988, 0x2b931057, 0x3fca6785, 0xb41bacf7, + .word 0x3fe58ed2, 0x308158ed, 0x3fca43da, 0x0adc6899, + .word 0x3fe55555, 0x55555555, 0x3fca20bd, 0x700c2c3e, + .word 0x3fe51d07, 0xeae2f815, 0x3fc9fe2c, 0x315637ee, + .word 0x3fe4e5e0, 0xa72f0539, 0x3fc9dc22, 0xbe484458, + .word 0x3fe4afd6, 0xa052bf5b, 0x3fc9ba9d, 0xa6c73588, + .word 0x3fe47ae1, 0x47ae147b, 0x3fc99999, 0x9999999a, + .word 0x3fe446f8, 0x6562d9fb, 0x3fc97913, 0x63068b54, + .word 0x3fe41414, 0x14141414, 0x3fc95907, 0xeb87ab44, + .word 0x3fe3e22c, 0xbce4a902, 0x3fc93974, 0x368cfa31, + .word 0x3fe3b13b, 0x13b13b14, 0x3fc91a55, 0x6151761c, + .word 0x3fe38138, 0x13813814, 0x3fc8fba8, 0xa1bf6f96, + .word 0x3fe3521c, 0xfb2b78c1, 0x3fc8dd6b, 0x4563a009, + .word 0x3fe323e3, 0x4a2b10bf, 0x3fc8bf9a, 0xb06e1af3, + .word 0x3fe2f684, 0xbda12f68, 0x3fc8a234, 0x5cc04426, + .word 0x3fe2c9fb, 0x4d812ca0, 0x3fc88535, 0xd90703c6, + .word 0x3fe29e41, 0x29e4129e, 0x3fc8689c, 0xc7e07e7d, + .word 0x3fe27350, 0xb8812735, 0x3fc84c66, 0xdf0ca4c2, + .word 0x3fe24924, 0x92492492, 0x3fc83091, 0xe6a7f7e7, + .word 0x3fe21fb7, 0x8121fb78, 0x3fc8151b, 0xb86fee1d, + .word 0x3fe1f704, 0x7dc11f70, 0x3fc7fa02, 0x3f1068d1, + .word 0x3fe1cf06, 0xada2811d, 0x3fc7df43, 0x7579b9b5, + .word 0x3fe1a7b9, 0x611a7b96, 0x3fc7c4dd, 0x663ebb88, + .word 0x3fe18118, 0x11811812, 0x3fc7aace, 0x2afa8b72, + .word 0x3fe15b1e, 0x5f75270d, 0x3fc79113, 0xebbd7729, + .word 0x3fe135c8, 0x1135c811, 0x3fc777ac, 0xde80baea, + .word 0x3fe11111, 0x11111111, 0x3fc75e97, 0x46a0b098, + .word 0x3fe0ecf5, 0x6be69c90, 0x3fc745d1, 0x745d1746, + .word 0x3fe0c971, 0x4fbcda3b, 0x3fc72d59, 0xc45f1fc5, + .word 0x3fe0a681, 0x0a6810a7, 0x3fc7152e, 0x9f44f01f, + .word 0x3fe08421, 0x08421084, 0x3fc6fd4e, 0x79325467, + .word 0x3fe0624d, 0xd2f1a9fc, 0x3fc6e5b7, 0xd16657e1, + .word 0x3fe04104, 0x10410410, 0x3fc6ce69, 0x31d5858d, + .word 0x3fe02040, 0x81020408, 0x3fc6b761, 0x2ec892f6, + + .word 0x000fffff, 0xffffffff ! DC0 + .word 0x3ff00000, 0 ! DC1 + .word 0x7fffc000, 0 ! DC2 + .word 0x7fe00000, 0 ! DA0 + .word 0x60000000, 0 ! DA1 + .word 0x80808080, 0x3f800000 ! SCALE , FONE = 1.0f + .word 0x3fefffff, 0xfee7f18f ! KA0 = 9.99999997962321453275e-01 + .word 0xbfdfffff, 0xfe07e52f ! KA1 = -4.99999998166077580600e-01 + .word 0x3fd80118, 0x0ca296d9 ! KA2 = 3.75066768969515586277e-01 + .word 0xbfd400fc, 0x0bbb8e78 ! KA3 = -3.12560092408808548438e-01 + +#define _0x7f800000 %o0 +#define _0x7fffffff %o7 +#define TBL %l2 + +#define TBL_SHIFT 2048 + +#define stridex %l3 +#define stridey %l4 +#define stridez %l5 +#define counter %i0 + +#define DA0 %f52 +#define DA1 %f44 +#define SCALE %f6 + +#define DC0 %f46 +#define DC1 %f8 +#define FZERO %f9 +#define DC2 %f50 + +#define KA3 %f56 +#define KA2 %f58 +#define KA1 %f60 +#define KA0 %f54 + +#define tmp_counter STACK_BIAS-0x04 +#define tmp_px STACK_BIAS-0x20 +#define tmp_py STACK_BIAS-0x18 + +#define ftmp0 STACK_BIAS-0x10 +#define ftmp1 STACK_BIAS-0x0c +#define ftmp2 STACK_BIAS-0x10 +#define ftmp3 STACK_BIAS-0x0c +#define ftmp4 STACK_BIAS-0x08 + +! sizeof temp storage - must be a multiple of 16 for V9 +#define tmps 0x20 + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +! !!!!! algorithm !!!!! +! x0 = *px; +! ax = *(int*)px; +! +! y0 = *py; +! ay = *(int*)py; +! +! ax &= 0x7fffffff; +! ay &= 0x7fffffff; +! +! px += stridex; +! py += stridey; +! +! if ( ax >= 0x7f800000 || ay >= 0x7f800000 ) +! { +! *pz = fabsf(x0) * fabsf(y0); +! if( ax == 0x7f800000 ) *pz = 0.0f; +! else if( ay == 0x7f800000 ) *pz = 0.0f; +! pz += stridez; +! continue; +! } +! +! if ( ay == 0 ) +! { +! if ( ax == 0 ) +! { +! *pz = 1.0f / 0.0f; +! pz += stridez; +! continue; +! } +! } +! +! hyp0 = x0 * (double)x0; +! dtmp0 = y0 * (double)y0; +! hyp0 += dtmp0; +! +! ibase0 = ((int*)&hyp0)[0]; +! +! dbase0 = vis_fand(hyp0,DA0); +! dbase0 = vis_fmul8x16(SCALE, dbase0); +! dbase0 = vis_fpsub32(DA1,dbase0); +! +! hyp0 = vis_fand(hyp0,DC0); +! hyp0 = vis_for(hyp0,DC1); +! h_hi0 = vis_fand(hyp0,DC2); +! +! ibase0 >>= 10; +! si0 = ibase0 & 0x7f0; +! xx0 = ((double*)((char*)TBL + si0))[0]; +! +! dtmp1 = hyp0 - h_hi0; +! xx0 = dtmp1 * xx0; +! res0 = ((double*)((char*)arr + si0))[1]; +! dtmp2 = KA3 * xx0; +! dtmp2 += KA2; +! dtmp2 *= xx0; +! dtmp2 += KA1; +! dtmp2 *= xx0; +! dtmp2 += KA0; +! res0 *= dtmp2; +! res0 *= dbase0; +! ftmp0 = (float)res0; +! *pz = ftmp0; +! pz += stridez; +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + ENTRY(__vrhypotf) + save %sp,-SA(MINFRAME)-tmps,%sp + PIC_SETUP(l7) + PIC_SET(l7,.CONST_TBL,l2) + wr %g0,0x82,%asi + +#ifdef __sparcv9 + ldx [%fp+STACK_BIAS+176],stridez +#else + ld [%fp+STACK_BIAS+92],stridez +#endif + + stx %i1,[%fp+tmp_px] + sll %i2,2,stridex + + stx %i3,[%fp+tmp_py] + sll %i4,2,stridey + + st %i0,[%fp+tmp_counter] + sll stridez,2,stridez + mov %i5,%o1 + + ldd [TBL+TBL_SHIFT],DC0 + ldd [TBL+TBL_SHIFT+8],DC1 + ldd [TBL+TBL_SHIFT+16],DC2 + ldd [TBL+TBL_SHIFT+24],DA0 + ldd [TBL+TBL_SHIFT+32],DA1 + ldd [TBL+TBL_SHIFT+40],SCALE + ldd [TBL+TBL_SHIFT+48],KA0 + + ldd [TBL+TBL_SHIFT+56],KA1 + sethi %hi(0x7f800000),%o0 + + ldd [TBL+TBL_SHIFT+64],KA2 + sethi %hi(0x7ffffc00),%o7 + + ldd [TBL+TBL_SHIFT+72],KA3 + add %o7,1023,%o7 + +.begin: + ld [%fp+tmp_counter],counter + ldx [%fp+tmp_px],%o4 + ldx [%fp+tmp_py],%i2 + st %g0,[%fp+tmp_counter] +.begin1: + cmp counter,0 + ble,pn %icc,.exit + nop + + lda [%i2]0x82,%l6 ! (3_0) ay = *(int*)py; + + lda [%o4]0x82,%i5 ! (3_0) ax = *(int*)px; + + lda [%i2]0x82,%f2 ! (3_0) y0 = *py; + and %l6,_0x7fffffff,%l6 ! (3_0) ay &= 0x7fffffff; + + and %i5,_0x7fffffff,%i5 ! (3_0) ax &= 0x7fffffff; + cmp %l6,_0x7f800000 ! (3_0) ay ? 0x7f800000 + bge,pn %icc,.spec0 ! (3_0) if ( ay >= 0x7f800000 ) + lda [%o4]0x82,%f4 ! (3_0) x0 = *px; + + cmp %i5,_0x7f800000 ! (3_0) ax ? 0x7f800000 + bge,pn %icc,.spec0 ! (3_0) if ( ax >= 0x7f800000 ) + nop + + cmp %l6,0 ! (3_0) + be,pn %icc,.spec1 ! (3_0) if ( ay == 0 ) + fsmuld %f4,%f4,%f36 ! (3_0) hyp0 = x0 * (double)x0; +.cont_spec1: + lda [%i2+stridey]0x82,%l6 ! (4_0) ay = *(int*)py; + + fsmuld %f2,%f2,%f62 ! (3_0) dtmp0 = y0 * (double)y0; + lda [stridex+%o4]0x82,%i5 ! (4_0) ax = *(int*)px; + + add %o4,stridex,%l0 ! px += stridex + + add %i2,stridey,%i2 ! py += stridey + and %l6,_0x7fffffff,%l6 ! (4_0) ay &= 0x7fffffff; + + and %i5,_0x7fffffff,%i5 ! (4_0) ax &= 0x7fffffff; + lda [%i2]0x82,%f2 ! (4_0) y0 = *py; + + faddd %f36,%f62,%f20 ! (3_0) hyp0 += dtmp0; + cmp %l6,_0x7f800000 ! (4_0) ay ? 0x7f800000 + + bge,pn %icc,.update0 ! (4_0) if ( ay >= 0x7f800000 ) + lda [stridex+%o4]0x82,%f4 ! (4_0) x0 = *px; +.cont0: + cmp %i5,_0x7f800000 ! (4_0) ax ? 0x7f800000 + bge,pn %icc,.update1 ! (4_0) if ( ax >= 0x7f800000 ) + st %f20,[%fp+ftmp4] ! (3_0) ibase0 = ((int*)&hyp0)[0]; +.cont1: + cmp %l6,0 ! (4_1) ay ? 0 + be,pn %icc,.update2 ! (4_1) if ( ay == 0 ) + fsmuld %f4,%f4,%f38 ! (4_1) hyp0 = x0 * (double)x0; +.cont2: + lda [%i2+stridey]0x82,%l6 ! (0_0) ay = *(int*)py; + + fsmuld %f2,%f2,%f62 ! (4_1) dtmp0 = y0 * (double)y0; + lda [%l0+stridex]0x82,%i5 ! (0_0) ax = *(int*)px; + + add %l0,stridex,%i1 ! px += stridex + + add %i2,stridey,%i2 ! py += stridey + and %l6,_0x7fffffff,%l6 ! (0_0) ay &= 0x7fffffff; + + and %i5,_0x7fffffff,%i5 ! (0_0) ax &= 0x7fffffff; + lda [%i2]0x82,%f2 ! (0_0) y0 = *py; + + cmp %l6,_0x7f800000 ! (0_0) ay ? 0x7f800000 + bge,pn %icc,.update3 ! (0_0) if ( ay >= 0x7f800000 ) + faddd %f38,%f62,%f12 ! (4_1) hyp0 += dtmp0; +.cont3: + lda [%i1]0x82,%f4 ! (0_0) x0 = *px; + + cmp %i5,_0x7f800000 ! (0_0) ax ? 0x7f800000 + bge,pn %icc,.update4 ! (0_0) if ( ax >= 0x7f800000 ) + st %f12,[%fp+ftmp0] ! (4_1) ibase0 = ((int*)&hyp0)[0]; +.cont4: + cmp %l6,0 ! (0_0) ay ? 0 + be,pn %icc,.update5 ! (0_0) if ( ay == 0 ) + fsmuld %f4,%f4,%f38 ! (0_0) hyp0 = x0 * (double)x0; +.cont5: + lda [%i2+stridey]0x82,%l6 ! (1_0) ay = *(int*)py; + + fsmuld %f2,%f2,%f62 ! (0_0) dtmp0 = y0 * (double)y0; + lda [%i1+stridex]0x82,%i5 ! (1_0) ax = *(int*)px; + + add %i1,stridex,%g5 ! px += stridex + + add %i2,stridey,%o3 ! py += stridey + and %l6,_0x7fffffff,%l6 ! (1_0) ay &= 0x7fffffff; + fand %f20,DC0,%f30 ! (3_1) hyp0 = vis_fand(hyp0,DC0); + + and %i5,_0x7fffffff,%i5 ! (1_0) ax &= 0x7fffffff; + lda [%o3]0x82,%f2 ! (1_0) y0 = *py; + + faddd %f38,%f62,%f14 ! (0_0) hyp0 += dtmp0; + cmp %l6,_0x7f800000 ! (1_0) ay ? 0x7f800000 + + lda [%g5]0x82,%f4 ! (1_0) x0 = *px; + bge,pn %icc,.update6 ! (1_0) if ( ay >= 0x7f800000 ) + for %f30,DC1,%f28 ! (3_1) hyp0 = vis_for(hyp0,DC1); +.cont6: + cmp %i5,_0x7f800000 ! (1_0) ax ? 0x7f800000 + bge,pn %icc,.update7 ! (1_0) if ( ax >= 0x7f800000 ) + ld [%fp+ftmp4],%l1 ! (3_1) ibase0 = ((int*)&hyp0)[0]; +.cont7: + st %f14,[%fp+ftmp1] ! (0_0) ibase0 = ((int*)&hyp0)[0]; + + cmp %l6,0 ! (1_0) ay ? 0 + be,pn %icc,.update8 ! (1_0) if ( ay == 0 ) + fand %f28,DC2,%f30 ! (3_1) h_hi0 = vis_fand(hyp0,DC2); +.cont8: + fsmuld %f4,%f4,%f38 ! (1_0) hyp0 = x0 * (double)x0; + sra %l1,10,%o5 ! (3_1) ibase0 >>= 10; + + and %o5,2032,%o4 ! (3_1) si0 = ibase0 & 0x7f0; + lda [%o3+stridey]0x82,%l6 ! (2_0) ay = *(int*)py; + + fsmuld %f2,%f2,%f62 ! (1_0) dtmp0 = y0 * (double)y0; + add %o4,TBL,%l7 ! (3_1) (char*)TBL + si0 + lda [stridex+%g5]0x82,%i5 ! (2_0) ax = *(int*)px; + fsubd %f28,%f30,%f28 ! (3_1) dtmp1 = hyp0 - h_hi0; + + add %g5,stridex,%i4 ! px += stridex + ldd [TBL+%o4],%f42 ! (3_1) xx0 = ((double*)((char*)TBL + si0))[0]; + + and %l6,_0x7fffffff,%l6 ! (2_0) ay &= 0x7fffffff; + add %o3,stridey,%i2 ! py += stridey + fand %f12,DC0,%f30 ! (4_1) hyp0 = vis_fand(hyp0,DC0); + + and %i5,_0x7fffffff,%i5 ! (2_0) ax &= 0x7fffffff; + lda [%i2]0x82,%f2 ! (2_0) y0 = *py; + + faddd %f38,%f62,%f16 ! (1_0) hyp0 += dtmp0; + cmp %l6,_0x7f800000 ! (2_0) ay ? 0x7f800000 + fmuld %f28,%f42,%f26 ! (3_1) xx0 = dtmp1 * xx0; + + lda [stridex+%g5]0x82,%f4 ! (2_0) x0 = *px; + bge,pn %icc,.update9 ! (2_0) if ( ay >= 0x7f800000 + for %f30,DC1,%f28 ! (4_1) hyp0 = vis_for(hyp0,DC1); +.cont9: + cmp %i5,_0x7f800000 ! (2_0) ax ? 0x7f800000 + bge,pn %icc,.update10 ! (2_0) if ( ax >= 0x7f800000 ) + ld [%fp+ftmp0],%i3 ! (4_1) ibase0 = ((int*)&hyp0)[0]; +.cont10: + st %f16,[%fp+ftmp2] ! (1_0) ibase0 = ((int*)&hyp0)[0]; + + fmuld KA3,%f26,%f34 ! (3_1) dtmp2 = KA3 * xx0; + cmp %l6,0 ! (2_0) ay ? 0 + be,pn %icc,.update11 ! (2_0) if ( ay == 0 ) + fand %f28,DC2,%f30 ! (4_1) h_hi0 = vis_fand(hyp0,DC2); +.cont11: + fsmuld %f4,%f4,%f36 ! (2_0) hyp0 = x0 * (double)x0; + sra %i3,10,%i3 ! (4_1) ibase0 >>= 10; + + and %i3,2032,%i3 ! (4_1) si0 = ibase0 & 0x7f0; + lda [%i2+stridey]0x82,%l6 ! (3_0) ay = *(int*)py; + + fsmuld %f2,%f2,%f62 ! (2_0) dtmp0 = y0 * (double)y0; + add %i3,TBL,%i3 ! (4_1) (char*)TBL + si0 + lda [%i4+stridex]0x82,%i5 ! (3_0) ax = *(int*)px; + fsubd %f28,%f30,%f28 ! (4_1) dtmp1 = hyp0 - h_hi0; + + add %i4,stridex,%o4 ! px += stridex + ldd [%i3],%f42 ! (4_1) xx0 = ((double*)((char*)TBL + si0))[0]; + faddd %f34,KA2,%f10 ! (3_1) dtmp2 += KA2; + + add %i2,stridey,%i2 ! py += stridey + and %l6,_0x7fffffff,%l6 ! (3_0) ay &= 0x7fffffff; + fand %f14,DC0,%f30 ! (0_0) hyp0 = vis_fand(hyp0,DC0); + + and %i5,_0x7fffffff,%i5 ! (3_0) ax &= 0x7fffffff; + lda [%i2]0x82,%f2 ! (3_0) y0 = *py; + + faddd %f36,%f62,%f18 ! (2_0) hyp0 += dtmp0; + cmp %l6,_0x7f800000 ! (3_0) ay ? 0x7f800000 + fmuld %f28,%f42,%f32 ! (4_1) xx0 = dtmp1 * xx0; + + fmuld %f10,%f26,%f10 ! (3_1) dtmp2 *= xx0; + lda [%o4]0x82,%f4 ! (3_0) x0 = *px; + bge,pn %icc,.update12 ! (3_0) if ( ay >= 0x7f800000 ) + for %f30,DC1,%f28 ! (0_0) hyp0 = vis_for(hyp0,DC1); +.cont12: + cmp %i5,_0x7f800000 ! (3_0) ax ? 0x7f800000 + bge,pn %icc,.update13 ! (3_0) if ( ax >= 0x7f800000 ) + ld [%fp+ftmp1],%i1 ! (0_0) ibase0 = ((int*)&hyp0)[0]; +.cont13: + st %f18,[%fp+ftmp3] ! (2_0) ibase0 = ((int*)&hyp0)[0]; + + fmuld KA3,%f32,%f34 ! (4_1) dtmp2 = KA3 * xx0; + cmp %l6,0 ! (3_0) + be,pn %icc,.update14 ! (3_0) if ( ay == 0 ) + fand %f28,DC2,%f30 ! (0_0) h_hi0 = vis_fand(hyp0,DC2); +.cont14: + fsmuld %f4,%f4,%f36 ! (3_0) hyp0 = x0 * (double)x0; + sra %i1,10,%l1 ! (0_0) ibase0 >>= 10; + faddd %f10,KA1,%f40 ! (3_1) dtmp2 += KA1; + + and %l1,2032,%o5 ! (0_0) si0 = ibase0 & 0x7f0; + lda [%i2+stridey]0x82,%l6 ! (4_0) ay = *(int*)py; + + fsmuld %f2,%f2,%f62 ! (3_0) dtmp0 = y0 * (double)y0; + add %o5,TBL,%l1 ! (0_0) (char*)TBL + si0 + lda [stridex+%o4]0x82,%i5 ! (4_0) ax = *(int*)px; + fsubd %f28,%f30,%f28 ! (0_0) dtmp1 = hyp0 - h_hi0; + + add %o4,stridex,%l0 ! px += stridex + ldd [TBL+%o5],%f42 ! (0_0) xx0 = ((double*)((char*)TBL + si0))[0]; + faddd %f34,KA2,%f10 ! (4_1) dtmp2 += KA2; + + fmuld %f40,%f26,%f40 ! (3_1) dtmp2 *= xx0; + add %i2,stridey,%i2 ! py += stridey + and %l6,_0x7fffffff,%l6 ! (4_0) ay &= 0x7fffffff; + fand %f16,DC0,%f30 ! (1_0) hyp0 = vis_fand(hyp0,DC0); + + and %i5,_0x7fffffff,%i5 ! (4_0) ax &= 0x7fffffff; + lda [%i2]0x82,%f2 ! (4_0) y0 = *py; + fand %f20,DA0,%f24 ! (3_1) dbase0 = vis_fand(hyp0,DA0); + + faddd %f36,%f62,%f20 ! (3_0) hyp0 += dtmp0; + cmp %l6,_0x7f800000 ! (4_0) ay ? 0x7f800000 + ldd [%l7+8],%f36 ! (3_1) res0 = ((double*)((char*)arr + si0))[1]; + fmuld %f28,%f42,%f26 ! (0_0) xx0 = dtmp1 * xx0; + + fmuld %f10,%f32,%f10 ! (4_1) dtmp2 *= xx0; + lda [stridex+%o4]0x82,%f4 ! (4_0) x0 = *px; + bge,pn %icc,.update15 ! (4_0) if ( ay >= 0x7f800000 ) + for %f30,DC1,%f28 ! (1_0) hyp0 = vis_for(hyp0,DC1); +.cont15: + fmul8x16 SCALE,%f24,%f24 ! (3_1) dbase0 = vis_fmul8x16(SCALE, dbase0); + cmp %i5,_0x7f800000 ! (4_0) ax ? 0x7f800000 + ld [%fp+ftmp2],%i1 ! (1_0) ibase0 = ((int*)&hyp0)[0]; + faddd %f40,KA0,%f62 ! (3_1) dtmp2 += KA0; + + bge,pn %icc,.update16 ! (4_0) if ( ax >= 0x7f800000 ) + st %f20,[%fp+ftmp4] ! (3_0) ibase0 = ((int*)&hyp0)[0]; +.cont16: + fmuld KA3,%f26,%f34 ! (0_0) dtmp2 = KA3 * xx0; + fand %f28,DC2,%f30 ! (1_0) h_hi0 = vis_fand(hyp0,DC2); + + mov %o1,%i4 + cmp counter,5 + bl,pn %icc,.tail + nop + + ba .main_loop + sub counter,5,counter + + .align 16 +.main_loop: + fsmuld %f4,%f4,%f38 ! (4_1) hyp0 = x0 * (double)x0; + sra %i1,10,%o2 ! (1_1) ibase0 >>= 10; + cmp %l6,0 ! (4_1) ay ? 0 + faddd %f10,KA1,%f40 ! (4_2) dtmp2 += KA1; + + fmuld %f36,%f62,%f36 ! (3_2) res0 *= dtmp2; + and %o2,2032,%o2 ! (1_1) si0 = ibase0 & 0x7f0; + lda [%i2+stridey]0x82,%l6 ! (0_0) ay = *(int*)py; + fpsub32 DA1,%f24,%f24 ! (3_2) dbase0 = vis_fpsub32(DA1,dbase0); + + fsmuld %f2,%f2,%f62 ! (4_1) dtmp0 = y0 * (double)y0; + add %o2,TBL,%o2 ! (1_1) (char*)TBL + si0 + lda [%l0+stridex]0x82,%o1 ! (0_0) ax = *(int*)px; + fsubd %f28,%f30,%f28 ! (1_1) dtmp1 = hyp0 - h_hi0; + + add %l0,stridex,%i1 ! px += stridex + ldd [%o2],%f42 ! (1_1) xx0 = ((double*)((char*)TBL + si0))[0]; + be,pn %icc,.update17 ! (4_1) if ( ay == 0 ) + faddd %f34,KA2,%f10 ! (0_1) dtmp2 += KA2; +.cont17: + fmuld %f40,%f32,%f40 ! (4_2) dtmp2 *= xx0; + add %i2,stridey,%i2 ! py += stridey + and %l6,_0x7fffffff,%l6 ! (0_0) ay &= 0x7fffffff; + fand %f18,DC0,%f30 ! (2_1) hyp0 = vis_fand(hyp0,DC0); + + fmuld %f36,%f24,%f32 ! (3_2) res0 *= dbase0; + and %o1,_0x7fffffff,%o1 ! (0_0) ax &= 0x7fffffff; + lda [%i2]0x82,%f2 ! (0_0) y0 = *py; + fand %f12,DA0,%f24 ! (4_2) dbase0 = vis_fand(hyp0,DA0); + + faddd %f38,%f62,%f12 ! (4_1) hyp0 += dtmp0; + cmp %l6,_0x7f800000 ! (0_0) ay ? 0x7f800000 + ldd [%i3+8],%f62 ! (4_2) res0 = ((double*)((char*)arr + si0))[1]; + fmuld %f28,%f42,%f36 ! (1_1) xx0 = dtmp1 * xx0; + + fmuld %f10,%f26,%f10 ! (0_1) dtmp2 *= xx0; + lda [%i1]0x82,%f4 ! (0_0) x0 = *px; + bge,pn %icc,.update18 ! (0_0) if ( ay >= 0x7f800000 ) + for %f30,DC1,%f28 ! (2_1) hyp0 = vis_for(hyp0,DC1); +.cont18: + fmul8x16 SCALE,%f24,%f24 ! (4_2) dbase0 = vis_fmul8x16(SCALE, dbase0); + cmp %o1,_0x7f800000 ! (0_0) ax ? 0x7f800000 + ld [%fp+ftmp3],%l0 ! (2_1) ibase0 = ((int*)&hyp0)[0]; + faddd %f40,KA0,%f42 ! (4_2) dtmp2 += KA0; + + add %i4,stridez,%i3 ! pz += stridez + st %f12,[%fp+ftmp0] ! (4_1) ibase0 = ((int*)&hyp0)[0]; + bge,pn %icc,.update19 ! (0_0) if ( ax >= 0x7f800000 ) + fdtos %f32,%f1 ! (3_2) ftmp0 = (float)res0; +.cont19: + fmuld KA3,%f36,%f34 ! (1_1) dtmp2 = KA3 * xx0; + cmp %l6,0 ! (0_0) ay ? 0 + st %f1,[%i4] ! (3_2) *pz = ftmp0; + fand %f28,DC2,%f30 ! (2_1) h_hi0 = vis_fand(hyp0,DC2); + + fsmuld %f4,%f4,%f38 ! (0_0) hyp0 = x0 * (double)x0; + sra %l0,10,%i4 ! (2_1) ibase0 >>= 10; + be,pn %icc,.update20 ! (0_0) if ( ay == 0 ) + faddd %f10,KA1,%f40 ! (0_1) dtmp2 += KA1; +.cont20: + fmuld %f62,%f42,%f32 ! (4_2) res0 *= dtmp2; + and %i4,2032,%g1 ! (2_1) si0 = ibase0 & 0x7f0; + lda [%i2+stridey]0x82,%l6 ! (1_0) ay = *(int*)py; + fpsub32 DA1,%f24,%f24 ! (4_2) dbase0 = vis_fpsub32(DA1,dbase0); + + fsmuld %f2,%f2,%f62 ! (0_0) dtmp0 = y0 * (double)y0; + add %g1,TBL,%l0 ! (2_1) (char*)TBL + si0 + lda [%i1+stridex]0x82,%i5 ! (1_0) ax = *(int*)px; + fsubd %f28,%f30,%f28 ! (2_1) dtmp1 = hyp0 - h_hi0; + + nop + add %i1,stridex,%g5 ! px += stridex + ldd [TBL+%g1],%f42 ! (2_1) xx0 = ((double*)((char*)TBL + si0))[0]; + faddd %f34,KA2,%f10 ! (1_1) dtmp2 += KA2; + + fmuld %f40,%f26,%f40 ! (0_1) dtmp2 *= xx0; + add %i2,stridey,%o3 ! py += stridey + and %l6,_0x7fffffff,%l6 ! (1_0) ay &= 0x7fffffff; + fand %f20,DC0,%f30 ! (3_1) hyp0 = vis_fand(hyp0,DC0); + + fmuld %f32,%f24,%f26 ! (4_2) res0 *= dbase0; + and %i5,_0x7fffffff,%i5 ! (1_0) ax &= 0x7fffffff; + lda [%o3]0x82,%f2 ! (1_0) y0 = *py; + fand %f14,DA0,%f24 ! (0_1) dbase0 = vis_fand(hyp0,DA0); + + faddd %f38,%f62,%f14 ! (0_0) hyp0 += dtmp0; + cmp %l6,_0x7f800000 ! (1_0) ay ? 0x7f800000 + ldd [%l1+8],%f62 ! (0_1) res0 = ((double*)((char*)arr + si0))[1]; + fmuld %f28,%f42,%f32 ! (2_1) xx0 = dtmp1 * xx0; + + fmuld %f10,%f36,%f10 ! (1_1) dtmp2 *= xx0; + lda [%g5]0x82,%f4 ! (1_0) x0 = *px; + bge,pn %icc,.update21 ! (1_0) if ( ay >= 0x7f800000 ) + for %f30,DC1,%f28 ! (3_1) hyp0 = vis_for(hyp0,DC1); +.cont21: + fmul8x16 SCALE,%f24,%f24 ! (0_1) dbase0 = vis_fmul8x16(SCALE, dbase0); + cmp %i5,_0x7f800000 ! (1_0) ax ? 0x7f800000 + ld [%fp+ftmp4],%l1 ! (3_1) ibase0 = ((int*)&hyp0)[0]; + faddd %f40,KA0,%f42 ! (0_1) dtmp2 += KA0 + + add %i3,stridez,%o1 ! pz += stridez + st %f14,[%fp+ftmp1] ! (0_0) ibase0 = ((int*)&hyp0)[0]; + bge,pn %icc,.update22 ! (1_0) if ( ax >= 0x7f800000 ) + fdtos %f26,%f1 ! (4_2) ftmp0 = (float)res0; +.cont22: + fmuld KA3,%f32,%f34 ! (2_1) dtmp2 = KA3 * xx0; + cmp %l6,0 ! (1_0) ay ? 0 + st %f1,[%i3] ! (4_2) *pz = ftmp0; + fand %f28,DC2,%f30 ! (3_1) h_hi0 = vis_fand(hyp0,DC2); + + fsmuld %f4,%f4,%f38 ! (1_0) hyp0 = x0 * (double)x0; + sra %l1,10,%o5 ! (3_1) ibase0 >>= 10; + be,pn %icc,.update23 ! (1_0) if ( ay == 0 ) + faddd %f10,KA1,%f40 ! (1_1) dtmp2 += KA1; +.cont23: + fmuld %f62,%f42,%f26 ! (0_1) res0 *= dtmp2; + and %o5,2032,%o4 ! (3_1) si0 = ibase0 & 0x7f0; + lda [%o3+stridey]0x82,%l6 ! (2_0) ay = *(int*)py; + fpsub32 DA1,%f24,%f24 ! (0_1) dbase0 = vis_fpsub32(DA1,dbase0); + + fsmuld %f2,%f2,%f62 ! (1_0) dtmp0 = y0 * (double)y0; + add %o4,TBL,%l7 ! (3_1) (char*)TBL + si0 + lda [stridex+%g5]0x82,%i5 ! (2_0) ax = *(int*)px; + fsubd %f28,%f30,%f28 ! (3_1) dtmp1 = hyp0 - h_hi0; + + nop + add %g5,stridex,%i4 ! px += stridex + ldd [TBL+%o4],%f42 ! (3_1) xx0 = ((double*)((char*)TBL + si0))[0]; + faddd %f34,KA2,%f10 ! (2_1) dtmp2 += KA2; + + fmuld %f40,%f36,%f40 ! (1_1) dtmp2 *= xx0; + and %l6,_0x7fffffff,%l6 ! (2_0) ay &= 0x7fffffff; + add %o3,stridey,%i2 ! py += stridey + fand %f12,DC0,%f30 ! (4_1) hyp0 = vis_fand(hyp0,DC0); + + fmuld %f26,%f24,%f36 ! (0_1) res0 *= dbase0; + and %i5,_0x7fffffff,%i5 ! (2_0) ax &= 0x7fffffff; + lda [%i2]0x82,%f2 ! (2_0) y0 = *py; + fand %f16,DA0,%f24 ! (1_1) dbase0 = vis_fand(hyp0,DA0); + + faddd %f38,%f62,%f16 ! (1_0) hyp0 += dtmp0; + cmp %l6,_0x7f800000 ! (2_0) ay ? 0x7f800000 + ldd [%o2+8],%f38 ! (1_1) res0 = ((double*)((char*)arr + si0))[1]; + fmuld %f28,%f42,%f26 ! (3_1) xx0 = dtmp1 * xx0; + + fmuld %f10,%f32,%f10 ! (2_1) dtmp2 *= xx0; + lda [stridex+%g5]0x82,%f4 ! (2_0) x0 = *px; + bge,pn %icc,.update24 ! (2_0) if ( ay >= 0x7f800000 + for %f30,DC1,%f28 ! (4_1) hyp0 = vis_for(hyp0,DC1); +.cont24: + fmul8x16 SCALE,%f24,%f24 ! (1_1) dbase0 = vis_fmul8x16(SCALE, dbase0); + cmp %i5,_0x7f800000 ! (2_0) ax ? 0x7f800000 + ld [%fp+ftmp0],%i3 ! (4_1) ibase0 = ((int*)&hyp0)[0]; + faddd %f40,KA0,%f62 ! (1_1) dtmp2 += KA0; + + add %o1,stridez,%g1 ! pz += stridez + st %f16,[%fp+ftmp2] ! (1_0) ibase0 = ((int*)&hyp0)[0]; + bge,pn %icc,.update25 ! (2_0) if ( ax >= 0x7f800000 ) + fdtos %f36,%f1 ! (0_1) ftmp0 = (float)res0; +.cont25: + fmuld KA3,%f26,%f34 ! (3_1) dtmp2 = KA3 * xx0; + cmp %l6,0 ! (2_0) ay ? 0 + st %f1,[%o1] ! (0_1) *pz = ftmp0; + fand %f28,DC2,%f30 ! (4_1) h_hi0 = vis_fand(hyp0,DC2); + + fsmuld %f4,%f4,%f36 ! (2_0) hyp0 = x0 * (double)x0; + sra %i3,10,%i3 ! (4_1) ibase0 >>= 10; + be,pn %icc,.update26 ! (2_0) if ( ay == 0 ) + faddd %f10,KA1,%f40 ! (2_1) dtmp2 += KA1; +.cont26: + fmuld %f38,%f62,%f38 ! (1_1) res0 *= dtmp2; + and %i3,2032,%i3 ! (4_1) si0 = ibase0 & 0x7f0; + lda [%i2+stridey]0x82,%l6 ! (3_0) ay = *(int*)py; + fpsub32 DA1,%f24,%f24 ! (1_1) dbase0 = vis_fpsub32(DA1,dbase0); + + fsmuld %f2,%f2,%f62 ! (2_0) dtmp0 = y0 * (double)y0; + add %i3,TBL,%i3 ! (4_1) (char*)TBL + si0 + lda [%i4+stridex]0x82,%i5 ! (3_0) ax = *(int*)px; + fsubd %f28,%f30,%f28 ! (4_1) dtmp1 = hyp0 - h_hi0; + + nop + add %i4,stridex,%o4 ! px += stridex + ldd [%i3],%f42 ! (4_1) xx0 = ((double*)((char*)TBL + si0))[0]; + faddd %f34,KA2,%f10 ! (3_1) dtmp2 += KA2; + + fmuld %f40,%f32,%f40 ! (2_1) dtmp2 *= xx0; + add %i2,stridey,%i2 ! py += stridey + and %l6,_0x7fffffff,%l6 ! (3_0) ay &= 0x7fffffff; + fand %f14,DC0,%f30 ! (0_0) hyp0 = vis_fand(hyp0,DC0); + + fmuld %f38,%f24,%f38 ! (1_1) res0 *= dbase0; + and %i5,_0x7fffffff,%i5 ! (3_0) ax &= 0x7fffffff; + lda [%i2]0x82,%f2 ! (3_0) y0 = *py; + fand %f18,DA0,%f24 ! (2_1) dbase0 = vis_fand(hyp0,DA0); + + faddd %f36,%f62,%f18 ! (2_0) hyp0 += dtmp0; + cmp %l6,_0x7f800000 ! (3_0) ay ? 0x7f800000 + ldd [%l0+8],%f62 ! (2_1) res0 = ((double*)((char*)arr + si0))[1]; + fmuld %f28,%f42,%f32 ! (4_1) xx0 = dtmp1 * xx0; + + fmuld %f10,%f26,%f10 ! (3_1) dtmp2 *= xx0; + lda [%o4]0x82,%f4 ! (3_0) x0 = *px; + bge,pn %icc,.update27 ! (3_0) if ( ay >= 0x7f800000 ) + for %f30,DC1,%f28 ! (0_0) hyp0 = vis_for(hyp0,DC1); +.cont27: + fmul8x16 SCALE,%f24,%f24 ! (2_1) dbase0 = vis_fmul8x16(SCALE, dbase0); + cmp %i5,_0x7f800000 ! (3_0) ax ? 0x7f800000 + ld [%fp+ftmp1],%i1 ! (0_0) ibase0 = ((int*)&hyp0)[0]; + faddd %f40,KA0,%f42 ! (2_1) dtmp2 += KA0; + + add %g1,stridez,%o3 ! pz += stridez + st %f18,[%fp+ftmp3] ! (2_0) ibase0 = ((int*)&hyp0)[0]; + bge,pn %icc,.update28 ! (3_0) if ( ax >= 0x7f800000 ) + fdtos %f38,%f1 ! (1_1) ftmp0 = (float)res0; +.cont28: + fmuld KA3,%f32,%f34 ! (4_1) dtmp2 = KA3 * xx0; + cmp %l6,0 ! (3_0) + st %f1,[%g1] ! (1_1) *pz = ftmp0; + fand %f28,DC2,%f30 ! (0_0) h_hi0 = vis_fand(hyp0,DC2); + + fsmuld %f4,%f4,%f36 ! (3_0) hyp0 = x0 * (double)x0; + sra %i1,10,%l1 ! (0_0) ibase0 >>= 10; + be,pn %icc,.update29 ! (3_0) if ( ay == 0 ) + faddd %f10,KA1,%f40 ! (3_1) dtmp2 += KA1; +.cont29: + fmuld %f62,%f42,%f38 ! (2_1) res0 *= dtmp2; + and %l1,2032,%o5 ! (0_0) si0 = ibase0 & 0x7f0; + lda [%i2+stridey]0x82,%l6 ! (4_0) ay = *(int*)py; + fpsub32 DA1,%f24,%f24 ! (2_1) dbase0 = vis_fpsub32(DA1,dbase0); + + fsmuld %f2,%f2,%f62 ! (3_0) dtmp0 = y0 * (double)y0; + add %o5,TBL,%l1 ! (0_0) (char*)TBL + si0 + lda [stridex+%o4]0x82,%i5 ! (4_0) ax = *(int*)px; + fsubd %f28,%f30,%f28 ! (0_0) dtmp1 = hyp0 - h_hi0; + + add %o3,stridez,%i4 ! pz += stridez + add %o4,stridex,%l0 ! px += stridex + ldd [TBL+%o5],%f42 ! (0_0) xx0 = ((double*)((char*)TBL + si0))[0]; + faddd %f34,KA2,%f10 ! (4_1) dtmp2 += KA2; + + fmuld %f40,%f26,%f40 ! (3_1) dtmp2 *= xx0; + add %i2,stridey,%i2 ! py += stridey + and %l6,_0x7fffffff,%l6 ! (4_0) ay &= 0x7fffffff; + fand %f16,DC0,%f30 ! (1_0) hyp0 = vis_fand(hyp0,DC0); + + fmuld %f38,%f24,%f38 ! (2_1) res0 *= dbase0; + and %i5,_0x7fffffff,%i5 ! (4_0) ax &= 0x7fffffff; + lda [%i2]0x82,%f2 ! (4_0) y0 = *py; + fand %f20,DA0,%f24 ! (3_1) dbase0 = vis_fand(hyp0,DA0); + + faddd %f36,%f62,%f20 ! (3_0) hyp0 += dtmp0; + cmp %l6,_0x7f800000 ! (4_0) ay ? 0x7f800000 + ldd [%l7+8],%f36 ! (3_1) res0 = ((double*)((char*)arr + si0))[1]; + fmuld %f28,%f42,%f26 ! (0_0) xx0 = dtmp1 * xx0; + + fmuld %f10,%f32,%f10 ! (4_1) dtmp2 *= xx0; + lda [stridex+%o4]0x82,%f4 ! (4_0) x0 = *px; + bge,pn %icc,.update30 ! (4_0) if ( ay >= 0x7f800000 ) + for %f30,DC1,%f28 ! (1_0) hyp0 = vis_for(hyp0,DC1); +.cont30: + fmul8x16 SCALE,%f24,%f24 ! (3_1) dbase0 = vis_fmul8x16(SCALE, dbase0); + cmp %i5,_0x7f800000 ! (4_0) ax ? 0x7f800000 + ld [%fp+ftmp2],%i1 ! (1_0) ibase0 = ((int*)&hyp0)[0]; + faddd %f40,KA0,%f62 ! (3_1) dtmp2 += KA0; + + bge,pn %icc,.update31 ! (4_0) if ( ax >= 0x7f800000 ) + st %f20,[%fp+ftmp4] ! (3_0) ibase0 = ((int*)&hyp0)[0]; +.cont31: + subcc counter,5,counter ! counter -= 5; + fdtos %f38,%f1 ! (2_1) ftmp0 = (float)res0; + + fmuld KA3,%f26,%f34 ! (0_0) dtmp2 = KA3 * xx0; + st %f1,[%o3] ! (2_1) *pz = ftmp0; + bpos,pt %icc,.main_loop + fand %f28,DC2,%f30 ! (1_0) h_hi0 = vis_fand(hyp0,DC2); + + add counter,5,counter + +.tail: + subcc counter,1,counter + bneg .begin + mov %i4,%o1 + + sra %i1,10,%o2 ! (1_1) ibase0 >>= 10; + faddd %f10,KA1,%f40 ! (4_2) dtmp2 += KA1; + + fmuld %f36,%f62,%f36 ! (3_2) res0 *= dtmp2; + and %o2,2032,%o2 ! (1_1) si0 = ibase0 & 0x7f0; + fpsub32 DA1,%f24,%f24 ! (3_2) dbase0 = vis_fpsub32(DA1,dbase0); + + add %o2,TBL,%o2 ! (1_1) (char*)TBL + si0 + fsubd %f28,%f30,%f28 ! (1_1) dtmp1 = hyp0 - h_hi0; + + ldd [%o2],%f42 ! (1_1) xx0 = ((double*)((char*)TBL + si0))[0]; + faddd %f34,KA2,%f10 ! (0_1) dtmp2 += KA2; + + fmuld %f40,%f32,%f40 ! (4_2) dtmp2 *= xx0; + + fmuld %f36,%f24,%f32 ! (3_2) res0 *= dbase0; + fand %f12,DA0,%f24 ! (4_2) dbase0 = vis_fand(hyp0,DA0); + + ldd [%i3+8],%f62 ! (4_2) res0 = ((double*)((char*)arr + si0))[1]; + fmuld %f28,%f42,%f36 ! (1_1) xx0 = dtmp1 * xx0; + + fmuld %f10,%f26,%f10 ! (0_1) dtmp2 *= xx0; + + fmul8x16 SCALE,%f24,%f24 ! (4_2) dbase0 = vis_fmul8x16(SCALE, dbase0); + faddd %f40,KA0,%f42 ! (4_2) dtmp2 += KA0; + + add %i4,stridez,%i3 ! pz += stridez + fdtos %f32,%f1 ! (3_2) ftmp0 = (float)res0; + + fmuld KA3,%f36,%f34 ! (1_1) dtmp2 = KA3 * xx0; + st %f1,[%i4] ! (3_2) *pz = ftmp0; + + subcc counter,1,counter + bneg .begin + mov %i3,%o1 + + faddd %f10,KA1,%f40 ! (0_1) dtmp2 += KA1; + + fmuld %f62,%f42,%f32 ! (4_2) res0 *= dtmp2; + fpsub32 DA1,%f24,%f24 ! (4_2) dbase0 = vis_fpsub32(DA1,dbase0); + + + faddd %f34,KA2,%f10 ! (1_1) dtmp2 += KA2; + + fmuld %f40,%f26,%f40 ! (0_1) dtmp2 *= xx0; + + fmuld %f32,%f24,%f26 ! (4_2) res0 *= dbase0; + fand %f14,DA0,%f24 ! (0_1) dbase0 = vis_fand(hyp0,DA0); + + ldd [%l1+8],%f62 ! (0_1) res0 = ((double*)((char*)arr + si0))[1]; + + fmuld %f10,%f36,%f10 ! (1_1) dtmp2 *= xx0; + + fmul8x16 SCALE,%f24,%f24 ! (0_1) dbase0 = vis_fmul8x16(SCALE, dbase0); + faddd %f40,KA0,%f42 ! (0_1) dtmp2 += KA0 + + add %i3,stridez,%o1 ! pz += stridez + fdtos %f26,%f1 ! (4_2) ftmp0 = (float)res0; + + st %f1,[%i3] ! (4_2) *pz = ftmp0; + + subcc counter,1,counter + bneg .begin + nop + + faddd %f10,KA1,%f40 ! (1_1) dtmp2 += KA1; + + fmuld %f62,%f42,%f26 ! (0_1) res0 *= dtmp2; + fpsub32 DA1,%f24,%f24 ! (0_1) dbase0 = vis_fpsub32(DA1,dbase0); + + fmuld %f40,%f36,%f40 ! (1_1) dtmp2 *= xx0; + + fmuld %f26,%f24,%f36 ! (0_1) res0 *= dbase0; + fand %f16,DA0,%f24 ! (1_1) dbase0 = vis_fand(hyp0,DA0); + + ldd [%o2+8],%f38 ! (1_1) res0 = ((double*)((char*)arr + si0))[1]; + + fmul8x16 SCALE,%f24,%f24 ! (1_1) dbase0 = vis_fmul8x16(SCALE, dbase0); + faddd %f40,KA0,%f62 ! (1_1) dtmp2 += KA0; + + add %o1,stridez,%g1 ! pz += stridez + fdtos %f36,%f1 ! (0_1) ftmp0 = (float)res0; + + st %f1,[%o1] ! (0_1) *pz = ftmp0; + + subcc counter,1,counter + bneg .begin + mov %g1,%o1 + + fmuld %f38,%f62,%f38 ! (1_1) res0 *= dtmp2; + fpsub32 DA1,%f24,%f24 ! (1_1) dbase0 = vis_fpsub32(DA1,dbase0); + + fmuld %f38,%f24,%f38 ! (1_1) res0 *= dbase0; + + fdtos %f38,%f1 ! (1_1) ftmp0 = (float)res0; + st %f1,[%g1] ! (1_1) *pz = ftmp0; + + ba .begin + add %g1,stridez,%o1 ! pz += stridez + + .align 16 +.spec0: + fabss %f2,%f2 ! fabsf(y0); + + fabss %f4,%f4 ! fabsf(x0); + + fcmps %f2,%f4 + + cmp %l6,_0x7f800000 ! ay ? 0x7f800000 + be,a 1f ! if( ay == 0x7f800000 ) + st %g0,[%o1] ! *pz = 0.0f; + + cmp %i5,_0x7f800000 ! ax ? 0x7f800000 + be,a 1f ! if( ax == 0x7f800000 ) + st %g0,[%o1] ! *pz = 0.0f; + + fmuls %f2,%f4,%f2 ! fabsf(x0) * fabsf(y0); + st %f2,[%o1] ! *pz = fabsf(x0) + fabsf(y0); +1: + add %o4,stridex,%o4 ! px += stridex; + add %i2,stridey,%i2 ! py += stridey; + + add %o1,stridez,%o1 ! pz += stridez; + ba .begin1 + sub counter,1,counter ! counter--; + + .align 16 +.spec1: + cmp %i5,0 ! ax ? 0 + bne,pt %icc,.cont_spec1 ! if ( ax != 0 ) + nop + + add %o4,stridex,%o4 ! px += stridex; + add %i2,stridey,%i2 ! py += stridey; + + fdivs %f7,%f9,%f2 ! 1.0f / 0.0f + st %f2,[%o1] ! *pz = 1.0f / 0.0f; + + add %o1,stridez,%o1 ! pz += stridez; + ba .begin1 + sub counter,1,counter ! counter--; + + .align 16 +.update0: + cmp counter,1 + ble .cont0 + ld [TBL+TBL_SHIFT+44],%f2 + + sub counter,1,counter + st counter,[%fp+tmp_counter] + + stx %l0,[%fp+tmp_px] + + stx %i2,[%fp+tmp_py] + ba .cont0 + mov 1,counter + + .align 16 +.update1: + cmp counter,1 + ble .cont1 + ld [TBL+TBL_SHIFT+44],%f4 + + sub counter,1,counter + st counter,[%fp+tmp_counter] + + stx %l0,[%fp+tmp_px] + + stx %i2,[%fp+tmp_py] + ba .cont1 + mov 1,counter + + .align 16 +.update2: + cmp %i5,0 + bne .cont2 + + cmp counter,1 + ble .cont2 + ld [TBL+TBL_SHIFT+44],%f2 + + sub counter,1,counter + st counter,[%fp+tmp_counter] + + stx %l0,[%fp+tmp_px] + + stx %i2,[%fp+tmp_py] + ba .cont2 + mov 1,counter + + .align 16 +.update3: + cmp counter,2 + ble .cont3 + ld [TBL+TBL_SHIFT+44],%f2 + + sub counter,2,counter + st counter,[%fp+tmp_counter] + + stx %i1,[%fp+tmp_px] + + stx %i2,[%fp+tmp_py] + ba .cont3 + mov 2,counter + + .align 16 +.update4: + cmp counter,2 + ble .cont4 + ld [TBL+TBL_SHIFT+44],%f4 + + sub counter,2,counter + st counter,[%fp+tmp_counter] + + stx %i1,[%fp+tmp_px] + + stx %i2,[%fp+tmp_py] + ba .cont4 + mov 2,counter + + .align 16 +.update5: + cmp %i5,0 + bne .cont5 + + cmp counter,2 + ble .cont5 + ld [TBL+TBL_SHIFT+44],%f2 + + sub counter,2,counter + st counter,[%fp+tmp_counter] + + stx %i1,[%fp+tmp_px] + + stx %i2,[%fp+tmp_py] + ba .cont5 + mov 2,counter + + .align 16 +.update6: + cmp counter,3 + ble .cont6 + ld [TBL+TBL_SHIFT+44],%f2 + + sub counter,3,counter + st counter,[%fp+tmp_counter] + + stx %g5,[%fp+tmp_px] + + stx %o3,[%fp+tmp_py] + ba .cont6 + mov 3,counter + + .align 16 +.update7: + cmp counter,3 + ble .cont7 + ld [TBL+TBL_SHIFT+44],%f4 + + sub counter,3,counter + st counter,[%fp+tmp_counter] + + stx %g5,[%fp+tmp_px] + + stx %o3,[%fp+tmp_py] + ba .cont7 + mov 3,counter + + .align 16 +.update8: + cmp %i5,0 + bne .cont8 + + cmp counter,3 + ble .cont8 + ld [TBL+TBL_SHIFT+44],%f2 + + sub counter,3,counter + st counter,[%fp+tmp_counter] + + stx %g5,[%fp+tmp_px] + + stx %o3,[%fp+tmp_py] + ba .cont8 + mov 3,counter + + .align 16 +.update9: + cmp counter,4 + ble .cont9 + ld [TBL+TBL_SHIFT+44],%f2 + + sub counter,4,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + stx %i2,[%fp+tmp_py] + ba .cont9 + mov 4,counter + + .align 16 +.update10: + cmp counter,4 + ble .cont10 + ld [TBL+TBL_SHIFT+44],%f4 + + sub counter,4,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + stx %i2,[%fp+tmp_py] + ba .cont10 + mov 4,counter + + .align 16 +.update11: + cmp %i5,0 + bne .cont11 + + cmp counter,4 + ble .cont11 + ld [TBL+TBL_SHIFT+44],%f2 + + sub counter,4,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + stx %i2,[%fp+tmp_py] + ba .cont11 + mov 4,counter + + .align 16 +.update12: + cmp counter,5 + ble .cont12 + ld [TBL+TBL_SHIFT+44],%f2 + + sub counter,5,counter + st counter,[%fp+tmp_counter] + + stx %o4,[%fp+tmp_px] + + stx %i2,[%fp+tmp_py] + ba .cont12 + mov 5,counter + + .align 16 +.update13: + cmp counter,5 + ble .cont13 + ld [TBL+TBL_SHIFT+44],%f4 + + sub counter,5,counter + st counter,[%fp+tmp_counter] + + stx %o4,[%fp+tmp_px] + + stx %i2,[%fp+tmp_py] + ba .cont13 + mov 5,counter + + .align 16 +.update14: + cmp %i5,0 + bne .cont14 + + cmp counter,5 + ble .cont14 + ld [TBL+TBL_SHIFT+44],%f2 + + sub counter,5,counter + st counter,[%fp+tmp_counter] + + stx %o4,[%fp+tmp_px] + + stx %i2,[%fp+tmp_py] + ba .cont14 + mov 5,counter + + .align 16 +.update15: + cmp counter,6 + ble .cont15 + ld [TBL+TBL_SHIFT+44],%f2 + + sub counter,6,counter + st counter,[%fp+tmp_counter] + + stx %l0,[%fp+tmp_px] + + stx %i2,[%fp+tmp_py] + ba .cont15 + mov 6,counter + + .align 16 +.update16: + cmp counter,6 + ble .cont16 + ld [TBL+TBL_SHIFT+44],%f4 + + sub counter,6,counter + st counter,[%fp+tmp_counter] + + stx %l0,[%fp+tmp_px] + + stx %i2,[%fp+tmp_py] + ba .cont16 + mov 6,counter + + .align 16 +.update17: + cmp %i5,0 + bne .cont17 + + cmp counter,1 + ble .cont17 + fmovd DC1,%f62 + + sub counter,1,counter + st counter,[%fp+tmp_counter] + + stx %l0,[%fp+tmp_px] + + stx %i2,[%fp+tmp_py] + ba .cont17 + mov 1,counter + + .align 16 +.update18: + cmp counter,2 + ble .cont18 + ld [TBL+TBL_SHIFT+44],%f2 + + sub counter,2,counter + st counter,[%fp+tmp_counter] + + stx %i1,[%fp+tmp_px] + + stx %i2,[%fp+tmp_py] + ba .cont18 + mov 2,counter + + .align 16 +.update19: + cmp counter,2 + ble .cont19 + ld [TBL+TBL_SHIFT+44],%f4 + + sub counter,2,counter + st counter,[%fp+tmp_counter] + + stx %i1,[%fp+tmp_px] + + stx %i2,[%fp+tmp_py] + ba .cont19 + mov 2,counter + + .align 16 +.update20: + cmp %o1,0 + bne .cont20 + + cmp counter,2 + ble .cont20 + ld [TBL+TBL_SHIFT+44],%f2 + + sub counter,2,counter + st counter,[%fp+tmp_counter] + + stx %i1,[%fp+tmp_px] + + stx %i2,[%fp+tmp_py] + ba .cont20 + mov 2,counter + + .align 16 +.update21: + cmp counter,3 + ble .cont21 + ld [TBL+TBL_SHIFT+44],%f2 + + sub counter,3,counter + st counter,[%fp+tmp_counter] + + stx %g5,[%fp+tmp_px] + + stx %o3,[%fp+tmp_py] + ba .cont21 + mov 3,counter + + .align 16 +.update22: + cmp counter,3 + ble .cont22 + ld [TBL+TBL_SHIFT+44],%f4 + + sub counter,3,counter + st counter,[%fp+tmp_counter] + + stx %g5,[%fp+tmp_px] + + stx %o3,[%fp+tmp_py] + ba .cont22 + mov 3,counter + + .align 16 +.update23: + cmp %i5,0 + bne .cont23 + + cmp counter,3 + ble .cont23 + ld [TBL+TBL_SHIFT+44],%f2 + + sub counter,3,counter + st counter,[%fp+tmp_counter] + + stx %g5,[%fp+tmp_px] + + stx %o3,[%fp+tmp_py] + ba .cont23 + mov 3,counter + + .align 16 +.update24: + cmp counter,4 + ble .cont24 + ld [TBL+TBL_SHIFT+44],%f2 + + sub counter,4,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + stx %i2,[%fp+tmp_py] + ba .cont24 + mov 4,counter + + .align 16 +.update25: + cmp counter,4 + ble .cont25 + ld [TBL+TBL_SHIFT+44],%f4 + + sub counter,4,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + stx %i2,[%fp+tmp_py] + ba .cont25 + mov 4,counter + + .align 16 +.update26: + cmp %i5,0 + bne .cont26 + + cmp counter,4 + ble .cont26 + ld [TBL+TBL_SHIFT+44],%f2 + + sub counter,4,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + stx %i2,[%fp+tmp_py] + ba .cont26 + mov 4,counter + + .align 16 +.update27: + cmp counter,5 + ble .cont27 + ld [TBL+TBL_SHIFT+44],%f2 + + sub counter,5,counter + st counter,[%fp+tmp_counter] + + stx %o4,[%fp+tmp_px] + + stx %i2,[%fp+tmp_py] + ba .cont27 + mov 5,counter + + .align 16 +.update28: + cmp counter,5 + ble .cont28 + ld [TBL+TBL_SHIFT+44],%f4 + + sub counter,5,counter + st counter,[%fp+tmp_counter] + + stx %o4,[%fp+tmp_px] + + stx %i2,[%fp+tmp_py] + ba .cont28 + mov 5,counter + + .align 16 +.update29: + cmp %i5,0 + bne .cont29 + + cmp counter,5 + ble .cont29 + ld [TBL+TBL_SHIFT+44],%f2 + + sub counter,5,counter + st counter,[%fp+tmp_counter] + + stx %o4,[%fp+tmp_px] + + stx %i2,[%fp+tmp_py] + ba .cont29 + mov 5,counter + + .align 16 +.update30: + cmp counter,6 + ble .cont30 + ld [TBL+TBL_SHIFT+44],%f2 + + sub counter,6,counter + st counter,[%fp+tmp_counter] + + stx %l0,[%fp+tmp_px] + + stx %i2,[%fp+tmp_py] + ba .cont30 + mov 6,counter + + .align 16 +.update31: + cmp counter,6 + ble .cont31 + ld [TBL+TBL_SHIFT+44],%f4 + + sub counter,6,counter + st counter,[%fp+tmp_counter] + + stx %l0,[%fp+tmp_px] + + stx %i2,[%fp+tmp_py] + ba .cont31 + mov 6,counter + + .align 16 +.exit: + ret + restore + SET_SIZE(__vrhypotf) + diff --git a/usr/src/lib/libmvec/common/vis/__vrsqrt.S b/usr/src/lib/libmvec/common/vis/__vrsqrt.S new file mode 100644 index 0000000000..50329eb2b9 --- /dev/null +++ b/usr/src/lib/libmvec/common/vis/__vrsqrt.S @@ -0,0 +1,2157 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "__vrsqrt.S" + +#include "libm.h" + + RO_DATA + .align 64 + +.CONST_TBL: + .word 0xbfe00000, 0x0000002f ! K1 =-5.00000000000005209867e-01; + .word 0x3fd80000, 0x00000058 ! K2 = 3.75000000000004884257e-01; + .word 0xbfd3ffff, 0xff444bc8 ! K3 =-3.12499999317136886551e-01; + .word 0x3fd17fff, 0xff5006fe ! K4 = 2.73437499359815081532e-01; + .word 0xbfcf80bb, 0xb33ef574 ! K5 =-2.46116125605037803130e-01; + .word 0x3fcce0af, 0xf8156949 ! K6 = 2.25606914648617522896e-01; + + .word 0x001fffff, 0xffffffff ! DC0 + .word 0x3fe00000, 0x00000000 ! DC1 + .word 0x00002000, 0x00000000 ! DC2 + .word 0x7fffc000, 0x00000000 ! DC3 + .word 0x0007ffff, 0xffffffff ! DC4 + + .word 0x43200000, 0x00000000 ! D2ON51 = pow(2,51) + .word 0x3ff00000, 0x00000000 ! DONE = 1.0 + +#define stridex %l5 +#define stridey %l7 +#define counter %l0 +#define TBL %l3 +#define _0x7ff00000 %o0 +#define _0x00100000 %o1 + +#define DC0 %f56 +#define DC1 %f54 +#define DC2 %f48 +#define DC3 %f46 +#define K6 %f42 +#define K5 %f20 +#define K4 %f52 +#define K3 %f50 +#define K2 %f14 +#define K1 %f12 +#define DONE %f4 + +#define tmp_counter %g5 +#define tmp_px %o5 + +#define tmp0 STACK_BIAS-0x40 +#define tmp1 STACK_BIAS-0x38 +#define tmp2 STACK_BIAS-0x30 +#define tmp3 STACK_BIAS-0x28 +#define tmp4 STACK_BIAS-0x20 +#define tmp5 STACK_BIAS-0x18 +#define tmp6 STACK_BIAS-0x10 +#define tmp7 STACK_BIAS-0x08 + +! sizeof temp storage - must be a multiple of 16 for V9 +#define tmps 0x40 + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +! !!!!! algorithm !!!!! +! ((float*)&res)[0] = ((float*)px)[0]; +! ((float*)&res)[1] = ((float*)px)[1]; +! hx = *(int*)px; +! if ( hx >= 0x7ff00000 ) +! { +! res = DONE / res; +! ((float*)py)[0] = ((float*)&res)[0]; +! ((float*)py)[1] = ((float*)&res)[1]; +! px += stridex; +! py += stridey; +! continue; +! } +! if ( hx < 0x00100000 ) +! { +! ax = hx & 0x7fffffff; +! lx = ((int*)px)[1]; +! +! if ( (ax | lx) == 0 ) +! { +! res = DONE / res; +! ((float*)py)[0] = ((float*)&res)[0]; +! ((float*)py)[1] = ((float*)&res)[1]; +! px += stridex; +! py += stridey; +! continue; +! } +! else if ( hx >= 0 ) +! { +! if ( hx < 0x00080000 ) +! { +! res = *(long long*)&res; +! hx = *(int*)&res - (537 << 21); +! } +! else +! { +! res = vis_fand(res,DC4); +! res = *(long long*)&res; +! res += D2ON51; +! hx = *(int*)&res - (537 << 21); +! } +! } +! else +! { +! res = sqrt(res); +! ((float*)py)[0] = ((float*)&res)[0]; +! ((float*)py)[1] = ((float*)&res)[1]; +! px += stridex; +! py += stridey; +! continue; +! } +! } +! +! iexp = hx >> 21; +! iexp = -iexp; +! iexp += 0x5fe; +! lexp = iexp << 52; +! dlexp = *(double*)&lexp; +! hx >>= 10; +! hx &= 0x7f8; +! hx += 8; +! hx &= -16; +! +! res = vis_fand(res,DC0); +! res = vis_for(res,DC1); +! res_c = vis_fpadd32(res,DC2); +! res_c = vis_fand(res_c,DC3); +! +! addr = (char*)arr + hx; +! dexp_hi = ((double*)addr)[0]; +! dexp_lo = ((double*)addr)[1]; +! dtmp0 = dexp_hi * dexp_hi; +! xx = res - res_c; +! xx *= dtmp0; +! res = K6 * xx; +! res += K5; +! res *= xx; +! res += K4; +! res *= xx; +! res += K3; +! res *= xx; +! res += K2; +! res *= xx; +! res += K1; +! res *= xx; +! res = dexp_hi * res; +! res += dexp_lo; +! res += dexp_hi; +! +! res *= dlexp; +! +! ((float*)py)[0] = ((float*)&res)[0]; +! ((float*)py)[1] = ((float*)&res)[1]; +! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + ENTRY(__vrsqrt) + save %sp,-SA(MINFRAME)-tmps,%sp + PIC_SETUP(l7) + PIC_SET(l7,.CONST_TBL,o3) + PIC_SET(l7,__vlibm_TBL_rsqrt,l3) + wr %g0,0x82,%asi + + ldd [%o3],K1 + sethi %hi(0x7ff00000),%o0 + mov %i3,%o4 + + ldd [%o3+0x08],K2 + sethi %hi(0x00100000),%o1 + mov %i1,tmp_px + + ldd [%o3+0x10],K3 + sll %i2,3,stridex + mov %i0,tmp_counter + + ldd [%o3+0x18],K4 + sll %i4,3,stridey + + ldd [%o3+0x20],K5 + ldd [%o3+0x28],K6 + ldd [%o3+0x30],DC0 + ldd [%o3+0x38],DC1 + ldd [%o3+0x40],DC2 + ldd [%o3+0x48],DC3 + +.begin: + mov tmp_counter,counter + mov tmp_px,%i1 + clr tmp_counter +.begin1: + cmp counter,0 + ble,pn %icc,.exit + ldd [%o3+0x60],DONE + + lda [%i1]%asi,%f0 ! (6_0) ((float*)res)[0] = ((float*)px)[0]; + sethi %hi(0x7ffffc00),%i0 + + lda [%i1+4]%asi,%f1 ! (6_0) ((float*)res)[1] = ((float*)px)[1]; + add %i0,1023,%i0 + + fand %f0,DC0,%f16 ! (6_0) res = vis_fand(res,DC0); + + lda [%i1]%asi,%g1 ! (6_1) hx = *(int*)px; + sethi %hi(0x00080000),%i4 + + lda [%i1+4]%asi,%l4 + add %i1,stridex,%l6 ! px += stridex + + sra %g1,21,%o7 ! (6_1) iexp = hx >> 21; + lda [%l6]%asi,%f8 ! (0_0) ((float*)res)[0] = ((float*)px)[0]; + for %f16,DC1,%f44 ! (6_1) res = vis_for(res,DC1); + + lda [%l6+4]%asi,%f9 ! (0_0) ((float*)res)[1] = ((float*)px)[1]; + sra %g1,10,%o2 ! (6_1) hx >>= 10; + and %g1,%i0,%i2 + + cmp %g1,_0x7ff00000 ! (6_1) hx ? 0x7ff00000 + bge,pn %icc,.spec0 ! (6_1) if ( hx >= 0x7ff00000 ) + and %o2,2040,%o2 ! (6_1) hx &= 0x7f8; + + cmp %g1,_0x00100000 ! (6_1) hx ? 0x00100000 + bl,pn %icc,.spec1 ! (6_1) if ( hx < 0x00100000 ) + sub %g0,%o7,%o7 ! (6_1) iexp = -iexp; +.cont_spec: + fand %f8,DC0,%f16 ! (0_0) res = vis_fand(res,DC0); + + fpadd32 %f44,DC2,%f18 ! (6_1) res_c = vis_fpadd32(res,DC2); + + add %o2,8,%l4 ! (6_1) hx += 8; + + add %o7,1534,%o7 ! (6_1) iexp += 0x5fe; + + lda [%l6]%asi,%g1 ! (0_0) hx = *(int*)px; + sllx %o7,52,%o7 ! (6_1) iexp << 52; + and %l4,-16,%l4 ! (6_1) hx = -16; + + add %l4,TBL,%l4 ! (6_1) addr = (char*)arr + hx; + stx %o7,[%fp+tmp1] ! (6_1) dlexp = *(double*)lexp; + + add %l6,stridex,%l6 ! px += stridex + ldd [%l4],%f30 ! (6_1) dtmp0 = ((double*)addr)[0]; + + sra %g1,21,%o7 ! (0_0) iexp = hx >> 21; + lda [%l6]%asi,%f0 ! (1_0) ((float*)res)[0] = ((float*)px)[0]; + for %f16,DC1,%f28 ! (0_0) res = vis_for(res,DC1); + + sra %g1,10,%o2 ! (0_0) hx >>= 10; + sub %g0,%o7,%o7 ! (0_0) iexp = -iexp; + lda [%l6+4]%asi,%f1 ! (1_0) ((float*)res)[1] = ((float*)px)[1]; + + cmp %g1,_0x7ff00000 ! (0_0) hx ? 0x7ff00000 + bge,pn %icc,.update0 ! (0_0) if ( hx >= 0x7ff00000 ) + fand %f18,DC3,%f6 ! (6_1) res_c = vis_fand(res_c,DC3); +.cont0: + and %o2,2040,%o2 ! (0_0) hx &= 0x7f8; + fmuld %f30,%f30,%f10 ! (6_1) dtmp0 = dexp_hi * dexp_hi; + + cmp %g1,_0x00100000 ! (0_0) hx ? 0x00100000 + bl,pn %icc,.update1 ! (0_0) if ( hx < 0x00100000 ) + add %o7,1534,%o7 ! (0_0) iexp += 0x5fe; +.cont1: + fand %f0,DC0,%f16 ! (1_0) res = vis_fand(res,DC0); + + fpadd32 %f28,DC2,%f18 ! (0_0) res_c = vis_fpadd32(res,DC2); + + add %o2,8,%l2 ! (0_0) hx += 8; + fsubd %f44,%f6,%f6 ! (6_1) xx = res - res_c; + + lda [%l6]%asi,%g1 ! (1_0) hx = *(int*)px; + sllx %o7,52,%o7 ! (0_0) iexp << 52; + and %l2,-16,%l2 ! (0_0) hx = -16; + + add %l2,TBL,%l2 ! (0_0) addr = (char*)arr + hx; + add %l6,stridex,%l6 ! px += stridex + stx %o7,[%fp+tmp2] ! (0_0) dlexp = *(double*)lexp; + + fmuld %f6,%f10,%f26 ! (6_1) xx *= dtmp0; + ldd [%l2],%f10 ! (0_0) dtmp0 = ((double*)addr)[0]; + + sra %g1,21,%o7 ! (1_0) iexp = hx >> 21; + lda [%l6]%asi,%f6 ! (2_0) ((float*)res)[0] = ((float*)px)[0]; + for %f16,DC1,%f44 ! (1_0) res = vis_for(res,DC1); + + sra %g1,10,%o2 ! (1_0) hx >>= 10; + cmp %g1,_0x7ff00000 ! (1_0) hx ? 0x7ff00000 + bge,pn %icc,.update2 ! (1_0) if ( hx >= 0x7ff00000 ) + lda [%l6+4]%asi,%f7 ! (2_0) ((float*)res)[1] = ((float*)px)[1]; +.cont2: + fand %f18,DC3,%f8 ! (0_0) res_c = vis_fand(res_c,DC3); + + fmuld %f10,%f10,%f10 ! (0_0) dtmp0 = dexp_hi * dexp_hi; + cmp %g1,_0x00100000 ! (1_0) hx ? 0x00100000 + bl,pn %icc,.update3 ! (1_0) if ( hx < 0x00100000 ) + and %o2,2040,%o2 ! (1_0) hx &= 0x7f8; +.cont3: + sub %g0,%o7,%o7 ! (1_0) iexp = -iexp; + fand %f6,DC0,%f16 ! (2_0) res = vis_fand(res,DC0); + + add %o7,1534,%o7 ! (1_0) iexp += 0x5fe; + fpadd32 %f44,DC2,%f18 ! (1_0) res_c = vis_fpadd32(res,DC2); + + fmuld K6,%f26,%f62 ! (6_1) res = K6 * xx; + add %o2,8,%i2 ! (1_0) hx += 8; + fsubd %f28,%f8,%f32 ! (0_0) xx = res - res_c; + + lda [%l6]%asi,%g1 ! (2_0) hx = *(int*)px; + sllx %o7,52,%o7 ! (1_0) iexp << 52; + and %i2,-16,%i2 ! (1_0) hx = -16; + + add %i2,TBL,%i2 ! (1_0) addr = (char*)arr + hx; + stx %o7,[%fp+tmp3] ! (1_0) dlexp = *(double*)lexp; + + fmuld %f32,%f10,%f32 ! (0_0) xx *= dtmp0; + add %l6,stridex,%l6 ! px += stridex + ldd [%i2],%f10 ! (1_0) dtmp0 = ((double*)addr)[0]; + faddd %f62,K5,%f62 ! (6_1) res += K5; + + sra %g1,21,%o7 ! (2_0) iexp = hx >> 21; + lda [%l6]%asi,%f0 ! (3_0) ((float*)res)[0] = ((float*)px)[0]; + for %f16,DC1,%f28 ! (2_0) res = vis_for(res,DC1); + + sra %g1,10,%o2 ! (2_0) hx >>= 10; + cmp %g1,_0x7ff00000 ! (2_0) hx ? 0x7ff00000 + bge,pn %icc,.update4 ! (2_0) if ( hx >= 0x7ff00000 ) + lda [%l6+4]%asi,%f1 ! (3_0) ((float*)res)[1] = ((float*)px)[1]; +.cont4: + fmuld %f62,%f26,%f40 ! (6_1) res *= xx; + fand %f18,DC3,%f8 ! (1_0) res_c = vis_fand(res_c,DC3); + + fmuld %f10,%f10,%f10 ! (1_0) dtmp0 = dexp_hi * dexp_hi; + cmp %g1,_0x00100000 ! (2_0) hx ? 0x00100000 + bl,pn %icc,.update5 ! (2_0) if ( hx < 0x00100000 ) + and %o2,2040,%o2 ! (2_0) hx &= 0x7f8; +.cont5: + sub %g0,%o7,%o7 ! (2_0) iexp = -iexp; + fand %f0,DC0,%f16 ! (3_0) res = vis_fand(res,DC0); + + add %o7,1534,%o7 ! (2_0) iexp += 0x5fe; + fpadd32 %f28,DC2,%f18 ! (2_0) res_c = vis_fpadd32(res,DC2); + + fmuld K6,%f32,%f62 ! (0_0) res = K6 * xx; + add %o2,8,%i4 ! (2_0) hx += 8; + fsubd %f44,%f8,%f6 ! (1_0) xx = res - res_c; + + faddd %f40,K4,%f40 ! (6_1) res += K4; + + lda [%l6]%asi,%g1 ! (3_0) hx = *(int*)px; + sllx %o7,52,%o7 ! (2_0) iexp << 52; + and %i4,-16,%i4 ! (2_0) hx = -16; + + add %i4,TBL,%i4 ! (2_0) addr = (char*)arr + hx; + stx %o7,[%fp+tmp4] ! (2_0) dlexp = *(double*)lexp; + + fmuld %f6,%f10,%f38 ! (1_0) xx *= dtmp0; + ldd [%i4],%f24 ! (2_0) dtmp0 = ((double*)addr)[0]; + faddd %f62,K5,%f62 ! (0_0) res += K5; + + fmuld %f40,%f26,%f34 ! (6_1) res *= xx; + add %l6,stridex,%l6 ! px += stridex + + sra %g1,21,%o7 ! (3_0) iexp = hx >> 21; + lda [%l6]%asi,%f8 ! (4_0) ((float*)res)[0] = ((float*)px)[0]; + for %f16,DC1,%f44 ! (3_0) res = vis_for(res,DC1); + + sra %g1,10,%o2 ! (3_0) hx >>= 10; + cmp %g1,_0x7ff00000 ! (3_0) hx ? 0x7ff00000 + bge,pn %icc,.update6 ! (3_0) if ( hx >= 0x7ff00000 ) + lda [%l6+4]%asi,%f9 ! (4_0) ((float*)res)[1] = ((float*)px)[1]; +.cont6: + fmuld %f62,%f32,%f60 ! (0_0) res *= xx; + cmp %g1,_0x00100000 ! (3_0) hx ? 0x00100000 + fand %f18,DC3,%f22 ! (2_0) res_c = vis_fand(res_c,DC3); + + fmuld %f24,%f24,%f24 ! (2_0) dtmp0 = dexp_hi * dexp_hi; + bl,pn %icc,.update7 ! (3_0) if ( hx < 0x00100000 ) + and %o2,2040,%o2 ! (3_0) hx &= 0x7f8; + faddd %f34,K3,%f6 ! (6_1) res += K3; +.cont7: + sub %g0,%o7,%o7 ! (3_0) iexp = -iexp; + fand %f8,DC0,%f16 ! (4_0) res = vis_fand(res,DC0); + + add %o7,1534,%o7 ! (3_0) iexp += 0x5fe; + fpadd32 %f44,DC2,%f18 ! (3_0) res_c = vis_fpadd32(res,DC2); + + fmuld K6,%f38,%f62 ! (1_0) res = K6 * xx; + add %o2,8,%i5 ! (3_0) hx += 8; + fsubd %f28,%f22,%f28 ! (2_0) xx = res - res_c; + + fmuld %f6,%f26,%f22 ! (6_1) res *= xx; + faddd %f60,K4,%f60 ! (0_0) res += K4; + + lda [%l6]%asi,%g1 ! (4_0) hx = *(int*)px; + sllx %o7,52,%o7 ! (3_0) iexp << 52; + and %i5,-16,%i5 ! (3_0) hx = -16; + + add %i5,TBL,%i5 ! (3_0) addr = (char*)arr + hx; + stx %o7,[%fp+tmp5] ! (3_0) dlexp = *(double*)lexp; + + fmuld %f28,%f24,%f36 ! (2_0) xx *= dtmp0; + add %l6,stridex,%i0 ! px += stridex + ldd [%i5],%f28 ! (3_0) dtmp0 = ((double*)addr)[0]; + faddd %f62,K5,%f62 ! (1_0) res += K5; + + faddd %f22,K2,%f10 ! (6_1) res += K2; + fmuld %f60,%f32,%f34 ! (0_0) res *= xx; + + sra %g1,21,%o7 ! (4_0) iexp = hx >> 21; + lda [%i0]%asi,%f0 ! (5_0) ((float*)res)[0] = ((float*)px)[0]; + for %f16,DC1,%f24 ! (4_0) res = vis_for(res,DC1); + + sra %g1,10,%o2 ! (4_0) hx >>= 10; + cmp %g1,_0x7ff00000 ! (4_0) hx ? 0x7ff00000 + bge,pn %icc,.update8 ! (4_0) if ( hx >= 0x7ff00000 ) + lda [%i0+4]%asi,%f1 ! (5_0) ((float*)res)[1] = ((float*)px)[1]; +.cont8: + fand %f18,DC3,%f40 ! (3_0) res_c = vis_fand(res_c,DC3); + fmuld %f62,%f38,%f62 ! (1_0) res *= xx; + + fmuld %f10,%f26,%f58 ! (6_1) res *= xx; + cmp %g1,_0x00100000 ! (4_0) hx ? 0x00100000 + and %o2,2040,%o2 ! (4_0) hx &= 0x7f8; + faddd %f34,K3,%f60 ! (0_0) res += K3; + + fmuld %f28,%f28,%f28 ! (3_0) dtmp0 = dexp_hi * dexp_hi; + bl,pn %icc,.update9 ! (4_0) if ( hx < 0x00100000 ) + sub %g0,%o7,%o7 ! (4_0) iexp = -iexp; + fand %f0,DC0,%f16 ! (5_0) res = vis_fand(res,DC0); +.cont9: + add %o7,1534,%o7 ! (4_0) iexp += 0x5fe; + fpadd32 %f24,DC2,%f18 ! (4_0) res_c = vis_fpadd32(res,DC2); + + fmuld K6,%f36,%f10 ! (2_0) res = K6 * xx; + add %o2,8,%l1 ! (4_0) hx += 8; + fsubd %f44,%f40,%f44 ! (3_0) xx = res - res_c; + + fmuld %f60,%f32,%f60 ! (0_0) res *= xx; + faddd %f62,K4,%f6 ! (1_0) res += K4; + + lda [%i0]%asi,%g1 ! (5_0) hx = *(int*)px; + sllx %o7,52,%o7 ! (4_0) iexp << 52; + and %l1,-16,%l1 ! (4_0) hx = -16; + faddd %f58,K1,%f58 ! (6_1) res += K1; + + add %i0,stridex,%i1 ! px += stridex + add %l1,TBL,%l1 ! (4_0) addr = (char*)arr + hx; + stx %o7,[%fp+tmp6] ! (4_0) dlexp = *(double*)lexp; + + fmuld %f44,%f28,%f40 ! (3_0) xx *= dtmp0; + ldd [%l1],%f44 ! (4_0) dtmp0 = ((double*)addr)[0]; + faddd %f10,K5,%f62 ! (2_0) res += K5; + + fmuld %f6,%f38,%f34 ! (1_0) res *= xx; + sra %g1,21,%o7 ! (5_0) iexp = hx >> 21; + nop + faddd %f60,K2,%f60 ! (0_0) res += K2; + + for %f16,DC1,%f28 ! (5_0) res = vis_for(res,DC1); + sub %g0,%o7,%o7 ! (5_0) iexp = -iexp; + lda [%i1]%asi,%f6 ! (6_0) ((float*)res)[0] = ((float*)px)[0]; + fmuld %f58,%f26,%f26 ! (6_1) res *= xx; + + sra %g1,10,%o2 ! (5_0) hx >>= 10; + cmp %g1,_0x7ff00000 ! (5_0) hx ? 0x7ff00000 + bge,pn %icc,.update10 ! (5_0) if ( hx >= 0x7ff00000 ) + lda [%i1+4]%asi,%f7 ! (6_0) ((float*)res)[1] = ((float*)px)[1]; +.cont10: + fand %f18,DC3,%f8 ! (4_0) res_c = vis_fand(res_c,DC3); + fmuld %f62,%f36,%f62 ! (2_0) res *= xx; + + fmuld %f60,%f32,%f58 ! (0_0) res *= xx; + cmp %g1,_0x00100000 ! (5_0) hx ? 0x00100000 + and %o2,2040,%o2 ! (5_0) hx &= 0x7f8; + faddd %f34,K3,%f34 ! (1_0) res += K3; + + fmuld %f30,%f26,%f26 ! (6_1) res = dexp_hi * res; + bl,pn %icc,.update11 ! (5_0) if ( hx < 0x00100000 ) + nop + fand %f6,DC0,%f16 ! (6_0) res = vis_fand(res,DC0); +.cont11: + ldd [%l4+8],%f60 ! (6_1) dexp_lo = ((double*)addr)[1]; + fmuld %f44,%f44,%f44 ! (4_0) dtmp0 = dexp_hi * dexp_hi; + fpadd32 %f28,DC2,%f18 ! (5_0) res_c = vis_fpadd32(res,DC2); + + fmuld K6,%f40,%f22 ! (3_0) res = K6 * xx; + add %o2,8,%i3 ! (5_0) hx += 8; + fsubd %f24,%f8,%f10 ! (4_0) xx = res - res_c; + + fmuld %f34,%f38,%f24 ! (1_0) res *= xx; + or %g0,%o4,%i0 + + cmp counter,7 + bl,pn %icc,.tail + faddd %f62,K4,%f34 ! (2_0) res += K4; + + ba .main_loop + sub counter,7,counter ! counter + + .align 16 +.main_loop: + add %o7,1534,%o7 ! (5_0) iexp += 0x5fe; + and %i3,-16,%i3 ! (5_1) hx = -16; + lda [%i1]%asi,%g1 ! (6_1) hx = *(int*)px; + faddd %f58,K1,%f58 ! (0_1) res += K1; + + add %i3,TBL,%i3 ! (5_1) addr = (char*)arr + hx; + sllx %o7,52,%o7 ! (5_1) iexp << 52; + stx %o7,[%fp+tmp0] ! (5_1) dlexp = *(double*)lexp; + faddd %f26,%f60,%f8 ! (6_2) res += dexp_lo; + + faddd %f22,K5,%f62 ! (3_1) res += K5; + add %i1,stridex,%l6 ! px += stridex + ldd [%i3],%f22 ! (5_1) dtmp0 = ((double*)addr)[0]; + fmuld %f10,%f44,%f60 ! (4_1) xx *= dtmp0; + + faddd %f24,K2,%f26 ! (1_1) res += K2; + add %i0,stridey,%i1 ! px += stridey + ldd [%l2],%f24 ! (0_1) dexp_hi = ((double*)addr)[0]; + fmuld %f34,%f36,%f34 ! (2_1) res *= xx; + + fmuld %f58,%f32,%f58 ! (0_1) res *= xx; + sra %g1,21,%o7 ! (6_1) iexp = hx >> 21; + lda [%l6]%asi,%f0 ! (0_0) ((float*)res)[0] = ((float*)px)[0]; + for %f16,DC1,%f44 ! (6_1) res = vis_for(res,DC1); + + lda [%l6+4]%asi,%f1 ! (0_0) ((float*)res)[1] = ((float*)px)[1]; + sra %g1,10,%o2 ! (6_1) hx >>= 10; + fmuld %f22,%f22,%f10 ! (5_1) dtmp0 = dexp_hi * dexp_hi; + faddd %f8,%f30,%f30 ! (6_2) res += dexp_hi; + + fmuld %f62,%f40,%f32 ! (3_1) res *= xx; + cmp %g1,_0x7ff00000 ! (6_1) hx ? 0x7ff00000 + ldd [%fp+tmp1],%f62 ! (6_2) dlexp = *(double*)lexp; + fand %f18,DC3,%f8 ! (5_1) res_c = vis_fand(res_c,DC3); + + fmuld %f26,%f38,%f26 ! (1_1) res *= xx; + bge,pn %icc,.update12 ! (6_1) if ( hx >= 0x7ff00000 ) + and %o2,2040,%o2 ! (6_1) hx &= 0x7f8; + faddd %f34,K3,%f34 ! (2_1) res += K3; +.cont12: + fmuld %f24,%f58,%f58 ! (0_1) res = dexp_hi * res; + cmp %g1,_0x00100000 ! (6_1) hx ? 0x00100000 + sub %g0,%o7,%o7 ! (6_1) iexp = -iexp; + fand %f0,DC0,%f16 ! (0_0) res = vis_fand(res,DC0); + + fmuld %f30,%f62,%f2 ! (6_2) res *= dlexp; + bl,pn %icc,.update13 ! (6_1) if ( hx < 0x00100000 ) + ldd [%l2+8],%f30 ! (0_1) dexp_lo = ((double*)addr)[1]; + fpadd32 %f44,DC2,%f18 ! (6_1) res_c = vis_fpadd32(res,DC2); +.cont13: + fmuld K6,%f60,%f62 ! (4_1) res = K6 * xx; + add %o2,8,%l4 ! (6_1) hx += 8; + st %f2,[%i0] ! (6_2) ((float*)py)[0] = ((float*)res)[0]; + fsubd %f28,%f8,%f6 ! (5_1) xx = res - res_c; + + fmuld %f34,%f36,%f28 ! (2_1) res *= xx; + add %o7,1534,%o7 ! (6_1) iexp += 0x5fe; + st %f3,[%i0+4] ! (6_2) ((float*)py)[1] = ((float*)res)[1]; + faddd %f32,K4,%f32 ! (3_1) res += K4; + + lda [%l6]%asi,%g1 ! (0_0) hx = *(int*)px; + sllx %o7,52,%o7 ! (6_1) iexp << 52; + and %l4,-16,%l4 ! (6_1) hx = -16; + faddd %f26,K1,%f26 ! (1_1) res += K1; + + add %i1,stridey,%i0 ! px += stridey + add %l4,TBL,%l4 ! (6_1) addr = (char*)arr + hx; + stx %o7,[%fp+tmp1] ! (6_1) dlexp = *(double*)lexp; + faddd %f58,%f30,%f8 ! (0_1) res += dexp_lo; + + fmuld %f6,%f10,%f58 ! (5_1) xx *= dtmp0; + add %l6,stridex,%l6 ! px += stridex + ldd [%l4],%f30 ! (6_1) dtmp0 = ((double*)addr)[0]; + faddd %f62,K5,%f62 ! (4_1) res += K5; + + fmuld %f32,%f40,%f34 ! (3_1) res *= xx; + sra %g1,10,%o2 ! (0_0) hx >>= 10; + ldd [%i2],%f4 ! (1_1) dexp_hi = ((double*)addr)[0]; + faddd %f28,K2,%f32 ! (2_1) res += K2; + + fmuld %f26,%f38,%f26 ! (1_1) res *= xx; + sra %g1,21,%o7 ! (0_0) iexp = hx >> 21; + lda [%l6]%asi,%f6 ! (1_0) ((float*)res)[0] = ((float*)px)[0]; + for %f16,DC1,%f28 ! (0_0) res = vis_for(res,DC1); + + fmuld %f30,%f30,%f30 ! (6_1) dtmp0 = dexp_hi * dexp_hi; + sub %g0,%o7,%o7 ! (0_0) iexp = -iexp; + lda [%l6+4]%asi,%f7 ! (1_0) ((float*)res)[1] = ((float*)px)[1]; + faddd %f8,%f24,%f24 ! (0_1) res += dexp_hi; + + fmuld %f62,%f60,%f38 ! (4_1) res *= xx; + cmp %g1,_0x7ff00000 ! (0_0) hx ? 0x7ff00000 + ldd [%fp+tmp2],%f62 ! (0_1) dlexp = *(double*)lexp; + fand %f18,DC3,%f8 ! (6_1) res_c = vis_fand(res_c,DC3); + + fmuld %f32,%f36,%f32 ! (2_1) res *= xx; + bge,pn %icc,.update14 ! (0_0) if ( hx >= 0x7ff00000 ) + and %o2,2040,%o2 ! (0_0) hx &= 0x7f8; + faddd %f34,K3,%f34 ! (3_1) res += K3; +.cont14: + fmuld %f4,%f26,%f26 ! (1_1) res = dexp_hi * res; + cmp %g1,_0x00100000 ! (0_0) hx ? 0x00100000 + add %o7,1534,%o7 ! (0_0) iexp += 0x5fe; + fand %f6,DC0,%f16 ! (1_0) res = vis_fand(res,DC0); + + fmuld %f24,%f62,%f2 ! (0_1) res *= dlexp; + bl,pn %icc,.update15 ! (0_0) if ( hx < 0x00100000 ) + ldd [%i2+8],%f24 ! (1_1) dexp_lo = ((double*)addr)[1]; + fpadd32 %f28,DC2,%f18 ! (0_0) res_c = vis_fpadd32(res,DC2); +.cont15: + fmuld K6,%f58,%f62 ! (5_1) res = K6 * xx; + add %o2,8,%l2 ! (0_0) hx += 8; + st %f2,[%i1] ! (0_1) ((float*)py)[0] = ((float*)res)[0]; + fsubd %f44,%f8,%f10 ! (6_1) xx = res - res_c; + + fmuld %f34,%f40,%f44 ! (3_1) res *= xx; + nop + st %f3,[%i1+4] ! (0_1) ((float*)py)[1] = ((float*)res)[1]; + faddd %f38,K4,%f38 ! (4_1) res += K4; + + lda [%l6]%asi,%g1 ! (1_0) hx = *(int*)px; + sllx %o7,52,%o7 ! (0_0) iexp << 52; + and %l2,-16,%l2 ! (0_0) hx = -16; + faddd %f32,K1,%f32 ! (2_1) res += K1; + + add %l2,TBL,%l2 ! (0_0) addr = (char*)arr + hx; + add %l6,stridex,%l6 ! px += stridex + stx %o7,[%fp+tmp2] ! (0_0) dlexp = *(double*)lexp; + faddd %f26,%f24,%f8 ! (1_1) res += dexp_lo; + + fmuld %f10,%f30,%f26 ! (6_1) xx *= dtmp0; + add %i0,stridey,%i1 ! px += stridey + ldd [%l2],%f30 ! (0_0) dtmp0 = ((double*)addr)[0]; + faddd %f62,K5,%f62 ! (5_1) res += K5; + + fmuld %f38,%f60,%f34 ! (4_1) res *= xx; + sra %g1,10,%o2 ! (1_0) hx >>= 10; + ldd [%i4],%f24 ! (2_1) dexp_hi = ((double*)addr)[0]; + faddd %f44,K2,%f38 ! (3_1) res += K2; + + fmuld %f32,%f36,%f32 ! (2_1) res *= xx; + sra %g1,21,%o7 ! (1_0) iexp = hx >> 21; + lda [%l6]%asi,%f0 ! (2_0) ((float*)res)[0] = ((float*)px)[0]; + for %f16,DC1,%f44 ! (1_0) res = vis_for(res,DC1); + + fmuld %f30,%f30,%f30 ! (0_0) dtmp0 = dexp_hi * dexp_hi; + cmp %g1,_0x7ff00000 ! (1_0) hx ? 0x7ff00000 + lda [%l6+4]%asi,%f1 ! (2_0) ((float*)res)[1] = ((float*)px)[1]; + faddd %f8,%f4,%f4 ! (1_1) res += dexp_hi; + + fmuld %f62,%f58,%f36 ! (5_1) res *= xx; + bge,pn %icc,.update16 ! (1_0) if ( hx >= 0x7ff00000 ) + ldd [%fp+tmp3],%f62 ! (1_1) dlexp = *(double*)lexp; + fand %f18,DC3,%f8 ! (0_0) res_c = vis_fand(res_c,DC3); +.cont16: + fmuld %f38,%f40,%f38 ! (3_1) res *= xx; + cmp %g1,_0x00100000 ! (1_0) hx ? 0x00100000 + and %o2,2040,%o2 ! (1_0) hx &= 0x7f8; + faddd %f34,K3,%f34 ! (4_1) res += K3; + + fmuld %f24,%f32,%f32 ! (2_1) res = dexp_hi * res; + bl,pn %icc,.update17 ! (1_0) if ( hx < 0x00100000 ) + sub %g0,%o7,%o7 ! (1_0) iexp = -iexp; + fand %f0,DC0,%f16 ! (2_0) res = vis_fand(res,DC0); +.cont17: + fmuld %f4,%f62,%f2 ! (1_1) res *= dlexp; + add %o7,1534,%o7 ! (1_0) iexp += 0x5fe; + ldd [%i4+8],%f4 ! (2_1) dexp_lo = ((double*)addr)[1]; + fpadd32 %f44,DC2,%f18 ! (1_0) res_c = vis_fpadd32(res,DC2); + + fmuld K6,%f26,%f62 ! (6_1) res = K6 * xx; + add %o2,8,%i2 ! (1_0) hx += 8; + st %f2,[%i0] ! (1_1) ((float*)py)[0] = ((float*)res)[0]; + fsubd %f28,%f8,%f6 ! (0_0) xx = res - res_c; + + fmuld %f34,%f60,%f28 ! (4_1) res *= xx; + nop + st %f3,[%i0+4] ! (1_1) ((float*)py)[1] = ((float*)res)[1]; + faddd %f36,K4,%f36 ! (5_1) res += K4; + + lda [%l6]%asi,%g1 ! (2_0) hx = *(int*)px; + sllx %o7,52,%o7 ! (1_0) iexp << 52; + and %i2,-16,%i2 ! (1_0) hx = -16; + faddd %f38,K1,%f38 ! (3_1) res += K1; + + add %i1,stridey,%i0 ! px += stridey + add %i2,TBL,%i2 ! (1_0) addr = (char*)arr + hx; + stx %o7,[%fp+tmp3] ! (1_0) dlexp = *(double*)lexp; + faddd %f32,%f4,%f8 ! (2_1) res += dexp_lo; + + fmuld %f6,%f30,%f32 ! (0_0) xx *= dtmp0; + add %l6,stridex,%l6 ! px += stridex + ldd [%i2],%f30 ! (1_0) dtmp0 = ((double*)addr)[0]; + faddd %f62,K5,%f62 ! (6_1) res += K5; + + fmuld %f36,%f58,%f34 ! (5_1) res *= xx; + sra %g1,10,%o2 ! (2_0) hx >>= 10; + ldd [%i5],%f4 ! (3_1) dexp_hi = ((double*)addr)[0]; + faddd %f28,K2,%f36 ! (4_1) res += K2; + + fmuld %f38,%f40,%f38 ! (3_1) res *= xx; + sra %g1,21,%o7 ! (2_0) iexp = hx >> 21; + lda [%l6]%asi,%f6 ! (3_0) ((float*)res)[0] = ((float*)px)[0]; + for %f16,DC1,%f28 ! (2_0) res = vis_for(res,DC1); + + fmuld %f30,%f30,%f30 ! (1_0) dtmp0 = dexp_hi * dexp_hi; + cmp %g1,_0x7ff00000 ! (2_0) hx ? 0x7ff00000 + lda [%l6+4]%asi,%f7 ! (3_0) ((float*)res)[1] = ((float*)px)[1]; + faddd %f8,%f24,%f24 ! (2_1) res += dexp_hi; + + fmuld %f62,%f26,%f40 ! (6_1) res *= xx; + bge,pn %icc,.update18 ! (2_0) if ( hx >= 0x7ff00000 ) + ldd [%fp+tmp4],%f62 ! (2_1) dlexp = *(double*)lexp; + fand %f18,DC3,%f8 ! (1_0) res_c = vis_fand(res_c,DC3); +.cont18: + fmuld %f36,%f60,%f36 ! (4_1) res *= xx; + cmp %g1,_0x00100000 ! (2_0) hx ? 0x00100000 + and %o2,2040,%o2 ! (2_0) hx &= 0x7f8; + faddd %f34,K3,%f34 ! (5_1) res += K3; + + fmuld %f4,%f38,%f38 ! (3_1) res = dexp_hi * res; + bl,pn %icc,.update19 ! (2_0) if ( hx < 0x00100000 ) + sub %g0,%o7,%o7 ! (2_0) iexp = -iexp; + fand %f6,DC0,%f16 ! (3_0) res = vis_fand(res,DC0); +.cont19: + fmuld %f24,%f62,%f2 ! (2_1) res *= dlexp; + add %o7,1534,%o7 ! (2_0) iexp += 0x5fe; + ldd [%i5+8],%f24 ! (3_1) dexp_lo = ((double*)addr)[1]; + fpadd32 %f28,DC2,%f18 ! (2_0) res_c = vis_fpadd32(res,DC2); + + fmuld K6,%f32,%f62 ! (0_0) res = K6 * xx; + add %o2,8,%i4 ! (2_0) hx += 8; + st %f2,[%i1] ! (2_1) ((float*)py)[0] = ((float*)res)[0]; + fsubd %f44,%f8,%f10 ! (1_0) xx = res - res_c; + + fmuld %f34,%f58,%f44 ! (5_1) res *= xx; + nop + st %f3,[%i1+4] ! (2_1) ((float*)py)[1] = ((float*)res)[1]; + faddd %f40,K4,%f40 ! (6_1) res += K4; + + lda [%l6]%asi,%g1 ! (3_0) hx = *(int*)px; + sllx %o7,52,%o7 ! (2_0) iexp << 52; + and %i4,-16,%i4 ! (2_0) hx = -16; + faddd %f36,K1,%f36 ! (4_1) res += K1; + + add %l6,stridex,%l6 ! px += stridex + add %i4,TBL,%i4 ! (2_0) addr = (char*)arr + hx; + stx %o7,[%fp+tmp4] ! (2_0) dlexp = *(double*)lexp; + faddd %f38,%f24,%f8 ! (3_1) res += dexp_lo; + + fmuld %f10,%f30,%f38 ! (1_0) xx *= dtmp0; + add %i0,stridey,%i1 ! px += stridey + ldd [%i4],%f24 ! (2_0) dtmp0 = ((double*)addr)[0]; + faddd %f62,K5,%f62 ! (0_0) res += K5; + + fmuld %f40,%f26,%f34 ! (6_1) res *= xx; + sra %g1,10,%o2 ! (3_0) hx >>= 10; + ldd [%l1],%f30 ! (4_1) dexp_hi = ((double*)addr)[0]; + faddd %f44,K2,%f40 ! (5_1) res += K2; + + fmuld %f36,%f60,%f36 ! (4_1) res *= xx; + sra %g1,21,%o7 ! (3_0) iexp = hx >> 21; + lda [%l6]%asi,%f0 ! (4_0) ((float*)res)[0] = ((float*)px)[0]; + for %f16,DC1,%f44 ! (3_0) res = vis_for(res,DC1); + + fmuld %f24,%f24,%f24 ! (2_0) dtmp0 = dexp_hi * dexp_hi; + cmp %g1,_0x7ff00000 ! (3_0) hx ? 0x7ff00000 + lda [%l6+4]%asi,%f1 ! (4_0) ((float*)res)[1] = ((float*)px)[1]; + faddd %f8,%f4,%f8 ! (3_1) res += dexp_hi; + + fmuld %f62,%f32,%f60 ! (0_0) res *= xx; + bge,pn %icc,.update20 ! (3_0) if ( hx >= 0x7ff00000 ) + ldd [%fp+tmp5],%f62 ! (3_1) dlexp = *(double*)lexp; + fand %f18,DC3,%f4 ! (2_0) res_c = vis_fand(res_c,DC3); +.cont20: + fmuld %f40,%f58,%f40 ! (5_1) res *= xx; + cmp %g1,_0x00100000 ! (3_0) hx ? 0x00100000 + and %o2,2040,%o2 ! (3_0) hx &= 0x7f8; + faddd %f34,K3,%f10 ! (6_1) res += K3; + + fmuld %f30,%f36,%f36 ! (4_1) res = dexp_hi * res; + bl,pn %icc,.update21 ! (3_0) if ( hx < 0x00100000 ) + sub %g0,%o7,%o7 ! (3_0) iexp = -iexp; + fand %f0,DC0,%f16 ! (4_0) res = vis_fand(res,DC0); +.cont21: + fmuld %f8,%f62,%f8 ! (3_1) res *= dlexp; + add %o7,1534,%o7 ! (3_0) iexp += 0x5fe; + ldd [%l1+8],%f34 ! (4_1) dexp_lo = ((double*)addr)[1]; + fpadd32 %f44,DC2,%f18 ! (3_0) res_c = vis_fpadd32(res,DC2); + + fmuld K6,%f38,%f62 ! (1_0) res = K6 * xx; + add %o2,8,%i5 ! (3_0) hx += 8; + st %f8,[%i0] ! (3_1) ((float*)py)[0] = ((float*)res)[0]; + fsubd %f28,%f4,%f28 ! (2_0) xx = res - res_c; + + fmuld %f10,%f26,%f4 ! (6_1) res *= xx; + nop + st %f9,[%i0+4] ! (3_1) ((float*)py)[1] = ((float*)res)[1]; + faddd %f60,K4,%f60 ! (0_0) res += K4; + + lda [%l6]%asi,%g1 ! (4_0) hx = *(int*)px; + sllx %o7,52,%o7 ! (3_0) iexp << 52; + and %i5,-16,%i5 ! (3_0) hx = -16; + faddd %f40,K1,%f40 ! (5_1) res += K1; + + add %l6,stridex,%i0 ! px += stridex + add %i5,TBL,%i5 ! (3_0) addr = (char*)arr + hx; + stx %o7,[%fp+tmp5] ! (3_0) dlexp = *(double*)lexp; + faddd %f36,%f34,%f8 ! (4_1) res += dexp_lo; + + fmuld %f28,%f24,%f36 ! (2_0) xx *= dtmp0; + add %i1,stridey,%l6 ! px += stridey + ldd [%i5],%f28 ! (3_0) dtmp0 = ((double*)addr)[0]; + faddd %f62,K5,%f62 ! (1_0) res += K5; + + faddd %f4,K2,%f10 ! (6_1) res += K2; + sra %g1,10,%o2 ! (4_0) hx >>= 10; + nop + fmuld %f60,%f32,%f34 ! (0_0) res *= xx; + + fmuld %f40,%f58,%f40 ! (5_1) res *= xx; + sra %g1,21,%o7 ! (4_0) iexp = hx >> 21; + lda [%i0]%asi,%f6 ! (5_0) ((float*)res)[0] = ((float*)px)[0]; + for %f16,DC1,%f24 ! (4_0) res = vis_for(res,DC1); + + fmuld %f28,%f28,%f28 ! (3_0) dtmp0 = dexp_hi * dexp_hi; + cmp %g1,_0x7ff00000 ! (4_0) hx ? 0x7ff00000 + lda [%i0+4]%asi,%f7 ! (5_0) ((float*)res)[1] = ((float*)px)[1]; + faddd %f8,%f30,%f30 ! (4_1) res += dexp_hi; + + fand %f18,DC3,%f8 ! (3_0) res_c = vis_fand(res_c,DC3); + bge,pn %icc,.update22 ! (4_0) if ( hx >= 0x7ff00000 ) + ldd [%fp+tmp6],%f18 ! (4_1) dlexp = *(double*)lexp; + fmuld %f62,%f38,%f62 ! (1_0) res *= xx; +.cont22: + fmuld %f10,%f26,%f58 ! (6_1) res *= xx; + cmp %g1,_0x00100000 ! (4_0) hx ? 0x00100000 + and %o2,2040,%o2 ! (4_0) hx &= 0x7f8; + faddd %f34,K3,%f60 ! (0_0) res += K3; + + fmuld %f22,%f40,%f40 ! (5_1) res = dexp_hi * res; + bl,pn %icc,.update23 ! (4_0) if ( hx < 0x00100000 ) + sub %g0,%o7,%o7 ! (4_0) iexp = -iexp; + fand %f6,DC0,%f16 ! (5_0) res = vis_fand(res,DC0); +.cont23: + fmuld %f30,%f18,%f6 ! (4_1) res *= dlexp; + add %o7,1534,%o7 ! (4_0) iexp += 0x5fe; + ldd [%i3+8],%f34 ! (5_1) dexp_lo = ((double*)addr)[1]; + fpadd32 %f24,DC2,%f18 ! (4_0) res_c = vis_fpadd32(res,DC2); + + fmuld K6,%f36,%f30 ! (2_0) res = K6 * xx; + add %o2,8,%l1 ! (4_0) hx += 8; + st %f6,[%i1] ! (4_1) ((float*)py)[0] = ((float*)res)[0]; + fsubd %f44,%f8,%f44 ! (3_0) xx = res - res_c; + + fmuld %f60,%f32,%f60 ! (0_0) res *= xx; + sllx %o7,52,%o7 ! (4_0) iexp << 52; + st %f7,[%i1+4] ! (4_1) ((float*)py)[1] = ((float*)res)[1]; + faddd %f62,K4,%f6 ! (1_0) res += K4; + + lda [%i0]%asi,%g1 ! (5_0) hx = *(int*)px; + add %i0,stridex,%i1 ! px += stridex + and %l1,-16,%l1 ! (4_0) hx = -16; + faddd %f58,K1,%f58 ! (6_1) res += K1; + + add %l1,TBL,%l1 ! (4_0) addr = (char*)arr + hx; + add %l6,stridey,%i0 ! px += stridey + stx %o7,[%fp+tmp6] ! (4_0) dlexp = *(double*)lexp; + faddd %f40,%f34,%f8 ! (5_1) res += dexp_lo; + + fmuld %f44,%f28,%f40 ! (3_0) xx *= dtmp0; + nop + ldd [%l1],%f44 ! (4_0) dtmp0 = ((double*)addr)[0]; + faddd %f30,K5,%f62 ! (2_0) res += K5; + + fmuld %f6,%f38,%f34 ! (1_0) res *= xx; + sra %g1,21,%o7 ! (5_0) iexp = hx >> 21; + ldd [%l4],%f30 ! (6_1) dexp_hi = ((double*)addr)[0]; + faddd %f60,K2,%f60 ! (0_0) res += K2; + + for %f16,DC1,%f28 ! (5_0) res = vis_for(res,DC1); + sub %g0,%o7,%o7 ! (5_0) iexp = -iexp; + lda [%i1]%asi,%f6 ! (6_0) ((float*)res)[0] = ((float*)px)[0]; + fmuld %f58,%f26,%f26 ! (6_1) res *= xx; + + fmuld %f44,%f44,%f44 ! (4_0) dtmp0 = dexp_hi * dexp_hi; + cmp %g1,_0x7ff00000 ! (5_0) hx ? 0x7ff00000 + lda [%i1+4]%asi,%f7 ! (6_0) ((float*)res)[1] = ((float*)px)[1]; + faddd %f8,%f22,%f22 ! (5_1) res += dexp_hi; + + fand %f18,DC3,%f8 ! (4_0) res_c = vis_fand(res_c,DC3); + bge,pn %icc,.update24 ! (5_0) if ( hx >= 0x7ff00000 ) + ldd [%fp+tmp0],%f18 ! (5_1) dlexp = *(double*)lexp; + fmuld %f62,%f36,%f62 ! (2_0) res *= xx; +.cont24: + fmuld %f60,%f32,%f58 ! (0_0) res *= xx; + sra %g1,10,%o2 ! (5_0) hx >>= 10; + cmp %g1,_0x00100000 ! (5_0) hx ? 0x00100000 + faddd %f34,K3,%f34 ! (1_0) res += K3; + + fmuld %f30,%f26,%f26 ! (6_1) res = dexp_hi * res; + bl,pn %icc,.update25 ! (5_0) if ( hx < 0x00100000 ) + and %o2,2040,%o2 ! (5_0) hx &= 0x7f8; + fand %f6,DC0,%f16 ! (6_0) res = vis_fand(res,DC0); +.cont25: + fmuld %f22,%f18,%f2 ! (5_1) res *= dlexp; + subcc counter,7,counter ! counter -= 7; + ldd [%l4+8],%f60 ! (6_1) dexp_lo = ((double*)addr)[1]; + fpadd32 %f28,DC2,%f18 ! (5_0) res_c = vis_fpadd32(res,DC2); + + fmuld K6,%f40,%f22 ! (3_0) res = K6 * xx; + add %o2,8,%i3 ! (5_0) hx += 8; + st %f2,[%l6] ! (5_1) ((float*)py)[0] = ((float*)res)[0]; + fsubd %f24,%f8,%f10 ! (4_0) xx = res - res_c; + + fmuld %f34,%f38,%f24 ! (1_0) res *= xx; + st %f3,[%l6+4] ! (5_1) ((float*)py)[1] = ((float*)res)[1]; + bpos,pt %icc,.main_loop + faddd %f62,K4,%f34 ! (2_0) res += K4; + + add counter,7,counter +.tail: + add %o7,1534,%o7 ! (5_0) iexp += 0x5fe; + subcc counter,1,counter + bneg,a .begin + mov %i0,%o4 + + faddd %f58,K1,%f58 ! (0_1) res += K1; + + faddd %f26,%f60,%f8 ! (6_2) res += dexp_lo; + + faddd %f22,K5,%f62 ! (3_1) res += K5; + fmuld %f10,%f44,%f60 ! (4_1) xx *= dtmp0; + + faddd %f24,K2,%f26 ! (1_1) res += K2; + add %i1,stridex,%l6 ! px += stridex + ldd [%l2],%f24 ! (0_1) dexp_hi = ((double*)addr)[0]; + fmuld %f34,%f36,%f34 ! (2_1) res *= xx; + + fmuld %f58,%f32,%f58 ! (0_1) res *= xx; + + add %i0,stridey,%i1 ! px += stridey + faddd %f8,%f30,%f30 ! (6_2) res += dexp_hi; + + fmuld %f62,%f40,%f32 ! (3_1) res *= xx; + ldd [%fp+tmp1],%f62 ! (6_2) dlexp = *(double*)lexp; + + fmuld %f26,%f38,%f26 ! (1_1) res *= xx; + faddd %f34,K3,%f34 ! (2_1) res += K3; + + fmuld %f24,%f58,%f58 ! (0_1) res = dexp_hi * res; + + fmuld %f30,%f62,%f2 ! (6_2) res *= dlexp; + ldd [%l2+8],%f30 ! (0_1) dexp_lo = ((double*)addr)[1]; + + fmuld K6,%f60,%f62 ! (4_1) res = K6 * xx; + st %f2,[%i0] ! (6_2) ((float*)py)[0] = ((float*)res)[0]; + + fmuld %f34,%f36,%f28 ! (2_1) res *= xx; + st %f3,[%i0+4] ! (6_2) ((float*)py)[1] = ((float*)res)[1]; + faddd %f32,K4,%f32 ! (3_1) res += K4; + + subcc counter,1,counter + bneg,a .begin + mov %i1,%o4 + + faddd %f26,K1,%f26 ! (1_1) res += K1; + + faddd %f58,%f30,%f8 ! (0_1) res += dexp_lo; + + add %l6,stridex,%l6 ! px += stridex + faddd %f62,K5,%f62 ! (4_1) res += K5; + + fmuld %f32,%f40,%f34 ! (3_1) res *= xx; + add %i1,stridey,%i0 ! px += stridey + ldd [%i2],%f22 ! (1_1) dexp_hi = ((double*)addr)[0]; + faddd %f28,K2,%f32 ! (2_1) res += K2; + + fmuld %f26,%f38,%f26 ! (1_1) res *= xx; + + faddd %f8,%f24,%f24 ! (0_1) res += dexp_hi; + + fmuld %f62,%f60,%f38 ! (4_1) res *= xx; + ldd [%fp+tmp2],%f62 ! (0_1) dlexp = *(double*)lexp; + + fmuld %f32,%f36,%f32 ! (2_1) res *= xx; + faddd %f34,K3,%f34 ! (3_1) res += K3; + + fmuld %f22,%f26,%f26 ! (1_1) res = dexp_hi * res; + + fmuld %f24,%f62,%f2 ! (0_1) res *= dlexp; + ldd [%i2+8],%f24 ! (1_1) dexp_lo = ((double*)addr)[1]; + + st %f2,[%i1] ! (0_1) ((float*)py)[0] = ((float*)res)[0]; + + fmuld %f34,%f40,%f44 ! (3_1) res *= xx; + st %f3,[%i1+4] ! (0_1) ((float*)py)[1] = ((float*)res)[1]; + faddd %f38,K4,%f38 ! (4_1) res += K4; + + subcc counter,1,counter + bneg,a .begin + mov %i0,%o4 + + faddd %f32,K1,%f32 ! (2_1) res += K1; + + add %l6,stridex,%l6 ! px += stridex + faddd %f26,%f24,%f8 ! (1_1) res += dexp_lo; + + add %i0,stridey,%i1 ! px += stridey + + fmuld %f38,%f60,%f34 ! (4_1) res *= xx; + ldd [%i4],%f24 ! (2_1) dexp_hi = ((double*)addr)[0]; + faddd %f44,K2,%f38 ! (3_1) res += K2; + + fmuld %f32,%f36,%f32 ! (2_1) res *= xx; + + faddd %f8,%f22,%f22 ! (1_1) res += dexp_hi; + + ldd [%fp+tmp3],%f62 ! (1_1) dlexp = *(double*)lexp; + + fmuld %f38,%f40,%f38 ! (3_1) res *= xx; + faddd %f34,K3,%f34 ! (4_1) res += K3; + + fmuld %f24,%f32,%f32 ! (2_1) res = dexp_hi * res; + + fmuld %f22,%f62,%f2 ! (1_1) res *= dlexp; + ldd [%i4+8],%f22 ! (2_1) dexp_lo = ((double*)addr)[1]; + + st %f2,[%i0] ! (1_1) ((float*)py)[0] = ((float*)res)[0]; + + fmuld %f34,%f60,%f28 ! (4_1) res *= xx; + st %f3,[%i0+4] ! (1_1) ((float*)py)[1] = ((float*)res)[1]; + + subcc counter,1,counter + bneg,a .begin + mov %i1,%o4 + + faddd %f38,K1,%f38 ! (3_1) res += K1; + + faddd %f32,%f22,%f8 ! (2_1) res += dexp_lo; + + add %l6,stridex,%l6 ! px += stridex + + add %i1,stridey,%i0 ! px += stridey + ldd [%i5],%f22 ! (3_1) dexp_hi = ((double*)addr)[0]; + faddd %f28,K2,%f36 ! (4_1) res += K2; + + fmuld %f38,%f40,%f38 ! (3_1) res *= xx; + + faddd %f8,%f24,%f24 ! (2_1) res += dexp_hi; + + ldd [%fp+tmp4],%f62 ! (2_1) dlexp = *(double*)lexp; + + fmuld %f36,%f60,%f36 ! (4_1) res *= xx; + + fmuld %f22,%f38,%f38 ! (3_1) res = dexp_hi * res; + + fmuld %f24,%f62,%f2 ! (2_1) res *= dlexp; + ldd [%i5+8],%f24 ! (3_1) dexp_lo = ((double*)addr)[1]; + + st %f2,[%i1] ! (2_1) ((float*)py)[0] = ((float*)res)[0]; + + st %f3,[%i1+4] ! (2_1) ((float*)py)[1] = ((float*)res)[1]; + + subcc counter,1,counter + bneg,a .begin + mov %i0,%o4 + + faddd %f36,K1,%f36 ! (4_1) res += K1; + + faddd %f38,%f24,%f8 ! (3_1) res += dexp_lo; + + add %i0,stridey,%i1 ! px += stridey + + add %l6,stridex,%l6 ! px += stridex + ldd [%l1],%f30 ! (4_1) dexp_hi = ((double*)addr)[0]; + + fmuld %f36,%f60,%f36 ! (4_1) res *= xx; + + faddd %f8,%f22,%f8 ! (3_1) res += dexp_hi; + + ldd [%fp+tmp5],%f62 ! (3_1) dlexp = *(double*)lexp; + + fmuld %f30,%f36,%f36 ! (4_1) res = dexp_hi * res; + + fmuld %f8,%f62,%f8 ! (3_1) res *= dlexp; + ldd [%l1+8],%f34 ! (4_1) dexp_lo = ((double*)addr)[1]; + + st %f8,[%i0] ! (3_1) ((float*)py)[0] = ((float*)res)[0]; + + st %f9,[%i0+4] ! (3_1) ((float*)py)[1] = ((float*)res)[1]; + + subcc counter,1,counter + bneg,a .begin + mov %i1,%o4 + + faddd %f36,%f34,%f8 ! (4_1) res += dexp_lo; + + add %l6,stridex,%i0 ! px += stridex + + add %i1,stridey,%l6 ! px += stridey + + faddd %f8,%f30,%f30 ! (4_1) res += dexp_hi; + + ldd [%fp+tmp6],%f18 ! (4_1) dlexp = *(double*)lexp; + + fmuld %f30,%f18,%f6 ! (4_1) res *= dlexp; + + st %f6,[%i1] ! (4_1) ((float*)py)[0] = ((float*)res)[0]; + + st %f7,[%i1+4] ! (4_1) ((float*)py)[1] = ((float*)res)[1]; + + ba .begin + add %i1,stridey,%o4 + + .align 16 +.spec0: + fdivd DONE,%f0,%f0 ! res = DONE / res; + add %i1,stridex,%i1 ! px += stridex + st %f0,[%o4] ! ((float*)py)[0] = ((float*)&res)[0]; + st %f1,[%o4+4] ! ((float*)py)[1] = ((float*)&res)[1]; + add %o4,stridey,%o4 ! py += stridey + ba .begin1 + sub counter,1,counter + + .align 16 +.spec1: + orcc %i2,%l4,%g0 + bz,a 2f + fdivd DONE,%f0,%f0 ! res = DONE / res; + + cmp %g1,0 + bl,a 2f + fsqrtd %f0,%f0 ! res = sqrt(res); + + cmp %g1,%i4 + bge,a 1f + ldd [%o3+0x50],%f18 + + fxtod %f0,%f0 ! res = *(long long*)&res; + st %f0,[%fp+tmp0] + + fand %f0,DC0,%f16 ! (6_0) res = vis_fand(res,DC0); + ld [%fp+tmp0],%g1 + + sra %g1,21,%o7 ! (6_1) iexp = hx >> 21; + for %f16,DC1,%f44 ! (6_1) res = vis_for(res,DC1); + + sra %g1,10,%o2 ! (6_1) hx >>= 10; + sub %o7,537,%o7 + + and %o2,2040,%o2 ! (6_1) hx &= 0x7f8; + ba .cont_spec + sub %g0,%o7,%o7 ! (6_1) iexp = -iexp; + +1: + fand %f0,%f18,%f0 ! res = vis_fand(res,DC4); + + ldd [%o3+0x58],%f28 + fxtod %f0,%f0 ! res = *(long long*)&res; + + faddd %f0,%f28,%f0 ! res += D2ON51; + st %f0,[%fp+tmp0] + + fand %f0,DC0,%f16 ! (6_0) res = vis_fand(res,DC0); + ld [%fp+tmp0],%g1 + + sra %g1,21,%o7 ! (6_1) iexp = hx >> 21; + for %f16,DC1,%f44 ! (6_1) res = vis_for(res,DC1); + + sra %g1,10,%o2 ! (6_1) hx >>= 10; + sub %o7,537,%o7 + + and %o2,2040,%o2 ! (6_1) hx &= 0x7f8; + ba .cont_spec + sub %g0,%o7,%o7 ! (6_1) iexp = -iexp; + +2: + add %i1,stridex,%i1 ! px += stridex + st %f0,[%o4] ! ((float*)py)[0] = ((float*)&res)[0]; + st %f1,[%o4+4] ! ((float*)py)[1] = ((float*)&res)[1]; + add %o4,stridey,%o4 ! py += stridey + ba .begin1 + sub counter,1,counter + + .align 16 +.update0: + cmp counter,1 + ble .cont0 + nop + + sub %l6,stridex,tmp_px + sub counter,1,tmp_counter + + ba .cont0 + mov 1,counter + + .align 16 +.update1: + cmp counter,1 + ble .cont1 + sub %l6,stridex,%i1 + + ld [%i1+4],%i2 + cmp %g1,0 + bl 1f + + orcc %g1,%i2,%g0 + bz 1f + sethi %hi(0x00080000),%i3 + + cmp %g1,%i3 + bge,a 2f + ldd [%o3+0x50],%f18 + + fxtod %f8,%f8 ! res = *(long long*)&res; + st %f8,[%fp+tmp7] + + fand %f8,DC0,%f16 ! (0_0) res = vis_fand(res,DC0); + ld [%fp+tmp7],%g1 + + sra %g1,21,%o7 ! (0_0) iexp = hx >> 21; + sra %g1,10,%o2 ! (0_0) hx >>= 10; + for %f16,DC1,%f28 ! (0_0) res = vis_for(res,DC1); + + sub %o7,537,%o7 + + sub %g0,%o7,%o7 ! (0_0) iexp = -iexp; + + and %o2,2040,%o2 ! (0_0) hx &= 0x7f8; + ba .cont1 + add %o7,1534,%o7 ! (0_0) iexp += 0x5fe; +2: + fand %f8,%f18,%f8 + fxtod %f8,%f8 ! res = *(long long*)&res; + ldd [%o3+0x58],%f18 + faddd %f8,%f18,%f8 + st %f8,[%fp+tmp7] + + fand %f8,DC0,%f16 ! (0_0) res = vis_fand(res,DC0); + ld [%fp+tmp7],%g1 + + sra %g1,21,%o7 ! (0_0) iexp = hx >> 21; + sra %g1,10,%o2 ! (0_0) hx >>= 10; + for %f16,DC1,%f28 ! (0_0) res = vis_for(res,DC1); + + sub %o7,537,%o7 + + sub %g0,%o7,%o7 ! (0_0) iexp = -iexp; + + and %o2,2040,%o2 ! (0_0) hx &= 0x7f8; + ba .cont1 + add %o7,1534,%o7 ! (0_0) iexp += 0x5fe; +1: + sub %l6,stridex,tmp_px + sub counter,1,tmp_counter + + ba .cont1 + mov 1,counter + + .align 16 +.update2: + cmp counter,2 + ble .cont2 + nop + + sub %l6,stridex,tmp_px + sub counter,2,tmp_counter + + ba .cont2 + mov 2,counter + + .align 16 +.update3: + cmp counter,2 + ble .cont3 + sub %l6,stridex,%i1 + + ld [%i1+4],%i2 + cmp %g1,0 + bl 1f + + orcc %g1,%i2,%g0 + bz 1f + sethi %hi(0x00080000),%i3 + + cmp %g1,%i3 + bge,a 2f + ldd [%o3+0x50],%f18 + + fxtod %f0,%f0 ! res = *(long long*)&res; + st %f0,[%fp+tmp7] + + fand %f0,DC0,%f16 ! (1_0) res = vis_fand(res,DC0); + ld [%fp+tmp7],%g1 + + sra %g1,21,%o7 ! (1_0) iexp = hx >> 21; + for %f16,DC1,%f44 ! (1_0) res = vis_for(res,DC1); + + sra %g1,10,%o2 ! (1_0) hx >>= 10; + sub %o7,537,%o7 + ba .cont3 + and %o2,2040,%o2 ! (1_0) hx &= 0x7f8; +2: + fand %f0,%f18,%f0 + fxtod %f0,%f0 ! res = *(long long*)&res; + ldd [%o3+0x58],%f18 + faddd %f0,%f18,%f0 + st %f0,[%fp+tmp7] + + fand %f0,DC0,%f16 ! (1_0) res = vis_fand(res,DC0); + ld [%fp+tmp7],%g1 + + sra %g1,21,%o7 ! (1_0) iexp = hx >> 21; + for %f16,DC1,%f44 ! (1_0) res = vis_for(res,DC1); + + sra %g1,10,%o2 ! (1_0) hx >>= 10; + sub %o7,537,%o7 + ba .cont3 + and %o2,2040,%o2 ! (1_0) hx &= 0x7f8; +1: + sub %l6,stridex,tmp_px + sub counter,2,tmp_counter + + ba .cont3 + mov 2,counter + + .align 16 +.update4: + cmp counter,3 + ble .cont4 + nop + + sub %l6,stridex,tmp_px + sub counter,3,tmp_counter + + ba .cont4 + mov 3,counter + + .align 16 +.update5: + cmp counter,3 + ble .cont5 + sub %l6,stridex,%i1 + + ld [%i1+4],%i3 + cmp %g1,0 + bl 1f + + orcc %g1,%i3,%g0 + bz 1f + sethi %hi(0x00080000),%i4 + + cmp %g1,%i4 + bge,a 2f + ldd [%o3+0x50],%f18 + + fxtod %f6,%f6 ! res = *(long long*)&res; + st %f6,[%fp+tmp7] + + fand %f6,DC0,%f16 ! (2_0) res = vis_fand(res,DC0); + ld [%fp+tmp7],%g1 + + sra %g1,21,%o7 ! (2_0) iexp = hx >> 21; + sra %g1,10,%o2 ! (2_0) hx >>= 10; + + sub %o7,537,%o7 + and %o2,2040,%o2 ! (2_0) hx &= 0x7f8; + ba .cont5 + for %f16,DC1,%f28 ! (2_0) res = vis_for(res,DC1); +2: + fand %f6,%f18,%f6 + fxtod %f6,%f6 ! res = *(long long*)&res; + ldd [%o3+0x58],%f18 + faddd %f6,%f18,%f6 + st %f6,[%fp+tmp7] + + fand %f6,DC0,%f16 ! (2_0) res = vis_fand(res,DC0); + ld [%fp+tmp7],%g1 + + sra %g1,21,%o7 ! (2_0) iexp = hx >> 21; + sra %g1,10,%o2 ! (2_0) hx >>= 10; + + sub %o7,537,%o7 + and %o2,2040,%o2 ! (2_0) hx &= 0x7f8; + ba .cont5 + for %f16,DC1,%f28 ! (2_0) res = vis_for(res,DC1); +1: + sub %l6,stridex,tmp_px + sub counter,3,tmp_counter + + ba .cont5 + mov 3,counter + + .align 16 +.update6: + cmp counter,4 + ble .cont6 + nop + + sub %l6,stridex,tmp_px + sub counter,4,tmp_counter + + ba .cont6 + mov 4,counter + + .align 16 +.update7: + sub %l6,stridex,%i1 + cmp counter,4 + ble .cont7 + faddd %f34,K3,%f6 ! (6_1) res += K3; + + ld [%i1+4],%i3 + cmp %g1,0 + bl 1f + + orcc %g1,%i3,%g0 + bz 1f + sethi %hi(0x00080000),%i5 + + cmp %g1,%i5 + bge,a 2f + ldd [%o3+0x50],%f18 + + fxtod %f0,%f0 ! res = *(long long*)&res; + st %f0,[%fp+tmp7] + + fand %f0,DC0,%f16 ! (3_0) res = vis_fand(res,DC0); + ld [%fp+tmp7],%g1 + + sra %g1,21,%o7 ! (3_0) iexp = hx >> 21; + sra %g1,10,%o2 ! (3_0) hx >>= 10; + + sub %o7,537,%o7 + and %o2,2040,%o2 ! (3_0) hx &= 0x7f8; + ba .cont7 + for %f16,DC1,%f44 ! (3_0) res = vis_for(res,DC1); +2: + fand %f0,%f18,%f0 + fxtod %f0,%f0 ! res = *(long long*)&res; + ldd [%o3+0x58],%f18 + faddd %f0,%f18,%f0 + st %f0,[%fp+tmp7] + + fand %f0,DC0,%f16 ! (3_0) res = vis_fand(res,DC0); + ld [%fp+tmp7],%g1 + + sra %g1,21,%o7 ! (3_0) iexp = hx >> 21; + sra %g1,10,%o2 ! (3_0) hx >>= 10; + + sub %o7,537,%o7 + and %o2,2040,%o2 ! (3_0) hx &= 0x7f8; + ba .cont7 + for %f16,DC1,%f44 ! (3_0) res = vis_for(res,DC1); +1: + sub %l6,stridex,tmp_px + sub counter,4,tmp_counter + + ba .cont7 + mov 4,counter + + .align 16 +.update8: + cmp counter,5 + ble .cont8 + nop + + mov %l6,tmp_px + sub counter,5,tmp_counter + + ba .cont8 + mov 5,counter + + .align 16 +.update9: + ld [%l6+4],%i3 + cmp counter,5 + ble .cont9 + fand %f0,DC0,%f16 ! (5_0) res = vis_fand(res,DC0); + + cmp %g1,0 + bl 1f + + orcc %g1,%i3,%g0 + bz 1f + sethi %hi(0x00080000),%i1 + + cmp %g1,%i1 + bge,a 2f + ldd [%o3+0x50],%f18 + + fxtod %f8,%f8 ! res = *(long long*)&res; + st %f8,[%fp+tmp7] + + fand %f8,DC0,%f24 ! (4_0) res = vis_fand(res,DC0); + ld [%fp+tmp7],%g1 + + sra %g1,21,%o7 ! (4_0) iexp = hx >> 21; + sra %g1,10,%o2 ! (4_0) hx >>= 10; + + sub %o7,537,%o7 + + and %o2,2040,%o2 ! (4_0) hx &= 0x7f8; + sub %g0,%o7,%o7 ! (4_0) iexp = -iexp; + ba .cont9 + for %f24,DC1,%f24 ! (4_0) res = vis_for(res,DC1); +2: + fand %f8,%f18,%f8 + fxtod %f8,%f8 ! res = *(long long*)&res; + ldd [%o3+0x58],%f18 + faddd %f8,%f18,%f8 + st %f8,[%fp+tmp7] + + fand %f8,DC0,%f24 ! (4_0) res = vis_fand(res,DC0); + ld [%fp+tmp7],%g1 + + sra %g1,21,%o7 ! (4_0) iexp = hx >> 21; + sra %g1,10,%o2 ! (4_0) hx >>= 10; + + sub %o7,537,%o7 + + and %o2,2040,%o2 ! (4_0) hx &= 0x7f8; + sub %g0,%o7,%o7 ! (4_0) iexp = -iexp; + ba .cont9 + for %f24,DC1,%f24 ! (4_0) res = vis_for(res,DC1); +1: + mov %l6,tmp_px + sub counter,5,tmp_counter + + ba .cont9 + mov 5,counter + + .align 16 +.update10: + cmp counter,6 + ble .cont10 + nop + + mov %i0,tmp_px + sub counter,6,tmp_counter + + ba .cont10 + mov 6,counter + + .align 16 +.update11: + ld [%i0+4],%i3 + cmp counter,6 + ble .cont11 + fand %f6,DC0,%f16 ! (6_0) res = vis_fand(res,DC0); + + cmp %g1,0 + bl 1f + + orcc %g1,%i3,%g0 + bz 1f + sethi %hi(0x00080000),%i3 + + cmp %g1,%i3 + bge,a 2f + ldd [%o3+0x50],%f18 + + fxtod %f0,%f0 ! res = *(long long*)&res; + st %f0,[%fp+tmp7] + + fand %f0,DC0,%f28 ! (5_0) res = vis_fand(res,DC0); + ld [%fp+tmp7],%g1 + + sra %g1,21,%o7 ! (5_0) iexp = hx >> 21; + sra %g1,10,%o2 ! (5_0) hx >>= 10; + + sub %o7,537,%o7 + + sub %g0,%o7,%o7 ! (5_0) iexp = -iexp; + + and %o2,2040,%o2 ! (5_0) hx &= 0x7f8; + ba .cont11 + for %f28,DC1,%f28 ! (5_0) res = vis_for(res,DC1); +2: + fand %f0,%f18,%f0 + fxtod %f0,%f0 ! res = *(long long*)&res; + ldd [%o3+0x58],%f18 + faddd %f0,%f18,%f0 + st %f0,[%fp+tmp7] + + fand %f0,DC0,%f28 ! (5_0) res = vis_fand(res,DC0); + ld [%fp+tmp7],%g1 + + sra %g1,21,%o7 ! (5_0) iexp = hx >> 21; + sra %g1,10,%o2 ! (5_0) hx >>= 10; + + sub %o7,537,%o7 + + sub %g0,%o7,%o7 ! (5_0) iexp = -iexp; + + and %o2,2040,%o2 ! (5_0) hx &= 0x7f8; + ba .cont11 + for %f28,DC1,%f28 ! (5_0) res = vis_for(res,DC1); +1: + mov %i0,tmp_px + sub counter,6,tmp_counter + + ba .cont11 + mov 6,counter + + .align 16 +.update12: + cmp counter,0 + ble .cont12 + faddd %f34,K3,%f34 ! (2_1) res += K3; + + sub %l6,stridex,tmp_px + sub counter,0,tmp_counter + + ba .cont12 + mov 0,counter + + .align 16 +.update13: + sub %l6,stridex,%l4 + cmp counter,0 + ble .cont13 + fpadd32 %f44,DC2,%f18 ! (6_1) res_c = vis_fpadd32(res,DC2); + + ld [%l4+4],%l4 + cmp %g1,0 + bl 1f + + orcc %g1,%l4,%g0 + bz 1f + sethi %hi(0x00080000),%l4 + + cmp %g1,%l4 + bge,a 2f + ldd [%o3+0x50],%f62 + + fxtod %f6,%f6 ! res = *(long long*)&res; + st %f6,[%fp+tmp7] + + fand %f6,DC0,%f44 ! (6_0) res = vis_fand(res,DC0); + ld [%fp+tmp7],%g1 + + sra %g1,21,%o7 ! (6_1) iexp = hx >> 21; + sra %g1,10,%o2 ! (6_1) hx >>= 10; + + sub %o7,537,%o7 + and %o2,2040,%o2 ! (6_1) hx &= 0x7f8; + for %f44,DC1,%f44 ! (6_1) res = vis_for(res,DC1); + + sub %g0,%o7,%o7 ! (6_1) iexp = -iexp; + ba .cont13 + fpadd32 %f44,DC2,%f18 ! (6_1) res_c = vis_fpadd32(res,DC2); +2: + fand %f6,%f62,%f6 + fxtod %f6,%f6 ! res = *(long long*)&res; + ldd [%o3+0x58],%f62 + faddd %f6,%f62,%f6 + st %f6,[%fp+tmp7] + + fand %f6,DC0,%f44 ! (6_0) res = vis_fand(res,DC0); + ld [%fp+tmp7],%g1 + + sra %g1,21,%o7 ! (6_1) iexp = hx >> 21; + sra %g1,10,%o2 ! (6_1) hx >>= 10; + for %f44,DC1,%f44 ! (6_1) res = vis_for(res,DC1); + + sub %o7,537,%o7 + + and %o2,2040,%o2 ! (6_1) hx &= 0x7f8; + sub %g0,%o7,%o7 ! (6_1) iexp = -iexp; + ba .cont13 + fpadd32 %f44,DC2,%f18 ! (6_1) res_c = vis_fpadd32(res,DC2); +1: + sub %l6,stridex,tmp_px + sub counter,0,tmp_counter + + ba .cont13 + mov 0,counter + + .align 16 +.update14: + cmp counter,1 + ble .cont14 + faddd %f34,K3,%f34 ! (3_1) res += K3; + + sub %l6,stridex,tmp_px + sub counter,1,tmp_counter + + ba .cont14 + mov 1,counter + + .align 16 +.update15: + sub %l6,stridex,%l2 + cmp counter,1 + ble .cont15 + fpadd32 %f28,DC2,%f18 ! (0_0) res_c = vis_fpadd32(res,DC2); + + ld [%l2+4],%l2 + cmp %g1,0 + bl 1f + + orcc %g1,%l2,%g0 + bz 1f + sethi %hi(0x00080000),%l2 + + cmp %g1,%l2 + bge,a 2f + ldd [%o3+0x50],%f62 + + fxtod %f0,%f0 ! res = *(long long*)&res; + st %f0,[%fp+tmp7] + + fand %f0,DC0,%f18 ! (0_0) res = vis_fand(res,DC0); + ld [%fp+tmp7],%g1 + + sra %g1,21,%o7 ! (0_0) iexp = hx >> 21; + sra %g1,10,%o2 ! (0_0) hx >>= 10; + + sub %o7,537,%o7 + for %f18,DC1,%f28 ! (0_0) res = vis_for(res,DC1); + + sub %g0,%o7,%o7 ! (0_0) iexp = -iexp; + + and %o2,2040,%o2 ! (0_0) hx &= 0x7f8; + add %o7,1534,%o7 ! (0_0) iexp += 0x5fe; + ba .cont15 + fpadd32 %f28,DC2,%f18 ! (0_0) res_c = vis_fpadd32(res,DC2); +2: + fand %f0,%f62,%f0 + fxtod %f0,%f0 ! res = *(long long*)&res; + ldd [%o3+0x58],%f62 + faddd %f0,%f62,%f0 + st %f0,[%fp+tmp7] + + fand %f0,DC0,%f18 ! (0_0) res = vis_fand(res,DC0); + ld [%fp+tmp7],%g1 + + sra %g1,21,%o7 ! (0_0) iexp = hx >> 21; + sra %g1,10,%o2 ! (0_0) hx >>= 10; + for %f18,DC1,%f28 ! (0_0) res = vis_for(res,DC1); + + sub %o7,537,%o7 + + sub %g0,%o7,%o7 ! (0_0) iexp = -iexp; + + and %o2,2040,%o2 ! (0_0) hx &= 0x7f8; + add %o7,1534,%o7 ! (0_0) iexp += 0x5fe; + ba .cont15 + fpadd32 %f28,DC2,%f18 ! (0_0) res_c = vis_fpadd32(res,DC2); +1: + sub %l6,stridex,tmp_px + sub counter,1,tmp_counter + + ba .cont15 + mov 1,counter + + .align 16 +.update16: + cmp counter,2 + ble .cont16 + fand %f18,DC3,%f8 ! (0_0) res_c = vis_fand(res_c,DC3); + + sub %l6,stridex,tmp_px + sub counter,2,tmp_counter + + ba .cont16 + mov 2,counter + + .align 16 +.update17: + sub %l6,stridex,%i2 + cmp counter,2 + ble .cont17 + fand %f0,DC0,%f16 ! (2_0) res = vis_fand(res,DC0); + + ld [%i2+4],%i2 + cmp %g1,0 + bl 1f + + orcc %g1,%i2,%g0 + bz 1f + sethi %hi(0x00080000),%i2 + + cmp %g1,%i2 + bge,a 2f + ldd [%o3+0x50],%f2 + + fxtod %f6,%f6 ! res = *(long long*)&res; + st %f6,[%fp+tmp7] + + fand %f6,DC0,%f44 ! (1_0) res = vis_fand(res,DC0); + ld [%fp+tmp7],%g1 + + sra %g1,21,%o7 ! (1_0) iexp = hx >> 21; + sra %g1,10,%o2 ! (1_0) hx >>= 10; + + sub %o7,537,%o7 + + and %o2,2040,%o2 ! (1_0) hx &= 0x7f8; + sub %g0,%o7,%o7 ! (1_0) iexp = -iexp; + ba .cont17 + for %f44,DC1,%f44 ! (1_0) res = vis_for(res,DC1); +2: + fand %f6,%f2,%f6 + fxtod %f6,%f6 ! res = *(long long*)&res; + ldd [%o3+0x58],%f2 + faddd %f6,%f2,%f6 + st %f6,[%fp+tmp7] + + fand %f6,DC0,%f44 ! (1_0) res = vis_fand(res,DC0); + ld [%fp+tmp7],%g1 + + sra %g1,21,%o7 ! (1_0) iexp = hx >> 21; + sra %g1,10,%o2 ! (1_0) hx >>= 10; + + sub %o7,537,%o7 + + and %o2,2040,%o2 ! (1_0) hx &= 0x7f8; + sub %g0,%o7,%o7 ! (1_0) iexp = -iexp; + ba .cont17 + for %f44,DC1,%f44 ! (1_0) res = vis_for(res,DC1); +1: + sub %l6,stridex,tmp_px + sub counter,2,tmp_counter + + ba .cont17 + mov 2,counter + + .align 16 +.update18: + cmp counter,3 + ble .cont18 + fand %f18,DC3,%f8 ! (1_0) res_c = vis_fand(res_c,DC3); + + sub %l6,stridex,tmp_px + sub counter,3,tmp_counter + + ba .cont18 + mov 3,counter + + .align 16 +.update19: + sub %l6,stridex,%i4 + cmp counter,3 + ble .cont19 + fand %f6,DC0,%f16 ! (3_0) res = vis_fand(res,DC0); + + ld [%i4+4],%i4 + cmp %g1,0 + bl 1f + + orcc %g1,%i4,%g0 + bz 1f + sethi %hi(0x00080000),%i4 + + cmp %g1,%i4 + bge,a 2f + ldd [%o3+0x50],%f2 + + fxtod %f0,%f0 ! res = *(long long*)&res; + st %f0,[%fp+tmp7] + + fand %f0,DC0,%f28 ! (2_0) res = vis_fand(res,DC0); + ld [%fp+tmp7],%g1 + + sra %g1,21,%o7 ! (2_0) iexp = hx >> 21; + + sra %g1,10,%o2 ! (2_0) hx >>= 10; + sub %o7,537,%o7 + + and %o2,2040,%o2 ! (2_0) hx &= 0x7f8; + sub %g0,%o7,%o7 ! (2_0) iexp = -iexp; + ba .cont19 + for %f28,DC1,%f28 ! (2_0) res = vis_for(res,DC1); +2: + fand %f0,%f2,%f0 + fxtod %f0,%f0 ! res = *(long long*)&res; + ldd [%o3+0x58],%f2 + faddd %f0,%f2,%f0 + st %f0,[%fp+tmp7] + + fand %f0,DC0,%f28 ! (2_0) res = vis_fand(res,DC0); + ld [%fp+tmp7],%g1 + + sra %g1,21,%o7 ! (2_0) iexp = hx >> 21; + + sra %g1,10,%o2 ! (2_0) hx >>= 10; + sub %o7,537,%o7 + + and %o2,2040,%o2 ! (2_0) hx &= 0x7f8; + sub %g0,%o7,%o7 ! (2_0) iexp = -iexp; + ba .cont19 + for %f28,DC1,%f28 ! (2_0) res = vis_for(res,DC1); +1: + sub %l6,stridex,tmp_px + sub counter,3,tmp_counter + + ba .cont19 + mov 3,counter + + .align 16 +.update20: + cmp counter,4 + ble .cont20 + fand %f18,DC3,%f4 ! (2_0) res_c = vis_fand(res_c,DC3); + + sub %l6,stridex,tmp_px + sub counter,4,tmp_counter + + ba .cont20 + mov 4,counter + + .align 16 +.update21: + sub %l6,stridex,%i5 + cmp counter,4 + ble .cont21 + fand %f0,DC0,%f16 ! (4_0) res = vis_fand(res,DC0); + + ld [%i5+4],%i5 + cmp %g1,0 + bl 1f + + orcc %g1,%i5,%g0 + bz 1f + sethi %hi(0x00080000),%i5 + + cmp %g1,%i5 + bge,a 2f + ldd [%o3+0x50],%f34 + + fxtod %f6,%f6 ! res = *(long long*)&res; + st %f6,[%fp+tmp7] + + fand %f6,DC0,%f44 ! (3_0) res = vis_fand(res,DC0); + ld [%fp+tmp7],%g1 + + sra %g1,21,%o7 ! (3_0) iexp = hx >> 21; + sra %g1,10,%o2 ! (3_0) hx >>= 10; + + sub %o7,537,%o7 + and %o2,2040,%o2 ! (3_0) hx &= 0x7f8; + + sub %g0,%o7,%o7 ! (3_0) iexp = -iexp; + ba .cont21 + for %f44,DC1,%f44 ! (3_0) res = vis_for(res,DC1); +2: + fand %f6,%f34,%f6 + fxtod %f6,%f6 ! res = *(long long*)&res; + ldd [%o3+0x58],%f34 + faddd %f6,%f34,%f6 + st %f6,[%fp+tmp7] + + fand %f6,DC0,%f44 ! (3_0) res = vis_fand(res,DC0); + ld [%fp+tmp7],%g1 + + sra %g1,21,%o7 ! (3_0) iexp = hx >> 21; + sra %g1,10,%o2 ! (3_0) hx >>= 10; + + sub %o7,537,%o7 + and %o2,2040,%o2 ! (3_0) hx &= 0x7f8; + + sub %g0,%o7,%o7 ! (3_0) iexp = -iexp; + ba .cont21 + for %f44,DC1,%f44 ! (3_0) res = vis_for(res,DC1); +1: + sub %l6,stridex,tmp_px + sub counter,4,tmp_counter + + ba .cont21 + mov 4,counter + + .align 16 +.update22: + cmp counter,5 + ble .cont22 + fmuld %f62,%f38,%f62 ! (1_0) res *= xx; + + sub %i0,stridex,tmp_px + sub counter,5,tmp_counter + + ba .cont22 + mov 5,counter + + .align 16 +.update23: + sub %i0,stridex,%l1 + cmp counter,5 + ble .cont23 + fand %f6,DC0,%f16 ! (5_0) res = vis_fand(res,DC0); + + ld [%l1+4],%l1 + cmp %g1,0 + bl 1f + + orcc %g1,%l1,%g0 + bz 1f + sethi %hi(0x00080000),%l1 + + cmp %g1,%l1 + bge,a 2f + ldd [%o3+0x50],%f34 + + fxtod %f0,%f0 ! res = *(long long*)&res; + st %f0,[%fp+tmp7] + + fand %f0,DC0,%f24 ! (4_0) res = vis_fand(res,DC0); + ld [%fp+tmp7],%g1 + + sra %g1,21,%o7 ! (4_0) iexp = hx >> 21; + + sra %g1,10,%o2 ! (4_0) hx >>= 10; + sub %o7,537,%o7 + + and %o2,2040,%o2 ! (4_0) hx &= 0x7f8; + sub %g0,%o7,%o7 ! (4_0) iexp = -iexp; + ba .cont23 + for %f24,DC1,%f24 ! (4_0) res = vis_for(res,DC1); +2: + fand %f0,%f34,%f0 + fxtod %f0,%f0 ! res = *(long long*)&res; + ldd [%o3+0x58],%f34 + faddd %f0,%f34,%f0 + st %f0,[%fp+tmp7] + + fand %f0,DC0,%f24 ! (4_0) res = vis_fand(res,DC0); + ld [%fp+tmp7],%g1 + + sra %g1,21,%o7 ! (4_0) iexp = hx >> 21; + + sra %g1,10,%o2 ! (4_0) hx >>= 10; + sub %o7,537,%o7 + + and %o2,2040,%o2 ! (4_0) hx &= 0x7f8; + sub %g0,%o7,%o7 ! (4_0) iexp = -iexp; + ba .cont23 + for %f24,DC1,%f24 ! (4_0) res = vis_for(res,DC1); +1: + sub %i0,stridex,tmp_px + sub counter,5,tmp_counter + + ba .cont23 + mov 5,counter + + .align 16 +.update24: + cmp counter,6 + ble .cont24 + fmuld %f62,%f36,%f62 ! (2_0) res *= xx; + + sub %i1,stridex,tmp_px + sub counter,6,tmp_counter + + ba .cont24 + mov 6,counter + + .align 16 +.update25: + sub %i1,stridex,%i3 + cmp counter,6 + ble .cont25 + fand %f6,DC0,%f16 ! (6_0) res = vis_fand(res,DC0); + + ld [%i3+4],%i3 + cmp %g1,0 + bl 1f + + orcc %g1,%i3,%g0 + bz 1f + nop + + sub %i1,stridex,%i3 + ld [%i3],%f10 + ld [%i3+4],%f11 + + sethi %hi(0x00080000),%i3 + + cmp %g1,%i3 + bge,a 2f + ldd [%o3+0x50],%f60 + + fxtod %f10,%f10 ! res = *(long long*)&res; + st %f10,[%fp+tmp7] + + fand %f10,DC0,%f28 ! (5_0) res = vis_fand(res,DC0); + ld [%fp+tmp7],%g1 + + sra %g1,21,%o7 ! (5_0) iexp = hx >> 21; + + sra %g1,10,%o2 ! (5_0) hx >>= 10; + sub %o7,537,%o7 + + and %o2,2040,%o2 ! (5_0) hx &= 0x7f8; + sub %g0,%o7,%o7 ! (5_0) iexp = -iexp; + + ba .cont25 + for %f28,DC1,%f28 ! (5_0) res = vis_for(res,DC1); +2: + fand %f10,%f60,%f10 + fxtod %f10,%f10 ! res = *(long long*)&res; + ldd [%o3+0x58],%f60 + faddd %f10,%f60,%f10 + st %f10,[%fp+tmp7] + + fand %f10,DC0,%f28 ! (5_0) res = vis_fand(res,DC0); + ld [%fp+tmp7],%g1 + + sra %g1,21,%o7 ! (5_0) iexp = hx >> 21; + + sra %g1,10,%o2 ! (5_0) hx >>= 10; + sub %o7,537,%o7 + + and %o2,2040,%o2 ! (5_0) hx &= 0x7f8; + sub %g0,%o7,%o7 ! (5_0) iexp = -iexp; + + ba .cont25 + for %f28,DC1,%f28 ! (5_0) res = vis_for(res,DC1); +1: + sub %i1,stridex,tmp_px + sub counter,6,tmp_counter + + ba .cont25 + mov 6,counter + +.exit: + ret + restore + SET_SIZE(__vrsqrt) + diff --git a/usr/src/lib/libmvec/common/vis/__vrsqrtf.S b/usr/src/lib/libmvec/common/vis/__vrsqrtf.S new file mode 100644 index 0000000000..3a8225f7af --- /dev/null +++ b/usr/src/lib/libmvec/common/vis/__vrsqrtf.S @@ -0,0 +1,1719 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "__vrsqrtf.S" + +#include "libm.h" + + RO_DATA + .align 64 + +! i = [0,63] +! TBL[2*i ] = 1 / (*(double*)&(0x3fe0000000000000ULL + (i << 46))) * 2**-24; +! TBL[2*i+1] = 1 / sqrtl(*(double*)&(0x3fe0000000000000ULL + (i << 46))); +! i = [64,127] +! TBL[2*i ] = 1 / (*(double*)&(0x3fe0000000000000ULL + (i << 46))) * 2**-23; +! TBL[2*i+1] = 1 / sqrtl(*(double*)&(0x3fe0000000000000ULL + (i << 46))); + +.CONST_TBL: + .word 0x3e800000, 0x00000000, 0x3ff6a09e, 0x667f3bcd, + .word 0x3e7f81f8, 0x1f81f820, 0x3ff673e3, 0x2ef63a03, + .word 0x3e7f07c1, 0xf07c1f08, 0x3ff6482d, 0x37a5a3d2, + .word 0x3e7e9131, 0xabf0b767, 0x3ff61d72, 0xb7978671, + .word 0x3e7e1e1e, 0x1e1e1e1e, 0x3ff5f3aa, 0x673fa911, + .word 0x3e7dae60, 0x76b981db, 0x3ff5cacb, 0x7802f342, + .word 0x3e7d41d4, 0x1d41d41d, 0x3ff5a2cd, 0x8c69d61a, + .word 0x3e7cd856, 0x89039b0b, 0x3ff57ba8, 0xb0ee01b9, + .word 0x3e7c71c7, 0x1c71c71c, 0x3ff55555, 0x55555555, + .word 0x3e7c0e07, 0x0381c0e0, 0x3ff52fcc, 0x468d6b54, + .word 0x3e7bacf9, 0x14c1bad0, 0x3ff50b06, 0xa8fc6b70, + .word 0x3e7b4e81, 0xb4e81b4f, 0x3ff4e6fd, 0xf33cf032, + .word 0x3e7af286, 0xbca1af28, 0x3ff4c3ab, 0xe93bcf74, + .word 0x3e7a98ef, 0x606a63be, 0x3ff4a10a, 0x97af7b92, + .word 0x3e7a41a4, 0x1a41a41a, 0x3ff47f14, 0x4fe17f9f, + .word 0x3e79ec8e, 0x951033d9, 0x3ff45dc3, 0xa3c34fa3, + .word 0x3e799999, 0x9999999a, 0x3ff43d13, 0x6248490f, + .word 0x3e7948b0, 0xfcd6e9e0, 0x3ff41cfe, 0x93ff5199, + .word 0x3e78f9c1, 0x8f9c18fa, 0x3ff3fd80, 0x77e70577, + .word 0x3e78acb9, 0x0f6bf3aa, 0x3ff3de94, 0x8077db58, + .word 0x3e786186, 0x18618618, 0x3ff3c036, 0x50e00e03, + .word 0x3e781818, 0x18181818, 0x3ff3a261, 0xba6d7a37, + .word 0x3e77d05f, 0x417d05f4, 0x3ff38512, 0xba21f51e, + .word 0x3e778a4c, 0x8178a4c8, 0x3ff36845, 0x766eec92, + .word 0x3e7745d1, 0x745d1746, 0x3ff34bf6, 0x3d156826, + .word 0x3e7702e0, 0x5c0b8170, 0x3ff33021, 0x8127c0e0, + .word 0x3e76c16c, 0x16c16c17, 0x3ff314c3, 0xd92a9e91, + .word 0x3e768168, 0x16816817, 0x3ff2f9d9, 0xfd52fd50, + .word 0x3e7642c8, 0x590b2164, 0x3ff2df60, 0xc5df2c9e, + .word 0x3e760581, 0x60581606, 0x3ff2c555, 0x2988e428, + .word 0x3e75c988, 0x2b931057, 0x3ff2abb4, 0x3c0eb0f4, + .word 0x3e758ed2, 0x308158ed, 0x3ff2927b, 0x2cd320f5, + .word 0x3e755555, 0x55555555, 0x3ff279a7, 0x4590331c, + .word 0x3e751d07, 0xeae2f815, 0x3ff26135, 0xe91daf55, + .word 0x3e74e5e0, 0xa72f0539, 0x3ff24924, 0x92492492, + .word 0x3e74afd6, 0xa052bf5b, 0x3ff23170, 0xd2be638a, + .word 0x3e747ae1, 0x47ae147b, 0x3ff21a18, 0x51ff630a, + .word 0x3e7446f8, 0x6562d9fb, 0x3ff20318, 0xcc6a8f5d, + .word 0x3e741414, 0x14141414, 0x3ff1ec70, 0x124e98f9, + .word 0x3e73e22c, 0xbce4a902, 0x3ff1d61c, 0x070ae7d3, + .word 0x3e73b13b, 0x13b13b14, 0x3ff1c01a, 0xa03be896, + .word 0x3e738138, 0x13813814, 0x3ff1aa69, 0xe4f2777f, + .word 0x3e73521c, 0xfb2b78c1, 0x3ff19507, 0xecf5b9e9, + .word 0x3e7323e3, 0x4a2b10bf, 0x3ff17ff2, 0xe00ec3ee, + .word 0x3e72f684, 0xbda12f68, 0x3ff16b28, 0xf55d72d4, + .word 0x3e72c9fb, 0x4d812ca0, 0x3ff156a8, 0x72b5ef62, + .word 0x3e729e41, 0x29e4129e, 0x3ff1426f, 0xac0654db, + .word 0x3e727350, 0xb8812735, 0x3ff12e7d, 0x02c40253, + .word 0x3e724924, 0x92492492, 0x3ff11ace, 0xe560242a, + .word 0x3e721fb7, 0x8121fb78, 0x3ff10763, 0xcec30b26, + .word 0x3e71f704, 0x7dc11f70, 0x3ff0f43a, 0x45cdedad, + .word 0x3e71cf06, 0xada2811d, 0x3ff0e150, 0xdce2b60c, + .word 0x3e71a7b9, 0x611a7b96, 0x3ff0cea6, 0x317186dc, + .word 0x3e718118, 0x11811812, 0x3ff0bc38, 0xeb8ba412, + .word 0x3e715b1e, 0x5f75270d, 0x3ff0aa07, 0xbd7b7488, + .word 0x3e7135c8, 0x1135c811, 0x3ff09811, 0x63615499, + .word 0x3e711111, 0x11111111, 0x3ff08654, 0xa2d4f6db, + .word 0x3e70ecf5, 0x6be69c90, 0x3ff074d0, 0x4a8b1438, + .word 0x3e70c971, 0x4fbcda3b, 0x3ff06383, 0x31ff307a, + .word 0x3e70a681, 0x0a6810a7, 0x3ff0526c, 0x39213bfa, + .word 0x3e708421, 0x08421084, 0x3ff0418a, 0x4806de7d, + .word 0x3e70624d, 0xd2f1a9fc, 0x3ff030dc, 0x4ea03a72, + .word 0x3e704104, 0x10410410, 0x3ff02061, 0x446ffa9a, + .word 0x3e702040, 0x81020408, 0x3ff01018, 0x28467ee9, + .word 0x3e800000, 0x00000000, 0x3ff00000, 0x00000000, + .word 0x3e7f81f8, 0x1f81f820, 0x3fefc0bd, 0x88a0f1d9, + .word 0x3e7f07c1, 0xf07c1f08, 0x3fef82ec, 0x882c0f9b, + .word 0x3e7e9131, 0xabf0b767, 0x3fef467f, 0x2814b0cc, + .word 0x3e7e1e1e, 0x1e1e1e1e, 0x3fef0b68, 0x48d2af1c, + .word 0x3e7dae60, 0x76b981db, 0x3feed19b, 0x75e78957, + .word 0x3e7d41d4, 0x1d41d41d, 0x3fee990c, 0xdad55ed2, + .word 0x3e7cd856, 0x89039b0b, 0x3fee61b1, 0x38f18adc, + .word 0x3e7c71c7, 0x1c71c71c, 0x3fee2b7d, 0xddfefa66, + .word 0x3e7c0e07, 0x0381c0e0, 0x3fedf668, 0x9b7e6350, + .word 0x3e7bacf9, 0x14c1bad0, 0x3fedc267, 0xbea45549, + .word 0x3e7b4e81, 0xb4e81b4f, 0x3fed8f72, 0x08e6b82d, + .word 0x3e7af286, 0xbca1af28, 0x3fed5d7e, 0xa914b937, + .word 0x3e7a98ef, 0x606a63be, 0x3fed2c85, 0x34ed6d86, + .word 0x3e7a41a4, 0x1a41a41a, 0x3fecfc7d, 0xa32a9213, + .word 0x3e79ec8e, 0x951033d9, 0x3feccd60, 0x45f5d358, + .word 0x3e799999, 0x9999999a, 0x3fec9f25, 0xc5bfedd9, + .word 0x3e7948b0, 0xfcd6e9e0, 0x3fec71c7, 0x1c71c71c, + .word 0x3e78f9c1, 0x8f9c18fa, 0x3fec453d, 0x90f057a2, + .word 0x3e78acb9, 0x0f6bf3aa, 0x3fec1982, 0xb2ece47b, + .word 0x3e786186, 0x18618618, 0x3febee90, 0x56fb9c39, + .word 0x3e781818, 0x18181818, 0x3febc460, 0x92eb3118, + .word 0x3e77d05f, 0x417d05f4, 0x3feb9aed, 0xba588347, + .word 0x3e778a4c, 0x8178a4c8, 0x3feb7232, 0x5b79db11, + .word 0x3e7745d1, 0x745d1746, 0x3feb4a29, 0x3c1d9550, + .word 0x3e7702e0, 0x5c0b8170, 0x3feb22cd, 0x56d87d7e, + .word 0x3e76c16c, 0x16c16c17, 0x3feafc19, 0xd8606169, + .word 0x3e768168, 0x16816817, 0x3fead60a, 0x1d0fb394, + .word 0x3e7642c8, 0x590b2164, 0x3feab099, 0xae8f539a, + .word 0x3e760581, 0x60581606, 0x3fea8bc4, 0x41a3d02c, + .word 0x3e75c988, 0x2b931057, 0x3fea6785, 0xb41bacf7, + .word 0x3e758ed2, 0x308158ed, 0x3fea43da, 0x0adc6899, + .word 0x3e755555, 0x55555555, 0x3fea20bd, 0x700c2c3e, + .word 0x3e751d07, 0xeae2f815, 0x3fe9fe2c, 0x315637ee, + .word 0x3e74e5e0, 0xa72f0539, 0x3fe9dc22, 0xbe484458, + .word 0x3e74afd6, 0xa052bf5b, 0x3fe9ba9d, 0xa6c73588, + .word 0x3e747ae1, 0x47ae147b, 0x3fe99999, 0x9999999a, + .word 0x3e7446f8, 0x6562d9fb, 0x3fe97913, 0x63068b54, + .word 0x3e741414, 0x14141414, 0x3fe95907, 0xeb87ab44, + .word 0x3e73e22c, 0xbce4a902, 0x3fe93974, 0x368cfa31, + .word 0x3e73b13b, 0x13b13b14, 0x3fe91a55, 0x6151761c, + .word 0x3e738138, 0x13813814, 0x3fe8fba8, 0xa1bf6f96, + .word 0x3e73521c, 0xfb2b78c1, 0x3fe8dd6b, 0x4563a009, + .word 0x3e7323e3, 0x4a2b10bf, 0x3fe8bf9a, 0xb06e1af3, + .word 0x3e72f684, 0xbda12f68, 0x3fe8a234, 0x5cc04426, + .word 0x3e72c9fb, 0x4d812ca0, 0x3fe88535, 0xd90703c6, + .word 0x3e729e41, 0x29e4129e, 0x3fe8689c, 0xc7e07e7d, + .word 0x3e727350, 0xb8812735, 0x3fe84c66, 0xdf0ca4c2, + .word 0x3e724924, 0x92492492, 0x3fe83091, 0xe6a7f7e7, + .word 0x3e721fb7, 0x8121fb78, 0x3fe8151b, 0xb86fee1d, + .word 0x3e71f704, 0x7dc11f70, 0x3fe7fa02, 0x3f1068d1, + .word 0x3e71cf06, 0xada2811d, 0x3fe7df43, 0x7579b9b5, + .word 0x3e71a7b9, 0x611a7b96, 0x3fe7c4dd, 0x663ebb88, + .word 0x3e718118, 0x11811812, 0x3fe7aace, 0x2afa8b72, + .word 0x3e715b1e, 0x5f75270d, 0x3fe79113, 0xebbd7729, + .word 0x3e7135c8, 0x1135c811, 0x3fe777ac, 0xde80baea, + .word 0x3e711111, 0x11111111, 0x3fe75e97, 0x46a0b098, + .word 0x3e70ecf5, 0x6be69c90, 0x3fe745d1, 0x745d1746, + .word 0x3e70c971, 0x4fbcda3b, 0x3fe72d59, 0xc45f1fc5, + .word 0x3e70a681, 0x0a6810a7, 0x3fe7152e, 0x9f44f01f, + .word 0x3e708421, 0x08421084, 0x3fe6fd4e, 0x79325467, + .word 0x3e70624d, 0xd2f1a9fc, 0x3fe6e5b7, 0xd16657e1, + .word 0x3e704104, 0x10410410, 0x3fe6ce69, 0x31d5858d, + .word 0x3e702040, 0x81020408, 0x3fe6b761, 0x2ec892f6, + + .word 0x3fefffff, 0xfee7f18f ! K0 = 9.99999997962321453275e-01 + .word 0xbfdfffff, 0xfe07e52f ! K1 = -4.99999998166077580600e-01 + .word 0x3fd80118, 0x0ca296d9 ! K2 = 3.75066768969515586277e-01 + .word 0xbfd400fc, 0x0bbb8e78 ! K3 = -3.12560092408808548438e-01 + .word 0x7ffe0000, 0x7ffe0000 ! DC0 + .word 0x3f800000, 0x40000000 ! FTWO + +#define stridex %l4 +#define stridex2 %l1 +#define stridey %l3 +#define stridey2 %i2 +#define TBL %l2 +#define counter %i5 + +#define K3 %f38 +#define K2 %f36 +#define K1 %f34 +#define K0 %f32 +#define DC0 %f4 +#define FONE %f2 +#define FTWO %f3 + +#define _0x00800000 %o2 +#define _0x7f800000 %o4 + +#define tmp0 STACK_BIAS-0x30 +#define tmp1 STACK_BIAS-0x28 +#define tmp2 STACK_BIAS-0x20 +#define tmp3 STACK_BIAS-0x18 +#define tmp_counter STACK_BIAS-0x10 +#define tmp_px STACK_BIAS-0x08 + +! sizeof temp storage - must be a multiple of 16 for V9 +#define tmps 0x30 + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +! !!!!! algorithm !!!!! +! ((float*)&ddx0)[0] = *px; +! ax0 = *(int*)px; +! +! ((float*)&ddx0)[1] = *(px + stridex); +! ax1 = *(int*)(px + stridex); +! +! px += stridex2; +! +! if ( ax0 >= 0x7f800000 ) +! { +! RETURN ( FONE / ((float*)&dres0)[0] ); +! } +! if ( ax0 < 0x00800000 ) +! { +! float res = ((float*)&dres0)[0]; +! +! if ( (ax0 & 0x7fffffff) == 0 ) /* |X| = zero */ +! { +! RETURN ( FONE / res ) +! } +! else if ( ax0 >= 0 ) /* X = denormal */ +! { +! double res0, xx0, tbl_div0, tbl_sqrt0; +! float fres0; +! int iax0, si0, iexp0; +! +! res = *(int*)&res; +! res *= FTWO; +! ax0 = *(int*)&res; +! iexp0 = ax0 >> 24; +! iexp0 = 0x3f + 0x4b - iexp0; +! iexp0 = iexp0 << 23; +! +! si0 = (ax0 >> 13) & 0x7f0; +! +! tbl_div0 = ((double*)((char*)__TBL_rsqrtf + si0))[0]; +! tbl_sqrt0 = ((double*)((char*)__TBL_rsqrtf + si0))[1]; +! iax0 = ax0 & 0x7ffe0000; +! iax0 = ax0 - iax0; +! xx0 = iax0 * tbl_div0; +! res0 = tbl_sqrt0 * (((A3 * xx0 + A2) * xx0 + A1) * xx0 + A0); +! +! fres0 = res0; +! iexp0 += *(int*)&fres0; +! RETURN(*(float*)&iexp0) +! } +! else /* X = negative */ +! { +! RETURN ( sqrtf(res) ) +! } +! } +! if ( ax1 >= 0x7f800000 ) +! { +! RETURN ( FONE / ((float*)&dres0)[1] ) +! } +! if ( ax1 < 0x00800000 ) +! { +! float res = ((float*)&dres0)[1]; +! if ( (ax0 & 0x7fffffff) == 0 ) /* |X| = zero */ +! { +! RETURN ( FONE / res ) +! } +! else if ( ax0 >= 0 ) /* X = denormal */ +! { +! double res0, xx0, tbl_div0, tbl_sqrt0; +! float fres0; +! int iax1, si0, iexp0; +! +! res = *(int*)&res; +! res *= FTWO; +! ax1 = *(int*)&res; +! iexp0 = ax1 >> 24; +! iexp0 = 0x3f + 0x4b - iexp0; +! iexp0 = iexp0 << 23; +! +! si0 = (ax1 >> 13) & 0x7f0; +! +! tbl_div0 = ((double*)((char*)__TBL_rsqrtf + si0))[0]; +! tbl_sqrt0 = ((double*)((char*)__TBL_rsqrtf + si0))[1]; +! iax1 = ax1 & 0x7ffe0000; +! iax1 = ax1 - iax1; +! xx0 = iax1 * tbl_div0; +! res0 = tbl_sqrt0 * (((A3 * xx0 + A2) * xx0 + A1) * xx0 + A0); +! +! fres0 = res0; +! iexp0 += *(int*)&fres0; +! RETURN(*(float*)&iexp0) +! } +! else /* X = negative */ +! { +! RETURN ( sqrtf(res) ) +! } +! } +! +! iexp0 = ax0 >> 24; +! iexp1 = ax1 >> 24; +! iexp0 = 0x3f - iexp0; +! iexp1 = 0x3f - iexp1; +! iexp1 &= 0x1ff; +! lexp0 = iexp0 << 55; +! lexp1 = iexp1 << 23; +! +! lexp0 |= lexp1; +! +! fdx0 = *((double*)&lexp0); +! +! si0 = ax0 >> 13; +! si1 = ax1 >> 13; +! si0 &= 0x7f0; +! si1 &= 0x7f0; +! +! addr0 = (char*)TBL + si0; +! addr1 = (char*)TBL + si1; +! tbl_div0 = ((double*)((char*)TBL + si0))[0]; +! tbl_div1 = ((double*)((char*)TBL + si1))[0]; +! tbl_sqrt0 = ((double*)addr0)[1]; +! tbl_sqrt1 = ((double*)addr1)[1]; +! dfx0 = vis_fand(ddx0,DC0); +! dfx0 = vis_fpsub32(ddx0,dfx0); +! dtmp0 = (double)(((int*)&dfx0)[0]); +! dtmp1 = (double)(((int*)&dfx0)[1]); +! xx0 = dtmp0 * tbl_div0; +! xx1 = dtmp1 * tbl_div1; +! res0 = K3 * xx0; +! res1 = K3 * xx1; +! res0 += K2; +! res1 += K2; +! res0 *= xx0; +! res1 *= xx1; +! res0 += K1; +! res1 += K1; +! res0 *= xx0; +! res1 *= xx1; +! res0 += K0; +! res1 += K0; +! res0 = tbl_sqrt0 * res0; +! res1 = tbl_sqrt1 * res1; +! ((float*)&dres0)[0] = (float)res0; +! ((float*)&dres0)[1] = (float)res1; +! dres0 = vis_fpadd32(dres0,fdx0); +! *py = ((float*)&dres0)[0]; +! *(py + stridey) = ((float*)&dres0)[1]; +! py += stridey2; +! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + ENTRY(__vrsqrtf) + save %sp,-SA(MINFRAME)-tmps,%sp + PIC_SETUP(l7) + PIC_SET(l7,.CONST_TBL,l2) + + st %i0,[%fp+tmp_counter] + stx %i1,[%fp+tmp_px] + + ldd [TBL+2048],K0 + sll %i2,2,stridex + + ldd [TBL+2048+8],K1 + sll %i4,2,stridey + mov %i3,%i2 + + ldd [TBL+2048+16],K2 + sethi %hi(0x7f800000),_0x7f800000 + sll stridex,1,stridex2 + + ldd [TBL+2048+24],K3 + sethi %hi(0x00800000),_0x00800000 + + ldd [TBL+2048+32],DC0 + add %g0,0x3f,%l0 + + ldd [TBL+2048+40],FONE +! ld [TBL+2048+44],FTWO +.begin: + ld [%fp+tmp_counter],counter + ldx [%fp+tmp_px],%l7 + st %g0,[%fp+tmp_counter] +.begin1: + cmp counter,0 + ble,pn %icc,.exit + + lda [%l7]0x82,%f14 ! (4_0) ((float*)&ddx0)[0] = *px; + + lda [stridex+%l7]0x82,%f15 ! (5_0) ((float*)&ddx0)[1] = *(px + stridex); + sethi %hi(0x7ffffc00),%o0 + + lda [%l7]0x82,%g1 ! (4_0) ax0 = *(int*)px; + add %l7,stridex2,%i1 ! px += stridex2 + add %o0,0x3ff,%o0 + + lda [stridex+%l7]0x82,%g5 ! (5_0) ax1 = *(int*)(px + stridex); + fand %f14,DC0,%f16 ! (4_0) dfx0 = vis_fand(ddx0,DC0); + + sra %g1,13,%l5 ! (4_0) si0 = ax0 >> 13; + add %i1,stridex2,%o5 ! px += stridex2 + + cmp %g1,_0x7f800000 ! (4_1) ax0 ? 0x7f800000 + bge,pn %icc,.spec0 ! (4_1) if ( ax0 >= 0x7f800000 ) + nop + + cmp %g1,_0x00800000 ! (4_1) ax0 ? 0x00800000 + bl,pn %icc,.spec1 ! (4_1) if ( ax0 < 0x00800000 ) + sra %g5,13,%l6 ! (5_0) si1 = ax1 >> 13; +.cont_spec: + and %l5,2032,%l5 ! (4_0) si0 &= 0x7f0; + + ldd [%l5+TBL],%f54 ! (4_0) tbl_div0 = ((double*)((char*)TBL + si0))[0]; + sra %g5,24,%l7 ! (5_0) iexp1 = ax1 >> 24; + and %l6,2032,%l6 ! (5_0) si1 &= 0x7f0; + fpsub32 %f14,%f16,%f16 ! (4_0) dfx0 = vis_fpsub32(ddx0,dfx0); + + ldd [%l6+TBL],%f46 ! (5_0) tbl_div1 = ((double*)((char*)TBL + si1))[0]; + sra %g1,24,%i3 ! (4_0) iexp0 = ax0 >> 24; + sub %l0,%l7,%l7 ! (5_0) iexp1 = 0x3f - iexp1; + + and %l7,511,%l1 ! (5_0) iexp1 = 0x1ff; + add %l6,TBL,%l6 ! (5_0) addr1 = (char*)TBL + si1; + + sllx %l1,23,%l1 ! (5_0) lexp1 = iexp1 << 23; + sub %l0,%i3,%o0 ! (4_0) iexp0 = 0x3f - iexp0; + fitod %f16,%f56 ! (4_0) dtmp0 = (double)(((int*)dfx0)[0]); + + sllx %o0,55,%o0 ! (4_0) lexp0 = iexp0 << 55; + fitod %f17,%f44 ! (5_0) dtmp1 = (double)(((int*)dfx0)[1]); + + or %o0,%l1,%o0 ! (4_0) lexp0 |= lexp1; + + stx %o0,[%fp+tmp0] ! (4_0) fdx0 = *((double*)lexp0); + + fmuld %f56,%f54,%f40 ! (4_0) xx0 = dtmp0 * tbl_div0; + + lda [%i1]0x82,%f18 ! (0_0) ((float*)&ddx0)[0] = *px; + fmuld %f44,%f46,%f46 ! (5_1) xx1 = dtmp1 * tbl_div1; + + lda [stridex+%i1]0x82,%f19 ! (1_0) ((float*)&ddx0)[1] = *(px + stridex); + + lda [%i1]0x82,%g1 ! (0_0) ax0 = *(int*)px; + + lda [stridex+%i1]0x82,%i4 ! (1_0) ax1 = *(int*)(px + stridex); + cmp %g5,_0x7f800000 ! (5_1) ax1 ? 0x7f800000 + bge,pn %icc,.update0 ! (5_1) if ( ax1 >= 0x7f800000 ) + fmuld K3,%f40,%f52 ! (4_1) res0 = K3 * xx0; +.cont0: + fmuld K3,%f46,%f50 ! (5_1) res1 = K3 * xx1; + cmp %g5,_0x00800000 ! (5_1) ax1 ? 0x00800000 + bl,pn %icc,.update1 ! (5_1) if ( ax1 < 0x00800000 ) + fand %f18,DC0,%f56 ! (0_0) dfx0 = vis_fand(ddx0,DC0); +.cont1: + sra %g1,13,%o0 ! (0_0) si0 = ax0 >> 13; + cmp %g1,_0x7f800000 ! (0_0) ax0 ? 0x7f800000 + + sra %i4,13,%g5 ! (1_0) si1 = ax1 >> 13; + and %o0,2032,%o0 ! (0_0) si0 &= 0x7f0; + + ldd [%o0+TBL],%f54 ! (0_0) tbl_div0 = ((double*)((char*)TBL + si0))[0]; + sra %i4,24,%i1 ! (1_0) iexp1 = ax1 >> 24; + and %g5,2032,%o7 ! (1_0) si1 &= 0x7f0; + fpsub32 %f18,%f56,%f30 ! (0_0) dfx0 = vis_fpsub32(ddx0,dfx0); + + ldd [%o7+TBL],%f44 ! (1_0) tbl_div1 = ((double*)((char*)TBL + si1))[0]; + sra %g1,24,%i3 ! (0_0) iexp0 = ax0 >> 24; + sub %l0,%i1,%i1 ! (1_0) iexp1 = 0x3f - iexp1; + faddd %f52,K2,%f62 ! (4_1) res0 += K2; + + sub %l0,%i3,%g5 ! (0_0) iexp0 = 0x3f - iexp0; + bge,pn %icc,.update2 ! (0_0) if ( ax0 >= 0x7f800000 ) + faddd %f50,K2,%f60 ! (5_1) res1 += K2; +.cont2: + cmp %g1,_0x00800000 ! (0_0) ax0 ? 0x00800000 + and %i1,511,%i0 ! (1_0) iexp1 = 0x1ff; + fitod %f30,%f56 ! (0_0) dtmp0 = (double)(((int*)dfx0)[0]); + + sllx %i0,23,%i0 ! (1_0) lexp1 = iexp1 << 23; + bl,pn %icc,.update3 ! (0_0) if ( ax0 < 0x00800000 ) + fitod %f31,%f50 ! (1_0) dtmp0 = (double)(((int*)dfx0)[0]); +.cont3: + fmuld %f62,%f40,%f30 ! (4_1) res0 *= xx0; + sllx %g5,55,%g5 ! (0_0) lexp0 = iexp0 << 55; + + fmuld %f60,%f46,%f48 ! (5_1) res1 *= xx1; + or %g5,%i0,%g5 ! (0_0) lexp0 |= lexp1; + stx %g5,[%fp+tmp1] ! (0_0) fdx0 = *((double*)lexp0); + + fmuld %f56,%f54,%f26 ! (0_0) xx0 = dtmp0 * tbl_div0; + sll stridex,1,stridex2 ! stridex2 = stridex * 2; + + lda [%o5]0x82,%f24 ! (2_0) ((float*)&ddx0)[0] = *px; + add %o7,TBL,%o7 ! (1_0) addr0 = (char*)TBL + si0; + fmuld %f50,%f44,%f44 ! (1_0) xx0 = dtmp0 * tbl_div0; + + lda [stridex+%o5]0x82,%f25 ! (3_0) ((float*)&ddx0)[1] = *(px + stridex); + add %l5,TBL,%l5 ! (4_1) addr0 = (char*)TBL + si0; + faddd %f30,K1,%f62 ! (4_1) res0 += K1; + + lda [%o5]0x82,%g1 ! (2_0) ax0 = *(int*)px; + add %o5,stridex2,%l7 ! px += stridex2 + faddd %f48,K1,%f42 ! (5_1) res1 += K1; + + lda [stridex+%o5]0x82,%o5 ! (3_0) ax1 = *(int*)(px + stridex); + cmp %i4,_0x7f800000 ! (1_0) ax1 ? 0x7f800000 + bge,pn %icc,.update4 ! (1_0) if ( ax1 >= 0x7f800000 ) + fmuld K3,%f26,%f52 ! (0_0) res0 = K3 * xx0; +.cont4: + fmuld K3,%f44,%f50 ! (1_0) res1 = K3 * xx1; + cmp %i4,_0x00800000 ! (1_0) ax1 ? 0x00800000 + bl,pn %icc,.update5 ! (1_0) if ( ax1 < 0x00800000 ) + fand %f24,DC0,%f54 ! (2_0) dfx0 = vis_fand(ddx0,DC0); +.cont5: + fmuld %f62,%f40,%f48 ! (4_1) res0 *= xx0; + sra %g1,13,%i0 ! (2_0) si0 = ax0 >> 13; + cmp %g1,_0x7f800000 ! (2_0) ax0 ? 0x7f800000 + + fmuld %f42,%f46,%f58 ! (5_1) res1 *= xx1; + sra %o5,13,%o1 ! (3_0) si1 = ax1 >> 13; + and %i0,2032,%i0 ! (2_0) si0 &= 0x7f0; + + ldd [%i0+TBL],%f30 ! (2_0) tbl_div0 = ((double*)((char*)TBL + si0))[0]; + sra %o5,24,%o3 ! (3_0) iexp1 = ax1 >> 24; + and %o1,2032,%o1 ! (3_0) si1 &= 0x7f0; + fpsub32 %f24,%f54,%f12 ! (2_0) dfx0 = vis_fpsub32(ddx0,dfx0); + + ldd [%o1+TBL],%f46 ! (3_0) tbl_div1 = ((double*)((char*)TBL + si1))[0]; + sra %g1,24,%i3 ! (2_0) iexp0 = ax0 >> 24; + sub %l0,%o3,%o3 ! (3_0) iexp1 = 0x3f - iexp1; + faddd %f52,K2,%f40 ! (0_0) res0 += K2; + + ldd [%l5+8],%f42 ! (4_1) tbl_sqrt0 = ((double*)addr0)[1]; + sub %l0,%i3,%g5 ! (2_0) iexp0 = 0x3f - iexp0; + and %o3,511,%i3 ! (3_0) iexp1 &= 0x1ff; + faddd %f50,K2,%f60 ! (1_0) res0 += K2; + + ldd [%l6+8],%f28 ! (5_1) tbl_sqrt1 = ((double*)addr1)[1]; + sllx %g5,55,%g5 ! (2_0) lexp0 = iexp0 << 55; + add %i0,TBL,%i0 ! (2_0) addr0 = (char*)TBL + si0; + fitod %f12,%f56 ! (2_0) dtmp0 = (double)(((int*)dfx0)[0]); + + sllx %i3,23,%i3 ! (3_0) lexp1 = iexp1 << 23; + fitod %f13,%f50 ! (3_0) dtmp1 = (double)(((int*)dfx0)[1]); + + fmuld %f40,%f26,%f40 ! (0_0) res0 *= xx0; + or %g5,%i3,%g5 ! (2_0) lexp0 |= lexp1; + faddd %f48,K0,%f62 ! (4_1) res0 += K0; + + fmuld %f60,%f44,%f48 ! (1_0) res1 *= xx1; + add %o1,TBL,%o1 ! (3_0) addr1 = (char*)TBL + si1; + stx %g5,[%fp+tmp2] ! (2_0) fdx0 = *((double*)lexp0); + faddd %f58,K0,%f60 ! (5_1) res1 += K0; + + fmuld %f56,%f30,%f30 ! (2_0) xx0 = dtmp0 * tbl_div0; + bge,pn %icc,.update6 ! (2_0) if ( ax0 >= 0x7f800000 ) + lda [%l7]0x82,%f14 ! (4_0) ((float*)&ddx0)[0] = *px; +.cont6: + cmp %g1,_0x00800000 ! (2_0) ax0 ? 0x00800000 + bl,pn %icc,.update7 ! (2_0) if ( ax0 < 0x00800000 ) + nop +.cont7: + fmuld %f50,%f46,%f24 ! (3_0) xx1 = dtmp1 * tbl_div1; + + lda [stridex+%l7]0x82,%f15 ! (5_0) ((float*)&ddx0)[1] = *(px + stridex); + cmp %o5,_0x7f800000 ! (3_0) ax1 ? 0x7f800000 + fmuld %f42,%f62,%f58 ! (4_1) res0 = tbl_sqrt0 * res0; + faddd %f40,K1,%f46 ! (0_0) res0 += K1; + + lda [%l7]0x82,%g1 ! (4_0) ax0 = *(int*)px; + add %l7,stridex2,%i1 ! px += stridex2 + fmuld %f28,%f60,%f56 ! (5_1) res1 = tbl_sqrt1 * res1; + faddd %f48,K1,%f62 ! (1_0) res1 += K1; + + lda [stridex+%l7]0x82,%g5 ! (5_0) ax1 = *(int*)(px + stridex); + add %o0,TBL,%o0 ! (0_0) addr0 = (char*)TBL + si0; + bge,pn %icc,.update8 ! (3_0) if ( ax1 >= 0x7f800000 ) + fmuld K3,%f30,%f52 ! (2_0) res0 = K3 * xx0; +.cont8: + fmuld K3,%f24,%f50 ! (3_0) res1 = K3 * xx1; + cmp %o5,_0x00800000 ! (3_0) ax1 ? 0x00800000 + bl,pn %icc,.update9 ! (3_0) if ( ax1 < 0x00800000 ) + fand %f14,DC0,%f16 ! (4_0) dfx0 = vis_fand(ddx0,DC0); +.cont9: + fmuld %f46,%f26,%f48 ! (0_0) res0 *= xx0; + sra %g1,13,%l5 ! (4_0) si0 = ax0 >> 13; + add %i1,stridex2,%o5 ! px += stridex2 + fdtos %f58,%f6 ! (4_1) ((float*)&dres0)[0] = (float)res0; + + fmuld %f62,%f44,%f40 ! (1_0) res1 *= xx1; + sra %g5,13,%l6 ! (5_0) si1 = ax1 >> 13; + and %l5,2032,%l5 ! (4_0) si0 &= 0x7f0; + fdtos %f56,%f7 ! (5_1) ((float*)&dres0)[1] = (float)res1; + + ldd [%l5+TBL],%f54 ! (4_0) tbl_div0 = ((double*)((char*)TBL + si0))[0]; + sra %g5,24,%l7 ! (5_0) iexp1 = ax1 >> 24; + and %l6,2032,%l6 ! (5_0) si1 &= 0x7f0; + fpsub32 %f14,%f16,%f16 ! (4_0) dfx0 = vis_fpsub32(ddx0,dfx0); + + ldd [%l6+TBL],%f46 ! (5_0) tbl_div1 = ((double*)((char*)TBL + si1))[0]; + sra %g1,24,%i3 ! (4_0) iexp0 = ax0 >> 24; + sub %l0,%l7,%l7 ! (5_0) iexp1 = 0x3f - iexp1; + faddd %f52,K2,%f58 ! (2_0) res0 += K2; + + ldd [%o0+8],%f42 ! (0_0) tbl_sqrt0 = ((double*)addr0)[1]; + and %l7,511,%l1 ! (5_0) iexp1 = 0x1ff; + add %l6,TBL,%l6 ! (5_0) addr1 = (char*)TBL + si1; + faddd %f50,K2,%f60 ! (3_0) res1 += K2; + + ldd [%o7+8],%f28 ! (1_0) tbl_sqrt1 = ((double*)addr1)[1]; + sllx %l1,23,%l1 ! (5_0) lexp1 = iexp1 << 23; + sub %l0,%i3,%o0 ! (4_0) iexp0 = 0x3f - iexp0; + fitod %f16,%f56 ! (4_0) dtmp0 = (double)(((int*)dfx0)[0]); + + ldd [%fp+tmp0],%f52 ! (4_1) fdx0 = *((double*)lexp0); + sllx %o0,55,%o0 ! (4_0) lexp0 = iexp0 << 55; + fitod %f17,%f44 ! (5_0) dtmp1 = (double)(((int*)dfx0)[1]); + + fmuld %f58,%f30,%f62 ! (2_0) res0 *= xx0; + or %o0,%l1,%o0 ! (4_0) lexp0 |= lexp1; + faddd %f48,K0,%f22 ! (0_0) res0 += K0; + + fmuld %f60,%f24,%f58 ! (3_0) res1 *= xx1; + stx %o0,[%fp+tmp0] ! (4_0) fdx0 = *((double*)lexp0); + faddd %f40,K0,%f26 ! (1_0) res1 += K0; + + fmuld %f56,%f54,%f40 ! (4_0) xx0 = dtmp0 * tbl_div0; + fpadd32 %f6,%f52,%f10 ! (4_1) dres0 = vis_fpadd32(dres0,fdx0); + + or %g0,%i2,%l7 + add stridey,stridey,stridey2 + + cmp counter,6 + bl,pn %icc,.tail + nop + + ba .main_loop + sub counter,6,counter ! counter + + .align 16 +.main_loop: + lda [%i1]0x82,%f18 ! (0_0) ((float*)&ddx0)[0] = *px; + cmp %g1,_0x7f800000 ! (4_1) ax0 ? 0x7f800000 + bge,pn %icc,.update10 ! (4_1) if ( ax0 >= 0x7f800000 ) + fmuld %f44,%f46,%f46 ! (5_1) xx1 = dtmp1 * tbl_div1; +.cont10: + lda [stridex+%i1]0x82,%f19 ! (1_0) ((float*)&ddx0)[1] = *(px + stridex); + cmp %g1,_0x00800000 ! (4_1) ax0 ? 0x00800000 + fmuld %f42,%f22,%f44 ! (0_1) res0 = tbl_sqrt0 * res0; + faddd %f62,K1,%f42 ! (2_1) res0 += K1; + + lda [%i1]0x82,%g1 ! (0_0) ax0 = *(int*)px; + fmuld %f28,%f26,%f60 ! (1_1) res1 = tbl_sqrt1 * res1; + bl,pn %icc,.update11 ! (4_1) if ( ax0 < 0x00800000 ) + faddd %f58,K1,%f62 ! (3_1) res1 += K1; +.cont11: + lda [stridex+%i1]0x82,%i4 ! (1_0) ax1 = *(int*)(px + stridex); + cmp %g5,_0x7f800000 ! (5_1) ax1 ? 0x7f800000 + bge,pn %icc,.update12 ! (5_1) if ( ax1 >= 0x7f800000 ) + fmuld K3,%f40,%f52 ! (4_1) res0 = K3 * xx0; +.cont12: + fmuld K3,%f46,%f50 ! (5_1) res1 = K3 * xx1; + cmp %g5,_0x00800000 ! (5_1) ax1 ? 0x00800000 + bl,pn %icc,.update13 ! (5_1) if ( ax1 < 0x00800000 ) + fand %f18,DC0,%f56 ! (0_0) dfx0 = vis_fand(ddx0,DC0); +.cont13: + fmuld %f42,%f30,%f48 ! (2_1) res0 *= xx0; + sra %g1,13,%o0 ! (0_0) si0 = ax0 >> 13; + cmp %g1,_0x7f800000 ! (0_0) ax0 ? 0x7f800000 + fdtos %f44,%f8 ! (0_1) ((float*)&dres0)[0] = (float)res0; + + fmuld %f62,%f24,%f58 ! (3_1) res1 *= xx1; + sra %i4,13,%g5 ! (1_0) si1 = ax1 >> 13; + and %o0,2032,%o0 ! (0_0) si0 &= 0x7f0; + fdtos %f60,%f9 ! (1_1) ((float*)&dres0)[1] = (float)res1; + + ldd [%o0+TBL],%f54 ! (0_0) tbl_div0 = ((double*)((char*)TBL + si0))[0]; + sra %i4,24,%i1 ! (1_0) iexp1 = ax1 >> 24; + and %g5,2032,%o7 ! (1_0) si1 &= 0x7f0; + fpsub32 %f18,%f56,%f30 ! (0_0) dfx0 = vis_fpsub32(ddx0,dfx0); + + ldd [%o7+TBL],%f44 ! (1_0) tbl_div1 = ((double*)((char*)TBL + si1))[0]; + sra %g1,24,%i3 ! (0_0) iexp0 = ax0 >> 24; + sub %l0,%i1,%i1 ! (1_0) iexp1 = 0x3f - iexp1; + faddd %f52,K2,%f62 ! (4_1) res0 += K2; + + ldd [%i0+8],%f42 ! (2_1) tbl_sqrt0 = ((double*)addr0)[1]; + sub %l0,%i3,%g5 ! (0_0) iexp0 = 0x3f - iexp0; + bge,pn %icc,.update14 ! (0_0) if ( ax0 >= 0x7f800000 ) + faddd %f50,K2,%f60 ! (5_1) res1 += K2; +.cont14: + ldd [%o1+8],%f28 ! (3_1) tbl_sqrt1 = ((double*)addr0)[1]; + cmp %g1,_0x00800000 ! (0_0) ax0 ? 0x00800000 + and %i1,511,%i0 ! (1_0) iexp1 = 0x1ff; + fitod %f30,%f56 ! (0_0) dtmp0 = (double)(((int*)dfx0)[0]); + + ldd [%fp+tmp1],%f52 ! (0_1) fdx0 = *((double*)lexp0); + sllx %i0,23,%i0 ! (1_0) lexp1 = iexp1 << 23; + bl,pn %icc,.update15 ! (0_0) if ( ax0 < 0x00800000 ) + fitod %f31,%f50 ! (1_0) dtmp0 = (double)(((int*)dfx0)[0]); +.cont15: + fmuld %f62,%f40,%f30 ! (4_1) res0 *= xx0; + sllx %g5,55,%g5 ! (0_0) lexp0 = iexp0 << 55; + st %f10,[%l7] ! (4_2) *py = ((float*)&dres0)[0]; + faddd %f48,K0,%f62 ! (2_1) res0 += K0; + + fmuld %f60,%f46,%f48 ! (5_1) res1 *= xx1; + or %g5,%i0,%g5 ! (0_0) lexp0 |= lexp1; + stx %g5,[%fp+tmp1] ! (0_0) fdx0 = *((double*)lexp0); + faddd %f58,K0,%f60 ! (3_1) res1 += K0; + + fmuld %f56,%f54,%f26 ! (0_0) xx0 = dtmp0 * tbl_div0; + sll stridex,1,stridex2 ! stridex2 = stridex * 2; + st %f11,[stridey+%l7] ! (5_2) *(py + stridey) = ((float*)&dres0)[1]; + fpadd32 %f8,%f52,%f10 ! (0_1) dres0 = vis_fpadd32(dres0,fdx0); + + lda [%o5]0x82,%f24 ! (2_0) ((float*)&ddx0)[0] = *px; + add %l7,stridey2,%i1 ! py += stridey2 + add %o7,TBL,%o7 ! (1_0) addr0 = (char*)TBL + si0; + fmuld %f50,%f44,%f44 ! (1_0) xx0 = dtmp0 * tbl_div0; + + lda [stridex+%o5]0x82,%f25 ! (3_0) ((float*)&ddx0)[1] = *(px + stridex); + add %l5,TBL,%l5 ! (4_1) addr0 = (char*)TBL + si0; + fmuld %f42,%f62,%f58 ! (2_1) res0 = tbl_sqrt0 * res0; + faddd %f30,K1,%f62 ! (4_1) res0 += K1; + + lda [%o5]0x82,%g1 ! (2_0) ax0 = *(int*)px; + add %o5,stridex2,%l7 ! px += stridex2 + fmuld %f28,%f60,%f56 ! (3_1) res1 = tbl_sqrt1 * res1; + faddd %f48,K1,%f42 ! (5_1) res1 += K1; + + lda [stridex+%o5]0x82,%o5 ! (3_0) ax1 = *(int*)(px + stridex); + cmp %i4,_0x7f800000 ! (1_0) ax1 ? 0x7f800000 + bge,pn %icc,.update16 ! (1_0) if ( ax1 >= 0x7f800000 ) + fmuld K3,%f26,%f52 ! (0_0) res0 = K3 * xx0; +.cont16: + fmuld K3,%f44,%f50 ! (1_0) res1 = K3 * xx1; + cmp %i4,_0x00800000 ! (1_0) ax1 ? 0x00800000 + bl,pn %icc,.update17 ! (1_0) if ( ax1 < 0x00800000 ) + fand %f24,DC0,%f54 ! (2_0) dfx0 = vis_fand(ddx0,DC0); +.cont17: + fmuld %f62,%f40,%f48 ! (4_1) res0 *= xx0; + sra %g1,13,%i0 ! (2_0) si0 = ax0 >> 13; + cmp %g1,_0x7f800000 ! (2_0) ax0 ? 0x7f800000 + fdtos %f58,%f20 ! (2_1) ((float*)&dres0)[0] = (float)res0; + + fmuld %f42,%f46,%f58 ! (5_1) res1 *= xx1; + sra %o5,13,%o1 ! (3_0) si1 = ax1 >> 13; + and %i0,2032,%i0 ! (2_0) si0 &= 0x7f0; + fdtos %f56,%f21 ! (3_1) ((float*)&dres0)[0] = (float)res0; + + ldd [%i0+TBL],%f30 ! (2_0) tbl_div0 = ((double*)((char*)TBL + si0))[0]; + sra %o5,24,%o3 ! (3_0) iexp1 = ax1 >> 24; + and %o1,2032,%o1 ! (3_0) si1 &= 0x7f0; + fpsub32 %f24,%f54,%f12 ! (2_0) dfx0 = vis_fpsub32(ddx0,dfx0); + + ldd [%o1+TBL],%f46 ! (3_0) tbl_div1 = ((double*)((char*)TBL + si1))[0]; + sra %g1,24,%i3 ! (2_0) iexp0 = ax0 >> 24; + sub %l0,%o3,%o3 ! (3_0) iexp1 = 0x3f - iexp1; + faddd %f52,K2,%f40 ! (0_0) res0 += K2; + + ldd [%l5+8],%f42 ! (4_1) tbl_sqrt0 = ((double*)addr0)[1]; + sub %l0,%i3,%g5 ! (2_0) iexp0 = 0x3f - iexp0; + and %o3,511,%i3 ! (3_0) iexp1 &= 0x1ff; + faddd %f50,K2,%f60 ! (1_0) res0 += K2; + + ldd [%l6+8],%f28 ! (5_1) tbl_sqrt1 = ((double*)addr1)[1]; + sllx %g5,55,%g5 ! (2_0) lexp0 = iexp0 << 55; + add %i0,TBL,%i0 ! (2_0) addr0 = (char*)TBL + si0; + fitod %f12,%f56 ! (2_0) dtmp0 = (double)(((int*)dfx0)[0]); + + ldd [%fp+tmp2],%f52 ! (2_1) fdx0 = *((double*)lexp0); + sllx %i3,23,%i3 ! (3_0) lexp1 = iexp1 << 23; + add %i1,stridey2,%o3 ! py += stridey2 + fitod %f13,%f50 ! (3_0) dtmp1 = (double)(((int*)dfx0)[1]); + + fmuld %f40,%f26,%f40 ! (0_0) res0 *= xx0; + or %g5,%i3,%g5 ! (2_0) lexp0 |= lexp1; + st %f10,[%i1] ! (0_1) *py = ((float*)&dres0)[0]; + faddd %f48,K0,%f62 ! (4_1) res0 += K0; + + fmuld %f60,%f44,%f48 ! (1_0) res1 *= xx1; + add %o1,TBL,%o1 ! (3_0) addr1 = (char*)TBL + si1; + stx %g5,[%fp+tmp2] ! (2_0) fdx0 = *((double*)lexp0); + faddd %f58,K0,%f60 ! (5_1) res1 += K0; + + fmuld %f56,%f30,%f30 ! (2_0) xx0 = dtmp0 * tbl_div0; + bge,pn %icc,.update18 ! (2_0) if ( ax0 >= 0x7f800000 ) + st %f11,[stridey+%i1] ! (1_1) *(py + stridey) = ((float*)&dres0)[1]; + fpadd32 %f20,%f52,%f0 ! (2_1) dres0 = vis_fpadd32(dres0,fdx0); +.cont18: + cmp %g1,_0x00800000 ! (2_0) ax0 ? 0x00800000 + bl,pn %icc,.update19 ! (2_0) if ( ax0 < 0x00800000 ) + lda [%l7]0x82,%f14 ! (4_0) ((float*)&ddx0)[0] = *px; + fmuld %f50,%f46,%f24 ! (3_0) xx1 = dtmp1 * tbl_div1; +.cont19: + lda [stridex+%l7]0x82,%f15 ! (5_0) ((float*)&ddx0)[1] = *(px + stridex); + cmp %o5,_0x7f800000 ! (3_0) ax1 ? 0x7f800000 + fmuld %f42,%f62,%f58 ! (4_1) res0 = tbl_sqrt0 * res0; + faddd %f40,K1,%f46 ! (0_0) res0 += K1; + + lda [%l7]0x82,%g1 ! (4_0) ax0 = *(int*)px; + add %l7,stridex2,%i1 ! px += stridex2 + fmuld %f28,%f60,%f56 ! (5_1) res1 = tbl_sqrt1 * res1; + faddd %f48,K1,%f62 ! (1_0) res1 += K1; + + lda [stridex+%l7]0x82,%g5 ! (5_0) ax1 = *(int*)(px + stridex); + add %o0,TBL,%o0 ! (0_0) addr0 = (char*)TBL + si0; + bge,pn %icc,.update20 ! (3_0) if ( ax1 >= 0x7f800000 ) + fmuld K3,%f30,%f52 ! (2_0) res0 = K3 * xx0; +.cont20: + fmuld K3,%f24,%f50 ! (3_0) res1 = K3 * xx1; + cmp %o5,_0x00800000 ! (3_0) ax1 ? 0x00800000 + bl,pn %icc,.update21 ! (3_0) if ( ax1 < 0x00800000 ) + fand %f14,DC0,%f16 ! (4_0) dfx0 = vis_fand(ddx0,DC0); +.cont21: + fmuld %f46,%f26,%f48 ! (0_0) res0 *= xx0; + sra %g1,13,%l5 ! (4_0) si0 = ax0 >> 13; + add %i1,stridex2,%o5 ! px += stridex2 + fdtos %f58,%f6 ! (4_1) ((float*)&dres0)[0] = (float)res0; + + fmuld %f62,%f44,%f40 ! (1_0) res1 *= xx1; + sra %g5,13,%l6 ! (5_0) si1 = ax1 >> 13; + and %l5,2032,%l5 ! (4_0) si0 &= 0x7f0; + fdtos %f56,%f7 ! (5_1) ((float*)&dres0)[1] = (float)res1; + + ldd [%l5+TBL],%f54 ! (4_0) tbl_div0 = ((double*)((char*)TBL + si0))[0]; + sra %g5,24,%l7 ! (5_0) iexp1 = ax1 >> 24; + and %l6,2032,%l6 ! (5_0) si1 &= 0x7f0; + fpsub32 %f14,%f16,%f16 ! (4_0) dfx0 = vis_fpsub32(ddx0,dfx0); + + ldd [%l6+TBL],%f46 ! (5_0) tbl_div1 = ((double*)((char*)TBL + si1))[0]; + sra %g1,24,%i3 ! (4_0) iexp0 = ax0 >> 24; + sub %l0,%l7,%l7 ! (5_0) iexp1 = 0x3f - iexp1; + faddd %f52,K2,%f58 ! (2_0) res0 += K2; + + ldd [%o0+8],%f42 ! (0_0) tbl_sqrt0 = ((double*)addr0)[1]; + and %l7,511,%l1 ! (5_0) iexp1 = 0x1ff; + add %l6,TBL,%l6 ! (5_0) addr1 = (char*)TBL + si1; + faddd %f50,K2,%f60 ! (3_0) res1 += K2; + + ldd [%o7+8],%f28 ! (1_0) tbl_sqrt1 = ((double*)addr1)[1]; + sllx %l1,23,%l1 ! (5_0) lexp1 = iexp1 << 23; + sub %l0,%i3,%o0 ! (4_0) iexp0 = 0x3f - iexp0; + fitod %f16,%f56 ! (4_0) dtmp0 = (double)(((int*)dfx0)[0]); + + ldd [%fp+tmp0],%f52 ! (4_1) fdx0 = *((double*)lexp0); + sllx %o0,55,%o0 ! (4_0) lexp0 = iexp0 << 55; + add %o3,stridey2,%l7 ! py += stridey2 + fitod %f17,%f44 ! (5_0) dtmp1 = (double)(((int*)dfx0)[1]); + + fmuld %f58,%f30,%f62 ! (2_0) res0 *= xx0; + or %o0,%l1,%o0 ! (4_0) lexp0 |= lexp1; + st %f0,[%o3] ! (2_1) *py = ((float*)&dres0)[0]; + faddd %f48,K0,%f22 ! (0_0) res0 += K0; + + fmuld %f60,%f24,%f58 ! (3_0) res1 *= xx1; + subcc counter,6,counter ! counter -= 6; + stx %o0,[%fp+tmp0] ! (4_0) fdx0 = *((double*)lexp0); + faddd %f40,K0,%f26 ! (1_0) res1 += K0; + + fmuld %f56,%f54,%f40 ! (4_0) xx0 = dtmp0 * tbl_div0; + st %f1,[stridey+%o3] ! (3_1) *(py + stridey) = ((float*)&dres0)[1]; + bpos,pt %icc,.main_loop + fpadd32 %f6,%f52,%f10 ! (4_1) dres0 = vis_fpadd32(dres0,fdx0); + + add counter,6,counter +.tail: + sll stridex,1,stridex2 + subcc counter,1,counter + bneg,a .begin + mov %l7,%i2 + + fmuld %f42,%f22,%f44 ! (0_1) res0 = tbl_sqrt0 * res0; + faddd %f62,K1,%f42 ! (2_1) res0 += K1; + + fmuld %f28,%f26,%f60 ! (1_1) res1 = tbl_sqrt1 * res1; + + fmuld %f42,%f30,%f48 ! (2_1) res0 *= xx0; + fdtos %f44,%f8 ! (0_1) ((float*)&dres0)[0] = (float)res0; + + fdtos %f60,%f9 ! (1_1) ((float*)&dres0)[1] = (float)res1; + + ldd [%i0+8],%f42 ! (2_1) tbl_sqrt0 = ((double*)addr0)[1]; + + ldd [%fp+tmp1],%f52 ! (0_1) fdx0 = *((double*)lexp0); + + st %f10,[%l7] ! (4_2) *py = ((float*)&dres0)[0]; + subcc counter,1,counter + bneg,a .begin + add %l7,stridey,%i2 + + faddd %f48,K0,%f62 ! (2_1) res0 += K0; + st %f11,[stridey+%l7] ! (5_2) *(py + stridey) = ((float*)&dres0)[1]; + subcc counter,1,counter + bneg,a .begin + add %l7,stridey2,%i2 + fpadd32 %f8,%f52,%f10 ! (0_1) dres0 = vis_fpadd32(dres0,fdx0); + + add %l7,stridey2,%i1 ! py += stridey2 + + fmuld %f42,%f62,%f58 ! (2_1) res0 = tbl_sqrt0 * res0; + + fdtos %f58,%f20 ! (2_1) ((float*)&dres0)[0] = (float)res0; + + ldd [%fp+tmp2],%f52 ! (2_1) fdx0 = *((double*)lexp0); + add %i1,stridey2,%o3 ! py += stridey2 + + st %f10,[%i1] ! (0_1) *py = ((float*)&dres0)[0]; + subcc counter,1,counter + bneg,a .begin + add %i1,stridey,%i2 + + st %f11,[stridey+%i1] ! (1_1) *(py + stridey) = ((float*)&dres0)[1]; + subcc counter,1,counter + bneg,a .begin + mov %o3,%i2 + fpadd32 %f20,%f52,%f0 ! (2_1) dres0 = vis_fpadd32(dres0,fdx0); + + st %f0,[%o3] ! (2_1) *py = ((float*)&dres0)[0]; + ba .begin + add %o3,stridey,%i2 + + .align 16 +.spec0: + fdivs FONE,%f14,%f14 ! x0 = FONE / x0; + add %l7,stridex,%l7 ! px += stridex + st %f14,[%i2] ! *py = x0; + sub counter,1,counter + ba .begin1 + add %i2,stridey,%i2 ! py += stridey + + .align 16 +.spec1: + andcc %g1,%o0,%g0 + bz,a 1f + fdivs FONE,%f14,%f14 ! x0 = DONE / x0; + + cmp %g1,0 + bl,a 1f + fsqrts %f14,%f14 ! x0 = sqrtf(x0); + + fitod %f14,%f0 + fdtos %f0,%f14 + fmuls %f14,FTWO,%f14 + st %f14,[%fp+tmp3] + ld [%fp+tmp3],%g1 + sethi %hi(0x4b000000),%o0 + sra %g1,13,%l5 ! (4_0) si0 = ax0 >> 13; + fands %f14,DC0,%f16 ! (4_0) dfx0 = vis_fand(ddx0,DC0); + ba .cont_spec + sub %g1,%o0,%g1 +1: + add %l7,stridex,%l7 ! px += stridex + sub counter,1,counter + st %f14,[%i2] ! *py = x0; + ba .begin1 + add %i2,stridey,%i2 ! py += stridey + + .align 16 +.update0: + cmp counter,1 + ble .cont0 + nop + + sub %i1,stridex,%o1 + stx %o1,[%fp+tmp_px] + + sub counter,1,counter + st counter,[%fp+tmp_counter] + + ba .cont0 + mov 1,counter + + .align 16 +.update1: + sethi %hi(0x7ffffc00),%o0 + cmp counter,1 + ble .cont1 + + add %o0,0x3ff,%o0 + + andcc %g5,%o0,%g0 + bz,a 1f + nop + + cmp %g5,0 + bl,a 1f + nop + + fitod %f15,%f0 + fdtos %f0,%f15 + fmuls %f15,FTWO,%f15 + st %f15,[%fp+tmp3] + ld [%fp+tmp3],%g5 + sethi %hi(0x4b000000),%o0 + sub %g5,%o0,%g5 + + fands %f15,DC0,%f17 ! (4_0) dfx0 = vis_fand(ddx0,DC0); + + sra %g5,13,%l6 ! (5_0) si1 = ax1 >> 13; + + sra %g5,24,%l7 ! (5_0) iexp1 = ax1 >> 24; + and %l6,2032,%l6 ! (5_0) si1 &= 0x7f0; + + fpsub32s %f15,%f17,%f17 ! (4_0) dfx0 = vis_fpsub32(ddx0,dfx0); + + ldd [%l6+TBL],%f46 ! (5_0) tbl_div1 = ((double*)((char*)TBL + si1))[0]; + sub %l0,%l7,%l1 ! (5_0) iexp1 = 0x3f - iexp1; + + sll %l1,23,%l1 ! (5_0) lexp1 = iexp1 << 23; + add %l6,TBL,%l6 ! (5_0) addr1 = (char*)TBL + si1; + st %l1,[%fp+tmp0+4] ! (4_0) fdx0 = *((double*)lexp0); + fitod %f17,%f44 ! (5_0) dtmp1 = (double)(((int*)dfx0)[1]); + + fmuld %f44,%f46,%f46 ! (5_1) xx1 = dtmp1 * tbl_div1; + + ba .cont1 + fmuld K3,%f46,%f50 ! (5_1) res1 = K3 * xx1; +1: + sub %i1,stridex,%o1 + stx %o1,[%fp+tmp_px] + + sub counter,1,counter + st counter,[%fp+tmp_counter] + + ba .cont1 + mov 1,counter + + .align 16 +.update2: + cmp counter,2 + ble .cont2 + sub %o5,stridex,%o1 + + sub %o1,stridex,%o1 + stx %o1,[%fp+tmp_px] + + sub counter,2,counter + st counter,[%fp+tmp_counter] + + ba .cont2 + mov 2,counter + + .align 16 +.update3: + sethi %hi(0x7ffffc00),%o1 + cmp counter,2 + ble .cont3 + + add %o1,0x3ff,%o1 + + andcc %g1,%o1,%g0 + bz,a 1f + sub %o5,stridex,%o1 + + cmp %g1,0 + bl,a 1f + sub %o5,stridex,%o1 + + fitod %f18,%f0 + fdtos %f0,%f18 + fmuls %f18,FTWO,%f18 + st %f18,[%fp+tmp3] + ld [%fp+tmp3],%g1 + sethi %hi(0x4b000000),%o1 + sub %g1,%o1,%g1 + + fand %f18,DC0,%f56 ! (0_0) dfx0 = vis_fand(ddx0,DC0); + sra %g1,13,%o0 ! (0_0) si0 = ax0 >> 13; + + and %o0,2032,%o0 ! (0_0) si0 &= 0x7f0; + + ldd [%o0+TBL],%f54 ! (0_0) tbl_div0 = ((double*)((char*)TBL + si0))[0]; + fpsub32 %f18,%f56,%f30 ! (0_0) dfx0 = vis_fpsub32(ddx0,dfx0); + + sra %g1,24,%i3 ! (0_0) iexp0 = ax0 >> 24; + sub %l0,%i3,%g5 ! (0_0) iexp0 = 0x3f - iexp0; + ba .cont3 + fitod %f30,%f56 ! (0_0) dtmp0 = (double)(((int*)dfx0)[0]); +1: + sub %o1,stridex,%o1 + stx %o1,[%fp+tmp_px] + + sub counter,2,counter + st counter,[%fp+tmp_counter] + + ba .cont3 + mov 2,counter + + .align 16 +.update4: + cmp counter,3 + ble .cont4 + sub %l7,stridex2,%o1 + + sub %o1,stridex,%o1 + stx %o1,[%fp+tmp_px] + + sub counter,3,counter + st counter,[%fp+tmp_counter] + + ba .cont4 + mov 3,counter + + .align 16 +.update5: + sethi %hi(0x7ffffc00),%o1 + cmp counter,3 + ble .cont5 + + add %o1,0x3ff,%o1 + + andcc %i4,%o1,%g0 + bz,a 1f + sub %l7,stridex2,%o1 + + cmp %i4,0 + bl,a 1f + sub %l7,stridex2,%o1 + + fitod %f19,%f0 + fdtos %f0,%f19 + fmuls %f19,FTWO,%f19 + st %f19,[%fp+tmp3] + ld [%fp+tmp3],%i4 + sethi %hi(0x4b000000),%o1 + sub %i4,%o1,%i4 + + fands %f19,DC0,%f0 ! (0_0) dfx0 = vis_fand(ddx0,DC0); + + sra %i4,13,%g5 ! (1_0) si1 = ax1 >> 13; + + sra %i4,24,%i1 ! (1_0) iexp1 = ax1 >> 24; + and %g5,2032,%o7 ! (1_0) si1 &= 0x7f0; + fpsub32s %f19,%f0,%f31 ! (0_0) dfx0 = vis_fpsub32(ddx0,dfx0); + + ldd [%o7+TBL],%f44 ! (1_0) tbl_div1 = ((double*)((char*)TBL + si1))[0]; + sub %l0,%i1,%i0 ! (1_0) iexp1 = 0x3f - iexp1; + + sll %i0,23,%i0 ! (1_0) lexp1 = iexp1 << 23; + fitod %f31,%f50 ! (1_0) dtmp0 = (double)(((int*)dfx0)[0]); + + st %i0,[%fp+tmp1+4] ! (0_0) fdx0 = *((double*)lexp0); + + add %o7,TBL,%o7 ! (1_0) addr0 = (char*)TBL + si0; + fmuld %f50,%f44,%f44 ! (1_0) xx0 = dtmp0 * tbl_div0; + + ba .cont5 + fmuld K3,%f44,%f50 ! (1_0) res1 = K3 * xx1; +1: + sub %o1,stridex,%o1 + stx %o1,[%fp+tmp_px] + + sub counter,3,counter + st counter,[%fp+tmp_counter] + + ba .cont5 + mov 3,counter + + .align 16 +.update6: + cmp counter,4 + ble .cont6 + sub %l7,stridex,%o3 + + sub %o3,stridex,%o3 + stx %o3,[%fp+tmp_px] + + sub counter,4,counter + st counter,[%fp+tmp_counter] + + ba .cont6 + mov 4,counter + + .align 16 +.update7: + sethi %hi(0x7ffffc00),%o3 + cmp counter,4 + ble .cont7 + + add %o3,0x3ff,%o3 + + andcc %g1,%o3,%g0 + bz,a 1f + sub %l7,stridex,%o3 + + cmp %g1,0 + bl,a 1f + sub %l7,stridex,%o3 + + fitod %f24,%f0 + fdtos %f0,%f24 + fmuls %f24,FTWO,%f24 + st %f24,[%fp+tmp3] + ld [%fp+tmp3],%g1 + sethi %hi(0x4b000000),%o3 + sub %g1,%o3,%g1 + + fands %f24,DC0,%f0 ! (2_0) dfx0 = vis_fand(ddx0,DC0); + sra %g1,13,%i0 ! (2_0) si0 = ax0 >> 13; + + and %i0,2032,%i0 ! (2_0) si0 &= 0x7f0; + + ldd [%i0+TBL],%f30 ! (2_0) tbl_div0 = ((double*)((char*)TBL + si0))[0]; + fpsub32s %f24,%f0,%f12 ! (2_0) dfx0 = vis_fpsub32(ddx0,dfx0); + + sra %g1,24,%i3 ! (2_0) iexp0 = ax0 >> 24; + + sub %l0,%i3,%g5 ! (2_0) iexp0 = 0x3f - iexp0; + + sll %g5,23,%g5 ! (2_0) lexp0 = iexp0 << 55; + add %i0,TBL,%i0 ! (2_0) addr0 = (char*)TBL + si0; + fitod %f12,%f56 ! (2_0) dtmp0 = (double)(((int*)dfx0)[0]); + + st %g5,[%fp+tmp2] ! (2_0) fdx0 = *((double*)lexp0); + ba .cont7 + fmuld %f56,%f30,%f30 ! (2_0) xx0 = dtmp0 * tbl_div0; +1: + sub %o3,stridex,%o3 + stx %o3,[%fp+tmp_px] + + sub counter,4,counter + st counter,[%fp+tmp_counter] + + ba .cont7 + mov 4,counter + + .align 16 +.update8: + cmp counter,5 + ble .cont8 + nop + + sub %l7,stridex,%o3 + stx %o3,[%fp+tmp_px] + + sub counter,5,counter + st counter,[%fp+tmp_counter] + + ba .cont8 + mov 5,counter + + .align 16 +.update9: + sethi %hi(0x7ffffc00),%o3 + cmp counter,5 + ble .cont9 + sub %l7,stridex,%i3 + + add %o3,0x3ff,%o3 + + andcc %o5,%o3,%g0 + bz 1f + ld [%i3],%f0 + + cmp %o5,0 + bl,a 1f + nop + + fitod %f0,%f0 + fdtos %f0,%f0 + fmuls %f0,FTWO,%f0 + st %f0,[%fp+tmp3] + ld [%fp+tmp3],%o5 + sethi %hi(0x4b000000),%o3 + sub %o5,%o3,%o5 + + fands %f0,DC0,%f8 ! (2_0) dfx0 = vis_fand(ddx0,DC0); + + sra %o5,13,%o1 ! (3_0) si1 = ax1 >> 13; + + sra %o5,24,%o3 ! (3_0) iexp1 = ax1 >> 24; + and %o1,2032,%o1 ! (3_0) si1 &= 0x7f0; + fpsub32s %f0,%f8,%f0 ! (2_0) dfx0 = vis_fpsub32(ddx0,dfx0); + + ldd [%o1+TBL],%f8 ! (3_0) tbl_div1 = ((double*)((char*)TBL + si1))[0]; + sub %l0,%o3,%i3 ! (3_0) iexp1 = 0x3f - iexp1; + + sllx %i3,23,%i3 ! (3_0) lexp1 = iexp1 << 23; + fitod %f0,%f50 ! (3_0) dtmp1 = (double)(((int*)dfx0)[1]); + + add %o1,TBL,%o1 ! (3_0) addr1 = (char*)TBL + si1; + st %i3,[%fp+tmp2+4] ! (2_0) fdx0 = *((double*)lexp0); + + fmuld %f50,%f8,%f24 ! (3_0) xx1 = dtmp1 * tbl_div1; + + ba .cont9 + fmuld K3,%f24,%f50 ! (3_0) res1 = K3 * xx1; +1: + stx %i3,[%fp+tmp_px] + + sub counter,5,counter + st counter,[%fp+tmp_counter] + + ba .cont9 + mov 5,counter + + .align 16 +.update10: + cmp counter,0 + ble .cont10 + sub %i1,stridex,%o3 + + sub %o3,stridex,%o3 + stx %o3,[%fp+tmp_px] + + st counter,[%fp+tmp_counter] + + ba .cont10 + mov 0,counter + + .align 16 +.update11: + sethi %hi(0x7ffffc00),%i4 + cmp counter,0 + ble .cont11 + sub %i1,stridex,%o3 + + sub %o3,stridex,%o3 + add %i4,0x3ff,%i4 + ld [%o3],%i3 + + andcc %i3,%i4,%g0 + bz 1f + + cmp %i3,0 + bl,a 1f + nop + + fitod %f14,%f0 + fdtos %f0,%f14 + fmuls %f14,FTWO,%f14 + st %f14,[%fp+tmp3] + ld [%fp+tmp3],%i3 + sethi %hi(0x4b000000),%o3 + sub %i3,%o3,%i3 + + fands %f14,DC0,%f16 ! (4_0) dfx0 = vis_fand(ddx0,DC0); + sra %i3,13,%l5 ! (4_0) si0 = ax0 >> 13; + + and %l5,2032,%l5 ! (4_0) si0 &= 0x7f0; + + ldd [%l5+TBL],%f54 ! (4_0) tbl_div0 = ((double*)((char*)TBL + si0))[0]; + fpsub32s %f14,%f16,%f16 ! (4_0) dfx0 = vis_fpsub32(ddx0,dfx0); + + sra %i3,24,%i3 ! (4_0) iexp0 = ax0 >> 24; + + sub %l0,%i3,%o0 ! (4_0) iexp0 = 0x3f - iexp0; + fitod %f16,%f56 ! (4_0) dtmp0 = (double)(((int*)dfx0)[0]); + + sllx %o0,23,%o0 ! (4_0) lexp0 = iexp0 << 55; + + st %o0,[%fp+tmp0] ! (4_0) fdx0 = *((double*)lexp0); + + ba .cont11 + fmuld %f56,%f54,%f40 ! (4_0) xx0 = dtmp0 * tbl_div0; +1: + stx %o3,[%fp+tmp_px] + + st counter,[%fp+tmp_counter] + + ba .cont11 + mov 0,counter + + .align 16 +.update12: + cmp counter,1 + ble .cont12 + nop + + sub %i1,stridex,%i1 + stx %i1,[%fp+tmp_px] + + sub counter,1,counter + st counter,[%fp+tmp_counter] + + ba .cont12 + mov 1,counter + + .align 16 +.update13: + sethi %hi(0x7ffffc00),%o3 + cmp counter,1 + ble .cont13 + + add %o3,0x3ff,%o3 + + andcc %g5,%o3,%g0 + bz 1f + + cmp %g5,0 + bl,a 1f + nop + + fitod %f15,%f0 + fdtos %f0,%f15 + fmuls %f15,FTWO,%f15 + st %f15,[%fp+tmp3] + ld [%fp+tmp3],%g5 + sethi %hi(0x4b000000),%o3 + sub %g5,%o3,%g5 + + fands %f15,DC0,%f17 ! (4_0) dfx0 = vis_fand(ddx0,DC0); + + sra %g5,13,%l6 ! (5_0) si1 = ax1 >> 13; + sra %g5,24,%o3 ! (5_0) iexp1 = ax1 >> 24; + and %l6,2032,%l6 ! (5_0) si1 &= 0x7f0; + fpsub32s %f15,%f17,%f17 ! (4_0) dfx0 = vis_fpsub32(ddx0,dfx0); + + ldd [%l6+TBL],%f46 ! (5_0) tbl_div1 = ((double*)((char*)TBL + si1))[0]; + sub %l0,%o3,%l1 ! (5_0) iexp1 = 0x3f - iexp1; + + add %l6,TBL,%l6 ! (5_0) addr1 = (char*)TBL + si1; + + sllx %l1,23,%l1 ! (5_0) lexp1 = iexp1 << 23; + st %l1,[%fp+tmp0+4] ! (4_0) fdx0 = *((double*)lexp0); + + fitod %f17,%f0 ! (5_0) dtmp1 = (double)(((int*)dfx0)[1]); + + fmuld %f0,%f46,%f46 ! (5_1) xx1 = dtmp1 * tbl_div1; + ba .cont13 + fmuld K3,%f46,%f50 ! (5_1) res1 = K3 * xx1; +1: + sub %i1,stridex,%i1 + stx %i1,[%fp+tmp_px] + + sub counter,1,counter + st counter,[%fp+tmp_counter] + + ba .cont13 + mov 1,counter + + .align 16 +.update14: + cmp counter,2 + ble .cont14 + sub %o5,stridex,%o3 + + sub %o3,stridex,%o3 + stx %o3,[%fp+tmp_px] + + sub counter,2,counter + st counter,[%fp+tmp_counter] + + ba .cont14 + mov 2,counter + + .align 16 +.update15: + sethi %hi(0x7ffffc00),%i3 + cmp counter,2 + ble .cont15 + sub %o5,stridex,%o3 + + add %i3,0x3ff,%i3 + + andcc %g1,%i3,%g0 + bz 1f + sub %o3,stridex,%o3 + + cmp %g1,0 + bl,a 1f + nop + + fitod %f18,%f0 + fdtos %f0,%f18 + fmuls %f18,FTWO,%f18 + st %f18,[%fp+tmp3] + ld [%fp+tmp3],%g1 + sethi %hi(0x4b000000),%o3 + sub %g1,%o3,%g1 + + fands %f18,DC0,%f0 ! (0_0) dfx0 = vis_fand(ddx0,DC0); + sra %g1,13,%o0 ! (0_0) si0 = ax0 >> 13; + and %o0,2032,%o0 ! (0_0) si0 &= 0x7f0; + + ldd [%o0+TBL],%f54 ! (0_0) tbl_div0 = ((double*)((char*)TBL + si0))[0]; + fpsub32s %f18,%f0,%f30 ! (0_0) dfx0 = vis_fpsub32(ddx0,dfx0); + + sra %g1,24,%i3 ! (0_0) iexp0 = ax0 >> 24; + + sub %l0,%i3,%g5 ! (0_0) iexp0 = 0x3f - iexp0; + + ba .cont15 + fitod %f30,%f56 ! (0_0) dtmp0 = (double)(((int*)dfx0)[0]); +1: + stx %o3,[%fp+tmp_px] + + sub counter,2,counter + st counter,[%fp+tmp_counter] + + ba .cont15 + mov 2,counter + + .align 16 +.update16: + cmp counter,3 + ble .cont16 + sub %l7,stridex2,%o3 + + sub %o3,stridex,%o3 + stx %o3,[%fp+tmp_px] + + sub counter,3,counter + st counter,[%fp+tmp_counter] + + ba .cont16 + mov 3,counter + + .align 16 +.update17: + sethi %hi(0x7ffffc00),%i3 + cmp counter,3 + ble .cont17 + sub %l7,stridex2,%o3 + + add %i3,0x3ff,%i3 + + andcc %i4,%i3,%g0 + bz 1f + sub %o3,stridex,%o3 + + cmp %i4,0 + bl,a 1f + nop + + fitod %f19,%f0 + fdtos %f0,%f19 + fmuls %f19,FTWO,%f19 + st %f19,[%fp+tmp3] + ld [%fp+tmp3],%i4 + sethi %hi(0x4b000000),%o3 + sub %i4,%o3,%i4 + + fands %f19,DC0,%f0 ! (0_0) dfx0 = vis_fand(ddx0,DC0); + + sra %i4,13,%g5 ! (1_0) si1 = ax1 >> 13; + + sra %i4,24,%i0 ! (1_0) iexp1 = ax1 >> 24; + and %g5,2032,%o7 ! (1_0) si1 &= 0x7f0; + fpsub32s %f19,%f0,%f31 ! (0_0) dfx0 = vis_fpsub32(ddx0,dfx0); + + ldd [%o7+TBL],%f44 ! (1_0) tbl_div1 = ((double*)((char*)TBL + si1))[0]; + sub %l0,%i0,%i0 ! (1_0) iexp1 = 0x3f - iexp1; + + sllx %i0,23,%i0 ! (1_0) lexp1 = iexp1 << 23; + fitod %f31,%f50 ! (1_0) dtmp0 = (double)(((int*)dfx0)[0]); + + st %i0,[%fp+tmp1+4] ! (0_0) fdx0 = *((double*)lexp0); + + add %o7,TBL,%o7 ! (1_0) addr0 = (char*)TBL + si0; + fmuld %f50,%f44,%f44 ! (1_0) xx0 = dtmp0 * tbl_div0; + + ba .cont17 + fmuld K3,%f44,%f50 ! (1_0) res1 = K3 * xx1; +1: + stx %o3,[%fp+tmp_px] + + sub counter,3,counter + st counter,[%fp+tmp_counter] + + ba .cont17 + mov 3,counter + + .align 16 +.update18: + cmp counter,4 + ble .cont18 + fpadd32 %f20,%f52,%f0 ! (2_1) dres0 = vis_fpadd32(dres0,fdx0); + + sub %l7,stridex2,%i3 + stx %i3,[%fp+tmp_px] + + sub counter,4,counter + st counter,[%fp+tmp_counter] + + ba .cont18 + mov 4,counter + + .align 16 +.update19: + sethi %hi(0x7ffffc00),%i3 + cmp counter,4 + ble,a .cont19 + fmuld %f50,%f46,%f24 ! (3_0) xx1 = dtmp1 * tbl_div1; + + add %i3,0x3ff,%i3 + + andcc %g1,%i3,%g0 + bz 1f + nop + + cmp %g1,0 + bl,a 1f + nop + + fitod %f24,%f24 + fdtos %f24,%f24 + fmuls %f24,FTWO,%f24 + st %f24,[%fp+tmp3] + ld [%fp+tmp3],%g1 + sethi %hi(0x4b000000),%i3 + sub %g1,%i3,%g1 + + fands %f24,DC0,%f8 ! (2_0) dfx0 = vis_fand(ddx0,DC0); + sra %g1,13,%i0 ! (2_0) si0 = ax0 >> 13; + + and %i0,2032,%i0 ! (2_0) si0 &= 0x7f0; + + ldd [%i0+TBL],%f30 ! (2_0) tbl_div0 = ((double*)((char*)TBL + si0))[0]; + fpsub32s %f24,%f8,%f12 ! (2_0) dfx0 = vis_fpsub32(ddx0,dfx0); + + sra %g1,24,%i3 ! (2_0) iexp0 = ax0 >> 24; + + sub %l0,%i3,%g5 ! (2_0) iexp0 = 0x3f - iexp0; + + sllx %g5,23,%g5 ! (2_0) lexp0 = iexp0 << 55; + add %i0,TBL,%i0 ! (2_0) addr0 = (char*)TBL + si0; + fitod %f12,%f56 ! (2_0) dtmp0 = (double)(((int*)dfx0)[0]); + + st %g5,[%fp+tmp2] ! (2_0) fdx0 = *((double*)lexp0); + fmuld %f56,%f30,%f30 ! (2_0) xx0 = dtmp0 * tbl_div0; + + ba .cont19 + fmuld %f50,%f46,%f24 ! (3_0) xx1 = dtmp1 * tbl_div1; +1: + sub %l7,stridex2,%i3 + stx %i3,[%fp+tmp_px] + + sub counter,4,counter + st counter,[%fp+tmp_counter] + + mov 4,counter + ba .cont19 + fmuld %f50,%f46,%f24 ! (3_0) xx1 = dtmp1 * tbl_div1; + + .align 16 +.update20: + cmp counter,5 + ble .cont20 + nop + + sub %l7,stridex,%i3 + stx %i3,[%fp+tmp_px] + + sub counter,5,counter + st counter,[%fp+tmp_counter] + + ba .cont20 + mov 5,counter + + .align 16 +.update21: + sethi %hi(0x7ffffc00),%i3 + cmp counter,5 + ble,a .cont21 + nop + + sub %l7,stridex,%i4 + add %i3,0x3ff,%i3 + + andcc %o5,%i3,%g0 + bz 1f + ld [%i4],%f8 + + cmp %o5,0 + bl,a 1f + nop + + fitod %f8,%f8 + fdtos %f8,%f8 + fmuls %f8,FTWO,%f8 + st %f8,[%fp+tmp3] + ld [%fp+tmp3],%o5 + sethi %hi(0x4b000000),%i3 + sub %o5,%i3,%o5 + + fands %f8,DC0,%f24 ! (2_0) dfx0 = vis_fand(ddx0,DC0); + + sra %o5,13,%o1 ! (3_0) si1 = ax1 >> 13; + + sra %o5,24,%i3 ! (3_0) iexp1 = ax1 >> 24; + and %o1,2032,%o1 ! (3_0) si1 &= 0x7f0; + fpsub32s %f8,%f24,%f24 ! (2_0) dfx0 = vis_fpsub32(ddx0,dfx0); + + ldd [%o1+TBL],%f8 ! (3_0) tbl_div1 = ((double*)((char*)TBL + si1))[0]; + sub %l0,%i3,%i3 ! (3_0) iexp1 = 0x3f - iexp1; + + sllx %i3,23,%i3 ! (3_0) lexp1 = iexp1 << 23; + fitod %f24,%f50 ! (3_0) dtmp1 = (double)(((int*)dfx0)[1]); + + add %o1,TBL,%o1 ! (3_0) addr1 = (char*)TBL + si1; + st %i3,[%fp+tmp2+4] ! (2_0) fdx0 = *((double*)lexp0); + + fmuld %f50,%f8,%f24 ! (3_0) xx1 = dtmp1 * tbl_div1; + + ba .cont21 + fmuld K3,%f24,%f50 ! (3_0) res1 = K3 * xx1; +1: + sub %l7,stridex,%i3 + stx %i3,[%fp+tmp_px] + + sub counter,5,counter + st counter,[%fp+tmp_counter] + + ba .cont21 + mov 5,counter + + .align 16 +.exit: + ret + restore + + SET_SIZE(__vrsqrtf) + diff --git a/usr/src/lib/libmvec/common/vis/__vsin.S b/usr/src/lib/libmvec/common/vis/__vsin.S new file mode 100644 index 0000000000..50f3279de6 --- /dev/null +++ b/usr/src/lib/libmvec/common/vis/__vsin.S @@ -0,0 +1,3003 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "__vsin.S" + +#include "libm.h" + + RO_DATA + .align 64 +constants: + .word 0x3ec718e3,0xa6972785 + .word 0x3ef9fd39,0x94293940 + .word 0xbf2a019f,0x75ee4be1 + .word 0xbf56c16b,0xba552569 + .word 0x3f811111,0x1108c703 + .word 0x3fa55555,0x554f5b35 + .word 0xbfc55555,0x555554d0 + .word 0xbfdfffff,0xffffff85 + .word 0x3ff00000,0x00000000 + .word 0xbfc55555,0x5551fc28 + .word 0x3f811107,0x62eacc9d + .word 0xbfdfffff,0xffff6328 + .word 0x3fa55551,0x5f7acf0c + .word 0x3fe45f30,0x6dc9c883 + .word 0x43380000,0x00000000 + .word 0x3ff921fb,0x54400000 + .word 0x3dd0b461,0x1a600000 + .word 0x3ba3198a,0x2e000000 + .word 0x397b839a,0x252049c1 + .word 0x80000000,0x00004000 + .word 0xffff8000,0x00000000 ! N.B.: low-order words used + .word 0x3fc90000,0x80000000 ! for sign bit hacking; see + .word 0x3fc40000,0x00000000 ! references to "thresh" below + +#define p4 0x0 +#define q4 0x08 +#define p3 0x10 +#define q3 0x18 +#define p2 0x20 +#define q2 0x28 +#define p1 0x30 +#define q1 0x38 +#define one 0x40 +#define pp1 0x48 +#define pp2 0x50 +#define qq1 0x58 +#define qq2 0x60 +#define invpio2 0x68 +#define round 0x70 +#define pio2_1 0x78 +#define pio2_2 0x80 +#define pio2_3 0x88 +#define pio2_3t 0x90 +#define f30val 0x98 +#define mask 0xa0 +#define thresh 0xa8 + +! local storage indices + +#define xsave STACK_BIAS-0x8 +#define ysave STACK_BIAS-0x10 +#define nsave STACK_BIAS-0x14 +#define sxsave STACK_BIAS-0x18 +#define sysave STACK_BIAS-0x1c +#define biguns STACK_BIAS-0x20 +#define n2 STACK_BIAS-0x24 +#define n1 STACK_BIAS-0x28 +#define n0 STACK_BIAS-0x2c +#define x2_1 STACK_BIAS-0x40 +#define x1_1 STACK_BIAS-0x50 +#define x0_1 STACK_BIAS-0x60 +#define y2_0 STACK_BIAS-0x70 +#define y1_0 STACK_BIAS-0x80 +#define y0_0 STACK_BIAS-0x90 +! sizeof temp storage - must be a multiple of 16 for V9 +#define tmps 0x90 + +!-------------------------------------------------------------- +! Some defines to keep code more readable +#define LIM_l6 %l6 +! in primary range, contains |x| upper limit when cos(x)=1. +! in transferring to medium range, denotes what loop was active. +!-------------------------------------------------------------- + + ENTRY(__vsin) + save %sp,-SA(MINFRAME)-tmps,%sp + PIC_SETUP(g5) + PIC_SET(g5,__vlibm_TBL_sincos_hi,l3) + PIC_SET(g5,__vlibm_TBL_sincos_lo,l4) + PIC_SET(g5,constants,l5) + mov %l5,%g1 + wr %g0,0x82,%asi ! set %asi for non-faulting loads + +! ========== primary range ========== + +! register use + +! i0 n +! i1 x +! i2 stridex +! i3 y +! i4 stridey +! i5 0x80000000 + +! l0 hx0 +! l1 hx1 +! l2 hx2 +! l3 __vlibm_TBL_sincos_hi +! l4 __vlibm_TBL_sincos_lo +! l5 0x3fc90000 +! l6 0x3e400000 +! l7 0x3fe921fb + +! the following are 64-bit registers in both V8+ and V9 + +! g1 scratch +! g5 + +! o0 py0 +! o1 py1 +! o2 py2 +! o3 oy0 +! o4 oy1 +! o5 oy2 +! o7 scratch + +! f0 x0 +! f2 +! f4 +! f6 +! f8 scratch for table base +! f9 signbit0 +! f10 x1 +! f12 +! f14 +! f16 +! f18 scratch for table base +! f19 signbit1 +! f20 x2 +! f22 +! f24 +! f26 +! f28 scratch for table base +! f29 signbit2 +! f30 0x80000000 +! f31 0x4000 +! f32 +! f34 +! f36 +! f38 +! f40 +! f42 +! f44 0xffff800000000000 +! f46 p1 +! f48 p2 +! f50 p3 +! f52 p4 +! f54 one +! f56 pp1 +! f58 pp2 +! f60 qq1 +! f62 qq2 + +#ifdef __sparcv9 + stx %i1,[%fp+xsave] ! save arguments + stx %i3,[%fp+ysave] +#else + st %i1,[%fp+xsave] ! save arguments + st %i3,[%fp+ysave] +#endif + st %i0,[%fp+nsave] + st %i2,[%fp+sxsave] + st %i4,[%fp+sysave] + sethi %hi(0x80000000),%i5 ! load/set up constants + sethi %hi(0x3fc90000),%l5 + sethi %hi(0x3e400000),LIM_l6 + sethi %hi(0x3fe921fb),%l7 + or %l7,%lo(0x3fe921fb),%l7 + ldd [%g1+f30val],%f30 + ldd [%g1+mask],%f44 + ldd [%g1+p1],%f46 + ldd [%g1+p2],%f48 + ldd [%g1+p3],%f50 + ldd [%g1+p4],%f52 + ldd [%g1+one],%f54 + ldd [%g1+pp1],%f56 + ldd [%g1+pp2],%f58 + ldd [%g1+qq1],%f60 + ldd [%g1+qq2],%f62 + sll %i2,3,%i2 ! scale strides + sll %i4,3,%i4 + add %fp,x0_1,%o3 ! precondition loop + add %fp,x0_1,%o4 + add %fp,x0_1,%o5 + ld [%i1],%l0 ! hx = *x + ld [%i1],%f0 + ld [%i1+4],%f1 + andn %l0,%i5,%l0 ! hx &= ~0x80000000 + add %i1,%i2,%i1 ! x += stridex + + ba,pt %icc,.loop0 +! delay slot + nop + + .align 32 +.loop0: + lda [%i1]%asi,%l1 ! preload next argument + sub %l0,LIM_l6,%g1 + sub %l7,%l0,%o7 + fands %f0,%f30,%f9 ! save signbit + + lda [%i1]%asi,%f10 + orcc %o7,%g1,%g0 + mov %i3,%o0 ! py0 = y + bl,pn %icc,.range0 ! if hx < 0x3e400000 or > 0x3fe921fb + +! delay slot + lda [%i1+4]%asi,%f11 + addcc %i0,-1,%i0 + add %i3,%i4,%i3 ! y += stridey + ble,pn %icc,.endloop1 + +! delay slot + andn %l1,%i5,%l1 + add %i1,%i2,%i1 ! x += stridex + fabsd %f0,%f0 + fmuld %f54,%f54,%f54 ! one*one; a nop for alignment only + +.loop1: + lda [%i1]%asi,%l2 ! preload next argument + sub %l1,LIM_l6,%g1 + sub %l7,%l1,%o7 + fands %f10,%f30,%f19 ! save signbit + + lda [%i1]%asi,%f20 + orcc %o7,%g1,%g0 + mov %i3,%o1 ! py1 = y + bl,pn %icc,.range1 ! if hx < 0x3e400000 or > 0x3fe921fb + +! delay slot + lda [%i1+4]%asi,%f21 + addcc %i0,-1,%i0 + add %i3,%i4,%i3 ! y += stridey + ble,pn %icc,.endloop2 + +! delay slot + andn %l2,%i5,%l2 + add %i1,%i2,%i1 ! x += stridex + fabsd %f10,%f10 + fmuld %f54,%f54,%f54 ! one*one; a nop for alignment only + +.loop2: + st %f6,[%o3] + sub %l2,LIM_l6,%g1 + sub %l7,%l2,%o7 + fands %f20,%f30,%f29 ! save signbit + + st %f7,[%o3+4] + orcc %g1,%o7,%g0 + mov %i3,%o2 ! py2 = y + bl,pn %icc,.range2 ! if hx < 0x3e400000 or > 0x3fe921fb + +! delay slot + add %i3,%i4,%i3 ! y += stridey + cmp %l0,%l5 + fabsd %f20,%f20 + bl,pn %icc,.case4 + +! delay slot + st %f16,[%o4] + cmp %l1,%l5 + fpadd32s %f0,%f31,%f8 + bl,pn %icc,.case2 + +! delay slot + st %f17,[%o4+4] + cmp %l2,%l5 + fpadd32s %f10,%f31,%f18 + bl,pn %icc,.case1 + +! delay slot + st %f26,[%o5] + mov %o0,%o3 + sethi %hi(0x3fc3c000),%o7 + fpadd32s %f20,%f31,%f28 + + st %f27,[%o5+4] + fand %f8,%f44,%f2 + mov %o1,%o4 + + fand %f18,%f44,%f12 + mov %o2,%o5 + sub %l0,%o7,%l0 + + fand %f28,%f44,%f22 + sub %l1,%o7,%l1 + sub %l2,%o7,%l2 + + fsubd %f0,%f2,%f0 + srl %l0,10,%l0 + add %l3,8,%g1 + + fsubd %f10,%f12,%f10 + srl %l1,10,%l1 + + fsubd %f20,%f22,%f20 + srl %l2,10,%l2 + + fmuld %f0,%f0,%f2 + andn %l0,0x1f,%l0 + + fmuld %f10,%f10,%f12 + andn %l1,0x1f,%l1 + + fmuld %f20,%f20,%f22 + andn %l2,0x1f,%l2 + + fmuld %f2,%f58,%f6 + ldd [%l3+%l0],%f32 + + fmuld %f12,%f58,%f16 + ldd [%l3+%l1],%f36 + + fmuld %f22,%f58,%f26 + ldd [%l3+%l2],%f40 + + faddd %f6,%f56,%f6 + fmuld %f2,%f62,%f4 + ldd [%g1+%l0],%f34 + + faddd %f16,%f56,%f16 + fmuld %f12,%f62,%f14 + ldd [%g1+%l1],%f38 + + faddd %f26,%f56,%f26 + fmuld %f22,%f62,%f24 + ldd [%g1+%l2],%f42 + + fmuld %f2,%f6,%f6 + faddd %f4,%f60,%f4 + + fmuld %f12,%f16,%f16 + faddd %f14,%f60,%f14 + + fmuld %f22,%f26,%f26 + faddd %f24,%f60,%f24 + + faddd %f6,%f54,%f6 + fmuld %f2,%f4,%f4 + + faddd %f16,%f54,%f16 + fmuld %f12,%f14,%f14 + + faddd %f26,%f54,%f26 + fmuld %f22,%f24,%f24 + + fmuld %f0,%f6,%f6 + ldd [%l4+%l0],%f2 + + fmuld %f10,%f16,%f16 + ldd [%l4+%l1],%f12 + + fmuld %f20,%f26,%f26 + ldd [%l4+%l2],%f22 + + fmuld %f4,%f32,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fmuld %f14,%f36,%f14 + lda [%i1]%asi,%f0 + + fmuld %f24,%f40,%f24 + lda [%i1+4]%asi,%f1 + + fmuld %f6,%f34,%f6 + add %i1,%i2,%i1 ! x += stridex + + fmuld %f16,%f38,%f16 + + fmuld %f26,%f42,%f26 + + faddd %f6,%f4,%f6 + + faddd %f16,%f14,%f16 + + faddd %f26,%f24,%f26 + + faddd %f6,%f2,%f6 + + faddd %f16,%f12,%f16 + + faddd %f26,%f22,%f26 + + faddd %f6,%f32,%f6 + + faddd %f16,%f36,%f16 + + faddd %f26,%f40,%f26 + andn %l0,%i5,%l0 ! hx &= ~0x80000000 + + fors %f6,%f9,%f6 + addcc %i0,-1,%i0 + + fors %f16,%f19,%f16 + bg,pt %icc,.loop0 + +! delay slot + fors %f26,%f29,%f26 + + ba,pt %icc,.endloop0 +! delay slot + nop + + .align 32 +.case1: + st %f27,[%o5+4] + sethi %hi(0x3fc3c000),%o7 + add %l3,8,%g1 + fand %f8,%f44,%f2 + + sub %l0,%o7,%l0 + sub %l1,%o7,%l1 + fand %f18,%f44,%f12 + fmuld %f20,%f20,%f22 + + fsubd %f0,%f2,%f0 + srl %l0,10,%l0 + mov %o0,%o3 + + fsubd %f10,%f12,%f10 + srl %l1,10,%l1 + mov %o1,%o4 + + fmuld %f22,%f52,%f24 + mov %o2,%o5 + + fmuld %f0,%f0,%f2 + andn %l0,0x1f,%l0 + + fmuld %f10,%f10,%f12 + andn %l1,0x1f,%l1 + + faddd %f24,%f50,%f24 + + fmuld %f2,%f58,%f6 + ldd [%l3+%l0],%f32 + + fmuld %f12,%f58,%f16 + ldd [%l3+%l1],%f36 + + fmuld %f22,%f24,%f24 + + faddd %f6,%f56,%f6 + fmuld %f2,%f62,%f4 + ldd [%g1+%l0],%f34 + + faddd %f16,%f56,%f16 + fmuld %f12,%f62,%f14 + ldd [%g1+%l1],%f38 + + faddd %f24,%f48,%f24 + + fmuld %f2,%f6,%f6 + faddd %f4,%f60,%f4 + + fmuld %f12,%f16,%f16 + faddd %f14,%f60,%f14 + + fmuld %f22,%f24,%f24 + + faddd %f6,%f54,%f6 + fmuld %f2,%f4,%f4 + + faddd %f16,%f54,%f16 + fmuld %f12,%f14,%f14 + + faddd %f24,%f46,%f24 + + fmuld %f0,%f6,%f6 + ldd [%l4+%l0],%f2 + + fmuld %f10,%f16,%f16 + ldd [%l4+%l1],%f12 + + fmuld %f4,%f32,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fmuld %f14,%f36,%f14 + lda [%i1]%asi,%f0 + + fmuld %f6,%f34,%f6 + lda [%i1+4]%asi,%f1 + + fmuld %f16,%f38,%f16 + add %i1,%i2,%i1 ! x += stridex + + fmuld %f22,%f24,%f24 + + faddd %f6,%f4,%f6 + + faddd %f16,%f14,%f16 + + fmuld %f20,%f24,%f24 + + faddd %f6,%f2,%f6 + + faddd %f16,%f12,%f16 + + faddd %f20,%f24,%f26 + + faddd %f6,%f32,%f6 + + faddd %f16,%f36,%f16 + andn %l0,%i5,%l0 ! hx &= ~0x80000000 + + fors %f26,%f29,%f26 + addcc %i0,-1,%i0 + + fors %f6,%f9,%f6 + bg,pt %icc,.loop0 + +! delay slot + fors %f16,%f19,%f16 + + ba,pt %icc,.endloop0 +! delay slot + nop + + .align 32 +.case2: + st %f26,[%o5] + cmp %l2,%l5 + fpadd32s %f20,%f31,%f28 + bl,pn %icc,.case3 + +! delay slot + st %f27,[%o5+4] + sethi %hi(0x3fc3c000),%o7 + add %l3,8,%g1 + fand %f8,%f44,%f2 + + sub %l0,%o7,%l0 + sub %l2,%o7,%l2 + fand %f28,%f44,%f22 + fmuld %f10,%f10,%f12 + + fsubd %f0,%f2,%f0 + srl %l0,10,%l0 + mov %o0,%o3 + + fsubd %f20,%f22,%f20 + srl %l2,10,%l2 + mov %o2,%o5 + + fmuld %f12,%f52,%f14 + mov %o1,%o4 + + fmuld %f0,%f0,%f2 + andn %l0,0x1f,%l0 + + fmuld %f20,%f20,%f22 + andn %l2,0x1f,%l2 + + faddd %f14,%f50,%f14 + + fmuld %f2,%f58,%f6 + ldd [%l3+%l0],%f32 + + fmuld %f22,%f58,%f26 + ldd [%l3+%l2],%f40 + + fmuld %f12,%f14,%f14 + + faddd %f6,%f56,%f6 + fmuld %f2,%f62,%f4 + ldd [%g1+%l0],%f34 + + faddd %f26,%f56,%f26 + fmuld %f22,%f62,%f24 + ldd [%g1+%l2],%f42 + + faddd %f14,%f48,%f14 + + fmuld %f2,%f6,%f6 + faddd %f4,%f60,%f4 + + fmuld %f22,%f26,%f26 + faddd %f24,%f60,%f24 + + fmuld %f12,%f14,%f14 + + faddd %f6,%f54,%f6 + fmuld %f2,%f4,%f4 + + faddd %f26,%f54,%f26 + fmuld %f22,%f24,%f24 + + faddd %f14,%f46,%f14 + + fmuld %f0,%f6,%f6 + ldd [%l4+%l0],%f2 + + fmuld %f20,%f26,%f26 + ldd [%l4+%l2],%f22 + + fmuld %f4,%f32,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fmuld %f24,%f40,%f24 + lda [%i1]%asi,%f0 + + fmuld %f6,%f34,%f6 + lda [%i1+4]%asi,%f1 + + fmuld %f26,%f42,%f26 + add %i1,%i2,%i1 ! x += stridex + + fmuld %f12,%f14,%f14 + + faddd %f6,%f4,%f6 + + faddd %f26,%f24,%f26 + + fmuld %f10,%f14,%f14 + + faddd %f6,%f2,%f6 + + faddd %f26,%f22,%f26 + + faddd %f10,%f14,%f16 + + faddd %f6,%f32,%f6 + + faddd %f26,%f40,%f26 + andn %l0,%i5,%l0 ! hx &= ~0x80000000 + + fors %f16,%f19,%f16 + addcc %i0,-1,%i0 + + fors %f6,%f9,%f6 + bg,pt %icc,.loop0 + +! delay slot + fors %f26,%f29,%f26 + + ba,pt %icc,.endloop0 +! delay slot + nop + + .align 32 +.case3: + sethi %hi(0x3fc3c000),%o7 + add %l3,8,%g1 + fand %f8,%f44,%f2 + fmuld %f10,%f10,%f12 + + sub %l0,%o7,%l0 + fmuld %f20,%f20,%f22 + + fsubd %f0,%f2,%f0 + srl %l0,10,%l0 + mov %o0,%o3 + + fmuld %f12,%f52,%f14 + mov %o1,%o4 + + fmuld %f22,%f52,%f24 + mov %o2,%o5 + + fmuld %f0,%f0,%f2 + andn %l0,0x1f,%l0 + + faddd %f14,%f50,%f14 + + faddd %f24,%f50,%f24 + + fmuld %f2,%f58,%f6 + ldd [%l3+%l0],%f32 + + fmuld %f12,%f14,%f14 + + fmuld %f22,%f24,%f24 + + faddd %f6,%f56,%f6 + fmuld %f2,%f62,%f4 + ldd [%g1+%l0],%f34 + + faddd %f14,%f48,%f14 + + faddd %f24,%f48,%f24 + + fmuld %f2,%f6,%f6 + faddd %f4,%f60,%f4 + + fmuld %f12,%f14,%f14 + + fmuld %f22,%f24,%f24 + + faddd %f6,%f54,%f6 + fmuld %f2,%f4,%f4 + + faddd %f14,%f46,%f14 + + faddd %f24,%f46,%f24 + + fmuld %f0,%f6,%f6 + ldd [%l4+%l0],%f2 + + fmuld %f4,%f32,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fmuld %f12,%f14,%f14 + lda [%i1]%asi,%f0 + + fmuld %f6,%f34,%f6 + lda [%i1+4]%asi,%f1 + + fmuld %f22,%f24,%f24 + add %i1,%i2,%i1 ! x += stridex + + fmuld %f10,%f14,%f14 + + faddd %f6,%f4,%f6 + + fmuld %f20,%f24,%f24 + + faddd %f10,%f14,%f16 + + faddd %f6,%f2,%f6 + + faddd %f20,%f24,%f26 + + fors %f16,%f19,%f16 + andn %l0,%i5,%l0 ! hx &= ~0x80000000 + + faddd %f6,%f32,%f6 + addcc %i0,-1,%i0 + + fors %f26,%f29,%f26 + bg,pt %icc,.loop0 + +! delay slot + fors %f6,%f9,%f6 + + ba,pt %icc,.endloop0 +! delay slot + nop + + .align 32 +.case4: + st %f17,[%o4+4] + cmp %l1,%l5 + fpadd32s %f10,%f31,%f18 + bl,pn %icc,.case6 + +! delay slot + st %f26,[%o5] + cmp %l2,%l5 + fpadd32s %f20,%f31,%f28 + bl,pn %icc,.case5 + +! delay slot + st %f27,[%o5+4] + sethi %hi(0x3fc3c000),%o7 + add %l3,8,%g1 + fand %f18,%f44,%f12 + + sub %l1,%o7,%l1 + sub %l2,%o7,%l2 + fand %f28,%f44,%f22 + fmuld %f0,%f0,%f2 + + fsubd %f10,%f12,%f10 + srl %l1,10,%l1 + mov %o1,%o4 + + fsubd %f20,%f22,%f20 + srl %l2,10,%l2 + mov %o2,%o5 + + fmovd %f0,%f6 + fmuld %f2,%f52,%f4 + mov %o0,%o3 + + fmuld %f10,%f10,%f12 + andn %l1,0x1f,%l1 + + fmuld %f20,%f20,%f22 + andn %l2,0x1f,%l2 + + faddd %f4,%f50,%f4 + + fmuld %f12,%f58,%f16 + ldd [%l3+%l1],%f36 + + fmuld %f22,%f58,%f26 + ldd [%l3+%l2],%f40 + + fmuld %f2,%f4,%f4 + + faddd %f16,%f56,%f16 + fmuld %f12,%f62,%f14 + ldd [%g1+%l1],%f38 + + faddd %f26,%f56,%f26 + fmuld %f22,%f62,%f24 + ldd [%g1+%l2],%f42 + + faddd %f4,%f48,%f4 + + fmuld %f12,%f16,%f16 + faddd %f14,%f60,%f14 + + fmuld %f22,%f26,%f26 + faddd %f24,%f60,%f24 + + fmuld %f2,%f4,%f4 + + faddd %f16,%f54,%f16 + fmuld %f12,%f14,%f14 + + faddd %f26,%f54,%f26 + fmuld %f22,%f24,%f24 + + faddd %f4,%f46,%f4 + + fmuld %f10,%f16,%f16 + ldd [%l4+%l1],%f12 + + fmuld %f20,%f26,%f26 + ldd [%l4+%l2],%f22 + + fmuld %f14,%f36,%f14 + lda [%i1]%asi,%l0 ! preload next argument + + fmuld %f24,%f40,%f24 + lda [%i1]%asi,%f0 + + fmuld %f16,%f38,%f16 + lda [%i1+4]%asi,%f1 + + fmuld %f26,%f42,%f26 + add %i1,%i2,%i1 ! x += stridex + + fmuld %f2,%f4,%f4 + + faddd %f16,%f14,%f16 + + faddd %f26,%f24,%f26 + + fmuld %f6,%f4,%f4 + + faddd %f16,%f12,%f16 + + faddd %f26,%f22,%f26 + + faddd %f6,%f4,%f6 + + faddd %f16,%f36,%f16 + + faddd %f26,%f40,%f26 + andn %l0,%i5,%l0 ! hx &= ~0x80000000 + + fors %f6,%f9,%f6 + addcc %i0,-1,%i0 + + fors %f16,%f19,%f16 + bg,pt %icc,.loop0 + +! delay slot + fors %f26,%f29,%f26 + + ba,pt %icc,.endloop0 +! delay slot + nop + + .align 32 +.case5: + sethi %hi(0x3fc3c000),%o7 + add %l3,8,%g1 + fand %f18,%f44,%f12 + fmuld %f0,%f0,%f2 + + sub %l1,%o7,%l1 + fmuld %f20,%f20,%f22 + + fsubd %f10,%f12,%f10 + srl %l1,10,%l1 + mov %o1,%o4 + + fmovd %f0,%f6 + fmuld %f2,%f52,%f4 + mov %o0,%o3 + + fmuld %f22,%f52,%f24 + mov %o2,%o5 + + fmuld %f10,%f10,%f12 + andn %l1,0x1f,%l1 + + faddd %f4,%f50,%f4 + + faddd %f24,%f50,%f24 + + fmuld %f12,%f58,%f16 + ldd [%l3+%l1],%f36 + + fmuld %f2,%f4,%f4 + + fmuld %f22,%f24,%f24 + + faddd %f16,%f56,%f16 + fmuld %f12,%f62,%f14 + ldd [%g1+%l1],%f38 + + faddd %f4,%f48,%f4 + + faddd %f24,%f48,%f24 + + fmuld %f12,%f16,%f16 + faddd %f14,%f60,%f14 + + fmuld %f2,%f4,%f4 + + fmuld %f22,%f24,%f24 + + faddd %f16,%f54,%f16 + fmuld %f12,%f14,%f14 + + faddd %f4,%f46,%f4 + + faddd %f24,%f46,%f24 + + fmuld %f10,%f16,%f16 + ldd [%l4+%l1],%f12 + + fmuld %f14,%f36,%f14 + lda [%i1]%asi,%l0 ! preload next argument + + fmuld %f2,%f4,%f4 + lda [%i1]%asi,%f0 + + fmuld %f16,%f38,%f16 + lda [%i1+4]%asi,%f1 + + fmuld %f22,%f24,%f24 + add %i1,%i2,%i1 ! x += stridex + + fmuld %f6,%f4,%f4 + + faddd %f16,%f14,%f16 + + fmuld %f20,%f24,%f24 + + faddd %f6,%f4,%f6 + + faddd %f16,%f12,%f16 + + faddd %f20,%f24,%f26 + + fors %f6,%f9,%f6 + andn %l0,%i5,%l0 ! hx &= ~0x80000000 + + faddd %f16,%f36,%f16 + addcc %i0,-1,%i0 + + fors %f26,%f29,%f26 + bg,pt %icc,.loop0 + +! delay slot + fors %f16,%f19,%f16 + + ba,pt %icc,.endloop0 +! delay slot + nop + + .align 32 +.case6: + st %f27,[%o5+4] + cmp %l2,%l5 + fpadd32s %f20,%f31,%f28 + bl,pn %icc,.case7 + +! delay slot + sethi %hi(0x3fc3c000),%o7 + add %l3,8,%g1 + fand %f28,%f44,%f22 + fmuld %f0,%f0,%f2 + + sub %l2,%o7,%l2 + fmuld %f10,%f10,%f12 + + fsubd %f20,%f22,%f20 + srl %l2,10,%l2 + mov %o2,%o5 + + fmovd %f0,%f6 + fmuld %f2,%f52,%f4 + mov %o0,%o3 + + fmuld %f12,%f52,%f14 + mov %o1,%o4 + + fmuld %f20,%f20,%f22 + andn %l2,0x1f,%l2 + + faddd %f4,%f50,%f4 + + faddd %f14,%f50,%f14 + + fmuld %f22,%f58,%f26 + ldd [%l3+%l2],%f40 + + fmuld %f2,%f4,%f4 + + fmuld %f12,%f14,%f14 + + faddd %f26,%f56,%f26 + fmuld %f22,%f62,%f24 + ldd [%g1+%l2],%f42 + + faddd %f4,%f48,%f4 + + faddd %f14,%f48,%f14 + + fmuld %f22,%f26,%f26 + faddd %f24,%f60,%f24 + + fmuld %f2,%f4,%f4 + + fmuld %f12,%f14,%f14 + + faddd %f26,%f54,%f26 + fmuld %f22,%f24,%f24 + + faddd %f4,%f46,%f4 + + faddd %f14,%f46,%f14 + + fmuld %f20,%f26,%f26 + ldd [%l4+%l2],%f22 + + fmuld %f24,%f40,%f24 + lda [%i1]%asi,%l0 ! preload next argument + + fmuld %f2,%f4,%f4 + lda [%i1]%asi,%f0 + + fmuld %f26,%f42,%f26 + lda [%i1+4]%asi,%f1 + + fmuld %f12,%f14,%f14 + add %i1,%i2,%i1 ! x += stridex + + fmuld %f6,%f4,%f4 + + faddd %f26,%f24,%f26 + + fmuld %f10,%f14,%f14 + + faddd %f6,%f4,%f6 + + faddd %f26,%f22,%f26 + + faddd %f10,%f14,%f16 + + fors %f6,%f9,%f6 + andn %l0,%i5,%l0 ! hx &= ~0x80000000 + + faddd %f26,%f40,%f26 + addcc %i0,-1,%i0 + + fors %f16,%f19,%f16 + bg,pt %icc,.loop0 + +! delay slot + fors %f26,%f29,%f26 + + ba,pt %icc,.endloop0 +! delay slot + nop + + .align 32 +.case7: + fmuld %f0,%f0,%f2 + fmovd %f0,%f6 + mov %o0,%o3 + + fmuld %f10,%f10,%f12 + mov %o1,%o4 + + fmuld %f20,%f20,%f22 + mov %o2,%o5 + + fmuld %f2,%f52,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fmuld %f12,%f52,%f14 + lda [%i1]%asi,%f0 + + fmuld %f22,%f52,%f24 + lda [%i1+4]%asi,%f1 + + faddd %f4,%f50,%f4 + add %i1,%i2,%i1 ! x += stridex + + faddd %f14,%f50,%f14 + + faddd %f24,%f50,%f24 + + fmuld %f2,%f4,%f4 + + fmuld %f12,%f14,%f14 + + fmuld %f22,%f24,%f24 + + faddd %f4,%f48,%f4 + + faddd %f14,%f48,%f14 + + faddd %f24,%f48,%f24 + + fmuld %f2,%f4,%f4 + + fmuld %f12,%f14,%f14 + + fmuld %f22,%f24,%f24 + + faddd %f4,%f46,%f4 + + faddd %f14,%f46,%f14 + + faddd %f24,%f46,%f24 + + fmuld %f2,%f4,%f4 + + fmuld %f12,%f14,%f14 + + fmuld %f22,%f24,%f24 + + fmuld %f6,%f4,%f4 + + fmuld %f10,%f14,%f14 + + fmuld %f20,%f24,%f24 + + faddd %f6,%f4,%f6 + + faddd %f10,%f14,%f16 + + faddd %f20,%f24,%f26 + andn %l0,%i5,%l0 ! hx &= ~0x80000000 + + fors %f6,%f9,%f6 + addcc %i0,-1,%i0 + + fors %f16,%f19,%f16 + bg,pt %icc,.loop0 + +! delay slot + fors %f26,%f29,%f26 + + ba,pt %icc,.endloop0 +! delay slot + nop + + + .align 32 +.endloop2: + cmp %l1,%l5 + bl,pn %icc,1f +! delay slot + fabsd %f10,%f10 + sethi %hi(0x3fc3c000),%o7 + fpadd32s %f10,%f31,%f18 + add %l3,8,%g1 + fand %f18,%f44,%f12 + sub %l1,%o7,%l1 + fsubd %f10,%f12,%f10 + srl %l1,10,%l1 + fmuld %f10,%f10,%f12 + andn %l1,0x1f,%l1 + fmuld %f12,%f58,%f20 + ldd [%l3+%l1],%f36 + faddd %f20,%f56,%f20 + fmuld %f12,%f62,%f14 + ldd [%g1+%l1],%f38 + fmuld %f12,%f20,%f20 + faddd %f14,%f60,%f14 + faddd %f20,%f54,%f20 + fmuld %f12,%f14,%f14 + fmuld %f10,%f20,%f20 + ldd [%l4+%l1],%f12 + fmuld %f14,%f36,%f14 + fmuld %f20,%f38,%f20 + faddd %f20,%f14,%f20 + faddd %f20,%f12,%f20 + ba,pt %icc,2f +! delay slot + faddd %f20,%f36,%f20 +1: + fmuld %f10,%f10,%f12 + fmuld %f12,%f52,%f14 + faddd %f14,%f50,%f14 + fmuld %f12,%f14,%f14 + faddd %f14,%f48,%f14 + fmuld %f12,%f14,%f14 + faddd %f14,%f46,%f14 + fmuld %f12,%f14,%f14 + fmuld %f10,%f14,%f14 + faddd %f10,%f14,%f20 +2: + fors %f20,%f19,%f20 + st %f20,[%o1] + st %f21,[%o1+4] + +.endloop1: + cmp %l0,%l5 + bl,pn %icc,1f +! delay slot + fabsd %f0,%f0 + sethi %hi(0x3fc3c000),%o7 + fpadd32s %f0,%f31,%f8 + add %l3,8,%g1 + fand %f8,%f44,%f2 + sub %l0,%o7,%l0 + fsubd %f0,%f2,%f0 + srl %l0,10,%l0 + fmuld %f0,%f0,%f2 + andn %l0,0x1f,%l0 + fmuld %f2,%f58,%f20 + ldd [%l3+%l0],%f32 + faddd %f20,%f56,%f20 + fmuld %f2,%f62,%f4 + ldd [%g1+%l0],%f34 + fmuld %f2,%f20,%f20 + faddd %f4,%f60,%f4 + faddd %f20,%f54,%f20 + fmuld %f2,%f4,%f4 + fmuld %f0,%f20,%f20 + ldd [%l4+%l0],%f2 + fmuld %f4,%f32,%f4 + fmuld %f20,%f34,%f20 + faddd %f20,%f4,%f20 + faddd %f20,%f2,%f20 + ba,pt %icc,2f +! delay slot + faddd %f20,%f32,%f20 +1: + fmuld %f0,%f0,%f2 + fmuld %f2,%f52,%f4 + faddd %f4,%f50,%f4 + fmuld %f2,%f4,%f4 + faddd %f4,%f48,%f4 + fmuld %f2,%f4,%f4 + faddd %f4,%f46,%f4 + fmuld %f2,%f4,%f4 + fmuld %f0,%f4,%f4 + faddd %f0,%f4,%f20 +2: + fors %f20,%f9,%f20 + st %f20,[%o0] + st %f21,[%o0+4] + +.endloop0: + st %f6,[%o3] + st %f7,[%o3+4] + st %f16,[%o4] + st %f17,[%o4+4] + st %f26,[%o5] + st %f27,[%o5+4] + +! return. finished off with only primary range arguments. + + ret + restore + + + .align 32 +.range0: + cmp %l0,LIM_l6 + bg,a,pt %icc,.MEDIUM ! branch if x is not tiny +! delay slot, annulled if branch not taken + mov 0x1,LIM_l6 ! set "processing loop0" + st %f0,[%o0] ! *y = *x with inexact if x nonzero + st %f1,[%o0+4] + fdtoi %f0,%f2 + addcc %i0,-1,%i0 + ble,pn %icc,.endloop0 +! delay slot, harmless if branch taken + add %i3,%i4,%i3 ! y += stridey + andn %l1,%i5,%l0 ! hx &= ~0x80000000 + fmovd %f10,%f0 + ba,pt %icc,.loop0 +! delay slot + add %i1,%i2,%i1 ! x += stridex + + + .align 32 +.range1: + cmp %l1,LIM_l6 + bg,a,pt %icc,.MEDIUM ! branch if x is not tiny +! delay slot, annulled if branch not taken + mov 0x2,LIM_l6 ! set "processing loop1" + st %f10,[%o1] ! *y = *x with inexact if x nonzero + st %f11,[%o1+4] + fdtoi %f10,%f12 + addcc %i0,-1,%i0 + ble,pn %icc,.endloop1 +! delay slot, harmless if branch taken + add %i3,%i4,%i3 ! y += stridey + andn %l2,%i5,%l1 ! hx &= ~0x80000000 + fmovd %f20,%f10 + ba,pt %icc,.loop1 +! delay slot + add %i1,%i2,%i1 ! x += stridex + + + .align 32 +.range2: + cmp %l2,LIM_l6 + bg,a,pt %icc,.MEDIUM ! branch if x is not tiny +! delay slot, annulled if branch not taken + mov 0x3,LIM_l6 ! set "processing loop2" + st %f20,[%o2] ! *y = *x with inexact if x nonzero + st %f21,[%o2+4] + fdtoi %f20,%f22 +1: + addcc %i0,-1,%i0 + ble,pn %icc,.endloop2 +! delay slot + nop + ld [%i1],%l2 + ld [%i1],%f20 + ld [%i1+4],%f21 + andn %l2,%i5,%l2 ! hx &= ~0x80000000 + ba,pt %icc,.loop2 +! delay slot + add %i1,%i2,%i1 ! x += stridex + + + .align 32 +.MEDIUM: + +! ========== medium range ========== + +! register use + +! i0 n +! i1 x +! i2 stridex +! i3 y +! i4 stridey +! i5 0x80000000 + +! l0 hx0 +! l1 hx1 +! l2 hx2 +! l3 __vlibm_TBL_sincos_hi +! l4 __vlibm_TBL_sincos_lo +! l5 constants +! l6 in transition from pri-range and here, use for biguns +! l7 0x413921fb + +! the following are 64-bit registers in both V8+ and V9 + +! g1 scratch +! g5 + +! o0 py0 +! o1 py1 +! o2 py2 +! o3 n0 +! o4 n1 +! o5 n2 +! o7 scratch + +! f0 x0 +! f2 n0,y0 +! f4 +! f6 +! f8 scratch for table base +! f9 signbit0 +! f10 x1 +! f12 n1,y1 +! f14 +! f16 +! f18 scratch for table base +! f19 signbit1 +! f20 x2 +! f22 n2,y2 +! f24 +! f26 +! f28 scratch for table base +! f29 signbit2 +! f30 0x80000000 +! f31 0x4000 +! f32 +! f34 +! f36 +! f38 +! f40 invpio2 +! f42 round +! f44 0xffff800000000000 +! f46 pio2_1 +! f48 pio2_2 +! f50 pio2_3 +! f52 pio2_3t +! f54 one +! f56 pp1 +! f58 pp2 +! f60 qq1 +! f62 qq2 + + PIC_SET(g5,constants,l5) + + ! %o3,%o4,%o5 need to be stored + st %f6,[%o3] + sethi %hi(0x413921fb),%l7 + st %f7,[%o3+4] + or %l7,%lo(0x413921fb),%l7 + st %f16,[%o4] + st %f17,[%o4+4] + st %f26,[%o5] + st %f27,[%o5+4] + ldd [%l5+invpio2],%f40 + ldd [%l5+round],%f42 + ldd [%l5+pio2_1],%f46 + ldd [%l5+pio2_2],%f48 + ldd [%l5+pio2_3],%f50 + ldd [%l5+pio2_3t],%f52 + std %f54,[%fp+x0_1+8] ! set up stack data + std %f54,[%fp+x1_1+8] + std %f54,[%fp+x2_1+8] + stx %g0,[%fp+y0_0+8] + stx %g0,[%fp+y1_0+8] + stx %g0,[%fp+y2_0+8] + +! branched here in the middle of the array. Need to adjust +! for the members of the triple that were selected in the primary +! loop. + +! no adjustment since all three selected here + subcc LIM_l6,0x1,%g0 ! continue in LOOP0? + bz,a %icc,.LOOP0 + mov 0x0,LIM_l6 ! delay slot set biguns=0 + +! ajust 1st triple since 2d and 3d done here + subcc LIM_l6,0x2,%g0 ! continue in LOOP1? + fors %f0,%f9,%f0 ! restore sign bit + fmuld %f0,%f40,%f2 ! adj LOOP0 + bz,a %icc,.LOOP1 + mov 0x0,LIM_l6 ! delay slot set biguns=0 + +! ajust 1st and 2d triple since 3d done here + subcc LIM_l6,0x3,%g0 ! continue in LOOP2? + !done fmuld %f0,%f40,%f2 ! adj LOOP0 + sub %i3,%i4,%i3 ! adjust to not double increment + fors %f10,%f19,%f10 ! restore sign bit + fmuld %f10,%f40,%f12 ! adj LOOP1 + faddd %f2,%f42,%f2 ! adj LOOP1 + bz,a %icc,.LOOP2 + mov 0x0,LIM_l6 ! delay slot set biguns=0 + + .align 32 +.LOOP0: + lda [%i1]%asi,%l1 ! preload next argument + mov %i3,%o0 ! py0 = y + lda [%i1]%asi,%f10 + cmp %l0,%l7 + add %i3,%i4,%i3 ! y += stridey + bg,pn %icc,.BIG0 ! if hx > 0x413921fb + +! delay slot + lda [%i1+4]%asi,%f11 + addcc %i0,-1,%i0 + add %i1,%i2,%i1 ! x += stridex + ble,pn %icc,.ENDLOOP1 + +! delay slot + andn %l1,%i5,%l1 + nop + fmuld %f0,%f40,%f2 + fabsd %f54,%f54 ! a nop for alignment only + +.LOOP1: + lda [%i1]%asi,%l2 ! preload next argument + mov %i3,%o1 ! py1 = y + + lda [%i1]%asi,%f20 + cmp %l1,%l7 + add %i3,%i4,%i3 ! y += stridey + bg,pn %icc,.BIG1 ! if hx > 0x413921fb + +! delay slot + lda [%i1+4]%asi,%f21 + addcc %i0,-1,%i0 + add %i1,%i2,%i1 ! x += stridex + ble,pn %icc,.ENDLOOP2 + +! delay slot + andn %l2,%i5,%l2 + nop + fmuld %f10,%f40,%f12 + faddd %f2,%f42,%f2 + +.LOOP2: + st %f3,[%fp+n0] + mov %i3,%o2 ! py2 = y + + cmp %l2,%l7 + add %i3,%i4,%i3 ! y += stridey + fmuld %f20,%f40,%f22 + bg,pn %icc,.BIG2 ! if hx > 0x413921fb + +! delay slot + add %l5,thresh+4,%o7 + faddd %f12,%f42,%f12 + st %f13,[%fp+n1] + +! - + + add %l5,thresh,%g1 + faddd %f22,%f42,%f22 + st %f23,[%fp+n2] + + fsubd %f2,%f42,%f2 ! n + + fsubd %f12,%f42,%f12 ! n + + fsubd %f22,%f42,%f22 ! n + + fmuld %f2,%f46,%f4 + + fmuld %f12,%f46,%f14 + + fmuld %f22,%f46,%f24 + + fsubd %f0,%f4,%f4 + fmuld %f2,%f48,%f6 + + fsubd %f10,%f14,%f14 + fmuld %f12,%f48,%f16 + + fsubd %f20,%f24,%f24 + fmuld %f22,%f48,%f26 + + fsubd %f4,%f6,%f0 + ld [%fp+n0],%o3 + + fsubd %f14,%f16,%f10 + ld [%fp+n1],%o4 + + fsubd %f24,%f26,%f20 + ld [%fp+n2],%o5 + + fsubd %f4,%f0,%f32 + and %o3,1,%o3 + + fsubd %f14,%f10,%f34 + and %o4,1,%o4 + + fsubd %f24,%f20,%f36 + and %o5,1,%o5 + + fsubd %f32,%f6,%f32 + fmuld %f2,%f50,%f8 + sll %o3,3,%o3 + + fsubd %f34,%f16,%f34 + fmuld %f12,%f50,%f18 + sll %o4,3,%o4 + + fsubd %f36,%f26,%f36 + fmuld %f22,%f50,%f28 + sll %o5,3,%o5 + + fsubd %f8,%f32,%f8 + ld [%g1+%o3],%f6 + + fsubd %f18,%f34,%f18 + ld [%g1+%o4],%f16 + + fsubd %f28,%f36,%f28 + ld [%g1+%o5],%f26 + + fsubd %f0,%f8,%f4 + + fsubd %f10,%f18,%f14 + + fsubd %f20,%f28,%f24 + + fsubd %f0,%f4,%f32 + + fsubd %f10,%f14,%f34 + + fsubd %f20,%f24,%f36 + + fsubd %f32,%f8,%f32 + fmuld %f2,%f52,%f2 + + fsubd %f34,%f18,%f34 + fmuld %f12,%f52,%f12 + + fsubd %f36,%f28,%f36 + fmuld %f22,%f52,%f22 + + fsubd %f2,%f32,%f2 + ld [%o7+%o3],%f8 + + fsubd %f12,%f34,%f12 + ld [%o7+%o4],%f18 + + fsubd %f22,%f36,%f22 + ld [%o7+%o5],%f28 + + fsubd %f4,%f2,%f0 ! x + + fsubd %f14,%f12,%f10 ! x + + fsubd %f24,%f22,%f20 ! x + + fsubd %f4,%f0,%f4 + + fsubd %f14,%f10,%f14 + + fsubd %f24,%f20,%f24 + + fands %f0,%f30,%f9 ! save signbit + + fands %f10,%f30,%f19 ! save signbit + + fands %f20,%f30,%f29 ! save signbit + + fabsd %f0,%f0 + std %f0,[%fp+x0_1] + + fabsd %f10,%f10 + std %f10,[%fp+x1_1] + + fabsd %f20,%f20 + std %f20,[%fp+x2_1] + + fsubd %f4,%f2,%f2 ! y + + fsubd %f14,%f12,%f12 ! y + + fsubd %f24,%f22,%f22 ! y + + fcmpgt32 %f6,%f0,%l0 + + fcmpgt32 %f16,%f10,%l1 + + fcmpgt32 %f26,%f20,%l2 + +! -- 16 byte aligned + fxors %f2,%f9,%f2 + + fxors %f12,%f19,%f12 + + fxors %f22,%f29,%f22 + + fands %f9,%f8,%f9 ! if (n & 1) clear sign bit + andcc %l0,2,%g0 + bne,pn %icc,.CASE4 + +! delay slot + fands %f19,%f18,%f19 ! if (n & 1) clear sign bit + andcc %l1,2,%g0 + bne,pn %icc,.CASE2 + +! delay slot + fands %f29,%f28,%f29 ! if (n & 1) clear sign bit + andcc %l2,2,%g0 + bne,pn %icc,.CASE1 + +! delay slot + fpadd32s %f0,%f31,%f8 + sethi %hi(0x3fc3c000),%o7 + ld [%fp+x0_1],%l0 + + fpadd32s %f10,%f31,%f18 + add %l3,8,%g1 + ld [%fp+x1_1],%l1 + + fpadd32s %f20,%f31,%f28 + ld [%fp+x2_1],%l2 + + fand %f8,%f44,%f4 + sub %l0,%o7,%l0 + + fand %f18,%f44,%f14 + sub %l1,%o7,%l1 + + fand %f28,%f44,%f24 + sub %l2,%o7,%l2 + + fsubd %f0,%f4,%f0 + srl %l0,10,%l0 + + fsubd %f10,%f14,%f10 + srl %l1,10,%l1 + + fsubd %f20,%f24,%f20 + srl %l2,10,%l2 + + faddd %f0,%f2,%f0 + andn %l0,0x1f,%l0 + + faddd %f10,%f12,%f10 + andn %l1,0x1f,%l1 + + faddd %f20,%f22,%f20 + andn %l2,0x1f,%l2 + + fmuld %f0,%f0,%f2 + add %l0,%o3,%l0 + + fmuld %f10,%f10,%f12 + add %l1,%o4,%l1 + + fmuld %f20,%f20,%f22 + add %l2,%o5,%l2 + + fmuld %f2,%f58,%f6 + ldd [%l3+%l0],%f32 + + fmuld %f12,%f58,%f16 + ldd [%l3+%l1],%f34 + + fmuld %f22,%f58,%f26 + ldd [%l3+%l2],%f36 + + faddd %f6,%f56,%f6 + fmuld %f2,%f62,%f4 + + faddd %f16,%f56,%f16 + fmuld %f12,%f62,%f14 + + faddd %f26,%f56,%f26 + fmuld %f22,%f62,%f24 + + fmuld %f2,%f6,%f6 + faddd %f4,%f60,%f4 + + fmuld %f12,%f16,%f16 + faddd %f14,%f60,%f14 + + fmuld %f22,%f26,%f26 + faddd %f24,%f60,%f24 + + faddd %f6,%f54,%f6 + fmuld %f2,%f4,%f4 + + faddd %f16,%f54,%f16 + fmuld %f12,%f14,%f14 + + faddd %f26,%f54,%f26 + fmuld %f22,%f24,%f24 + + fmuld %f0,%f6,%f6 + ldd [%g1+%l0],%f2 + + fmuld %f10,%f16,%f16 + ldd [%g1+%l1],%f12 + + fmuld %f20,%f26,%f26 + ldd [%g1+%l2],%f22 + + fmuld %f4,%f32,%f4 + ldd [%l4+%l0],%f0 + + fmuld %f14,%f34,%f14 + ldd [%l4+%l1],%f10 + + fmuld %f24,%f36,%f24 + ldd [%l4+%l2],%f20 + + fmuld %f6,%f2,%f6 + + fmuld %f16,%f12,%f16 + + fmuld %f26,%f22,%f26 + + faddd %f6,%f4,%f6 + + faddd %f16,%f14,%f16 + + faddd %f26,%f24,%f26 + + faddd %f6,%f0,%f6 + + faddd %f16,%f10,%f16 + + faddd %f26,%f20,%f26 + + faddd %f6,%f32,%f6 + + faddd %f16,%f34,%f16 + + faddd %f26,%f36,%f26 + +.FIXSIGN: + ld [%fp+n0],%o3 + add %l5,thresh-4,%g1 + + ld [%fp+n1],%o4 + + ld [%fp+n2],%o5 + and %o3,2,%o3 + + sll %o3,2,%o3 + and %o4,2,%o4 + lda [%i1]%asi,%l0 ! preload next argument + + sll %o4,2,%o4 + and %o5,2,%o5 + ld [%g1+%o3],%f8 + + sll %o5,2,%o5 + ld [%g1+%o4],%f18 + + ld [%g1+%o5],%f28 + fxors %f9,%f8,%f9 + + lda [%i1]%asi,%f0 + fxors %f29,%f28,%f29 + + lda [%i1+4]%asi,%f1 + fxors %f19,%f18,%f19 + + fors %f6,%f9,%f6 ! tack on sign + add %i1,%i2,%i1 ! x += stridex + st %f6,[%o0] + + fors %f26,%f29,%f26 ! tack on sign + st %f7,[%o0+4] + + fors %f16,%f19,%f16 ! tack on sign + st %f26,[%o2] + + st %f27,[%o2+4] + addcc %i0,-1,%i0 + + st %f16,[%o1] + andn %l0,%i5,%l0 ! hx &= ~0x80000000 + bg,pt %icc,.LOOP0 + +! delay slot + st %f17,[%o1+4] + + ba,pt %icc,.ENDLOOP0 +! delay slot + nop + + .align 32 +.CASE1: + fpadd32s %f10,%f31,%f18 + sethi %hi(0x3fc3c000),%o7 + ld [%fp+x0_1],%l0 + + fand %f8,%f44,%f4 + add %l3,8,%g1 + ld [%fp+x1_1],%l1 + + fand %f18,%f44,%f14 + sub %l0,%o7,%l0 + + fsubd %f0,%f4,%f0 + srl %l0,10,%l0 + sub %l1,%o7,%l1 + + fsubd %f10,%f14,%f10 + srl %l1,10,%l1 + + fmuld %f20,%f20,%f20 + ldd [%l5+%o5],%f36 + add %l5,%o5,%l2 + + faddd %f0,%f2,%f0 + andn %l0,0x1f,%l0 + + faddd %f10,%f12,%f10 + andn %l1,0x1f,%l1 + + fmuld %f20,%f36,%f24 + ldd [%l2+0x10],%f26 + add %fp,%o5,%o5 + + fmuld %f0,%f0,%f2 + add %l0,%o3,%l0 + + fmuld %f10,%f10,%f12 + add %l1,%o4,%l1 + + faddd %f24,%f26,%f24 + ldd [%l2+0x20],%f36 + + fmuld %f2,%f58,%f6 + ldd [%l3+%l0],%f32 + + fmuld %f12,%f58,%f16 + ldd [%l3+%l1],%f34 + + fmuld %f20,%f24,%f24 + ldd [%l2+0x30],%f26 + + faddd %f6,%f56,%f6 + fmuld %f2,%f62,%f4 + + faddd %f16,%f56,%f16 + fmuld %f12,%f62,%f14 + + faddd %f24,%f36,%f24 + ldd [%o5+x2_1],%f36 + + fmuld %f2,%f6,%f6 + faddd %f4,%f60,%f4 + + fmuld %f12,%f16,%f16 + faddd %f14,%f60,%f14 + + fmuld %f20,%f24,%f24 + + faddd %f6,%f54,%f6 + fmuld %f2,%f4,%f4 + ldd [%g1+%l0],%f2 + + faddd %f16,%f54,%f16 + fmuld %f12,%f14,%f14 + ldd [%g1+%l1],%f12 + + faddd %f24,%f26,%f24 + + fmuld %f0,%f6,%f6 + ldd [%l4+%l0],%f0 + + fmuld %f10,%f16,%f16 + ldd [%l4+%l1],%f10 + + fmuld %f4,%f32,%f4 + std %f22,[%fp+y2_0] + + fmuld %f14,%f34,%f14 + + fmuld %f6,%f2,%f6 + + fmuld %f16,%f12,%f16 + + fmuld %f20,%f24,%f24 + + faddd %f6,%f4,%f6 + + faddd %f16,%f14,%f16 + + fmuld %f36,%f24,%f24 + ldd [%o5+y2_0],%f22 + + faddd %f6,%f0,%f6 + + faddd %f16,%f10,%f16 + + faddd %f24,%f22,%f24 + + faddd %f6,%f32,%f6 + + faddd %f16,%f34,%f16 + ba,pt %icc,.FIXSIGN + +! delay slot + faddd %f36,%f24,%f26 + + .align 32 +.CASE2: + fpadd32s %f0,%f31,%f8 + ld [%fp+x0_1],%l0 + andcc %l2,2,%g0 + bne,pn %icc,.CASE3 + +! delay slot + sethi %hi(0x3fc3c000),%o7 + fpadd32s %f20,%f31,%f28 + ld [%fp+x2_1],%l2 + + fand %f8,%f44,%f4 + sub %l0,%o7,%l0 + add %l3,8,%g1 + + fand %f28,%f44,%f24 + sub %l2,%o7,%l2 + + fsubd %f0,%f4,%f0 + srl %l0,10,%l0 + + fsubd %f20,%f24,%f20 + srl %l2,10,%l2 + + fmuld %f10,%f10,%f10 + ldd [%l5+%o4],%f34 + add %l5,%o4,%l1 + + faddd %f0,%f2,%f0 + andn %l0,0x1f,%l0 + + faddd %f20,%f22,%f20 + andn %l2,0x1f,%l2 + + fmuld %f10,%f34,%f14 + ldd [%l1+0x10],%f16 + add %fp,%o4,%o4 + + fmuld %f0,%f0,%f2 + add %l0,%o3,%l0 + + fmuld %f20,%f20,%f22 + add %l2,%o5,%l2 + + faddd %f14,%f16,%f14 + ldd [%l1+0x20],%f34 + + fmuld %f2,%f58,%f6 + ldd [%l3+%l0],%f32 + + fmuld %f22,%f58,%f26 + ldd [%l3+%l2],%f36 + + fmuld %f10,%f14,%f14 + ldd [%l1+0x30],%f16 + + faddd %f6,%f56,%f6 + fmuld %f2,%f62,%f4 + + faddd %f26,%f56,%f26 + fmuld %f22,%f62,%f24 + + faddd %f14,%f34,%f14 + ldd [%o4+x1_1],%f34 + + fmuld %f2,%f6,%f6 + faddd %f4,%f60,%f4 + + fmuld %f22,%f26,%f26 + faddd %f24,%f60,%f24 + + fmuld %f10,%f14,%f14 + + faddd %f6,%f54,%f6 + fmuld %f2,%f4,%f4 + ldd [%g1+%l0],%f2 + + faddd %f26,%f54,%f26 + fmuld %f22,%f24,%f24 + ldd [%g1+%l2],%f22 + + faddd %f14,%f16,%f14 + + fmuld %f0,%f6,%f6 + ldd [%l4+%l0],%f0 + + fmuld %f20,%f26,%f26 + ldd [%l4+%l2],%f20 + + fmuld %f4,%f32,%f4 + std %f12,[%fp+y1_0] + + fmuld %f24,%f36,%f24 + + fmuld %f6,%f2,%f6 + + fmuld %f26,%f22,%f26 + + fmuld %f10,%f14,%f14 + + faddd %f6,%f4,%f6 + + faddd %f26,%f24,%f26 + + fmuld %f34,%f14,%f14 + ldd [%o4+y1_0],%f12 + + faddd %f6,%f0,%f6 + + faddd %f26,%f20,%f26 + + faddd %f14,%f12,%f14 + + faddd %f6,%f32,%f6 + + faddd %f26,%f36,%f26 + ba,pt %icc,.FIXSIGN + +! delay slot + faddd %f34,%f14,%f16 + + .align 32 +.CASE3: + fand %f8,%f44,%f4 + add %l3,8,%g1 + sub %l0,%o7,%l0 + + fmuld %f10,%f10,%f10 + ldd [%l5+%o4],%f34 + add %l5,%o4,%l1 + + fsubd %f0,%f4,%f0 + srl %l0,10,%l0 + + fmuld %f20,%f20,%f20 + ldd [%l5+%o5],%f36 + add %l5,%o5,%l2 + + fmuld %f10,%f34,%f14 + ldd [%l1+0x10],%f16 + add %fp,%o4,%o4 + + faddd %f0,%f2,%f0 + andn %l0,0x1f,%l0 + + fmuld %f20,%f36,%f24 + ldd [%l2+0x10],%f26 + add %fp,%o5,%o5 + + faddd %f14,%f16,%f14 + ldd [%l1+0x20],%f34 + + fmuld %f0,%f0,%f2 + add %l0,%o3,%l0 + + faddd %f24,%f26,%f24 + ldd [%l2+0x20],%f36 + + fmuld %f10,%f14,%f14 + ldd [%l1+0x30],%f16 + + fmuld %f2,%f58,%f6 + ldd [%l3+%l0],%f32 + + fmuld %f20,%f24,%f24 + ldd [%l2+0x30],%f26 + + faddd %f14,%f34,%f14 + ldd [%o4+x1_1],%f34 + + faddd %f6,%f56,%f6 + fmuld %f2,%f62,%f4 + + faddd %f24,%f36,%f24 + ldd [%o5+x2_1],%f36 + + fmuld %f10,%f14,%f14 + std %f12,[%fp+y1_0] + + fmuld %f2,%f6,%f6 + faddd %f4,%f60,%f4 + + fmuld %f20,%f24,%f24 + std %f22,[%fp+y2_0] + + faddd %f14,%f16,%f14 + + faddd %f6,%f54,%f6 + fmuld %f2,%f4,%f4 + ldd [%g1+%l0],%f2 + + faddd %f24,%f26,%f24 + + fmuld %f10,%f14,%f14 + + fmuld %f0,%f6,%f6 + ldd [%l4+%l0],%f0 + + fmuld %f4,%f32,%f4 + + fmuld %f20,%f24,%f24 + + fmuld %f6,%f2,%f6 + + fmuld %f34,%f14,%f14 + ldd [%o4+y1_0],%f12 + + fmuld %f36,%f24,%f24 + ldd [%o5+y2_0],%f22 + + faddd %f6,%f4,%f6 + + faddd %f14,%f12,%f14 + + faddd %f24,%f22,%f24 + + faddd %f6,%f0,%f6 + + faddd %f34,%f14,%f16 + + faddd %f36,%f24,%f26 + ba,pt %icc,.FIXSIGN + +! delay slot + faddd %f6,%f32,%f6 + + .align 32 +.CASE4: + fands %f29,%f28,%f29 ! if (n & 1) clear sign bit + sethi %hi(0x3fc3c000),%o7 + andcc %l1,2,%g0 + bne,pn %icc,.CASE6 + +! delay slot + andcc %l2,2,%g0 + fpadd32s %f10,%f31,%f18 + ld [%fp+x1_1],%l1 + bne,pn %icc,.CASE5 + +! delay slot + add %l3,8,%g1 + ld [%fp+x2_1],%l2 + fpadd32s %f20,%f31,%f28 + + fand %f18,%f44,%f14 + sub %l1,%o7,%l1 + + fand %f28,%f44,%f24 + sub %l2,%o7,%l2 + + fsubd %f10,%f14,%f10 + srl %l1,10,%l1 + + fsubd %f20,%f24,%f20 + srl %l2,10,%l2 + + fmuld %f0,%f0,%f0 + ldd [%l5+%o3],%f32 + add %l5,%o3,%l0 + + faddd %f10,%f12,%f10 + andn %l1,0x1f,%l1 + + faddd %f20,%f22,%f20 + andn %l2,0x1f,%l2 + + fmuld %f0,%f32,%f4 + ldd [%l0+0x10],%f6 + add %fp,%o3,%o3 + + fmuld %f10,%f10,%f12 + add %l1,%o4,%l1 + + fmuld %f20,%f20,%f22 + add %l2,%o5,%l2 + + faddd %f4,%f6,%f4 + ldd [%l0+0x20],%f32 + + fmuld %f12,%f58,%f16 + ldd [%l3+%l1],%f34 + + fmuld %f22,%f58,%f26 + ldd [%l3+%l2],%f36 + + fmuld %f0,%f4,%f4 + ldd [%l0+0x30],%f6 + + faddd %f16,%f56,%f16 + fmuld %f12,%f62,%f14 + + faddd %f26,%f56,%f26 + fmuld %f22,%f62,%f24 + + faddd %f4,%f32,%f4 + ldd [%o3+x0_1],%f32 + + fmuld %f12,%f16,%f16 + faddd %f14,%f60,%f14 + + fmuld %f22,%f26,%f26 + faddd %f24,%f60,%f24 + + fmuld %f0,%f4,%f4 + + faddd %f16,%f54,%f16 + fmuld %f12,%f14,%f14 + ldd [%g1+%l1],%f12 + + faddd %f26,%f54,%f26 + fmuld %f22,%f24,%f24 + ldd [%g1+%l2],%f22 + + faddd %f4,%f6,%f4 + + fmuld %f10,%f16,%f16 + ldd [%l4+%l1],%f10 + + fmuld %f20,%f26,%f26 + ldd [%l4+%l2],%f20 + + fmuld %f14,%f34,%f14 + std %f2,[%fp+y0_0] + + fmuld %f24,%f36,%f24 + + fmuld %f0,%f4,%f4 + + fmuld %f16,%f12,%f16 + + fmuld %f26,%f22,%f26 + + fmuld %f32,%f4,%f4 + ldd [%o3+y0_0],%f2 + + faddd %f16,%f14,%f16 + + faddd %f26,%f24,%f26 + + faddd %f4,%f2,%f4 + + faddd %f16,%f10,%f16 + + faddd %f26,%f20,%f26 + + faddd %f32,%f4,%f6 + + faddd %f16,%f34,%f16 + ba,pt %icc,.FIXSIGN + +! delay slot + faddd %f26,%f36,%f26 + + .align 32 +.CASE5: + fand %f18,%f44,%f14 + sub %l1,%o7,%l1 + + fmuld %f0,%f0,%f0 + ldd [%l5+%o3],%f32 + add %l5,%o3,%l0 + + fsubd %f10,%f14,%f10 + srl %l1,10,%l1 + + fmuld %f20,%f20,%f20 + ldd [%l5+%o5],%f36 + add %l5,%o5,%l2 + + fmuld %f0,%f32,%f4 + ldd [%l0+0x10],%f6 + add %fp,%o3,%o3 + + faddd %f10,%f12,%f10 + andn %l1,0x1f,%l1 + + fmuld %f20,%f36,%f24 + ldd [%l2+0x10],%f26 + add %fp,%o5,%o5 + + faddd %f4,%f6,%f4 + ldd [%l0+0x20],%f32 + + fmuld %f10,%f10,%f12 + add %l1,%o4,%l1 + + faddd %f24,%f26,%f24 + ldd [%l2+0x20],%f36 + + fmuld %f0,%f4,%f4 + ldd [%l0+0x30],%f6 + + fmuld %f12,%f58,%f16 + ldd [%l3+%l1],%f34 + + fmuld %f20,%f24,%f24 + ldd [%l2+0x30],%f26 + + faddd %f4,%f32,%f4 + ldd [%o3+x0_1],%f32 + + faddd %f16,%f56,%f16 + fmuld %f12,%f62,%f14 + + faddd %f24,%f36,%f24 + ldd [%o5+x2_1],%f36 + + fmuld %f0,%f4,%f4 + std %f2,[%fp+y0_0] + + fmuld %f12,%f16,%f16 + faddd %f14,%f60,%f14 + + fmuld %f20,%f24,%f24 + std %f22,[%fp+y2_0] + + faddd %f4,%f6,%f4 + + faddd %f16,%f54,%f16 + fmuld %f12,%f14,%f14 + ldd [%g1+%l1],%f12 + + faddd %f24,%f26,%f24 + + fmuld %f0,%f4,%f4 + + fmuld %f10,%f16,%f16 + ldd [%l4+%l1],%f10 + + fmuld %f14,%f34,%f14 + + fmuld %f20,%f24,%f24 + + fmuld %f16,%f12,%f16 + + fmuld %f32,%f4,%f4 + ldd [%o3+y0_0],%f2 + + fmuld %f36,%f24,%f24 + ldd [%o5+y2_0],%f22 + + faddd %f16,%f14,%f16 + + faddd %f4,%f2,%f4 + + faddd %f24,%f22,%f24 + + faddd %f16,%f10,%f16 + + faddd %f32,%f4,%f6 + + faddd %f36,%f24,%f26 + ba,pt %icc,.FIXSIGN + +! delay slot + faddd %f16,%f34,%f16 + + .align 32 +.CASE6: + ld [%fp+x2_1],%l2 + add %l3,8,%g1 + bne,pn %icc,.CASE7 +! delay slot + fpadd32s %f20,%f31,%f28 + + fand %f28,%f44,%f24 + ldd [%l5+%o3],%f32 + add %l5,%o3,%l0 + + fmuld %f0,%f0,%f0 + sub %l2,%o7,%l2 + + fsubd %f20,%f24,%f20 + srl %l2,10,%l2 + + fmuld %f10,%f10,%f10 + ldd [%l5+%o4],%f34 + add %l5,%o4,%l1 + + fmuld %f0,%f32,%f4 + ldd [%l0+0x10],%f6 + add %fp,%o3,%o3 + + faddd %f20,%f22,%f20 + andn %l2,0x1f,%l2 + + fmuld %f10,%f34,%f14 + ldd [%l1+0x10],%f16 + add %fp,%o4,%o4 + + faddd %f4,%f6,%f4 + ldd [%l0+0x20],%f32 + + fmuld %f20,%f20,%f22 + add %l2,%o5,%l2 + + faddd %f14,%f16,%f14 + ldd [%l1+0x20],%f34 + + fmuld %f0,%f4,%f4 + ldd [%l0+0x30],%f6 + + fmuld %f22,%f58,%f26 + ldd [%l3+%l2],%f36 + + fmuld %f10,%f14,%f14 + ldd [%l1+0x30],%f16 + + faddd %f4,%f32,%f4 + ldd [%o3+x0_1],%f32 + + faddd %f26,%f56,%f26 + fmuld %f22,%f62,%f24 + + faddd %f14,%f34,%f14 + ldd [%o4+x1_1],%f34 + + fmuld %f0,%f4,%f4 + std %f2,[%fp+y0_0] + + fmuld %f22,%f26,%f26 + faddd %f24,%f60,%f24 + + fmuld %f10,%f14,%f14 + std %f12,[%fp+y1_0] + + faddd %f4,%f6,%f4 + + faddd %f26,%f54,%f26 + fmuld %f22,%f24,%f24 + ldd [%g1+%l2],%f22 + + faddd %f14,%f16,%f14 + + fmuld %f0,%f4,%f4 + + fmuld %f20,%f26,%f26 + ldd [%l4+%l2],%f20 + + fmuld %f24,%f36,%f24 + + fmuld %f10,%f14,%f14 + + fmuld %f26,%f22,%f26 + + fmuld %f32,%f4,%f4 + ldd [%o3+y0_0],%f2 + + fmuld %f34,%f14,%f14 + ldd [%o4+y1_0],%f12 + + faddd %f26,%f24,%f26 + + faddd %f4,%f2,%f4 + + faddd %f14,%f12,%f14 + + faddd %f26,%f20,%f26 + + faddd %f32,%f4,%f6 + + faddd %f34,%f14,%f16 + ba,pt %icc,.FIXSIGN + +! delay slot + faddd %f26,%f36,%f26 + + .align 32 +.CASE7: + fmuld %f0,%f0,%f0 + ldd [%l5+%o3],%f32 + add %l5,%o3,%l0 + + fmuld %f10,%f10,%f10 + ldd [%l5+%o4],%f34 + add %l5,%o4,%l1 + + fmuld %f20,%f20,%f20 + ldd [%l5+%o5],%f36 + add %l5,%o5,%l2 + + fmuld %f0,%f32,%f4 + ldd [%l0+0x10],%f6 + add %fp,%o3,%o3 + + fmuld %f10,%f34,%f14 + ldd [%l1+0x10],%f16 + add %fp,%o4,%o4 + + fmuld %f20,%f36,%f24 + ldd [%l2+0x10],%f26 + add %fp,%o5,%o5 + + faddd %f4,%f6,%f4 + ldd [%l0+0x20],%f32 + + faddd %f14,%f16,%f14 + ldd [%l1+0x20],%f34 + + faddd %f24,%f26,%f24 + ldd [%l2+0x20],%f36 + + fmuld %f0,%f4,%f4 + ldd [%l0+0x30],%f6 + + fmuld %f10,%f14,%f14 + ldd [%l1+0x30],%f16 + + fmuld %f20,%f24,%f24 + ldd [%l2+0x30],%f26 + + faddd %f4,%f32,%f4 + ldd [%o3+x0_1],%f32 + + faddd %f14,%f34,%f14 + ldd [%o4+x1_1],%f34 + + faddd %f24,%f36,%f24 + ldd [%o5+x2_1],%f36 + + fmuld %f0,%f4,%f4 + std %f2,[%fp+y0_0] + + fmuld %f10,%f14,%f14 + std %f12,[%fp+y1_0] + + fmuld %f20,%f24,%f24 + std %f22,[%fp+y2_0] + + faddd %f4,%f6,%f4 + + faddd %f14,%f16,%f14 + + faddd %f24,%f26,%f24 + + fmuld %f0,%f4,%f4 + + fmuld %f10,%f14,%f14 + + fmuld %f20,%f24,%f24 + + fmuld %f32,%f4,%f4 + ldd [%o3+y0_0],%f2 + + fmuld %f34,%f14,%f14 + ldd [%o4+y1_0],%f12 + + fmuld %f36,%f24,%f24 + ldd [%o5+y2_0],%f22 + + faddd %f4,%f2,%f4 + + faddd %f14,%f12,%f14 + + faddd %f24,%f22,%f24 + + faddd %f32,%f4,%f6 + + faddd %f34,%f14,%f16 + ba,pt %icc,.FIXSIGN + +! delay slot + faddd %f36,%f24,%f26 + + + .align 32 +.ENDLOOP2: + fmuld %f10,%f40,%f12 + add %l5,thresh,%g1 + faddd %f12,%f42,%f12 + st %f13,[%fp+n1] + fsubd %f12,%f42,%f12 ! n + fmuld %f12,%f46,%f14 + fsubd %f10,%f14,%f14 + fmuld %f12,%f48,%f16 + fsubd %f14,%f16,%f10 + ld [%fp+n1],%o4 + fsubd %f14,%f10,%f34 + and %o4,1,%o4 + fsubd %f34,%f16,%f34 + fmuld %f12,%f50,%f18 + sll %o4,3,%o4 + fsubd %f18,%f34,%f18 + ld [%g1+%o4],%f16 + fsubd %f10,%f18,%f14 + fsubd %f10,%f14,%f34 + add %l5,thresh+4,%o7 + fsubd %f34,%f18,%f34 + fmuld %f12,%f52,%f12 + fsubd %f12,%f34,%f12 + ld [%o7+%o4],%f18 + fsubd %f14,%f12,%f10 ! x + fsubd %f14,%f10,%f14 + fands %f10,%f30,%f19 ! save signbit + fabsd %f10,%f10 + std %f10,[%fp+x1_1] + fsubd %f14,%f12,%f12 ! y + fcmpgt32 %f16,%f10,%l1 + fxors %f12,%f19,%f12 + fands %f19,%f18,%f19 ! if (n & 1) clear sign bit + andcc %l1,2,%g0 + bne,pn %icc,1f +! delay slot + nop + fpadd32s %f10,%f31,%f18 + ld [%fp+x1_1],%l1 + fand %f18,%f44,%f14 + sethi %hi(0x3fc3c000),%o7 + add %l3,8,%g1 + fsubd %f10,%f14,%f10 + sub %l1,%o7,%l1 + srl %l1,10,%l1 + faddd %f10,%f12,%f10 + andn %l1,0x1f,%l1 + fmuld %f10,%f10,%f12 + add %l1,%o4,%l1 + fmuld %f12,%f58,%f16 + ldd [%l3+%l1],%f34 + faddd %f16,%f56,%f16 + fmuld %f12,%f62,%f14 + fmuld %f12,%f16,%f16 + faddd %f14,%f60,%f14 + faddd %f16,%f54,%f16 + fmuld %f12,%f14,%f14 + ldd [%g1+%l1],%f12 + fmuld %f10,%f16,%f16 + ldd [%l4+%l1],%f10 + fmuld %f14,%f34,%f14 + fmuld %f16,%f12,%f16 + faddd %f16,%f14,%f16 + faddd %f16,%f10,%f16 + ba,pt %icc,2f + faddd %f16,%f34,%f16 +1: + fmuld %f10,%f10,%f10 + ldd [%l5+%o4],%f34 + add %l5,%o4,%l1 + fmuld %f10,%f34,%f14 + ldd [%l1+0x10],%f16 + add %fp,%o4,%o4 + faddd %f14,%f16,%f14 + ldd [%l1+0x20],%f34 + fmuld %f10,%f14,%f14 + ldd [%l1+0x30],%f16 + faddd %f14,%f34,%f14 + ldd [%o4+x1_1],%f34 + fmuld %f10,%f14,%f14 + std %f12,[%fp+y1_0] + faddd %f14,%f16,%f14 + fmuld %f10,%f14,%f14 + fmuld %f34,%f14,%f14 + ldd [%o4+y1_0],%f12 + faddd %f14,%f12,%f14 + faddd %f34,%f14,%f16 +2: + add %l5,thresh-4,%g1 + ld [%fp+n1],%o4 + and %o4,2,%o4 + sll %o4,2,%o4 + ld [%g1+%o4],%f18 + fxors %f19,%f18,%f19 + fors %f16,%f19,%f16 ! tack on sign + st %f16,[%o1] + st %f17,[%o1+4] + +.ENDLOOP1: + fmuld %f0,%f40,%f2 + add %l5,thresh,%g1 + faddd %f2,%f42,%f2 + st %f3,[%fp+n0] + fsubd %f2,%f42,%f2 ! n + fmuld %f2,%f46,%f4 + fsubd %f0,%f4,%f4 + fmuld %f2,%f48,%f6 + fsubd %f4,%f6,%f0 + ld [%fp+n0],%o3 + fsubd %f4,%f0,%f32 + and %o3,1,%o3 + fsubd %f32,%f6,%f32 + fmuld %f2,%f50,%f8 + sll %o3,3,%o3 + fsubd %f8,%f32,%f8 + ld [%g1+%o3],%f6 + fsubd %f0,%f8,%f4 + fsubd %f0,%f4,%f32 + add %l5,thresh+4,%o7 + fsubd %f32,%f8,%f32 + fmuld %f2,%f52,%f2 + fsubd %f2,%f32,%f2 + ld [%o7+%o3],%f8 + fsubd %f4,%f2,%f0 ! x + fsubd %f4,%f0,%f4 + fands %f0,%f30,%f9 ! save signbit + fabsd %f0,%f0 + std %f0,[%fp+x0_1] + fsubd %f4,%f2,%f2 ! y + fcmpgt32 %f6,%f0,%l0 + fxors %f2,%f9,%f2 + fands %f9,%f8,%f9 ! if (n & 1) clear sign bit + andcc %l0,2,%g0 + bne,pn %icc,1f +! delay slot + nop + fpadd32s %f0,%f31,%f8 + ld [%fp+x0_1],%l0 + fand %f8,%f44,%f4 + sethi %hi(0x3fc3c000),%o7 + add %l3,8,%g1 + fsubd %f0,%f4,%f0 + sub %l0,%o7,%l0 + srl %l0,10,%l0 + faddd %f0,%f2,%f0 + andn %l0,0x1f,%l0 + fmuld %f0,%f0,%f2 + add %l0,%o3,%l0 + fmuld %f2,%f58,%f6 + ldd [%l3+%l0],%f32 + faddd %f6,%f56,%f6 + fmuld %f2,%f62,%f4 + fmuld %f2,%f6,%f6 + faddd %f4,%f60,%f4 + faddd %f6,%f54,%f6 + fmuld %f2,%f4,%f4 + ldd [%g1+%l0],%f2 + fmuld %f0,%f6,%f6 + ldd [%l4+%l0],%f0 + fmuld %f4,%f32,%f4 + fmuld %f6,%f2,%f6 + faddd %f6,%f4,%f6 + faddd %f6,%f0,%f6 + ba,pt %icc,2f + faddd %f6,%f32,%f6 +1: + fmuld %f0,%f0,%f0 + ldd [%l5+%o3],%f32 + add %l5,%o3,%l0 + fmuld %f0,%f32,%f4 + ldd [%l0+0x10],%f6 + add %fp,%o3,%o3 + faddd %f4,%f6,%f4 + ldd [%l0+0x20],%f32 + fmuld %f0,%f4,%f4 + ldd [%l0+0x30],%f6 + faddd %f4,%f32,%f4 + ldd [%o3+x0_1],%f32 + fmuld %f0,%f4,%f4 + std %f2,[%fp+y0_0] + faddd %f4,%f6,%f4 + fmuld %f0,%f4,%f4 + fmuld %f32,%f4,%f4 + ldd [%o3+y0_0],%f2 + faddd %f4,%f2,%f4 + faddd %f32,%f4,%f6 +2: + add %l5,thresh-4,%g1 + ld [%fp+n0],%o3 + and %o3,2,%o3 + sll %o3,2,%o3 + ld [%g1+%o3],%f8 + fxors %f9,%f8,%f9 + fors %f6,%f9,%f6 ! tack on sign + st %f6,[%o0] + st %f7,[%o0+4] + +.ENDLOOP0: + +! check for huge arguments remaining + + tst LIM_l6 + be,pt %icc,.exit +! delay slot + nop + +! ========== huge range (use C code) ========== + +#ifdef __sparcv9 + ldx [%fp+xsave],%o1 + ldx [%fp+ysave],%o3 +#else + ld [%fp+xsave],%o1 + ld [%fp+ysave],%o3 +#endif + ld [%fp+nsave],%o0 + ld [%fp+sxsave],%o2 + ld [%fp+sysave],%o4 + sra %o2,0,%o2 ! sign-extend for V9 + sra %o4,0,%o4 + call __vlibm_vsin_big + mov %l7,%o5 ! delay slot + +.exit: + ret + restore + + + .align 32 +.SKIP0: + addcc %i0,-1,%i0 + ble,pn %icc,.ENDLOOP0 +! delay slot, harmless if branch taken + add %i3,%i4,%i3 ! y += stridey + andn %l1,%i5,%l0 ! hx &= ~0x80000000 + fmovs %f10,%f0 + ld [%i1+4],%f1 + ba,pt %icc,.LOOP0 +! delay slot + add %i1,%i2,%i1 ! x += stridex + + + .align 32 +.SKIP1: + addcc %i0,-1,%i0 + ble,pn %icc,.ENDLOOP1 +! delay slot, harmless if branch taken + add %i3,%i4,%i3 ! y += stridey + andn %l2,%i5,%l1 ! hx &= ~0x80000000 + fmovs %f20,%f10 + ld [%i1+4],%f11 + ba,pt %icc,.LOOP1 +! delay slot + add %i1,%i2,%i1 ! x += stridex + + + .align 32 +.SKIP2: + addcc %i0,-1,%i0 + ble,pn %icc,.ENDLOOP2 +! delay slot, harmless if branch taken + add %i3,%i4,%i3 ! y += stridey + ld [%i1],%l2 + ld [%i1],%f20 + ld [%i1+4],%f21 + andn %l2,%i5,%l2 ! hx &= ~0x80000000 + ba,pt %icc,.LOOP2 +! delay slot + add %i1,%i2,%i1 ! x += stridex + + + .align 32 +.BIG0: + sethi %hi(0x7ff00000),%o7 + cmp %l0,%o7 + bl,a,pt %icc,1f ! if hx < 0x7ff00000 +! delay slot, annulled if branch not taken + mov %l7,LIM_l6 ! set biguns flag or + fsubd %f0,%f0,%f0 ! y = x - x + st %f0,[%o0] + st %f1,[%o0+4] +1: + addcc %i0,-1,%i0 + ble,pn %icc,.ENDLOOP0 +! delay slot, harmless if branch taken + andn %l1,%i5,%l0 ! hx &= ~0x80000000 + fmovd %f10,%f0 + ba,pt %icc,.LOOP0 +! delay slot + add %i1,%i2,%i1 ! x += stridex + + + .align 32 +.BIG1: + sethi %hi(0x7ff00000),%o7 + cmp %l1,%o7 + bl,a,pt %icc,1f ! if hx < 0x7ff00000 +! delay slot, annulled if branch not taken + mov %l7,LIM_l6 ! set biguns flag or + fsubd %f10,%f10,%f10 ! y = x - x + st %f10,[%o1] + st %f11,[%o1+4] +1: + addcc %i0,-1,%i0 + ble,pn %icc,.ENDLOOP1 +! delay slot, harmless if branch taken + andn %l2,%i5,%l1 ! hx &= ~0x80000000 + fmovd %f20,%f10 + ba,pt %icc,.LOOP1 +! delay slot + add %i1,%i2,%i1 ! x += stridex + + + .align 32 +.BIG2: + sethi %hi(0x7ff00000),%o7 + cmp %l2,%o7 + bl,a,pt %icc,1f ! if hx < 0x7ff00000 +! delay slot, annulled if branch not taken + mov %l7,LIM_l6 ! set biguns flag or + fsubd %f20,%f20,%f20 ! y = x - x + st %f20,[%o2] + st %f21,[%o2+4] +1: + addcc %i0,-1,%i0 + ble,pn %icc,.ENDLOOP2 +! delay slot + nop + ld [%i1],%l2 + ld [%i1],%f20 + ld [%i1+4],%f21 + andn %l2,%i5,%l2 ! hx &= ~0x80000000 + ba,pt %icc,.LOOP2 +! delay slot + add %i1,%i2,%i1 ! x += stridex + + SET_SIZE(__vsin) + diff --git a/usr/src/lib/libmvec/common/vis/__vsin_ultra3.S b/usr/src/lib/libmvec/common/vis/__vsin_ultra3.S new file mode 100644 index 0000000000..bf441ca6ea --- /dev/null +++ b/usr/src/lib/libmvec/common/vis/__vsin_ultra3.S @@ -0,0 +1,3432 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "__vsin_ultra3.S" + +#include "libm.h" +#if defined(LIBMVEC_SO_BUILD) + .weak __vsin + .type __vsin,#function + __vsin = __vsin_ultra3 +#endif + + RO_DATA + .align 64 +constants: + .word 0x42c80000,0x00000000 ! 3 * 2^44 + .word 0x43380000,0x00000000 ! 3 * 2^51 + .word 0x3fe45f30,0x6dc9c883 ! invpio2 + .word 0x3ff921fb,0x54442c00 ! pio2_1 + .word 0x3d318469,0x898cc400 ! pio2_2 + .word 0x3a71701b,0x839a2520 ! pio2_3 + .word 0xbfc55555,0x55555533 ! pp1 + .word 0x3f811111,0x10e7d53b ! pp2 + .word 0xbf2a0167,0xe6b3cf9b ! pp3 + .word 0xbfdfffff,0xffffff65 ! qq1 + .word 0x3fa55555,0x54f88ed0 ! qq2 + .word 0xbf56c12c,0xdd185f60 ! qq3 + +! local storage indices + +#define xsave STACK_BIAS-0x8 +#define ysave STACK_BIAS-0x10 +#define nsave STACK_BIAS-0x14 +#define sxsave STACK_BIAS-0x18 +#define sysave STACK_BIAS-0x1c +#define biguns STACK_BIAS-0x20 +#define nk3 STACK_BIAS-0x24 +#define nk2 STACK_BIAS-0x28 +#define nk1 STACK_BIAS-0x2c +#define nk0 STACK_BIAS-0x30 +#define junk STACK_BIAS-0x38 +! sizeof temp storage - must be a multiple of 16 for V9 +#define tmps 0x40 + +! register use + +! i0 n +! i1 x +! i2 stridex +! i3 y +! i4 stridey +! i5 0x80000000 + +! l0 hx0 +! l1 hx1 +! l2 hx2 +! l3 hx3 +! l4 k0 +! l5 k1 +! l6 k2 +! l7 k3 + +! the following are 64-bit registers in both V8+ and V9 + +! g1 __vlibm_TBL_sincos2 +! g5 scratch + +! o0 py0 +! o1 py1 +! o2 py2 +! o3 py3 +! o4 0x3e400000 +! o5 0x3fe921fb,0x4099251e +! o7 scratch + +! f0 hx0 +! f2 +! f4 +! f6 +! f8 hx1 +! f10 +! f12 +! f14 +! f16 hx2 +! f18 +! f20 +! f22 +! f24 hx3 +! f26 +! f28 +! f30 +! f32 +! f34 +! f36 +! f38 + +#define c3two44 %f40 +#define c3two51 %f42 +#define invpio2 %f44 +#define pio2_1 %f46 +#define pio2_2 %f48 +#define pio2_3 %f50 +#define pp1 %f52 +#define pp2 %f54 +#define pp3 %f56 +#define qq1 %f58 +#define qq2 %f60 +#define qq3 %f62 + + ENTRY(__vsin_ultra3) + save %sp,-SA(MINFRAME)-tmps,%sp + PIC_SETUP(l7) + PIC_SET(l7,constants,o0) + PIC_SET(l7,__vlibm_TBL_sincos2,o1) + mov %o1,%g1 + wr %g0,0x82,%asi ! set %asi for non-faulting loads +#ifdef __sparcv9 + stx %i1,[%fp+xsave] ! save arguments + stx %i3,[%fp+ysave] +#else + st %i1,[%fp+xsave] ! save arguments + st %i3,[%fp+ysave] +#endif + st %i0,[%fp+nsave] + st %i2,[%fp+sxsave] + st %i4,[%fp+sysave] + st %g0,[%fp+biguns] ! biguns = 0 + ldd [%o0+0x00],c3two44 ! load/set up constants + ldd [%o0+0x08],c3two51 + ldd [%o0+0x10],invpio2 + ldd [%o0+0x18],pio2_1 + ldd [%o0+0x20],pio2_2 + ldd [%o0+0x28],pio2_3 + ldd [%o0+0x30],pp1 + ldd [%o0+0x38],pp2 + ldd [%o0+0x40],pp3 + ldd [%o0+0x48],qq1 + ldd [%o0+0x50],qq2 + ldd [%o0+0x58],qq3 + sethi %hi(0x80000000),%i5 + sethi %hi(0x3e400000),%o4 + sethi %hi(0x3fe921fb),%o5 + or %o5,%lo(0x3fe921fb),%o5 + sllx %o5,32,%o5 + sethi %hi(0x4099251e),%o7 + or %o7,%lo(0x4099251e),%o7 + or %o5,%o7,%o5 + sll %i2,3,%i2 ! scale strides + sll %i4,3,%i4 + add %fp,junk,%o1 ! loop prologue + add %fp,junk,%o2 + add %fp,junk,%o3 + ld [%i1],%l0 ! *x + ld [%i1],%f0 + ld [%i1+4],%f3 + andn %l0,%i5,%l0 ! mask off sign + ba .loop0 + add %i1,%i2,%i1 ! x += stridex + +! 16-byte aligned + .align 16 +.loop0: + lda [%i1]%asi,%l1 ! preload next argument + sub %l0,%o4,%g5 + sub %o5,%l0,%o7 + fabss %f0,%f2 + + lda [%i1]%asi,%f8 + orcc %o7,%g5,%g0 + mov %i3,%o0 ! py0 = y + bl,pn %icc,.range0 ! hx < 0x3e400000 or hx > 0x4099251e + +! delay slot + lda [%i1+4]%asi,%f11 + addcc %i0,-1,%i0 + add %i3,%i4,%i3 ! y += stridey + ble,pn %icc,.last1 + +! delay slot + andn %l1,%i5,%l1 + add %i1,%i2,%i1 ! x += stridex + faddd %f2,c3two44,%f4 + st %f15,[%o1+4] + +.loop1: + lda [%i1]%asi,%l2 ! preload next argument + sub %l1,%o4,%g5 + sub %o5,%l1,%o7 + fabss %f8,%f10 + + lda [%i1]%asi,%f16 + orcc %o7,%g5,%g0 + mov %i3,%o1 ! py1 = y + bl,pn %icc,.range1 ! hx < 0x3e400000 or hx > 0x4099251e + +! delay slot + lda [%i1+4]%asi,%f19 + addcc %i0,-1,%i0 + add %i3,%i4,%i3 ! y += stridey + ble,pn %icc,.last2 + +! delay slot + andn %l2,%i5,%l2 + add %i1,%i2,%i1 ! x += stridex + faddd %f10,c3two44,%f12 + st %f23,[%o2+4] + +.loop2: + lda [%i1]%asi,%l3 ! preload next argument + sub %l2,%o4,%g5 + sub %o5,%l2,%o7 + fabss %f16,%f18 + + lda [%i1]%asi,%f24 + orcc %o7,%g5,%g0 + mov %i3,%o2 ! py2 = y + bl,pn %icc,.range2 ! hx < 0x3e400000 or hx > 0x4099251e + +! delay slot + lda [%i1+4]%asi,%f27 + addcc %i0,-1,%i0 + add %i3,%i4,%i3 ! y += stridey + ble,pn %icc,.last3 + +! delay slot + andn %l3,%i5,%l3 + add %i1,%i2,%i1 ! x += stridex + faddd %f18,c3two44,%f20 + st %f31,[%o3+4] + +.loop3: + sub %l3,%o4,%g5 + sub %o5,%l3,%o7 + fabss %f24,%f26 + st %f5,[%fp+nk0] + + orcc %o7,%g5,%g0 + mov %i3,%o3 ! py3 = y + bl,pn %icc,.range3 ! hx < 0x3e400000 or > hx 0x4099251e +! delay slot + st %f13,[%fp+nk1] + +!!! DONE? +.cont: + srlx %o5,32,%o7 + add %i3,%i4,%i3 ! y += stridey + fmovs %f3,%f1 + st %f21,[%fp+nk2] + + sub %o7,%l0,%l0 + sub %o7,%l1,%l1 + faddd %f26,c3two44,%f28 + st %f29,[%fp+nk3] + + sub %o7,%l2,%l2 + sub %o7,%l3,%l3 + fmovs %f11,%f9 + + or %l0,%l1,%l0 + or %l2,%l3,%l2 + fmovs %f19,%f17 + + fmovs %f27,%f25 + fmuld %f0,invpio2,%f6 ! x * invpio2, for medium range + + fmuld %f8,invpio2,%f14 + ld [%fp+nk0],%l4 + + fmuld %f16,invpio2,%f22 + ld [%fp+nk1],%l5 + + orcc %l0,%l2,%g0 + bl,pn %icc,.medium +! delay slot + fmuld %f24,invpio2,%f30 + ld [%fp+nk2],%l6 + + ld [%fp+nk3],%l7 + sll %l4,5,%l4 ! k + fcmpd %fcc0,%f0,pio2_3 ! x < pio2_3 iff x < 0 + + sll %l5,5,%l5 + ldd [%l4+%g1],%f4 + fcmpd %fcc1,%f8,pio2_3 + + sll %l6,5,%l6 + ldd [%l5+%g1],%f12 + fcmpd %fcc2,%f16,pio2_3 + + sll %l7,5,%l7 + ldd [%l6+%g1],%f20 + fcmpd %fcc3,%f24,pio2_3 + + ldd [%l7+%g1],%f28 + fsubd %f2,%f4,%f2 ! x -= __vlibm_TBL_sincos2[k] + + fsubd %f10,%f12,%f10 + + fsubd %f18,%f20,%f18 + + fsubd %f26,%f28,%f26 + + fmuld %f2,%f2,%f0 ! z = x * x + + fmuld %f10,%f10,%f8 + + fmuld %f18,%f18,%f16 + + fmuld %f26,%f26,%f24 + + fmuld %f0,pp3,%f6 + + fmuld %f8,pp3,%f14 + + fmuld %f16,pp3,%f22 + + fmuld %f24,pp3,%f30 + + faddd %f6,pp2,%f6 + fmuld %f0,qq2,%f4 + + faddd %f14,pp2,%f14 + fmuld %f8,qq2,%f12 + + faddd %f22,pp2,%f22 + fmuld %f16,qq2,%f20 + + faddd %f30,pp2,%f30 + fmuld %f24,qq2,%f28 + + fmuld %f0,%f6,%f6 + faddd %f4,qq1,%f4 + + fmuld %f8,%f14,%f14 + faddd %f12,qq1,%f12 + + fmuld %f16,%f22,%f22 + faddd %f20,qq1,%f20 + + fmuld %f24,%f30,%f30 + faddd %f28,qq1,%f28 + + faddd %f6,pp1,%f6 + fmuld %f0,%f4,%f4 + add %l4,%g1,%l4 + + faddd %f14,pp1,%f14 + fmuld %f8,%f12,%f12 + add %l5,%g1,%l5 + + faddd %f22,pp1,%f22 + fmuld %f16,%f20,%f20 + add %l6,%g1,%l6 + + faddd %f30,pp1,%f30 + fmuld %f24,%f28,%f28 + add %l7,%g1,%l7 + + fmuld %f0,%f6,%f6 + ldd [%l4+8],%f0 + + fmuld %f8,%f14,%f14 + ldd [%l5+8],%f8 + + fmuld %f16,%f22,%f22 + ldd [%l6+8],%f16 + + fmuld %f24,%f30,%f30 + ldd [%l7+8],%f24 + + fmuld %f2,%f6,%f6 + + fmuld %f10,%f14,%f14 + + fmuld %f18,%f22,%f22 + + fmuld %f26,%f30,%f30 + + faddd %f6,%f2,%f6 + fmuld %f0,%f4,%f4 + ldd [%l4+16],%f2 + + faddd %f14,%f10,%f14 + fmuld %f8,%f12,%f12 + ldd [%l5+16],%f10 + + faddd %f22,%f18,%f22 + fmuld %f16,%f20,%f20 + ldd [%l6+16],%f18 + + faddd %f30,%f26,%f30 + fmuld %f24,%f28,%f28 + ldd [%l7+16],%f26 + + fmuld %f2,%f6,%f6 + + fmuld %f10,%f14,%f14 + + fmuld %f18,%f22,%f22 + + fmuld %f26,%f30,%f30 + + faddd %f6,%f4,%f6 + + faddd %f14,%f12,%f14 + + faddd %f22,%f20,%f22 + + faddd %f30,%f28,%f30 + + faddd %f6,%f0,%f6 + + faddd %f14,%f8,%f14 + + faddd %f22,%f16,%f22 + + faddd %f30,%f24,%f30 + + fnegd %f6,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fnegd %f14,%f12 + lda [%i1]%asi,%f0 + + fnegd %f22,%f20 + lda [%i1+4]%asi,%f3 + + fnegd %f30,%f28 + andn %l0,%i5,%l0 + add %i1,%i2,%i1 + + fmovdl %fcc0,%f4,%f6 ! (hx < -0)? -s : s + st %f6,[%o0] + + fmovdl %fcc1,%f12,%f14 + st %f14,[%o1] + + fmovdl %fcc2,%f20,%f22 + st %f22,[%o2] + + fmovdl %fcc3,%f28,%f30 + st %f30,[%o3] + addcc %i0,-1,%i0 + + bg,pt %icc,.loop0 +! delay slot + st %f7,[%o0+4] + + ba,pt %icc,.end +! delay slot + nop + + + .align 16 +.medium: + faddd %f6,c3two51,%f4 + st %f5,[%fp+nk0] + + faddd %f14,c3two51,%f12 + st %f13,[%fp+nk1] + + faddd %f22,c3two51,%f20 + st %f21,[%fp+nk2] + + faddd %f30,c3two51,%f28 + st %f29,[%fp+nk3] + + fsubd %f4,c3two51,%f6 + + fsubd %f12,c3two51,%f14 + + fsubd %f20,c3two51,%f22 + + fsubd %f28,c3two51,%f30 + + fmuld %f6,pio2_1,%f2 + ld [%fp+nk0],%l0 ! n + + fmuld %f14,pio2_1,%f10 + ld [%fp+nk1],%l1 + + fmuld %f22,pio2_1,%f18 + ld [%fp+nk2],%l2 + + fmuld %f30,pio2_1,%f26 + ld [%fp+nk3],%l3 + + fsubd %f0,%f2,%f0 + fmuld %f6,pio2_2,%f4 + + fsubd %f8,%f10,%f8 + fmuld %f14,pio2_2,%f12 + + fsubd %f16,%f18,%f16 + fmuld %f22,pio2_2,%f20 + + fsubd %f24,%f26,%f24 + fmuld %f30,pio2_2,%f28 + + fsubd %f0,%f4,%f32 + + fsubd %f8,%f12,%f34 + + fsubd %f16,%f20,%f36 + + fsubd %f24,%f28,%f38 + + fsubd %f0,%f32,%f0 + fcmple32 %f32,pio2_3,%l4 ! x <= pio2_3 iff x < 0 + + fsubd %f8,%f34,%f8 + fcmple32 %f34,pio2_3,%l5 + + fsubd %f16,%f36,%f16 + fcmple32 %f36,pio2_3,%l6 + + fsubd %f24,%f38,%f24 + fcmple32 %f38,pio2_3,%l7 + + fsubd %f0,%f4,%f0 + fmuld %f6,pio2_3,%f6 + sll %l4,30,%l4 ! if (x < 0) n = -n ^ 2 + + fsubd %f8,%f12,%f8 + fmuld %f14,pio2_3,%f14 + sll %l5,30,%l5 + + fsubd %f16,%f20,%f16 + fmuld %f22,pio2_3,%f22 + sll %l6,30,%l6 + + fsubd %f24,%f28,%f24 + fmuld %f30,pio2_3,%f30 + sll %l7,30,%l7 + + fsubd %f6,%f0,%f6 + sra %l4,31,%l4 + + fsubd %f14,%f8,%f14 + sra %l5,31,%l5 + + fsubd %f22,%f16,%f22 + sra %l6,31,%l6 + + fsubd %f30,%f24,%f30 + sra %l7,31,%l7 + + fsubd %f32,%f6,%f0 ! reduced x + xor %l0,%l4,%l0 + + fsubd %f34,%f14,%f8 + xor %l1,%l5,%l1 + + fsubd %f36,%f22,%f16 + xor %l2,%l6,%l2 + + fsubd %f38,%f30,%f24 + xor %l3,%l7,%l3 + + fabsd %f0,%f2 + sub %l0,%l4,%l0 + + fabsd %f8,%f10 + sub %l1,%l5,%l1 + + fabsd %f16,%f18 + sub %l2,%l6,%l2 + + fabsd %f24,%f26 + sub %l3,%l7,%l3 + + faddd %f2,c3two44,%f4 + st %f5,[%fp+nk0] + and %l4,2,%l4 + + faddd %f10,c3two44,%f12 + st %f13,[%fp+nk1] + and %l5,2,%l5 + + faddd %f18,c3two44,%f20 + st %f21,[%fp+nk2] + and %l6,2,%l6 + + faddd %f26,c3two44,%f28 + st %f29,[%fp+nk3] + and %l7,2,%l7 + + fsubd %f32,%f0,%f4 + xor %l0,%l4,%l0 + + fsubd %f34,%f8,%f12 + xor %l1,%l5,%l1 + + fsubd %f36,%f16,%f20 + xor %l2,%l6,%l2 + + fsubd %f38,%f24,%f28 + xor %l3,%l7,%l3 + + fzero %f38 + ld [%fp+nk0],%l4 + + fsubd %f4,%f6,%f6 ! w + ld [%fp+nk1],%l5 + + fsubd %f12,%f14,%f14 + ld [%fp+nk2],%l6 + + fnegd %f38,%f38 + ld [%fp+nk3],%l7 + sll %l4,5,%l4 ! k + + fsubd %f20,%f22,%f22 + sll %l5,5,%l5 + + fsubd %f28,%f30,%f30 + sll %l6,5,%l6 + + fand %f0,%f38,%f32 ! sign bit of x + ldd [%l4+%g1],%f4 + sll %l7,5,%l7 + + fand %f8,%f38,%f34 + ldd [%l5+%g1],%f12 + + fand %f16,%f38,%f36 + ldd [%l6+%g1],%f20 + + fand %f24,%f38,%f38 + ldd [%l7+%g1],%f28 + + fsubd %f2,%f4,%f2 ! x -= __vlibm_TBL_sincos2[k] + + fsubd %f10,%f12,%f10 + + fsubd %f18,%f20,%f18 + nop + + fsubd %f26,%f28,%f26 + nop + +! 16-byte aligned + fmuld %f2,%f2,%f0 ! z = x * x + andcc %l0,1,%g0 + bz,pn %icc,.case8 +! delay slot + fxor %f6,%f32,%f32 + + fmuld %f10,%f10,%f8 + andcc %l1,1,%g0 + bz,pn %icc,.case4 +! delay slot + fxor %f14,%f34,%f34 + + fmuld %f18,%f18,%f16 + andcc %l2,1,%g0 + bz,pn %icc,.case2 +! delay slot + fxor %f22,%f36,%f36 + + fmuld %f26,%f26,%f24 + andcc %l3,1,%g0 + bz,pn %icc,.case1 +! delay slot + fxor %f30,%f38,%f38 + +!.case0: + fmuld %f0,qq3,%f6 ! cos(x0) + + fmuld %f8,qq3,%f14 ! cos(x1) + + fmuld %f16,qq3,%f22 ! cos(x2) + + fmuld %f24,qq3,%f30 ! cos(x3) + + faddd %f6,qq2,%f6 + fmuld %f0,pp2,%f4 + + faddd %f14,qq2,%f14 + fmuld %f8,pp2,%f12 + + faddd %f22,qq2,%f22 + fmuld %f16,pp2,%f20 + + faddd %f30,qq2,%f30 + fmuld %f24,pp2,%f28 + + fmuld %f0,%f6,%f6 + faddd %f4,pp1,%f4 + + fmuld %f8,%f14,%f14 + faddd %f12,pp1,%f12 + + fmuld %f16,%f22,%f22 + faddd %f20,pp1,%f20 + + fmuld %f24,%f30,%f30 + faddd %f28,pp1,%f28 + + faddd %f6,qq1,%f6 + fmuld %f0,%f4,%f4 + add %l4,%g1,%l4 + + faddd %f14,qq1,%f14 + fmuld %f8,%f12,%f12 + add %l5,%g1,%l5 + + faddd %f22,qq1,%f22 + fmuld %f16,%f20,%f20 + add %l6,%g1,%l6 + + faddd %f30,qq1,%f30 + fmuld %f24,%f28,%f28 + add %l7,%g1,%l7 + + fmuld %f2,%f4,%f4 + + fmuld %f10,%f12,%f12 + + fmuld %f18,%f20,%f20 + + fmuld %f26,%f28,%f28 + + fmuld %f0,%f6,%f6 + faddd %f4,%f32,%f4 + ldd [%l4+16],%f0 + + fmuld %f8,%f14,%f14 + faddd %f12,%f34,%f12 + ldd [%l5+16],%f8 + + fmuld %f16,%f22,%f22 + faddd %f20,%f36,%f20 + ldd [%l6+16],%f16 + + fmuld %f24,%f30,%f30 + faddd %f28,%f38,%f28 + ldd [%l7+16],%f24 + + fmuld %f0,%f6,%f6 + faddd %f4,%f2,%f4 + ldd [%l4+8],%f32 + + fmuld %f8,%f14,%f14 + faddd %f12,%f10,%f12 + ldd [%l5+8],%f34 + + fmuld %f16,%f22,%f22 + faddd %f20,%f18,%f20 + ldd [%l6+8],%f36 + + fmuld %f24,%f30,%f30 + faddd %f28,%f26,%f28 + ldd [%l7+8],%f38 + + fmuld %f32,%f4,%f4 + + fmuld %f34,%f12,%f12 + + fmuld %f36,%f20,%f20 + + fmuld %f38,%f28,%f28 + + fsubd %f6,%f4,%f6 + + fsubd %f14,%f12,%f14 + + fsubd %f22,%f20,%f22 + + fsubd %f30,%f28,%f30 + + faddd %f6,%f0,%f6 + + faddd %f14,%f8,%f14 + + faddd %f22,%f16,%f22 + + faddd %f30,%f24,%f30 + mov %l0,%l4 + + fnegd %f6,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fnegd %f14,%f12 + lda [%i1]%asi,%f0 + + fnegd %f22,%f20 + lda [%i1+4]%asi,%f3 + + fnegd %f30,%f28 + andn %l0,%i5,%l0 + add %i1,%i2,%i1 + + andcc %l4,2,%g0 + fmovdnz %icc,%f4,%f6 + st %f6,[%o0] + + andcc %l1,2,%g0 + fmovdnz %icc,%f12,%f14 + st %f14,[%o1] + + andcc %l2,2,%g0 + fmovdnz %icc,%f20,%f22 + st %f22,[%o2] + + andcc %l3,2,%g0 + fmovdnz %icc,%f28,%f30 + st %f30,[%o3] + + addcc %i0,-1,%i0 + bg,pt %icc,.loop0 +! delay slot + st %f7,[%o0+4] + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case1: + fmuld %f24,pp3,%f30 ! sin(x3) + + fmuld %f0,qq3,%f6 ! cos(x0) + + fmuld %f8,qq3,%f14 ! cos(x1) + + fmuld %f16,qq3,%f22 ! cos(x2) + + faddd %f30,pp2,%f30 + fmuld %f24,qq2,%f28 + + faddd %f6,qq2,%f6 + fmuld %f0,pp2,%f4 + + faddd %f14,qq2,%f14 + fmuld %f8,pp2,%f12 + + faddd %f22,qq2,%f22 + fmuld %f16,pp2,%f20 + + fmuld %f24,%f30,%f30 + faddd %f28,qq1,%f28 + + fmuld %f0,%f6,%f6 + faddd %f4,pp1,%f4 + + fmuld %f8,%f14,%f14 + faddd %f12,pp1,%f12 + + fmuld %f16,%f22,%f22 + faddd %f20,pp1,%f20 + + faddd %f30,pp1,%f30 + fmuld %f24,%f28,%f28 + add %l7,%g1,%l7 + + faddd %f6,qq1,%f6 + fmuld %f0,%f4,%f4 + add %l4,%g1,%l4 + + faddd %f14,qq1,%f14 + fmuld %f8,%f12,%f12 + add %l5,%g1,%l5 + + faddd %f22,qq1,%f22 + fmuld %f16,%f20,%f20 + add %l6,%g1,%l6 + + fmuld %f24,%f30,%f30 + + fmuld %f2,%f4,%f4 + + fmuld %f10,%f12,%f12 + + fmuld %f18,%f20,%f20 + + fmuld %f26,%f30,%f30 + ldd [%l7+8],%f24 + + fmuld %f0,%f6,%f6 + faddd %f4,%f32,%f4 + ldd [%l4+16],%f0 + + fmuld %f8,%f14,%f14 + faddd %f12,%f34,%f12 + ldd [%l5+16],%f8 + + fmuld %f16,%f22,%f22 + faddd %f20,%f36,%f20 + ldd [%l6+16],%f16 + + fmuld %f24,%f28,%f28 + faddd %f38,%f30,%f30 + + fmuld %f0,%f6,%f6 + faddd %f4,%f2,%f4 + ldd [%l4+8],%f32 + + fmuld %f8,%f14,%f14 + faddd %f12,%f10,%f12 + ldd [%l5+8],%f34 + + fmuld %f16,%f22,%f22 + faddd %f20,%f18,%f20 + ldd [%l6+8],%f36 + + faddd %f26,%f30,%f30 + ldd [%l7+16],%f38 + + fmuld %f32,%f4,%f4 + + fmuld %f34,%f12,%f12 + + fmuld %f36,%f20,%f20 + + fmuld %f38,%f30,%f30 + + fsubd %f6,%f4,%f6 + + fsubd %f14,%f12,%f14 + + fsubd %f22,%f20,%f22 + + faddd %f30,%f28,%f30 + + faddd %f6,%f0,%f6 + + faddd %f14,%f8,%f14 + + faddd %f22,%f16,%f22 + + faddd %f30,%f24,%f30 + mov %l0,%l4 + + fnegd %f6,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fnegd %f14,%f12 + lda [%i1]%asi,%f0 + + fnegd %f22,%f20 + lda [%i1+4]%asi,%f3 + + fnegd %f30,%f28 + andn %l0,%i5,%l0 + add %i1,%i2,%i1 + + andcc %l4,2,%g0 + fmovdnz %icc,%f4,%f6 + st %f6,[%o0] + + andcc %l1,2,%g0 + fmovdnz %icc,%f12,%f14 + st %f14,[%o1] + + andcc %l2,2,%g0 + fmovdnz %icc,%f20,%f22 + st %f22,[%o2] + + andcc %l3,2,%g0 + fmovdnz %icc,%f28,%f30 + st %f30,[%o3] + + addcc %i0,-1,%i0 + bg,pt %icc,.loop0 +! delay slot + st %f7,[%o0+4] + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case2: + fmuld %f26,%f26,%f24 + andcc %l3,1,%g0 + bz,pn %icc,.case3 +! delay slot + fxor %f30,%f38,%f38 + + fmuld %f16,pp3,%f22 ! sin(x2) + + fmuld %f0,qq3,%f6 ! cos(x0) + + fmuld %f8,qq3,%f14 ! cos(x1) + + faddd %f22,pp2,%f22 + fmuld %f16,qq2,%f20 + + fmuld %f24,qq3,%f30 ! cos(x3) + + faddd %f6,qq2,%f6 + fmuld %f0,pp2,%f4 + + faddd %f14,qq2,%f14 + fmuld %f8,pp2,%f12 + + fmuld %f16,%f22,%f22 + faddd %f20,qq1,%f20 + + faddd %f30,qq2,%f30 + fmuld %f24,pp2,%f28 + + fmuld %f0,%f6,%f6 + faddd %f4,pp1,%f4 + + fmuld %f8,%f14,%f14 + faddd %f12,pp1,%f12 + + faddd %f22,pp1,%f22 + fmuld %f16,%f20,%f20 + add %l6,%g1,%l6 + + fmuld %f24,%f30,%f30 + faddd %f28,pp1,%f28 + + faddd %f6,qq1,%f6 + fmuld %f0,%f4,%f4 + add %l4,%g1,%l4 + + faddd %f14,qq1,%f14 + fmuld %f8,%f12,%f12 + add %l5,%g1,%l5 + + fmuld %f16,%f22,%f22 + + faddd %f30,qq1,%f30 + fmuld %f24,%f28,%f28 + add %l7,%g1,%l7 + + fmuld %f2,%f4,%f4 + + fmuld %f10,%f12,%f12 + + fmuld %f18,%f22,%f22 + ldd [%l6+8],%f16 + + fmuld %f26,%f28,%f28 + + fmuld %f0,%f6,%f6 + faddd %f4,%f32,%f4 + ldd [%l4+16],%f0 + + fmuld %f8,%f14,%f14 + faddd %f12,%f34,%f12 + ldd [%l5+16],%f8 + + fmuld %f16,%f20,%f20 + faddd %f36,%f22,%f22 + + fmuld %f24,%f30,%f30 + faddd %f28,%f38,%f28 + ldd [%l7+16],%f24 + + fmuld %f0,%f6,%f6 + faddd %f4,%f2,%f4 + ldd [%l4+8],%f32 + + fmuld %f8,%f14,%f14 + faddd %f12,%f10,%f12 + ldd [%l5+8],%f34 + + faddd %f18,%f22,%f22 + ldd [%l6+16],%f36 + + fmuld %f24,%f30,%f30 + faddd %f28,%f26,%f28 + ldd [%l7+8],%f38 + + fmuld %f32,%f4,%f4 + + fmuld %f34,%f12,%f12 + + fmuld %f36,%f22,%f22 + + fmuld %f38,%f28,%f28 + + fsubd %f6,%f4,%f6 + + fsubd %f14,%f12,%f14 + + faddd %f22,%f20,%f22 + + fsubd %f30,%f28,%f30 + + faddd %f6,%f0,%f6 + + faddd %f14,%f8,%f14 + + faddd %f22,%f16,%f22 + + faddd %f30,%f24,%f30 + mov %l0,%l4 + + fnegd %f6,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fnegd %f14,%f12 + lda [%i1]%asi,%f0 + + fnegd %f22,%f20 + lda [%i1+4]%asi,%f3 + + fnegd %f30,%f28 + andn %l0,%i5,%l0 + add %i1,%i2,%i1 + + andcc %l4,2,%g0 + fmovdnz %icc,%f4,%f6 + st %f6,[%o0] + + andcc %l1,2,%g0 + fmovdnz %icc,%f12,%f14 + st %f14,[%o1] + + andcc %l2,2,%g0 + fmovdnz %icc,%f20,%f22 + st %f22,[%o2] + + andcc %l3,2,%g0 + fmovdnz %icc,%f28,%f30 + st %f30,[%o3] + + addcc %i0,-1,%i0 + bg,pt %icc,.loop0 +! delay slot + st %f7,[%o0+4] + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case3: + fmuld %f16,pp3,%f22 ! sin(x2) + + fmuld %f24,pp3,%f30 ! sin(x3) + + fmuld %f0,qq3,%f6 ! cos(x0) + + fmuld %f8,qq3,%f14 ! cos(x1) + + faddd %f22,pp2,%f22 + fmuld %f16,qq2,%f20 + + faddd %f30,pp2,%f30 + fmuld %f24,qq2,%f28 + + faddd %f6,qq2,%f6 + fmuld %f0,pp2,%f4 + + faddd %f14,qq2,%f14 + fmuld %f8,pp2,%f12 + + fmuld %f16,%f22,%f22 + faddd %f20,qq1,%f20 + + fmuld %f24,%f30,%f30 + faddd %f28,qq1,%f28 + + fmuld %f0,%f6,%f6 + faddd %f4,pp1,%f4 + + fmuld %f8,%f14,%f14 + faddd %f12,pp1,%f12 + + faddd %f22,pp1,%f22 + fmuld %f16,%f20,%f20 + add %l6,%g1,%l6 + + faddd %f30,pp1,%f30 + fmuld %f24,%f28,%f28 + add %l7,%g1,%l7 + + faddd %f6,qq1,%f6 + fmuld %f0,%f4,%f4 + add %l4,%g1,%l4 + + faddd %f14,qq1,%f14 + fmuld %f8,%f12,%f12 + add %l5,%g1,%l5 + + fmuld %f16,%f22,%f22 + + fmuld %f24,%f30,%f30 + + fmuld %f2,%f4,%f4 + + fmuld %f10,%f12,%f12 + + fmuld %f18,%f22,%f22 + ldd [%l6+8],%f16 + + fmuld %f26,%f30,%f30 + ldd [%l7+8],%f24 + + fmuld %f0,%f6,%f6 + faddd %f4,%f32,%f4 + ldd [%l4+16],%f0 + + fmuld %f8,%f14,%f14 + faddd %f12,%f34,%f12 + ldd [%l5+16],%f8 + + fmuld %f16,%f20,%f20 + faddd %f36,%f22,%f22 + + fmuld %f24,%f28,%f28 + faddd %f38,%f30,%f30 + + fmuld %f0,%f6,%f6 + faddd %f4,%f2,%f4 + ldd [%l4+8],%f32 + + fmuld %f8,%f14,%f14 + faddd %f12,%f10,%f12 + ldd [%l5+8],%f34 + + faddd %f18,%f22,%f22 + ldd [%l6+16],%f36 + + faddd %f26,%f30,%f30 + ldd [%l7+16],%f38 + + fmuld %f32,%f4,%f4 + + fmuld %f34,%f12,%f12 + + fmuld %f36,%f22,%f22 + + fmuld %f38,%f30,%f30 + + fsubd %f6,%f4,%f6 + + fsubd %f14,%f12,%f14 + + faddd %f22,%f20,%f22 + + faddd %f30,%f28,%f30 + + faddd %f6,%f0,%f6 + + faddd %f14,%f8,%f14 + + faddd %f22,%f16,%f22 + + faddd %f30,%f24,%f30 + mov %l0,%l4 + + fnegd %f6,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fnegd %f14,%f12 + lda [%i1]%asi,%f0 + + fnegd %f22,%f20 + lda [%i1+4]%asi,%f3 + + fnegd %f30,%f28 + andn %l0,%i5,%l0 + add %i1,%i2,%i1 + + andcc %l4,2,%g0 + fmovdnz %icc,%f4,%f6 + st %f6,[%o0] + + andcc %l1,2,%g0 + fmovdnz %icc,%f12,%f14 + st %f14,[%o1] + + andcc %l2,2,%g0 + fmovdnz %icc,%f20,%f22 + st %f22,[%o2] + + andcc %l3,2,%g0 + fmovdnz %icc,%f28,%f30 + st %f30,[%o3] + + addcc %i0,-1,%i0 + bg,pt %icc,.loop0 +! delay slot + st %f7,[%o0+4] + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case4: + fmuld %f18,%f18,%f16 + andcc %l2,1,%g0 + bz,pn %icc,.case6 +! delay slot + fxor %f22,%f36,%f36 + + fmuld %f26,%f26,%f24 + andcc %l3,1,%g0 + bz,pn %icc,.case5 +! delay slot + fxor %f30,%f38,%f38 + + fmuld %f8,pp3,%f14 ! sin(x1) + + fmuld %f0,qq3,%f6 ! cos(x0) + + faddd %f14,pp2,%f14 + fmuld %f8,qq2,%f12 + + fmuld %f16,qq3,%f22 ! cos(x2) + + fmuld %f24,qq3,%f30 ! cos(x3) + + faddd %f6,qq2,%f6 + fmuld %f0,pp2,%f4 + + fmuld %f8,%f14,%f14 + faddd %f12,qq1,%f12 + + faddd %f22,qq2,%f22 + fmuld %f16,pp2,%f20 + + faddd %f30,qq2,%f30 + fmuld %f24,pp2,%f28 + + fmuld %f0,%f6,%f6 + faddd %f4,pp1,%f4 + + faddd %f14,pp1,%f14 + fmuld %f8,%f12,%f12 + add %l5,%g1,%l5 + + fmuld %f16,%f22,%f22 + faddd %f20,pp1,%f20 + + fmuld %f24,%f30,%f30 + faddd %f28,pp1,%f28 + + faddd %f6,qq1,%f6 + fmuld %f0,%f4,%f4 + add %l4,%g1,%l4 + + fmuld %f8,%f14,%f14 + + faddd %f22,qq1,%f22 + fmuld %f16,%f20,%f20 + add %l6,%g1,%l6 + + faddd %f30,qq1,%f30 + fmuld %f24,%f28,%f28 + add %l7,%g1,%l7 + + fmuld %f2,%f4,%f4 + + fmuld %f10,%f14,%f14 + ldd [%l5+8],%f8 + + fmuld %f18,%f20,%f20 + + fmuld %f26,%f28,%f28 + + fmuld %f0,%f6,%f6 + faddd %f4,%f32,%f4 + ldd [%l4+16],%f0 + + fmuld %f8,%f12,%f12 + faddd %f34,%f14,%f14 + + fmuld %f16,%f22,%f22 + faddd %f20,%f36,%f20 + ldd [%l6+16],%f16 + + fmuld %f24,%f30,%f30 + faddd %f28,%f38,%f28 + ldd [%l7+16],%f24 + + fmuld %f0,%f6,%f6 + faddd %f4,%f2,%f4 + ldd [%l4+8],%f32 + + faddd %f10,%f14,%f14 + ldd [%l5+16],%f34 + + fmuld %f16,%f22,%f22 + faddd %f20,%f18,%f20 + ldd [%l6+8],%f36 + + fmuld %f24,%f30,%f30 + faddd %f28,%f26,%f28 + ldd [%l7+8],%f38 + + fmuld %f32,%f4,%f4 + + fmuld %f34,%f14,%f14 + + fmuld %f36,%f20,%f20 + + fmuld %f38,%f28,%f28 + + fsubd %f6,%f4,%f6 + + faddd %f14,%f12,%f14 + + fsubd %f22,%f20,%f22 + + fsubd %f30,%f28,%f30 + + faddd %f6,%f0,%f6 + + faddd %f14,%f8,%f14 + + faddd %f22,%f16,%f22 + + faddd %f30,%f24,%f30 + mov %l0,%l4 + + fnegd %f6,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fnegd %f14,%f12 + lda [%i1]%asi,%f0 + + fnegd %f22,%f20 + lda [%i1+4]%asi,%f3 + + fnegd %f30,%f28 + andn %l0,%i5,%l0 + add %i1,%i2,%i1 + + andcc %l4,2,%g0 + fmovdnz %icc,%f4,%f6 + st %f6,[%o0] + + andcc %l1,2,%g0 + fmovdnz %icc,%f12,%f14 + st %f14,[%o1] + + andcc %l2,2,%g0 + fmovdnz %icc,%f20,%f22 + st %f22,[%o2] + + andcc %l3,2,%g0 + fmovdnz %icc,%f28,%f30 + st %f30,[%o3] + + addcc %i0,-1,%i0 + bg,pt %icc,.loop0 +! delay slot + st %f7,[%o0+4] + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case5: + fmuld %f8,pp3,%f14 ! sin(x1) + + fmuld %f24,pp3,%f30 ! sin(x3) + + fmuld %f0,qq3,%f6 ! cos(x0) + + faddd %f14,pp2,%f14 + fmuld %f8,qq2,%f12 + + fmuld %f16,qq3,%f22 ! cos(x2) + + faddd %f30,pp2,%f30 + fmuld %f24,qq2,%f28 + + faddd %f6,qq2,%f6 + fmuld %f0,pp2,%f4 + + fmuld %f8,%f14,%f14 + faddd %f12,qq1,%f12 + + faddd %f22,qq2,%f22 + fmuld %f16,pp2,%f20 + + fmuld %f24,%f30,%f30 + faddd %f28,qq1,%f28 + + fmuld %f0,%f6,%f6 + faddd %f4,pp1,%f4 + + faddd %f14,pp1,%f14 + fmuld %f8,%f12,%f12 + add %l5,%g1,%l5 + + fmuld %f16,%f22,%f22 + faddd %f20,pp1,%f20 + + faddd %f30,pp1,%f30 + fmuld %f24,%f28,%f28 + add %l7,%g1,%l7 + + faddd %f6,qq1,%f6 + fmuld %f0,%f4,%f4 + add %l4,%g1,%l4 + + fmuld %f8,%f14,%f14 + + faddd %f22,qq1,%f22 + fmuld %f16,%f20,%f20 + add %l6,%g1,%l6 + + fmuld %f24,%f30,%f30 + + fmuld %f2,%f4,%f4 + + fmuld %f10,%f14,%f14 + ldd [%l5+8],%f8 + + fmuld %f18,%f20,%f20 + + fmuld %f26,%f30,%f30 + ldd [%l7+8],%f24 + + fmuld %f0,%f6,%f6 + faddd %f4,%f32,%f4 + ldd [%l4+16],%f0 + + fmuld %f8,%f12,%f12 + faddd %f34,%f14,%f14 + + fmuld %f16,%f22,%f22 + faddd %f20,%f36,%f20 + ldd [%l6+16],%f16 + + fmuld %f24,%f28,%f28 + faddd %f38,%f30,%f30 + + fmuld %f0,%f6,%f6 + faddd %f4,%f2,%f4 + ldd [%l4+8],%f32 + + faddd %f10,%f14,%f14 + ldd [%l5+16],%f34 + + fmuld %f16,%f22,%f22 + faddd %f20,%f18,%f20 + ldd [%l6+8],%f36 + + faddd %f26,%f30,%f30 + ldd [%l7+16],%f38 + + fmuld %f32,%f4,%f4 + + fmuld %f34,%f14,%f14 + + fmuld %f36,%f20,%f20 + + fmuld %f38,%f30,%f30 + + fsubd %f6,%f4,%f6 + + faddd %f14,%f12,%f14 + + fsubd %f22,%f20,%f22 + + faddd %f30,%f28,%f30 + + faddd %f6,%f0,%f6 + + faddd %f14,%f8,%f14 + + faddd %f22,%f16,%f22 + + faddd %f30,%f24,%f30 + mov %l0,%l4 + + fnegd %f6,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fnegd %f14,%f12 + lda [%i1]%asi,%f0 + + fnegd %f22,%f20 + lda [%i1+4]%asi,%f3 + + fnegd %f30,%f28 + andn %l0,%i5,%l0 + add %i1,%i2,%i1 + + andcc %l4,2,%g0 + fmovdnz %icc,%f4,%f6 + st %f6,[%o0] + + andcc %l1,2,%g0 + fmovdnz %icc,%f12,%f14 + st %f14,[%o1] + + andcc %l2,2,%g0 + fmovdnz %icc,%f20,%f22 + st %f22,[%o2] + + andcc %l3,2,%g0 + fmovdnz %icc,%f28,%f30 + st %f30,[%o3] + + addcc %i0,-1,%i0 + bg,pt %icc,.loop0 +! delay slot + st %f7,[%o0+4] + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case6: + fmuld %f26,%f26,%f24 + andcc %l3,1,%g0 + bz,pn %icc,.case7 +! delay slot + fxor %f30,%f38,%f38 + + fmuld %f8,pp3,%f14 ! sin(x1) + + fmuld %f16,pp3,%f22 ! sin(x2) + + fmuld %f0,qq3,%f6 ! cos(x0) + + faddd %f14,pp2,%f14 + fmuld %f8,qq2,%f12 + + faddd %f22,pp2,%f22 + fmuld %f16,qq2,%f20 + + fmuld %f24,qq3,%f30 ! cos(x3) + + faddd %f6,qq2,%f6 + fmuld %f0,pp2,%f4 + + fmuld %f8,%f14,%f14 + faddd %f12,qq1,%f12 + + fmuld %f16,%f22,%f22 + faddd %f20,qq1,%f20 + + faddd %f30,qq2,%f30 + fmuld %f24,pp2,%f28 + + fmuld %f0,%f6,%f6 + faddd %f4,pp1,%f4 + + faddd %f14,pp1,%f14 + fmuld %f8,%f12,%f12 + add %l5,%g1,%l5 + + faddd %f22,pp1,%f22 + fmuld %f16,%f20,%f20 + add %l6,%g1,%l6 + + fmuld %f24,%f30,%f30 + faddd %f28,pp1,%f28 + + faddd %f6,qq1,%f6 + fmuld %f0,%f4,%f4 + add %l4,%g1,%l4 + + fmuld %f8,%f14,%f14 + + fmuld %f16,%f22,%f22 + + faddd %f30,qq1,%f30 + fmuld %f24,%f28,%f28 + add %l7,%g1,%l7 + + fmuld %f2,%f4,%f4 + + fmuld %f10,%f14,%f14 + ldd [%l5+8],%f8 + + fmuld %f18,%f22,%f22 + ldd [%l6+8],%f16 + + fmuld %f26,%f28,%f28 + + fmuld %f0,%f6,%f6 + faddd %f4,%f32,%f4 + ldd [%l4+16],%f0 + + fmuld %f8,%f12,%f12 + faddd %f34,%f14,%f14 + + fmuld %f16,%f20,%f20 + faddd %f36,%f22,%f22 + + fmuld %f24,%f30,%f30 + faddd %f28,%f38,%f28 + ldd [%l7+16],%f24 + + fmuld %f0,%f6,%f6 + faddd %f4,%f2,%f4 + ldd [%l4+8],%f32 + + faddd %f10,%f14,%f14 + ldd [%l5+16],%f34 + + faddd %f18,%f22,%f22 + ldd [%l6+16],%f36 + + fmuld %f24,%f30,%f30 + faddd %f28,%f26,%f28 + ldd [%l7+8],%f38 + + fmuld %f32,%f4,%f4 + + fmuld %f34,%f14,%f14 + + fmuld %f36,%f22,%f22 + + fmuld %f38,%f28,%f28 + + fsubd %f6,%f4,%f6 + + faddd %f14,%f12,%f14 + + faddd %f22,%f20,%f22 + + fsubd %f30,%f28,%f30 + + faddd %f6,%f0,%f6 + + faddd %f14,%f8,%f14 + + faddd %f22,%f16,%f22 + + faddd %f30,%f24,%f30 + mov %l0,%l4 + + fnegd %f6,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fnegd %f14,%f12 + lda [%i1]%asi,%f0 + + fnegd %f22,%f20 + lda [%i1+4]%asi,%f3 + + fnegd %f30,%f28 + andn %l0,%i5,%l0 + add %i1,%i2,%i1 + + andcc %l4,2,%g0 + fmovdnz %icc,%f4,%f6 + st %f6,[%o0] + + andcc %l1,2,%g0 + fmovdnz %icc,%f12,%f14 + st %f14,[%o1] + + andcc %l2,2,%g0 + fmovdnz %icc,%f20,%f22 + st %f22,[%o2] + + andcc %l3,2,%g0 + fmovdnz %icc,%f28,%f30 + st %f30,[%o3] + + addcc %i0,-1,%i0 + bg,pt %icc,.loop0 +! delay slot + st %f7,[%o0+4] + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case7: + fmuld %f8,pp3,%f14 ! sin(x1) + + fmuld %f16,pp3,%f22 ! sin(x2) + + fmuld %f24,pp3,%f30 ! sin(x3) + + fmuld %f0,qq3,%f6 ! cos(x0) + + faddd %f14,pp2,%f14 + fmuld %f8,qq2,%f12 + + faddd %f22,pp2,%f22 + fmuld %f16,qq2,%f20 + + faddd %f30,pp2,%f30 + fmuld %f24,qq2,%f28 + + faddd %f6,qq2,%f6 + fmuld %f0,pp2,%f4 + + fmuld %f8,%f14,%f14 + faddd %f12,qq1,%f12 + + fmuld %f16,%f22,%f22 + faddd %f20,qq1,%f20 + + fmuld %f24,%f30,%f30 + faddd %f28,qq1,%f28 + + fmuld %f0,%f6,%f6 + faddd %f4,pp1,%f4 + + faddd %f14,pp1,%f14 + fmuld %f8,%f12,%f12 + add %l5,%g1,%l5 + + faddd %f22,pp1,%f22 + fmuld %f16,%f20,%f20 + add %l6,%g1,%l6 + + faddd %f30,pp1,%f30 + fmuld %f24,%f28,%f28 + add %l7,%g1,%l7 + + faddd %f6,qq1,%f6 + fmuld %f0,%f4,%f4 + add %l4,%g1,%l4 + + fmuld %f8,%f14,%f14 + + fmuld %f16,%f22,%f22 + + fmuld %f24,%f30,%f30 + + fmuld %f2,%f4,%f4 + + fmuld %f10,%f14,%f14 + ldd [%l5+8],%f8 + + fmuld %f18,%f22,%f22 + ldd [%l6+8],%f16 + + fmuld %f26,%f30,%f30 + ldd [%l7+8],%f24 + + fmuld %f0,%f6,%f6 + faddd %f4,%f32,%f4 + ldd [%l4+16],%f0 + + fmuld %f8,%f12,%f12 + faddd %f34,%f14,%f14 + + fmuld %f16,%f20,%f20 + faddd %f36,%f22,%f22 + + fmuld %f24,%f28,%f28 + faddd %f38,%f30,%f30 + + fmuld %f0,%f6,%f6 + faddd %f4,%f2,%f4 + ldd [%l4+8],%f32 + + faddd %f10,%f14,%f14 + ldd [%l5+16],%f34 + + faddd %f18,%f22,%f22 + ldd [%l6+16],%f36 + + faddd %f26,%f30,%f30 + ldd [%l7+16],%f38 + + fmuld %f32,%f4,%f4 + + fmuld %f34,%f14,%f14 + + fmuld %f36,%f22,%f22 + + fmuld %f38,%f30,%f30 + + fsubd %f6,%f4,%f6 + + faddd %f14,%f12,%f14 + + faddd %f22,%f20,%f22 + + faddd %f30,%f28,%f30 + + faddd %f6,%f0,%f6 + + faddd %f14,%f8,%f14 + + faddd %f22,%f16,%f22 + + faddd %f30,%f24,%f30 + mov %l0,%l4 + + fnegd %f6,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fnegd %f14,%f12 + lda [%i1]%asi,%f0 + + fnegd %f22,%f20 + lda [%i1+4]%asi,%f3 + + fnegd %f30,%f28 + andn %l0,%i5,%l0 + add %i1,%i2,%i1 + + andcc %l4,2,%g0 + fmovdnz %icc,%f4,%f6 + st %f6,[%o0] + + andcc %l1,2,%g0 + fmovdnz %icc,%f12,%f14 + st %f14,[%o1] + + andcc %l2,2,%g0 + fmovdnz %icc,%f20,%f22 + st %f22,[%o2] + + andcc %l3,2,%g0 + fmovdnz %icc,%f28,%f30 + st %f30,[%o3] + + addcc %i0,-1,%i0 + bg,pt %icc,.loop0 +! delay slot + st %f7,[%o0+4] + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case8: + fmuld %f10,%f10,%f8 + andcc %l1,1,%g0 + bz,pn %icc,.case12 +! delay slot + fxor %f14,%f34,%f34 + + fmuld %f18,%f18,%f16 + andcc %l2,1,%g0 + bz,pn %icc,.case10 +! delay slot + fxor %f22,%f36,%f36 + + fmuld %f26,%f26,%f24 + andcc %l3,1,%g0 + bz,pn %icc,.case9 +! delay slot + fxor %f30,%f38,%f38 + + fmuld %f0,pp3,%f6 ! sin(x0) + + faddd %f6,pp2,%f6 + fmuld %f0,qq2,%f4 + + fmuld %f8,qq3,%f14 ! cos(x1) + + fmuld %f16,qq3,%f22 ! cos(x2) + + fmuld %f24,qq3,%f30 ! cos(x3) + + fmuld %f0,%f6,%f6 + faddd %f4,qq1,%f4 + + faddd %f14,qq2,%f14 + fmuld %f8,pp2,%f12 + + faddd %f22,qq2,%f22 + fmuld %f16,pp2,%f20 + + faddd %f30,qq2,%f30 + fmuld %f24,pp2,%f28 + + faddd %f6,pp1,%f6 + fmuld %f0,%f4,%f4 + add %l4,%g1,%l4 + + fmuld %f8,%f14,%f14 + faddd %f12,pp1,%f12 + + fmuld %f16,%f22,%f22 + faddd %f20,pp1,%f20 + + fmuld %f24,%f30,%f30 + faddd %f28,pp1,%f28 + + fmuld %f0,%f6,%f6 + + faddd %f14,qq1,%f14 + fmuld %f8,%f12,%f12 + add %l5,%g1,%l5 + + faddd %f22,qq1,%f22 + fmuld %f16,%f20,%f20 + add %l6,%g1,%l6 + + faddd %f30,qq1,%f30 + fmuld %f24,%f28,%f28 + add %l7,%g1,%l7 + + fmuld %f2,%f6,%f6 + ldd [%l4+8],%f0 + + fmuld %f10,%f12,%f12 + + fmuld %f18,%f20,%f20 + + fmuld %f26,%f28,%f28 + + fmuld %f0,%f4,%f4 + faddd %f32,%f6,%f6 + + fmuld %f8,%f14,%f14 + faddd %f12,%f34,%f12 + ldd [%l5+16],%f8 + + fmuld %f16,%f22,%f22 + faddd %f20,%f36,%f20 + ldd [%l6+16],%f16 + + fmuld %f24,%f30,%f30 + faddd %f28,%f38,%f28 + ldd [%l7+16],%f24 + + faddd %f2,%f6,%f6 + ldd [%l4+16],%f32 + + fmuld %f8,%f14,%f14 + faddd %f12,%f10,%f12 + ldd [%l5+8],%f34 + + fmuld %f16,%f22,%f22 + faddd %f20,%f18,%f20 + ldd [%l6+8],%f36 + + fmuld %f24,%f30,%f30 + faddd %f28,%f26,%f28 + ldd [%l7+8],%f38 + + fmuld %f32,%f6,%f6 + + fmuld %f34,%f12,%f12 + + fmuld %f36,%f20,%f20 + + fmuld %f38,%f28,%f28 + + faddd %f6,%f4,%f6 + + fsubd %f14,%f12,%f14 + + fsubd %f22,%f20,%f22 + + fsubd %f30,%f28,%f30 + + faddd %f6,%f0,%f6 + + faddd %f14,%f8,%f14 + + faddd %f22,%f16,%f22 + + faddd %f30,%f24,%f30 + mov %l0,%l4 + + fnegd %f6,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fnegd %f14,%f12 + lda [%i1]%asi,%f0 + + fnegd %f22,%f20 + lda [%i1+4]%asi,%f3 + + fnegd %f30,%f28 + andn %l0,%i5,%l0 + add %i1,%i2,%i1 + + andcc %l4,2,%g0 + fmovdnz %icc,%f4,%f6 + st %f6,[%o0] + + andcc %l1,2,%g0 + fmovdnz %icc,%f12,%f14 + st %f14,[%o1] + + andcc %l2,2,%g0 + fmovdnz %icc,%f20,%f22 + st %f22,[%o2] + + andcc %l3,2,%g0 + fmovdnz %icc,%f28,%f30 + st %f30,[%o3] + + addcc %i0,-1,%i0 + bg,pt %icc,.loop0 +! delay slot + st %f7,[%o0+4] + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case9: + fmuld %f0,pp3,%f6 ! sin(x0) + + fmuld %f24,pp3,%f30 ! sin(x3) + + faddd %f6,pp2,%f6 + fmuld %f0,qq2,%f4 + + fmuld %f8,qq3,%f14 ! cos(x1) + + fmuld %f16,qq3,%f22 ! cos(x2) + + faddd %f30,pp2,%f30 + fmuld %f24,qq2,%f28 + + fmuld %f0,%f6,%f6 + faddd %f4,qq1,%f4 + + faddd %f14,qq2,%f14 + fmuld %f8,pp2,%f12 + + faddd %f22,qq2,%f22 + fmuld %f16,pp2,%f20 + + fmuld %f24,%f30,%f30 + faddd %f28,qq1,%f28 + + faddd %f6,pp1,%f6 + fmuld %f0,%f4,%f4 + add %l4,%g1,%l4 + + fmuld %f8,%f14,%f14 + faddd %f12,pp1,%f12 + + fmuld %f16,%f22,%f22 + faddd %f20,pp1,%f20 + + faddd %f30,pp1,%f30 + fmuld %f24,%f28,%f28 + add %l7,%g1,%l7 + + fmuld %f0,%f6,%f6 + + faddd %f14,qq1,%f14 + fmuld %f8,%f12,%f12 + add %l5,%g1,%l5 + + faddd %f22,qq1,%f22 + fmuld %f16,%f20,%f20 + add %l6,%g1,%l6 + + fmuld %f24,%f30,%f30 + + fmuld %f2,%f6,%f6 + ldd [%l4+8],%f0 + + fmuld %f10,%f12,%f12 + + fmuld %f18,%f20,%f20 + + fmuld %f26,%f30,%f30 + ldd [%l7+8],%f24 + + fmuld %f0,%f4,%f4 + faddd %f32,%f6,%f6 + + fmuld %f8,%f14,%f14 + faddd %f12,%f34,%f12 + ldd [%l5+16],%f8 + + fmuld %f16,%f22,%f22 + faddd %f20,%f36,%f20 + ldd [%l6+16],%f16 + + fmuld %f24,%f28,%f28 + faddd %f38,%f30,%f30 + + faddd %f2,%f6,%f6 + ldd [%l4+16],%f32 + + fmuld %f8,%f14,%f14 + faddd %f12,%f10,%f12 + ldd [%l5+8],%f34 + + fmuld %f16,%f22,%f22 + faddd %f20,%f18,%f20 + ldd [%l6+8],%f36 + + faddd %f26,%f30,%f30 + ldd [%l7+16],%f38 + + fmuld %f32,%f6,%f6 + + fmuld %f34,%f12,%f12 + + fmuld %f36,%f20,%f20 + + fmuld %f38,%f30,%f30 + + faddd %f6,%f4,%f6 + + fsubd %f14,%f12,%f14 + + fsubd %f22,%f20,%f22 + + faddd %f30,%f28,%f30 + + faddd %f6,%f0,%f6 + + faddd %f14,%f8,%f14 + + faddd %f22,%f16,%f22 + + faddd %f30,%f24,%f30 + mov %l0,%l4 + + fnegd %f6,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fnegd %f14,%f12 + lda [%i1]%asi,%f0 + + fnegd %f22,%f20 + lda [%i1+4]%asi,%f3 + + fnegd %f30,%f28 + andn %l0,%i5,%l0 + add %i1,%i2,%i1 + + andcc %l4,2,%g0 + fmovdnz %icc,%f4,%f6 + st %f6,[%o0] + + andcc %l1,2,%g0 + fmovdnz %icc,%f12,%f14 + st %f14,[%o1] + + andcc %l2,2,%g0 + fmovdnz %icc,%f20,%f22 + st %f22,[%o2] + + andcc %l3,2,%g0 + fmovdnz %icc,%f28,%f30 + st %f30,[%o3] + + addcc %i0,-1,%i0 + bg,pt %icc,.loop0 +! delay slot + st %f7,[%o0+4] + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case10: + fmuld %f26,%f26,%f24 + andcc %l3,1,%g0 + bz,pn %icc,.case11 +! delay slot + fxor %f30,%f38,%f38 + + fmuld %f0,pp3,%f6 ! sin(x0) + + fmuld %f16,pp3,%f22 ! sin(x2) + + faddd %f6,pp2,%f6 + fmuld %f0,qq2,%f4 + + fmuld %f8,qq3,%f14 ! cos(x1) + + faddd %f22,pp2,%f22 + fmuld %f16,qq2,%f20 + + fmuld %f24,qq3,%f30 ! cos(x3) + + fmuld %f0,%f6,%f6 + faddd %f4,qq1,%f4 + + faddd %f14,qq2,%f14 + fmuld %f8,pp2,%f12 + + fmuld %f16,%f22,%f22 + faddd %f20,qq1,%f20 + + faddd %f30,qq2,%f30 + fmuld %f24,pp2,%f28 + + faddd %f6,pp1,%f6 + fmuld %f0,%f4,%f4 + add %l4,%g1,%l4 + + fmuld %f8,%f14,%f14 + faddd %f12,pp1,%f12 + + faddd %f22,pp1,%f22 + fmuld %f16,%f20,%f20 + add %l6,%g1,%l6 + + fmuld %f24,%f30,%f30 + faddd %f28,pp1,%f28 + + fmuld %f0,%f6,%f6 + + faddd %f14,qq1,%f14 + fmuld %f8,%f12,%f12 + add %l5,%g1,%l5 + + fmuld %f16,%f22,%f22 + + faddd %f30,qq1,%f30 + fmuld %f24,%f28,%f28 + add %l7,%g1,%l7 + + fmuld %f2,%f6,%f6 + ldd [%l4+8],%f0 + + fmuld %f10,%f12,%f12 + + fmuld %f18,%f22,%f22 + ldd [%l6+8],%f16 + + fmuld %f26,%f28,%f28 + + fmuld %f0,%f4,%f4 + faddd %f32,%f6,%f6 + + fmuld %f8,%f14,%f14 + faddd %f12,%f34,%f12 + ldd [%l5+16],%f8 + + fmuld %f16,%f20,%f20 + faddd %f36,%f22,%f22 + + fmuld %f24,%f30,%f30 + faddd %f28,%f38,%f28 + ldd [%l7+16],%f24 + + faddd %f2,%f6,%f6 + ldd [%l4+16],%f32 + + fmuld %f8,%f14,%f14 + faddd %f12,%f10,%f12 + ldd [%l5+8],%f34 + + faddd %f18,%f22,%f22 + ldd [%l6+16],%f36 + + fmuld %f24,%f30,%f30 + faddd %f28,%f26,%f28 + ldd [%l7+8],%f38 + + fmuld %f32,%f6,%f6 + + fmuld %f34,%f12,%f12 + + fmuld %f36,%f22,%f22 + + fmuld %f38,%f28,%f28 + + faddd %f6,%f4,%f6 + + fsubd %f14,%f12,%f14 + + faddd %f22,%f20,%f22 + + fsubd %f30,%f28,%f30 + + faddd %f6,%f0,%f6 + + faddd %f14,%f8,%f14 + + faddd %f22,%f16,%f22 + + faddd %f30,%f24,%f30 + mov %l0,%l4 + + fnegd %f6,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fnegd %f14,%f12 + lda [%i1]%asi,%f0 + + fnegd %f22,%f20 + lda [%i1+4]%asi,%f3 + + fnegd %f30,%f28 + andn %l0,%i5,%l0 + add %i1,%i2,%i1 + + andcc %l4,2,%g0 + fmovdnz %icc,%f4,%f6 + st %f6,[%o0] + + andcc %l1,2,%g0 + fmovdnz %icc,%f12,%f14 + st %f14,[%o1] + + andcc %l2,2,%g0 + fmovdnz %icc,%f20,%f22 + st %f22,[%o2] + + andcc %l3,2,%g0 + fmovdnz %icc,%f28,%f30 + st %f30,[%o3] + + addcc %i0,-1,%i0 + bg,pt %icc,.loop0 +! delay slot + st %f7,[%o0+4] + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case11: + fmuld %f0,pp3,%f6 ! sin(x0) + + fmuld %f16,pp3,%f22 ! sin(x2) + + fmuld %f24,pp3,%f30 ! sin(x3) + + faddd %f6,pp2,%f6 + fmuld %f0,qq2,%f4 + + fmuld %f8,qq3,%f14 ! cos(x1) + + faddd %f22,pp2,%f22 + fmuld %f16,qq2,%f20 + + faddd %f30,pp2,%f30 + fmuld %f24,qq2,%f28 + + fmuld %f0,%f6,%f6 + faddd %f4,qq1,%f4 + + faddd %f14,qq2,%f14 + fmuld %f8,pp2,%f12 + + fmuld %f16,%f22,%f22 + faddd %f20,qq1,%f20 + + fmuld %f24,%f30,%f30 + faddd %f28,qq1,%f28 + + faddd %f6,pp1,%f6 + fmuld %f0,%f4,%f4 + add %l4,%g1,%l4 + + fmuld %f8,%f14,%f14 + faddd %f12,pp1,%f12 + + faddd %f22,pp1,%f22 + fmuld %f16,%f20,%f20 + add %l6,%g1,%l6 + + faddd %f30,pp1,%f30 + fmuld %f24,%f28,%f28 + add %l7,%g1,%l7 + + fmuld %f0,%f6,%f6 + + faddd %f14,qq1,%f14 + fmuld %f8,%f12,%f12 + add %l5,%g1,%l5 + + fmuld %f16,%f22,%f22 + + fmuld %f24,%f30,%f30 + + fmuld %f2,%f6,%f6 + ldd [%l4+8],%f0 + + fmuld %f10,%f12,%f12 + + fmuld %f18,%f22,%f22 + ldd [%l6+8],%f16 + + fmuld %f26,%f30,%f30 + ldd [%l7+8],%f24 + + fmuld %f0,%f4,%f4 + faddd %f32,%f6,%f6 + + fmuld %f8,%f14,%f14 + faddd %f12,%f34,%f12 + ldd [%l5+16],%f8 + + fmuld %f16,%f20,%f20 + faddd %f36,%f22,%f22 + + fmuld %f24,%f28,%f28 + faddd %f38,%f30,%f30 + + faddd %f2,%f6,%f6 + ldd [%l4+16],%f32 + + fmuld %f8,%f14,%f14 + faddd %f12,%f10,%f12 + ldd [%l5+8],%f34 + + faddd %f18,%f22,%f22 + ldd [%l6+16],%f36 + + faddd %f26,%f30,%f30 + ldd [%l7+16],%f38 + + fmuld %f32,%f6,%f6 + + fmuld %f34,%f12,%f12 + + fmuld %f36,%f22,%f22 + + fmuld %f38,%f30,%f30 + + faddd %f6,%f4,%f6 + + fsubd %f14,%f12,%f14 + + faddd %f22,%f20,%f22 + + faddd %f30,%f28,%f30 + + faddd %f6,%f0,%f6 + + faddd %f14,%f8,%f14 + + faddd %f22,%f16,%f22 + + faddd %f30,%f24,%f30 + mov %l0,%l4 + + fnegd %f6,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fnegd %f14,%f12 + lda [%i1]%asi,%f0 + + fnegd %f22,%f20 + lda [%i1+4]%asi,%f3 + + fnegd %f30,%f28 + andn %l0,%i5,%l0 + add %i1,%i2,%i1 + + andcc %l4,2,%g0 + fmovdnz %icc,%f4,%f6 + st %f6,[%o0] + + andcc %l1,2,%g0 + fmovdnz %icc,%f12,%f14 + st %f14,[%o1] + + andcc %l2,2,%g0 + fmovdnz %icc,%f20,%f22 + st %f22,[%o2] + + andcc %l3,2,%g0 + fmovdnz %icc,%f28,%f30 + st %f30,[%o3] + + addcc %i0,-1,%i0 + bg,pt %icc,.loop0 +! delay slot + st %f7,[%o0+4] + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case12: + fmuld %f18,%f18,%f16 + andcc %l2,1,%g0 + bz,pn %icc,.case14 +! delay slot + fxor %f22,%f36,%f36 + + fmuld %f26,%f26,%f24 + andcc %l3,1,%g0 + bz,pn %icc,.case13 +! delay slot + fxor %f30,%f38,%f38 + + fmuld %f0,pp3,%f6 ! sin(x0) + + fmuld %f8,pp3,%f14 ! sin(x1) + + faddd %f6,pp2,%f6 + fmuld %f0,qq2,%f4 + + faddd %f14,pp2,%f14 + fmuld %f8,qq2,%f12 + + fmuld %f16,qq3,%f22 ! cos(x2) + + fmuld %f24,qq3,%f30 ! cos(x3) + + fmuld %f0,%f6,%f6 + faddd %f4,qq1,%f4 + + fmuld %f8,%f14,%f14 + faddd %f12,qq1,%f12 + + faddd %f22,qq2,%f22 + fmuld %f16,pp2,%f20 + + faddd %f30,qq2,%f30 + fmuld %f24,pp2,%f28 + + faddd %f6,pp1,%f6 + fmuld %f0,%f4,%f4 + add %l4,%g1,%l4 + + faddd %f14,pp1,%f14 + fmuld %f8,%f12,%f12 + add %l5,%g1,%l5 + + fmuld %f16,%f22,%f22 + faddd %f20,pp1,%f20 + + fmuld %f24,%f30,%f30 + faddd %f28,pp1,%f28 + + fmuld %f0,%f6,%f6 + + fmuld %f8,%f14,%f14 + + faddd %f22,qq1,%f22 + fmuld %f16,%f20,%f20 + add %l6,%g1,%l6 + + faddd %f30,qq1,%f30 + fmuld %f24,%f28,%f28 + add %l7,%g1,%l7 + + fmuld %f2,%f6,%f6 + ldd [%l4+8],%f0 + + fmuld %f10,%f14,%f14 + ldd [%l5+8],%f8 + + fmuld %f18,%f20,%f20 + + fmuld %f26,%f28,%f28 + + fmuld %f0,%f4,%f4 + faddd %f32,%f6,%f6 + + fmuld %f8,%f12,%f12 + faddd %f34,%f14,%f14 + + fmuld %f16,%f22,%f22 + faddd %f20,%f36,%f20 + ldd [%l6+16],%f16 + + fmuld %f24,%f30,%f30 + faddd %f28,%f38,%f28 + ldd [%l7+16],%f24 + + faddd %f2,%f6,%f6 + ldd [%l4+16],%f32 + + faddd %f10,%f14,%f14 + ldd [%l5+16],%f34 + + fmuld %f16,%f22,%f22 + faddd %f20,%f18,%f20 + ldd [%l6+8],%f36 + + fmuld %f24,%f30,%f30 + faddd %f28,%f26,%f28 + ldd [%l7+8],%f38 + + fmuld %f32,%f6,%f6 + + fmuld %f34,%f14,%f14 + + fmuld %f36,%f20,%f20 + + fmuld %f38,%f28,%f28 + + faddd %f6,%f4,%f6 + + faddd %f14,%f12,%f14 + + fsubd %f22,%f20,%f22 + + fsubd %f30,%f28,%f30 + + faddd %f6,%f0,%f6 + + faddd %f14,%f8,%f14 + + faddd %f22,%f16,%f22 + + faddd %f30,%f24,%f30 + mov %l0,%l4 + + fnegd %f6,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fnegd %f14,%f12 + lda [%i1]%asi,%f0 + + fnegd %f22,%f20 + lda [%i1+4]%asi,%f3 + + fnegd %f30,%f28 + andn %l0,%i5,%l0 + add %i1,%i2,%i1 + + andcc %l4,2,%g0 + fmovdnz %icc,%f4,%f6 + st %f6,[%o0] + + andcc %l1,2,%g0 + fmovdnz %icc,%f12,%f14 + st %f14,[%o1] + + andcc %l2,2,%g0 + fmovdnz %icc,%f20,%f22 + st %f22,[%o2] + + andcc %l3,2,%g0 + fmovdnz %icc,%f28,%f30 + st %f30,[%o3] + + addcc %i0,-1,%i0 + bg,pt %icc,.loop0 +! delay slot + st %f7,[%o0+4] + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case13: + fmuld %f0,pp3,%f6 ! sin(x0) + + fmuld %f8,pp3,%f14 ! sin(x1) + + fmuld %f24,pp3,%f30 ! sin(x3) + + faddd %f6,pp2,%f6 + fmuld %f0,qq2,%f4 + + faddd %f14,pp2,%f14 + fmuld %f8,qq2,%f12 + + fmuld %f16,qq3,%f22 ! cos(x2) + + faddd %f30,pp2,%f30 + fmuld %f24,qq2,%f28 + + fmuld %f0,%f6,%f6 + faddd %f4,qq1,%f4 + + fmuld %f8,%f14,%f14 + faddd %f12,qq1,%f12 + + faddd %f22,qq2,%f22 + fmuld %f16,pp2,%f20 + + fmuld %f24,%f30,%f30 + faddd %f28,qq1,%f28 + + faddd %f6,pp1,%f6 + fmuld %f0,%f4,%f4 + add %l4,%g1,%l4 + + faddd %f14,pp1,%f14 + fmuld %f8,%f12,%f12 + add %l5,%g1,%l5 + + fmuld %f16,%f22,%f22 + faddd %f20,pp1,%f20 + + faddd %f30,pp1,%f30 + fmuld %f24,%f28,%f28 + add %l7,%g1,%l7 + + fmuld %f0,%f6,%f6 + + fmuld %f8,%f14,%f14 + + faddd %f22,qq1,%f22 + fmuld %f16,%f20,%f20 + add %l6,%g1,%l6 + + fmuld %f24,%f30,%f30 + + fmuld %f2,%f6,%f6 + ldd [%l4+8],%f0 + + fmuld %f10,%f14,%f14 + ldd [%l5+8],%f8 + + fmuld %f18,%f20,%f20 + + fmuld %f26,%f30,%f30 + ldd [%l7+8],%f24 + + fmuld %f0,%f4,%f4 + faddd %f32,%f6,%f6 + + fmuld %f8,%f12,%f12 + faddd %f34,%f14,%f14 + + fmuld %f16,%f22,%f22 + faddd %f20,%f36,%f20 + ldd [%l6+16],%f16 + + fmuld %f24,%f28,%f28 + faddd %f38,%f30,%f30 + + faddd %f2,%f6,%f6 + ldd [%l4+16],%f32 + + faddd %f10,%f14,%f14 + ldd [%l5+16],%f34 + + fmuld %f16,%f22,%f22 + faddd %f20,%f18,%f20 + ldd [%l6+8],%f36 + + faddd %f26,%f30,%f30 + ldd [%l7+16],%f38 + + fmuld %f32,%f6,%f6 + + fmuld %f34,%f14,%f14 + + fmuld %f36,%f20,%f20 + + fmuld %f38,%f30,%f30 + + faddd %f6,%f4,%f6 + + faddd %f14,%f12,%f14 + + fsubd %f22,%f20,%f22 + + faddd %f30,%f28,%f30 + + faddd %f6,%f0,%f6 + + faddd %f14,%f8,%f14 + + faddd %f22,%f16,%f22 + + faddd %f30,%f24,%f30 + mov %l0,%l4 + + fnegd %f6,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fnegd %f14,%f12 + lda [%i1]%asi,%f0 + + fnegd %f22,%f20 + lda [%i1+4]%asi,%f3 + + fnegd %f30,%f28 + andn %l0,%i5,%l0 + add %i1,%i2,%i1 + + andcc %l4,2,%g0 + fmovdnz %icc,%f4,%f6 + st %f6,[%o0] + + andcc %l1,2,%g0 + fmovdnz %icc,%f12,%f14 + st %f14,[%o1] + + andcc %l2,2,%g0 + fmovdnz %icc,%f20,%f22 + st %f22,[%o2] + + andcc %l3,2,%g0 + fmovdnz %icc,%f28,%f30 + st %f30,[%o3] + + addcc %i0,-1,%i0 + bg,pt %icc,.loop0 +! delay slot + st %f7,[%o0+4] + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case14: + fmuld %f26,%f26,%f24 + andcc %l3,1,%g0 + bz,pn %icc,.case15 +! delay slot + fxor %f30,%f38,%f38 + + fmuld %f0,pp3,%f6 ! sin(x0) + + fmuld %f8,pp3,%f14 ! sin(x1) + + fmuld %f16,pp3,%f22 ! sin(x2) + + faddd %f6,pp2,%f6 + fmuld %f0,qq2,%f4 + + faddd %f14,pp2,%f14 + fmuld %f8,qq2,%f12 + + faddd %f22,pp2,%f22 + fmuld %f16,qq2,%f20 + + fmuld %f24,qq3,%f30 ! cos(x3) + + fmuld %f0,%f6,%f6 + faddd %f4,qq1,%f4 + + fmuld %f8,%f14,%f14 + faddd %f12,qq1,%f12 + + fmuld %f16,%f22,%f22 + faddd %f20,qq1,%f20 + + faddd %f30,qq2,%f30 + fmuld %f24,pp2,%f28 + + faddd %f6,pp1,%f6 + fmuld %f0,%f4,%f4 + add %l4,%g1,%l4 + + faddd %f14,pp1,%f14 + fmuld %f8,%f12,%f12 + add %l5,%g1,%l5 + + faddd %f22,pp1,%f22 + fmuld %f16,%f20,%f20 + add %l6,%g1,%l6 + + fmuld %f24,%f30,%f30 + faddd %f28,pp1,%f28 + + fmuld %f0,%f6,%f6 + + fmuld %f8,%f14,%f14 + + fmuld %f16,%f22,%f22 + + faddd %f30,qq1,%f30 + fmuld %f24,%f28,%f28 + add %l7,%g1,%l7 + + fmuld %f2,%f6,%f6 + ldd [%l4+8],%f0 + + fmuld %f10,%f14,%f14 + ldd [%l5+8],%f8 + + fmuld %f18,%f22,%f22 + ldd [%l6+8],%f16 + + fmuld %f26,%f28,%f28 + + fmuld %f0,%f4,%f4 + faddd %f32,%f6,%f6 + + fmuld %f8,%f12,%f12 + faddd %f34,%f14,%f14 + + fmuld %f16,%f20,%f20 + faddd %f36,%f22,%f22 + + fmuld %f24,%f30,%f30 + faddd %f28,%f38,%f28 + ldd [%l7+16],%f24 + + faddd %f2,%f6,%f6 + ldd [%l4+16],%f32 + + faddd %f10,%f14,%f14 + ldd [%l5+16],%f34 + + faddd %f18,%f22,%f22 + ldd [%l6+16],%f36 + + fmuld %f24,%f30,%f30 + faddd %f28,%f26,%f28 + ldd [%l7+8],%f38 + + fmuld %f32,%f6,%f6 + + fmuld %f34,%f14,%f14 + + fmuld %f36,%f22,%f22 + + fmuld %f38,%f28,%f28 + + faddd %f6,%f4,%f6 + + faddd %f14,%f12,%f14 + + faddd %f22,%f20,%f22 + + fsubd %f30,%f28,%f30 + + faddd %f6,%f0,%f6 + + faddd %f14,%f8,%f14 + + faddd %f22,%f16,%f22 + + faddd %f30,%f24,%f30 + mov %l0,%l4 + + fnegd %f6,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fnegd %f14,%f12 + lda [%i1]%asi,%f0 + + fnegd %f22,%f20 + lda [%i1+4]%asi,%f3 + + fnegd %f30,%f28 + andn %l0,%i5,%l0 + add %i1,%i2,%i1 + + andcc %l4,2,%g0 + fmovdnz %icc,%f4,%f6 + st %f6,[%o0] + + andcc %l1,2,%g0 + fmovdnz %icc,%f12,%f14 + st %f14,[%o1] + + andcc %l2,2,%g0 + fmovdnz %icc,%f20,%f22 + st %f22,[%o2] + + andcc %l3,2,%g0 + fmovdnz %icc,%f28,%f30 + st %f30,[%o3] + + addcc %i0,-1,%i0 + bg,pt %icc,.loop0 +! delay slot + st %f7,[%o0+4] + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case15: + fmuld %f0,pp3,%f6 ! sin(x0) + + fmuld %f8,pp3,%f14 ! sin(x1) + + fmuld %f16,pp3,%f22 ! sin(x2) + + fmuld %f24,pp3,%f30 ! sin(x3) + + faddd %f6,pp2,%f6 + fmuld %f0,qq2,%f4 + + faddd %f14,pp2,%f14 + fmuld %f8,qq2,%f12 + + faddd %f22,pp2,%f22 + fmuld %f16,qq2,%f20 + + faddd %f30,pp2,%f30 + fmuld %f24,qq2,%f28 + + fmuld %f0,%f6,%f6 + faddd %f4,qq1,%f4 + + fmuld %f8,%f14,%f14 + faddd %f12,qq1,%f12 + + fmuld %f16,%f22,%f22 + faddd %f20,qq1,%f20 + + fmuld %f24,%f30,%f30 + faddd %f28,qq1,%f28 + + faddd %f6,pp1,%f6 + fmuld %f0,%f4,%f4 + add %l4,%g1,%l4 + + faddd %f14,pp1,%f14 + fmuld %f8,%f12,%f12 + add %l5,%g1,%l5 + + faddd %f22,pp1,%f22 + fmuld %f16,%f20,%f20 + add %l6,%g1,%l6 + + faddd %f30,pp1,%f30 + fmuld %f24,%f28,%f28 + add %l7,%g1,%l7 + + fmuld %f0,%f6,%f6 + + fmuld %f8,%f14,%f14 + + fmuld %f16,%f22,%f22 + + fmuld %f24,%f30,%f30 + + fmuld %f2,%f6,%f6 + ldd [%l4+8],%f0 + + fmuld %f10,%f14,%f14 + ldd [%l5+8],%f8 + + fmuld %f18,%f22,%f22 + ldd [%l6+8],%f16 + + fmuld %f26,%f30,%f30 + ldd [%l7+8],%f24 + + fmuld %f0,%f4,%f4 + faddd %f32,%f6,%f6 + + fmuld %f8,%f12,%f12 + faddd %f34,%f14,%f14 + + fmuld %f16,%f20,%f20 + faddd %f36,%f22,%f22 + + fmuld %f24,%f28,%f28 + faddd %f38,%f30,%f30 + + faddd %f2,%f6,%f6 + ldd [%l4+16],%f32 + + faddd %f10,%f14,%f14 + ldd [%l5+16],%f34 + + faddd %f18,%f22,%f22 + ldd [%l6+16],%f36 + + faddd %f26,%f30,%f30 + ldd [%l7+16],%f38 + + fmuld %f32,%f6,%f6 + + fmuld %f34,%f14,%f14 + + fmuld %f36,%f22,%f22 + + fmuld %f38,%f30,%f30 + + faddd %f6,%f4,%f6 + + faddd %f14,%f12,%f14 + + faddd %f22,%f20,%f22 + + faddd %f30,%f28,%f30 + + faddd %f6,%f0,%f6 + + faddd %f14,%f8,%f14 + + faddd %f22,%f16,%f22 + + faddd %f30,%f24,%f30 + mov %l0,%l4 + + fnegd %f6,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fnegd %f14,%f12 + lda [%i1]%asi,%f0 + + fnegd %f22,%f20 + lda [%i1+4]%asi,%f3 + + fnegd %f30,%f28 + andn %l0,%i5,%l0 + add %i1,%i2,%i1 + + andcc %l4,2,%g0 + fmovdnz %icc,%f4,%f6 + st %f6,[%o0] + + andcc %l1,2,%g0 + fmovdnz %icc,%f12,%f14 + st %f14,[%o1] + + andcc %l2,2,%g0 + fmovdnz %icc,%f20,%f22 + st %f22,[%o2] + + andcc %l3,2,%g0 + fmovdnz %icc,%f28,%f30 + st %f30,[%o3] + + addcc %i0,-1,%i0 + bg,pt %icc,.loop0 +! delay slot + st %f7,[%o0+4] + + ba,pt %icc,.end +! delay slot + nop + + + .align 16 +.end: + st %f15,[%o1+4] + st %f23,[%o2+4] + st %f31,[%o3+4] + ld [%fp+biguns],%i5 + tst %i5 ! check for huge arguments remaining + be,pt %icc,.exit +! delay slot + nop +#ifdef __sparcv9 + ldx [%fp+xsave],%o1 + ldx [%fp+ysave],%o3 +#else + ld [%fp+xsave],%o1 + ld [%fp+ysave],%o3 +#endif + ld [%fp+nsave],%o0 + ld [%fp+sxsave],%o2 + ld [%fp+sysave],%o4 + sra %o2,0,%o2 ! sign-extend for V9 + sra %o4,0,%o4 + call __vlibm_vsin_big_ultra3 + sra %o5,0,%o5 ! delay slot + +.exit: + ret + restore + + + .align 16 +.last1: + faddd %f2,c3two44,%f4 + st %f15,[%o1+4] +.last1_from_range1: + mov 0,%l1 + fzeros %f8 + fzero %f10 + add %fp,junk,%o1 +.last2: + faddd %f10,c3two44,%f12 + st %f23,[%o2+4] +.last2_from_range2: + mov 0,%l2 + fzeros %f16 + fzero %f18 + add %fp,junk,%o2 +.last3: + faddd %f18,c3two44,%f20 + st %f31,[%o3+4] + st %f5,[%fp+nk0] + st %f13,[%fp+nk1] +.last3_from_range3: + mov 0,%l3 + fzeros %f24 + fzero %f26 + ba,pt %icc,.cont +! delay slot + add %fp,junk,%o3 + + + .align 16 +.range0: + cmp %l0,%o4 + bl,pt %icc,1f ! hx < 0x3e400000 +! delay slot, harmless if branch taken + sethi %hi(0x7ff00000),%o7 + cmp %l0,%o7 + bl,a,pt %icc,2f ! branch if finite +! delay slot, squashed if branch not taken + st %o4,[%fp+biguns] ! set biguns + fzero %f0 + fmuld %f2,%f0,%f2 + st %f2,[%o0] + ba,pt %icc,2f +! delay slot + st %f3,[%o0+4] +1: + fdtoi %f2,%f4 ! raise inexact if not zero + st %f0,[%o0] + st %f3,[%o0+4] +2: + addcc %i0,-1,%i0 + ble,pn %icc,.end +! delay slot, harmless if branch taken + add %i3,%i4,%i3 ! y += stridey + andn %l1,%i5,%l0 ! hx &= ~0x80000000 + fmovs %f8,%f0 + fmovs %f11,%f3 + ba,pt %icc,.loop0 +! delay slot + add %i1,%i2,%i1 ! x += stridex + + + .align 16 +.range1: + cmp %l1,%o4 + bl,pt %icc,1f ! hx < 0x3e400000 +! delay slot, harmless if branch taken + sethi %hi(0x7ff00000),%o7 + cmp %l1,%o7 + bl,a,pt %icc,2f ! branch if finite +! delay slot, squashed if branch not taken + st %o4,[%fp+biguns] ! set biguns + fzero %f8 + fmuld %f10,%f8,%f10 + st %f10,[%o1] + ba,pt %icc,2f +! delay slot + st %f11,[%o1+4] +1: + fdtoi %f10,%f12 ! raise inexact if not zero + st %f8,[%o1] + st %f11,[%o1+4] +2: + addcc %i0,-1,%i0 + ble,pn %icc,.last1_from_range1 +! delay slot, harmless if branch taken + add %i3,%i4,%i3 ! y += stridey + andn %l2,%i5,%l1 ! hx &= ~0x80000000 + fmovs %f16,%f8 + fmovs %f19,%f11 + ba,pt %icc,.loop1 +! delay slot + add %i1,%i2,%i1 ! x += stridex + + + .align 16 +.range2: + cmp %l2,%o4 + bl,pt %icc,1f ! hx < 0x3e400000 +! delay slot, harmless if branch taken + sethi %hi(0x7ff00000),%o7 + cmp %l2,%o7 + bl,a,pt %icc,2f ! branch if finite +! delay slot, squashed if branch not taken + st %o4,[%fp+biguns] ! set biguns + fzero %f16 + fmuld %f18,%f16,%f18 + st %f18,[%o2] + ba,pt %icc,2f +! delay slot + st %f19,[%o2+4] +1: + fdtoi %f18,%f20 ! raise inexact if not zero + st %f16,[%o2] + st %f19,[%o2+4] +2: + addcc %i0,-1,%i0 + ble,pn %icc,.last2_from_range2 +! delay slot, harmless if branch taken + add %i3,%i4,%i3 ! y += stridey + andn %l3,%i5,%l2 ! hx &= ~0x80000000 + fmovs %f24,%f16 + fmovs %f27,%f19 + ba,pt %icc,.loop2 +! delay slot + add %i1,%i2,%i1 ! x += stridex + + + .align 16 +.range3: + cmp %l3,%o4 + bl,pt %icc,1f ! hx < 0x3e400000 +! delay slot, harmless if branch taken + sethi %hi(0x7ff00000),%o7 + cmp %l3,%o7 + bl,a,pt %icc,2f ! branch if finite +! delay slot, squashed if branch not taken + st %o4,[%fp+biguns] ! set biguns + fzero %f24 + fmuld %f26,%f24,%f26 + st %f26,[%o3] + ba,pt %icc,2f +! delay slot + st %f27,[%o3+4] +1: + fdtoi %f26,%f28 ! raise inexact if not zero + st %f24,[%o3] + st %f27,[%o3+4] +2: + addcc %i0,-1,%i0 + ble,pn %icc,.last3_from_range3 +! delay slot, harmless if branch taken + add %i3,%i4,%i3 ! y += stridey + ld [%i1],%l3 + ld [%i1],%f24 + ld [%i1+4],%f27 + andn %l3,%i5,%l3 ! hx &= ~0x80000000 + ba,pt %icc,.loop3 +! delay slot + add %i1,%i2,%i1 ! x += stridex + + SET_SIZE(__vsin_ultra3) + diff --git a/usr/src/lib/libmvec/common/vis/__vsincos.S b/usr/src/lib/libmvec/common/vis/__vsincos.S new file mode 100644 index 0000000000..0a856047db --- /dev/null +++ b/usr/src/lib/libmvec/common/vis/__vsincos.S @@ -0,0 +1,959 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "__vsincos.S" + +#include "libm.h" + + RO_DATA + .align 64 +constants: + .word 0x42c80000,0x00000000 ! 3 * 2^44 + .word 0x43380000,0x00000000 ! 3 * 2^51 + .word 0x3fe45f30,0x6dc9c883 ! invpio2 + .word 0x3ff921fb,0x54442c00 ! pio2_1 + .word 0x3d318469,0x898cc400 ! pio2_2 + .word 0x3a71701b,0x839a2520 ! pio2_3 + .word 0xbfc55555,0x55555533 ! pp1 + .word 0x3f811111,0x10e7d53b ! pp2 + .word 0xbf2a0167,0xe6b3cf9b ! pp3 + .word 0xbfdfffff,0xffffff65 ! qq1 + .word 0x3fa55555,0x54f88ed0 ! qq2 + .word 0xbf56c12c,0xdd185f60 ! qq3 + +! local storage indices + +#define xsave STACK_BIAS-0x8 +#define ssave STACK_BIAS-0x10 +#define csave STACK_BIAS-0x18 +#define nsave STACK_BIAS-0x1c +#define sxsave STACK_BIAS-0x20 +#define sssave STACK_BIAS-0x24 +#define biguns STACK_BIAS-0x28 +#define junk STACK_BIAS-0x30 +#define nk2 STACK_BIAS-0x38 +#define nk1 STACK_BIAS-0x3c +#define nk0 STACK_BIAS-0x40 +! sizeof temp storage - must be a multiple of 16 for V9 +#define tmps 0x40 + +! register use + +! i0 n +! i1 x +! i2 stridex +! i3 s +! i4 strides +! i5 0x80000000,n0 + +! l0 hx0,k0 +! l1 hx1,k1 +! l2 hx2,k2 +! l3 c +! l4 pc0 +! l5 pc1 +! l6 pc2 +! l7 stridec + +! the following are 64-bit registers in both V8+ and V9 + +! g1 __vlibm_TBL_sincos2 +! g5 scratch,n1 + +! o0 ps0 +! o1 ps1 +! o2 ps2 +! o3 0x3fe921fb +! o4 0x3e400000 +! o5 0x4099251e +! o7 scratch,n2 + +! f0 x0,z0 +! f2 abs(x0) +! f4 +! f6 +! f8 +! f10 x1,z1 +! f12 abs(x1) +! f14 +! f16 +! f18 +! f20 x2,z2 +! f22 abs(x2) +! f24 +! f26 +! f28 +! f30 +! f32 +! f34 +! f36 +! f38 + +#define c3two44 %f40 +#define c3two51 %f42 +#define invpio2 %f44 +#define pio2_1 %f46 +#define pio2_2 %f48 +#define pio2_3 %f50 +#define pp1 %f52 +#define pp2 %f54 +#define pp3 %f56 +#define qq1 %f58 +#define qq2 %f60 +#define qq3 %f62 + + ENTRY(__vsincos) + save %sp,-SA(MINFRAME)-tmps,%sp + PIC_SETUP(l7) + PIC_SET(l7,constants,o0) + PIC_SET(l7,__vlibm_TBL_sincos2,o1) + mov %o1,%g1 + wr %g0,0x82,%asi ! set %asi for non-faulting loads +#ifdef __sparcv9 + stx %i1,[%fp+xsave] ! save arguments + stx %i3,[%fp+ssave] + stx %i5,[%fp+csave] + ldx [%fp+STACK_BIAS+0xb0],%l7 +#else + st %i1,[%fp+xsave] ! save arguments + st %i3,[%fp+ssave] + st %i5,[%fp+csave] + ld [%fp+0x5c],%l7 +#endif + st %i0,[%fp+nsave] + st %i2,[%fp+sxsave] + st %i4,[%fp+sssave] + mov %i5,%l3 + st %g0,[%fp+biguns] ! biguns = 0 + ldd [%o0+0x00],c3two44 ! load/set up constants + ldd [%o0+0x08],c3two51 + ldd [%o0+0x10],invpio2 + ldd [%o0+0x18],pio2_1 + ldd [%o0+0x20],pio2_2 + ldd [%o0+0x28],pio2_3 + ldd [%o0+0x30],pp1 + ldd [%o0+0x38],pp2 + ldd [%o0+0x40],pp3 + ldd [%o0+0x48],qq1 + ldd [%o0+0x50],qq2 + ldd [%o0+0x58],qq3 + sethi %hi(0x80000000),%i5 + sethi %hi(0x3e400000),%o4 + sethi %hi(0x3fe921fb),%o3 + or %o3,%lo(0x3fe921fb),%o3 + sethi %hi(0x4099251e),%o5 + or %o5,%lo(0x4099251e),%o5 + sll %i2,3,%i2 ! scale strides + sll %i4,3,%i4 + sll %l7,3,%l7 + add %fp,junk,%o0 ! loop prologue + add %fp,junk,%o1 + add %fp,junk,%o2 + ld [%i1],%l0 ! *x + ld [%i1],%f0 + ld [%i1+4],%f3 + andn %l0,%i5,%l0 ! mask off sign + ba .loop0 + add %i1,%i2,%i1 ! x += stridex + +! 16-byte aligned + .align 16 +.loop0: + lda [%i1]%asi,%l1 ! preload next argument + sub %l0,%o4,%g5 + sub %o5,%l0,%o7 + fabss %f0,%f2 + + lda [%i1]%asi,%f10 + orcc %o7,%g5,%g0 + mov %i3,%o0 ! ps0 = s + bl,pn %icc,.range0 ! hx < 0x3e400000 or hx > 0x4099251e + +! delay slot + lda [%i1+4]%asi,%f13 + addcc %i0,-1,%i0 + add %i3,%i4,%i3 ! s += strides + + mov %l3,%l4 ! pc0 = c + add %l3,%l7,%l3 ! c += stridec + ble,pn %icc,.last1 + +! delay slot + andn %l1,%i5,%l1 + add %i1,%i2,%i1 ! x += stridex + faddd %f2,c3two44,%f4 + st %f17,[%o1+4] + +.loop1: + lda [%i1]%asi,%l2 ! preload next argument + sub %l1,%o4,%g5 + sub %o5,%l1,%o7 + fabss %f10,%f12 + + lda [%i1]%asi,%f20 + orcc %o7,%g5,%g0 + mov %i3,%o1 ! ps1 = s + bl,pn %icc,.range1 ! hx < 0x3e400000 or hx > 0x4099251e + +! delay slot + lda [%i1+4]%asi,%f23 + addcc %i0,-1,%i0 + add %i3,%i4,%i3 ! s += strides + + mov %l3,%l5 ! pc1 = c + add %l3,%l7,%l3 ! c += stridec + ble,pn %icc,.last2 + +! delay slot + andn %l2,%i5,%l2 + add %i1,%i2,%i1 ! x += stridex + faddd %f12,c3two44,%f14 + st %f27,[%o2+4] + +.loop2: + sub %l2,%o4,%g5 + sub %o5,%l2,%o7 + fabss %f20,%f22 + st %f5,[%fp+nk0] + + orcc %o7,%g5,%g0 + mov %i3,%o2 ! ps2 = s + bl,pn %icc,.range2 ! hx < 0x3e400000 or hx > 0x4099251e +! delay slot + st %f15,[%fp+nk1] + + mov %l3,%l6 ! pc2 = c + +.cont: + add %i3,%i4,%i3 ! s += strides + add %l3,%l7,%l3 ! c += stridec + faddd %f22,c3two44,%f24 + st %f25,[%fp+nk2] + + sub %o3,%l0,%l0 + sub %o3,%l1,%l1 + fmovs %f3,%f1 + + sub %o3,%l2,%l2 + fmovs %f13,%f11 + + or %l0,%l1,%l0 + orcc %l0,%l2,%g0 + fmovs %f23,%f21 + + fmuld %f0,invpio2,%f6 ! x * invpio2, for medium range + + fmuld %f10,invpio2,%f16 + ld [%fp+nk0],%l0 + + fmuld %f20,invpio2,%f26 + ld [%fp+nk1],%l1 + + bl,pn %icc,.medium +! delay slot + ld [%fp+nk2],%l2 + + sll %l0,5,%l0 ! k + fcmpd %fcc0,%f0,pio2_3 ! x < pio2_3 iff x < 0 + + sll %l1,5,%l1 + ldd [%l0+%g1],%f4 + fcmpd %fcc1,%f10,pio2_3 + + sll %l2,5,%l2 + ldd [%l1+%g1],%f14 + fcmpd %fcc2,%f20,pio2_3 + + ldd [%l2+%g1],%f24 + + fsubd %f2,%f4,%f2 ! x -= __vlibm_TBL_sincos2[k] + + fsubd %f12,%f14,%f12 + + fsubd %f22,%f24,%f22 + + fmuld %f2,%f2,%f0 ! z = x * x + + fmuld %f12,%f12,%f10 + + fmuld %f22,%f22,%f20 + + fmuld %f0,pp3,%f6 + + fmuld %f10,pp3,%f16 + + fmuld %f20,pp3,%f26 + + faddd %f6,pp2,%f6 + fmuld %f0,qq3,%f4 + + faddd %f16,pp2,%f16 + fmuld %f10,qq3,%f14 + + faddd %f26,pp2,%f26 + fmuld %f20,qq3,%f24 + + fmuld %f0,%f6,%f6 + faddd %f4,qq2,%f4 + + fmuld %f10,%f16,%f16 + faddd %f14,qq2,%f14 + + fmuld %f20,%f26,%f26 + faddd %f24,qq2,%f24 + + faddd %f6,pp1,%f6 + fmuld %f0,%f4,%f4 + add %l0,%g1,%l0 + + faddd %f16,pp1,%f16 + fmuld %f10,%f14,%f14 + add %l1,%g1,%l1 + + faddd %f26,pp1,%f26 + fmuld %f20,%f24,%f24 + add %l2,%g1,%l2 + + fmuld %f0,%f6,%f6 + faddd %f4,qq1,%f4 + + fmuld %f10,%f16,%f16 + faddd %f14,qq1,%f14 + + fmuld %f20,%f26,%f26 + faddd %f24,qq1,%f24 + + fmuld %f2,%f6,%f6 + ldd [%l0+8],%f8 + + fmuld %f12,%f16,%f16 + ldd [%l1+8],%f18 + + fmuld %f22,%f26,%f26 + ldd [%l2+8],%f28 + + faddd %f6,%f2,%f6 + fmuld %f0,%f4,%f4 + ldd [%l0+16],%f30 + + faddd %f16,%f12,%f16 + fmuld %f10,%f14,%f14 + ldd [%l1+16],%f32 + + faddd %f26,%f22,%f26 + fmuld %f20,%f24,%f24 + ldd [%l2+16],%f34 + + fmuld %f8,%f6,%f0 ! s * spoly + + fmuld %f18,%f16,%f10 + + fmuld %f28,%f26,%f20 + + fmuld %f30,%f4,%f2 ! c * cpoly + + fmuld %f32,%f14,%f12 + + fmuld %f34,%f24,%f22 + + fmuld %f30,%f6,%f6 ! c * spoly + fsubd %f2,%f0,%f2 + + fmuld %f32,%f16,%f16 + fsubd %f12,%f10,%f12 + + fmuld %f34,%f26,%f26 + fsubd %f22,%f20,%f22 + + fmuld %f8,%f4,%f4 ! s * cpoly + faddd %f2,%f30,%f2 + st %f2,[%l4] + + fmuld %f18,%f14,%f14 + faddd %f12,%f32,%f12 + st %f3,[%l4+4] + + fmuld %f28,%f24,%f24 + faddd %f22,%f34,%f22 + st %f12,[%l5] + + faddd %f6,%f4,%f6 + st %f13,[%l5+4] + + faddd %f16,%f14,%f16 + st %f22,[%l6] + + faddd %f26,%f24,%f26 + st %f23,[%l6+4] + + faddd %f6,%f8,%f6 + + faddd %f16,%f18,%f16 + + faddd %f26,%f28,%f26 + + fnegd %f6,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fnegd %f16,%f14 + lda [%i1]%asi,%f0 + + fnegd %f26,%f24 + lda [%i1+4]%asi,%f3 + andn %l0,%i5,%l0 + add %i1,%i2,%i1 + + fmovdl %fcc0,%f4,%f6 ! (hx < -0)? -s : s + st %f6,[%o0] + + fmovdl %fcc1,%f14,%f16 + st %f16,[%o1] + + fmovdl %fcc2,%f24,%f26 + st %f26,[%o2] + addcc %i0,-1,%i0 + + bg,pt %icc,.loop0 +! delay slot + st %f7,[%o0+4] + + ba,pt %icc,.end +! delay slot + nop + + + .align 16 +.medium: + faddd %f6,c3two51,%f4 + st %f5,[%fp+nk0] + + faddd %f16,c3two51,%f14 + st %f15,[%fp+nk1] + + faddd %f26,c3two51,%f24 + st %f25,[%fp+nk2] + + fsubd %f4,c3two51,%f6 + + fsubd %f14,c3two51,%f16 + + fsubd %f24,c3two51,%f26 + + fmuld %f6,pio2_1,%f2 + ld [%fp+nk0],%i5 ! n + + fmuld %f16,pio2_1,%f12 + ld [%fp+nk1],%g5 + + fmuld %f26,pio2_1,%f22 + ld [%fp+nk2],%o7 + + fsubd %f0,%f2,%f0 + fmuld %f6,pio2_2,%f4 + mov %o0,%o4 ! if (n & 1) swap ps, pc + andcc %i5,1,%g0 + + fsubd %f10,%f12,%f10 + fmuld %f16,pio2_2,%f14 + movnz %icc,%l4,%o0 + and %i5,3,%i5 + + fsubd %f20,%f22,%f20 + fmuld %f26,pio2_2,%f24 + movnz %icc,%o4,%l4 + + fsubd %f0,%f4,%f30 + mov %o1,%o4 + andcc %g5,1,%g0 + + fsubd %f10,%f14,%f32 + movnz %icc,%l5,%o1 + and %g5,3,%g5 + + fsubd %f20,%f24,%f34 + movnz %icc,%o4,%l5 + + fsubd %f0,%f30,%f0 + fcmple32 %f30,pio2_3,%l0 ! x <= pio2_3 iff x < 0 + mov %o2,%o4 + andcc %o7,1,%g0 + + fsubd %f10,%f32,%f10 + fcmple32 %f32,pio2_3,%l1 + movnz %icc,%l6,%o2 + and %o7,3,%o7 + + fsubd %f20,%f34,%f20 + fcmple32 %f34,pio2_3,%l2 + movnz %icc,%o4,%l6 + + fsubd %f0,%f4,%f0 + fmuld %f6,pio2_3,%f6 + add %i5,1,%o4 ! n = (n >> 1) | (((n + 1) ^ l) & 2) + srl %i5,1,%i5 + + fsubd %f10,%f14,%f10 + fmuld %f16,pio2_3,%f16 + xor %o4,%l0,%o4 + + fsubd %f20,%f24,%f20 + fmuld %f26,pio2_3,%f26 + and %o4,2,%o4 + + fsubd %f6,%f0,%f6 + or %i5,%o4,%i5 + + fsubd %f16,%f10,%f16 + add %g5,1,%o4 + srl %g5,1,%g5 + + fsubd %f26,%f20,%f26 + xor %o4,%l1,%o4 + + fsubd %f30,%f6,%f0 ! reduced x + and %o4,2,%o4 + + fsubd %f32,%f16,%f10 + or %g5,%o4,%g5 + + fsubd %f34,%f26,%f20 + add %o7,1,%o4 + srl %o7,1,%o7 + + fzero %f38 + xor %o4,%l2,%o4 + + fabsd %f0,%f2 + and %o4,2,%o4 + + fabsd %f10,%f12 + or %o7,%o4,%o7 + + fabsd %f20,%f22 + sethi %hi(0x3e400000),%o4 + + fnegd %f38,%f38 + + faddd %f2,c3two44,%f4 + st %f5,[%fp+nk0] + + faddd %f12,c3two44,%f14 + st %f15,[%fp+nk1] + + faddd %f22,c3two44,%f24 + st %f25,[%fp+nk2] + + fsubd %f30,%f0,%f4 + + fsubd %f32,%f10,%f14 + + fsubd %f34,%f20,%f24 + + fsubd %f4,%f6,%f6 ! w + ld [%fp+nk0],%l0 + + fsubd %f14,%f16,%f16 + ld [%fp+nk1],%l1 + + fsubd %f24,%f26,%f26 + ld [%fp+nk2],%l2 + sll %l0,5,%l0 ! k + + fand %f0,%f38,%f30 ! sign bit of x + ldd [%l0+%g1],%f4 + sll %l1,5,%l1 + + fand %f10,%f38,%f32 + ldd [%l1+%g1],%f14 + sll %l2,5,%l2 + + fand %f20,%f38,%f34 + ldd [%l2+%g1],%f24 + + fsubd %f2,%f4,%f2 ! x -= __vlibm_TBL_sincos2[k] + + fsubd %f12,%f14,%f12 + + fsubd %f22,%f24,%f22 + + fmuld %f2,%f2,%f0 ! z = x * x + fxor %f6,%f30,%f30 + + fmuld %f12,%f12,%f10 + fxor %f16,%f32,%f32 + + fmuld %f22,%f22,%f20 + fxor %f26,%f34,%f34 + + fmuld %f0,pp3,%f6 + + fmuld %f10,pp3,%f16 + + fmuld %f20,pp3,%f26 + + faddd %f6,pp2,%f6 + fmuld %f0,qq3,%f4 + + faddd %f16,pp2,%f16 + fmuld %f10,qq3,%f14 + + faddd %f26,pp2,%f26 + fmuld %f20,qq3,%f24 + + fmuld %f0,%f6,%f6 + faddd %f4,qq2,%f4 + + fmuld %f10,%f16,%f16 + faddd %f14,qq2,%f14 + + fmuld %f20,%f26,%f26 + faddd %f24,qq2,%f24 + + faddd %f6,pp1,%f6 + fmuld %f0,%f4,%f4 + add %l0,%g1,%l0 + + faddd %f16,pp1,%f16 + fmuld %f10,%f14,%f14 + add %l1,%g1,%l1 + + faddd %f26,pp1,%f26 + fmuld %f20,%f24,%f24 + add %l2,%g1,%l2 + + fmuld %f0,%f6,%f6 + faddd %f4,qq1,%f4 + + fmuld %f10,%f16,%f16 + faddd %f14,qq1,%f14 + + fmuld %f20,%f26,%f26 + faddd %f24,qq1,%f24 + + fmuld %f2,%f6,%f6 + ldd [%l0+16],%f8 + + fmuld %f12,%f16,%f16 + ldd [%l1+16],%f18 + + fmuld %f22,%f26,%f26 + ldd [%l2+16],%f28 + + faddd %f6,%f30,%f6 + fmuld %f0,%f4,%f4 + ldd [%l0+8],%f30 + + faddd %f16,%f32,%f16 + fmuld %f10,%f14,%f14 + ldd [%l1+8],%f32 + + faddd %f26,%f34,%f26 + fmuld %f20,%f24,%f24 + ldd [%l2+8],%f34 + + fmuld %f8,%f4,%f0 ! c * cpoly + faddd %f6,%f2,%f6 + + fmuld %f18,%f14,%f10 + faddd %f16,%f12,%f16 + + fmuld %f28,%f24,%f20 + faddd %f26,%f22,%f26 + + fmuld %f30,%f6,%f2 ! s * spoly + + fmuld %f32,%f16,%f12 + + fmuld %f34,%f26,%f22 + + fmuld %f8,%f6,%f6 ! c * spoly + fsubd %f0,%f2,%f2 + + fmuld %f18,%f16,%f16 + fsubd %f10,%f12,%f12 + + fmuld %f28,%f26,%f26 + fsubd %f20,%f22,%f22 + + fmuld %f30,%f4,%f4 ! s * cpoly + faddd %f8,%f2,%f8 + + fmuld %f32,%f14,%f14 + faddd %f18,%f12,%f18 + + fmuld %f34,%f24,%f24 + faddd %f28,%f22,%f28 + + faddd %f4,%f6,%f6 + + faddd %f14,%f16,%f16 + + faddd %f24,%f26,%f26 + + faddd %f30,%f6,%f6 ! now %f6 = sin |x|, %f8 = cos |x| + + faddd %f32,%f16,%f16 + + faddd %f34,%f26,%f26 + + fnegd %f8,%f4 ! if (n & 1) c = -c + lda [%i1]%asi,%l0 ! preload next argument + mov %i5,%l1 + + fnegd %f18,%f14 + lda [%i1]%asi,%f0 + sethi %hi(0x80000000),%i5 + + fnegd %f28,%f24 + lda [%i1+4]%asi,%f3 + + andcc %l1,1,%g0 + fmovdnz %icc,%f4,%f8 + st %f8,[%l4] + + andcc %g5,1,%g0 + fmovdnz %icc,%f14,%f18 + st %f9,[%l4+4] + + andcc %o7,1,%g0 + fmovdnz %icc,%f24,%f28 + st %f18,[%l5] + + fnegd %f6,%f4 ! if (n & 2) s = -s + st %f19,[%l5+4] + andn %l0,%i5,%l0 + + fnegd %f16,%f14 + st %f28,[%l6] + add %i1,%i2,%i1 + + fnegd %f26,%f24 + st %f29,[%l6+4] + + andcc %l1,2,%g0 + fmovdnz %icc,%f4,%f6 + st %f6,[%o0] + + andcc %g5,2,%g0 + fmovdnz %icc,%f14,%f16 + st %f16,[%o1] + + andcc %o7,2,%g0 + fmovdnz %icc,%f24,%f26 + st %f26,[%o2] + + addcc %i0,-1,%i0 + bg,pt %icc,.loop0 +! delay slot + st %f7,[%o0+4] + + ba,pt %icc,.end +! delay slot + nop + + + .align 16 +.end: + st %f17,[%o1+4] + st %f27,[%o2+4] + ld [%fp+biguns],%i5 + tst %i5 ! check for huge arguments remaining + be,pt %icc,.exit +! delay slot + nop +#ifdef __sparcv9 + stx %o5,[%sp+STACK_BIAS+0xb8] + ldx [%fp+xsave],%o1 + ldx [%fp+ssave],%o3 + ldx [%fp+csave],%o5 + ldx [%fp+STACK_BIAS+0xb0],%i5 + stx %i5,[%sp+STACK_BIAS+0xb0] +#else + st %o5,[%sp+0x60] + ld [%fp+xsave],%o1 + ld [%fp+ssave],%o3 + ld [%fp+csave],%o5 + ld [%fp+0x5c],%i5 + st %i5,[%sp+0x5c] +#endif + ld [%fp+nsave],%o0 + ld [%fp+sxsave],%o2 + ld [%fp+sssave],%o4 + sra %o2,0,%o2 ! sign-extend for V9 + call __vlibm_vsincos_big + sra %o4,0,%o4 ! delay slot + +.exit: + ret + restore + + + .align 16 +.last1: + faddd %f2,c3two44,%f4 + st %f17,[%o1+4] +.last1_from_range1: + mov 0,%l1 + fzeros %f10 + fzero %f12 + add %fp,junk,%o1 + add %fp,junk,%l5 +.last2: + faddd %f12,c3two44,%f14 + st %f27,[%o2+4] + st %f5,[%fp+nk0] + st %f15,[%fp+nk1] +.last2_from_range2: + mov 0,%l2 + fzeros %f20 + fzero %f22 + add %fp,junk,%o2 + ba,pt %icc,.cont +! delay slot + add %fp,junk,%l6 + + + .align 16 +.range0: + cmp %l0,%o4 + bl,pt %icc,1f ! hx < 0x3e400000 +! delay slot, harmless if branch taken + sethi %hi(0x7ff00000),%o7 + cmp %l0,%o7 + bl,a,pt %icc,2f ! branch if finite +! delay slot, squashed if branch not taken + st %o4,[%fp+biguns] ! set biguns + fzero %f0 + fmuld %f2,%f0,%f2 + st %f2,[%o0] + st %f3,[%o0+4] + st %f2,[%l3] + ba,pt %icc,2f +! delay slot + st %f3,[%l3+4] +1: + fdtoi %f2,%f4 ! raise inexact if not zero + st %f0,[%o0] + st %f3,[%o0+4] + sethi %hi(0x3ff00000),%g5 + st %g5,[%l3] + st %g0,[%l3+4] +2: + addcc %i0,-1,%i0 + ble,pn %icc,.end +! delay slot, harmless if branch taken + add %i3,%i4,%i3 ! s += strides + add %l3,%l7,%l3 ! c += stridec + andn %l1,%i5,%l0 ! hx &= ~0x80000000 + fmovs %f10,%f0 + fmovs %f13,%f3 + ba,pt %icc,.loop0 +! delay slot + add %i1,%i2,%i1 ! x += stridex + + + .align 16 +.range1: + cmp %l1,%o4 + bl,pt %icc,1f ! hx < 0x3e400000 +! delay slot, harmless if branch taken + sethi %hi(0x7ff00000),%o7 + cmp %l1,%o7 + bl,a,pt %icc,2f ! branch if finite +! delay slot, squashed if branch not taken + st %o4,[%fp+biguns] ! set biguns + fzero %f10 + fmuld %f12,%f10,%f12 + st %f12,[%o1] + st %f13,[%o1+4] + st %f12,[%l3] + ba,pt %icc,2f +! delay slot + st %f13,[%l3+4] +1: + fdtoi %f12,%f14 ! raise inexact if not zero + st %f10,[%o1] + st %f13,[%o1+4] + sethi %hi(0x3ff00000),%g5 + st %g5,[%l3] + st %g0,[%l3+4] +2: + addcc %i0,-1,%i0 + ble,pn %icc,.last1_from_range1 +! delay slot, harmless if branch taken + add %i3,%i4,%i3 ! s += strides + add %l3,%l7,%l3 ! c += stridec + andn %l2,%i5,%l1 ! hx &= ~0x80000000 + fmovs %f20,%f10 + fmovs %f23,%f13 + ba,pt %icc,.loop1 +! delay slot + add %i1,%i2,%i1 ! x += stridex + + + .align 16 +.range2: + cmp %l2,%o4 + bl,pt %icc,1f ! hx < 0x3e400000 +! delay slot, harmless if branch taken + sethi %hi(0x7ff00000),%o7 + cmp %l2,%o7 + bl,a,pt %icc,2f ! branch if finite +! delay slot, squashed if branch not taken + st %o4,[%fp+biguns] ! set biguns + fzero %f20 + fmuld %f22,%f20,%f22 + st %f22,[%o2] + st %f23,[%o2+4] + st %f22,[%l3] + ba,pt %icc,2f +! delay slot + st %f23,[%l3+4] +1: + fdtoi %f22,%f24 ! raise inexact if not zero + st %f20,[%o2] + st %f23,[%o2+4] + sethi %hi(0x3ff00000),%g5 + st %g5,[%l3] + st %g0,[%l3+4] +2: + addcc %i0,-1,%i0 + ble,pn %icc,.last2_from_range2 +! delay slot, harmless if branch taken + add %i3,%i4,%i3 ! s += strides + add %l3,%l7,%l3 ! c += stridec + ld [%i1],%l2 + ld [%i1],%f20 + ld [%i1+4],%f23 + andn %l2,%i5,%l2 ! hx &= ~0x80000000 + ba,pt %icc,.loop2 +! delay slot + add %i1,%i2,%i1 ! x += stridex + + SET_SIZE(__vsincos) + diff --git a/usr/src/lib/libmvec/common/vis/__vsincosf.S b/usr/src/lib/libmvec/common/vis/__vsincosf.S new file mode 100644 index 0000000000..adc7c15df4 --- /dev/null +++ b/usr/src/lib/libmvec/common/vis/__vsincosf.S @@ -0,0 +1,906 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "__vsincosf.S" + +#include "libm.h" + + RO_DATA + .align 64 +constants: + .word 0xbfc55554,0x60000000 + .word 0x3f811077,0xe0000000 + .word 0xbf29956b,0x60000000 + .word 0x3ff00000,0x00000000 + .word 0xbfe00000,0x00000000 + .word 0x3fa55554,0xa0000000 + .word 0xbf56c0c1,0xe0000000 + .word 0x3ef99e24,0xe0000000 + .word 0x3fe45f30,0x6dc9c883 + .word 0x43380000,0x00000000 + .word 0x3ff921fb,0x54400000 + .word 0x3dd0b461,0x1a626331 + .word 0x3f490fdb,0 + .word 0x49c90fdb,0 + .word 0x7f800000,0 + .word 0x80000000,0 + +#define S0 0x0 +#define S1 0x08 +#define S2 0x10 +#define one 0x18 +#define mhalf 0x20 +#define C0 0x28 +#define C1 0x30 +#define C2 0x38 +#define invpio2 0x40 +#define round 0x48 +#define pio2_1 0x50 +#define pio2_t 0x58 +#define thresh1 0x60 +#define thresh2 0x68 +#define inf 0x70 +#define signbit 0x78 + +! local storage indices + +#define xsave STACK_BIAS-0x8 +#define ssave STACK_BIAS-0x10 +#define csave STACK_BIAS-0x18 +#define nsave STACK_BIAS-0x1c +#define sxsave STACK_BIAS-0x20 +#define sssave STACK_BIAS-0x24 +#define junk STACK_BIAS-0x28 +#define n3 STACK_BIAS-0x38 +#define n2 STACK_BIAS-0x40 +#define n1 STACK_BIAS-0x48 +#define n0 STACK_BIAS-0x50 +! sizeof temp storage - must be a multiple of 16 for V9 +#define tmps 0x50 + +! register use + +! i0 n +! i1 x +! i2 stridex +! i3 s +! i4 strides +! i5 biguns + +! l0 ps0 +! l1 ps1 +! l2 ps2 +! l3 ps3 +! l4 pc0 +! l5 pc1 +! l6 pc2 +! l7 pc3 + +! the following are 64-bit registers in both V8+ and V9 + +! g1 +! g5 + +! o0 n0 +! o1 n1 +! o2 n2 +! o3 n3 +! o4 c +! o5 stridec +! o7 + +! f0 x0 +! f2 x1 +! f4 x2 +! f6 x3 +! f8 thresh1 (pi/4) +! f10 s0 +! f12 s1 +! f14 s2 +! f16 s3 +! f18 thresh2 (2^19 pi) +! f20 c0 +! f22 c1 +! f24 c2 +! f26 c3 +! f28 signbit +! f30 +! f32 +! f34 +! f36 +! f38 inf +! f40 S0 +! f42 S1 +! f44 S2 +! f46 one +! f48 mhalf +! f50 C0 +! f52 C1 +! f54 C2 +! f56 invpio2 +! f58 round +! f60 pio2_1 +! f62 pio2_t + + ENTRY(__vsincosf) + save %sp,-SA(MINFRAME)-tmps,%sp + PIC_SETUP(l7) + PIC_SET(l7,constants,o0) + mov %o0,%g1 + +#ifdef __sparcv9 + stx %i1,[%fp+xsave] ! save arguments + stx %i3,[%fp+ssave] + stx %i5,[%fp+csave] + ldx [%fp+STACK_BIAS+0xb0],%o5 +#else + st %i1,[%fp+xsave] ! save arguments + st %i3,[%fp+ssave] + st %i5,[%fp+csave] + ld [%fp+0x5c],%o5 +#endif + st %i0,[%fp+nsave] + st %i2,[%fp+sxsave] + st %i4,[%fp+sssave] + mov %i5,%o4 + mov 0,%i5 ! biguns = 0 + ldd [%g1+S0],%f40 ! load constants + ldd [%g1+S1],%f42 + ldd [%g1+S2],%f44 + ldd [%g1+one],%f46 + ldd [%g1+mhalf],%f48 + ldd [%g1+C0],%f50 + ldd [%g1+C1],%f52 + ldd [%g1+C2],%f54 + ldd [%g1+invpio2],%f56 + ldd [%g1+round],%f58 + ldd [%g1+pio2_1],%f60 + ldd [%g1+pio2_t],%f62 + ldd [%g1+thresh1],%f8 + ldd [%g1+thresh2],%f18 + ldd [%g1+inf],%f38 + ldd [%g1+signbit],%f28 + sll %i2,2,%i2 ! scale strides + sll %i4,2,%i4 + sll %o5,2,%o5 + nop + fzero %f10 ! loop prologue + add %fp,junk,%l0 + fzero %f20 + add %fp,junk,%l4 + fzero %f12 + add %fp,junk,%l1 + fzero %f22 + add %fp,junk,%l5 + fzero %f14 + add %fp,junk,%l2 + fzero %f24 + add %fp,junk,%l6 + fzero %f16 + add %fp,junk,%l3 + fzero %f26 + ba .start + add %fp,junk,%l7 + +! 16-byte aligned + .align 16 +.start: + ld [%i1],%f0 ! *x + add %i1,%i2,%i1 ! x += stridex + addcc %i0,-1,%i0 + fdtos %f10,%f10 + + st %f10,[%l0] + mov %i3,%l0 ! ps0 = s + add %i3,%i4,%i3 ! s += strides + fdtos %f20,%f20 + + st %f20,[%l4] + mov %o4,%l4 ! pc0 = c + ble,pn %icc,.last1 +! delay slot + add %o4,%o5,%o4 ! c += stridec + + ld [%i1],%f2 ! *x + add %i1,%i2,%i1 ! x += stridex + addcc %i0,-1,%i0 + fdtos %f12,%f12 + + st %f12,[%l1] + mov %i3,%l1 ! ps1 = s + add %i3,%i4,%i3 ! s += strides + fdtos %f22,%f22 + + st %f22,[%l5] + mov %o4,%l5 ! pc1 = c + ble,pn %icc,.last2 +! delay slot + add %o4,%o5,%o4 ! c += stridec + + ld [%i1],%f4 ! *x + add %i1,%i2,%i1 ! x += stridex + addcc %i0,-1,%i0 + fdtos %f14,%f14 + + st %f14,[%l2] + mov %i3,%l2 ! ps2 = s + add %i3,%i4,%i3 ! s += strides + fdtos %f24,%f24 + + st %f24,[%l6] + mov %o4,%l6 ! pc2 = c + ble,pn %icc,.last3 +! delay slot + add %o4,%o5,%o4 ! c += stridec + + ld [%i1],%f6 ! *x + add %i1,%i2,%i1 ! x += stridex + nop + fdtos %f16,%f16 + + st %f16,[%l3] + mov %i3,%l3 ! ps3 = s + add %i3,%i4,%i3 ! s += strides + fdtos %f26,%f26 + + st %f26,[%l7] + mov %o4,%l7 ! pc3 = c + add %o4,%o5,%o4 ! c += stridec +.cont: + fabsd %f0,%f30 + + fabsd %f2,%f32 + + fabsd %f4,%f34 + + fabsd %f6,%f36 + fcmple32 %f30,%f18,%o0 + + fcmple32 %f32,%f18,%o1 + + fcmple32 %f34,%f18,%o2 + + fcmple32 %f36,%f18,%o3 + nop + +! 16-byte aligned + andcc %o0,2,%g0 + bz,pn %icc,.range0 ! branch if > 2^19 pi +! delay slot + fcmple32 %f30,%f8,%o0 + +.check1: + andcc %o1,2,%g0 + bz,pn %icc,.range1 ! branch if > 2^19 pi +! delay slot + fcmple32 %f32,%f8,%o1 + +.check2: + andcc %o2,2,%g0 + bz,pn %icc,.range2 ! branch if > 2^19 pi +! delay slot + fcmple32 %f34,%f8,%o2 + +.check3: + andcc %o3,2,%g0 + bz,pn %icc,.range3 ! branch if > 2^19 pi +! delay slot + fcmple32 %f36,%f8,%o3 + +.checkprimary: + fsmuld %f0,%f0,%f30 + fstod %f0,%f0 + + fsmuld %f2,%f2,%f32 + fstod %f2,%f2 + and %o0,%o1,%o7 + + fsmuld %f4,%f4,%f34 + fstod %f4,%f4 + and %o2,%o7,%o7 + + fsmuld %f6,%f6,%f36 + fstod %f6,%f6 + and %o3,%o7,%o7 + + fmuld %f30,%f54,%f20 + andcc %o7,2,%g0 + bz,pn %icc,.medium ! branch if any argument is > pi/4 +! delay slot + nop + + fmuld %f32,%f54,%f22 + + fmuld %f34,%f54,%f24 + + fmuld %f36,%f54,%f26 + + faddd %f20,%f52,%f20 + fmuld %f30,%f44,%f10 + + faddd %f22,%f52,%f22 + fmuld %f32,%f44,%f12 + + faddd %f24,%f52,%f24 + fmuld %f34,%f44,%f14 + + faddd %f26,%f52,%f26 + fmuld %f36,%f44,%f16 + + fmuld %f30,%f20,%f20 + faddd %f10,%f42,%f10 + + fmuld %f32,%f22,%f22 + faddd %f12,%f42,%f12 + + fmuld %f34,%f24,%f24 + faddd %f14,%f42,%f14 + + fmuld %f36,%f26,%f26 + faddd %f16,%f42,%f16 + + faddd %f20,%f50,%f20 + fmuld %f30,%f10,%f10 + + faddd %f22,%f50,%f22 + fmuld %f32,%f12,%f12 + + faddd %f24,%f50,%f24 + fmuld %f34,%f14,%f14 + + faddd %f26,%f50,%f26 + fmuld %f36,%f16,%f16 + + fmuld %f30,%f20,%f20 + faddd %f10,%f40,%f10 + + fmuld %f32,%f22,%f22 + faddd %f12,%f40,%f12 + + fmuld %f34,%f24,%f24 + faddd %f14,%f40,%f14 + + fmuld %f36,%f26,%f26 + faddd %f16,%f40,%f16 + + faddd %f20,%f48,%f20 + fmuld %f30,%f10,%f10 + + faddd %f22,%f48,%f22 + fmuld %f32,%f12,%f12 + + faddd %f24,%f48,%f24 + fmuld %f34,%f14,%f14 + + faddd %f26,%f48,%f26 + fmuld %f36,%f16,%f16 + + fmuld %f30,%f20,%f20 + faddd %f10,%f46,%f10 + + fmuld %f32,%f22,%f22 + faddd %f12,%f46,%f12 + + fmuld %f34,%f24,%f24 + faddd %f14,%f46,%f14 + + fmuld %f36,%f26,%f26 + faddd %f16,%f46,%f16 + + faddd %f20,%f46,%f20 + fmuld %f0,%f10,%f10 + + faddd %f22,%f46,%f22 + fmuld %f2,%f12,%f12 + + faddd %f24,%f46,%f24 + fmuld %f4,%f14,%f14 + addcc %i0,-1,%i0 + + faddd %f26,%f46,%f26 + bg,pt %icc,.start +! delay slot + fmuld %f6,%f16,%f16 + + ba,pt %icc,.end +! delay slot + nop + + + .align 16 +.medium: + fmuld %f0,%f56,%f10 + + fmuld %f2,%f56,%f12 + + fmuld %f4,%f56,%f14 + + fmuld %f6,%f56,%f16 + + faddd %f10,%f58,%f10 + st %f11,[%fp+n0] + + faddd %f12,%f58,%f12 + st %f13,[%fp+n1] + + faddd %f14,%f58,%f14 + st %f15,[%fp+n2] + + faddd %f16,%f58,%f16 + st %f17,[%fp+n3] + + fsubd %f10,%f58,%f10 + + fsubd %f12,%f58,%f12 + + fsubd %f14,%f58,%f14 + + fsubd %f16,%f58,%f16 + + fmuld %f10,%f60,%f20 + ld [%fp+n0],%o0 + + fmuld %f12,%f60,%f22 + ld [%fp+n1],%o1 + + fmuld %f14,%f60,%f24 + ld [%fp+n2],%o2 + + fmuld %f16,%f60,%f26 + ld [%fp+n3],%o3 + + fsubd %f0,%f20,%f0 + fmuld %f10,%f62,%f30 + and %o0,1,%o0 + mov %l0,%g1 + + fsubd %f2,%f22,%f2 + fmuld %f12,%f62,%f32 + and %o1,1,%o1 + movrnz %o0,%l4,%l0 ! if (n & 1) exchange ps and pc + + fsubd %f4,%f24,%f4 + fmuld %f14,%f62,%f34 + and %o2,1,%o2 + movrnz %o0,%g1,%l4 + + fsubd %f6,%f26,%f6 + fmuld %f16,%f62,%f36 + and %o3,1,%o3 + mov %l1,%g1 + + fsubd %f0,%f30,%f0 + movrnz %o1,%l5,%l1 + + fsubd %f2,%f32,%f2 + movrnz %o1,%g1,%l5 + + fsubd %f4,%f34,%f4 + mov %l2,%g1 + + fsubd %f6,%f36,%f6 + movrnz %o2,%l6,%l2 + + fmuld %f0,%f0,%f30 + fnegd %f0,%f10 + movrnz %o2,%g1,%l6 + + fmuld %f2,%f2,%f32 + fnegd %f2,%f12 + mov %l3,%g1 + + fmuld %f4,%f4,%f34 + fnegd %f4,%f14 + movrnz %o3,%l7,%l3 + + fmuld %f6,%f6,%f36 + fnegd %f6,%f16 + movrnz %o3,%g1,%l7 + + fmuld %f30,%f54,%f20 + fmovrdnz %o0,%f10,%f0 ! if (n & 1) x = -x + + fmuld %f32,%f54,%f22 + fmovrdnz %o1,%f12,%f2 + + fmuld %f34,%f54,%f24 + fmovrdnz %o2,%f14,%f4 + + fmuld %f36,%f54,%f26 + fmovrdnz %o3,%f16,%f6 + + faddd %f20,%f52,%f20 + fmuld %f30,%f44,%f10 + ld [%fp+n0],%o0 + + faddd %f22,%f52,%f22 + fmuld %f32,%f44,%f12 + and %o0,2,%o0 + + faddd %f24,%f52,%f24 + fmuld %f34,%f44,%f14 + sllx %o0,62,%g1 + stx %g1,[%fp+n0] + + faddd %f26,%f52,%f26 + fmuld %f36,%f44,%f16 + ld [%fp+n1],%o1 + + fmuld %f30,%f20,%f20 + faddd %f10,%f42,%f10 + and %o1,2,%o1 + + fmuld %f32,%f22,%f22 + faddd %f12,%f42,%f12 + sllx %o1,62,%g1 + stx %g1,[%fp+n1] + + fmuld %f34,%f24,%f24 + faddd %f14,%f42,%f14 + ld [%fp+n2],%o2 + + fmuld %f36,%f26,%f26 + faddd %f16,%f42,%f16 + and %o2,2,%o2 + + faddd %f20,%f50,%f20 + fmuld %f30,%f10,%f10 + sllx %o2,62,%g1 + stx %g1,[%fp+n2] + + faddd %f22,%f50,%f22 + fmuld %f32,%f12,%f12 + ld [%fp+n3],%o3 + + faddd %f24,%f50,%f24 + fmuld %f34,%f14,%f14 + and %o3,2,%o3 + + faddd %f26,%f50,%f26 + fmuld %f36,%f16,%f16 + sllx %o3,62,%g1 + stx %g1,[%fp+n3] + + fmuld %f30,%f20,%f20 + faddd %f10,%f40,%f10 + + fmuld %f32,%f22,%f22 + faddd %f12,%f40,%f12 + + fmuld %f34,%f24,%f24 + faddd %f14,%f40,%f14 + + fmuld %f36,%f26,%f26 + faddd %f16,%f40,%f16 + + faddd %f20,%f48,%f20 + fmuld %f30,%f10,%f10 + + faddd %f22,%f48,%f22 + fmuld %f32,%f12,%f12 + + faddd %f24,%f48,%f24 + fmuld %f34,%f14,%f14 + + faddd %f26,%f48,%f26 + fmuld %f36,%f16,%f16 + + fmuld %f30,%f20,%f20 + faddd %f10,%f46,%f10 + + fmuld %f32,%f22,%f22 + faddd %f12,%f46,%f12 + + fmuld %f34,%f24,%f24 + faddd %f14,%f46,%f14 + + fmuld %f36,%f26,%f26 + faddd %f16,%f46,%f16 + + faddd %f20,%f46,%f20 + fmuld %f0,%f10,%f10 + ldd [%fp+n0],%f30 + + faddd %f22,%f46,%f22 + fmuld %f2,%f12,%f12 + ldd [%fp+n1],%f32 + + faddd %f24,%f46,%f24 + fmuld %f4,%f14,%f14 + ldd [%fp+n2],%f34 + + faddd %f26,%f46,%f26 + fmuld %f6,%f16,%f16 + ldd [%fp+n3],%f36 + + fxor %f10,%f30,%f10 ! if (n & 2) negate s, c + + fxor %f12,%f32,%f12 + + fxor %f14,%f34,%f14 + + fxor %f16,%f36,%f16 + + fxor %f20,%f30,%f20 + + fxor %f22,%f32,%f22 + + fxor %f24,%f34,%f24 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f26,%f36,%f26 + + ba,pt %icc,.end +! delay slot + nop + + + .align 32 +.end: + fdtos %f10,%f10 + st %f10,[%l0] + fdtos %f20,%f20 + st %f20,[%l4] + fdtos %f12,%f12 + st %f12,[%l1] + fdtos %f22,%f22 + st %f22,[%l5] + fdtos %f14,%f14 + st %f14,[%l2] + fdtos %f24,%f24 + st %f24,[%l6] + fdtos %f16,%f16 + st %f16,[%l3] + fdtos %f26,%f26 + tst %i5 ! check for huge arguments remaining + be,pt %icc,.exit +! delay slot + st %f26,[%l7] +#ifdef __sparcv9 + ldx [%fp+xsave],%o1 + ldx [%fp+ssave],%o3 + ldx [%fp+csave],%o5 + ldx [%fp+STACK_BIAS+0xb0],%i5 + stx %i5,[%sp+STACK_BIAS+0xb0] +#else + ld [%fp+xsave],%o1 + ld [%fp+ssave],%o3 + ld [%fp+csave],%o5 + ld [%fp+0x5c],%i5 + st %i5,[%sp+0x5c] +#endif + ld [%fp+nsave],%o0 + ld [%fp+sxsave],%o2 + ld [%fp+sssave],%o4 + sra %o2,0,%o2 ! sign-extend for V9 + call __vlibm_vsincos_bigf + sra %o4,0,%o4 ! delay slot + +.exit: + ret + restore + + + .align 32 +.last1: + fdtos %f12,%f12 + st %f12,[%l1] + nop + fdtos %f22,%f22 + st %f22,[%l5] + fzeros %f2 + add %fp,junk,%l5 + add %fp,junk,%l1 +.last2: + fdtos %f14,%f14 + st %f14,[%l2] + nop + fdtos %f24,%f24 + st %f24,[%l6] + fzeros %f4 + add %fp,junk,%l2 + add %fp,junk,%l6 +.last3: + fdtos %f16,%f16 + st %f16,[%l3] + fdtos %f26,%f26 + st %f26,[%l7] + fzeros %f6 + add %fp,junk,%l3 + ba,pt %icc,.cont +! delay slot + add %fp,junk,%l7 + + + .align 16 +.range0: + fcmpgt32 %f38,%f30,%o0 + andcc %o0,2,%g0 + bnz,a,pt %icc,1f ! branch if finite +! delay slot, squashed if branch not taken + mov 1,%i5 ! set biguns + fzeros %f1 + fmuls %f0,%f1,%f0 + st %f0,[%l0] + st %f0,[%l4] +1: + addcc %i0,-1,%i0 + ble,pn %icc,1f +! delay slot + nop + ld [%i1],%f0 + add %i1,%i2,%i1 + mov %i3,%l0 + add %i3,%i4,%i3 + fabsd %f0,%f30 + mov %o4,%l4 + add %o4,%o5,%o4 + fcmple32 %f30,%f18,%o0 + andcc %o0,2,%g0 + bz,pn %icc,.range0 +! delay slot + nop + ba,pt %icc,.check1 +! delay slot + fcmple32 %f30,%f8,%o0 +1: + fzero %f0 ! set up dummy argument + add %fp,junk,%l0 + add %fp,junk,%l4 + mov 2,%o0 + ba,pt %icc,.check1 +! delay slot + fzero %f30 + + + .align 16 +.range1: + fcmpgt32 %f38,%f32,%o1 + andcc %o1,2,%g0 + bnz,a,pt %icc,1f ! branch if finite +! delay slot, squashed if branch not taken + mov 1,%i5 ! set biguns + fzeros %f3 + fmuls %f2,%f3,%f2 + st %f2,[%l1] + st %f2,[%l5] +1: + addcc %i0,-1,%i0 + ble,pn %icc,1f +! delay slot + nop + ld [%i1],%f2 + add %i1,%i2,%i1 + mov %i3,%l1 + add %i3,%i4,%i3 + fabsd %f2,%f32 + mov %o4,%l5 + add %o4,%o5,%o4 + fcmple32 %f32,%f18,%o1 + andcc %o1,2,%g0 + bz,pn %icc,.range1 +! delay slot + nop + ba,pt %icc,.check2 +! delay slot + fcmple32 %f32,%f8,%o1 +1: + fzero %f2 ! set up dummy argument + add %fp,junk,%l1 + add %fp,junk,%l5 + mov 2,%o1 + ba,pt %icc,.check2 +! delay slot + fzero %f32 + + + .align 16 +.range2: + fcmpgt32 %f38,%f34,%o2 + andcc %o2,2,%g0 + bnz,a,pt %icc,1f ! branch if finite +! delay slot, squashed if branch not taken + mov 1,%i5 ! set biguns + fzeros %f5 + fmuls %f4,%f5,%f4 + st %f4,[%l2] + st %f4,[%l6] +1: + addcc %i0,-1,%i0 + ble,pn %icc,1f +! delay slot + nop + ld [%i1],%f4 + add %i1,%i2,%i1 + mov %i3,%l2 + add %i3,%i4,%i3 + fabsd %f4,%f34 + mov %o4,%l6 + add %o4,%o5,%o4 + fcmple32 %f34,%f18,%o2 + andcc %o2,2,%g0 + bz,pn %icc,.range2 +! delay slot + nop + ba,pt %icc,.check3 +! delay slot + fcmple32 %f34,%f8,%o2 +1: + fzero %f4 ! set up dummy argument + add %fp,junk,%l2 + add %fp,junk,%l6 + mov 2,%o2 + ba,pt %icc,.check3 +! delay slot + fzero %f34 + + + .align 16 +.range3: + fcmpgt32 %f38,%f36,%o3 + andcc %o3,2,%g0 + bnz,a,pt %icc,1f ! branch if finite +! delay slot, squashed if branch not taken + mov 1,%i5 ! set biguns + fzeros %f7 + fmuls %f6,%f7,%f6 + st %f6,[%l3] + st %f6,[%l7] +1: + addcc %i0,-1,%i0 + ble,pn %icc,1f +! delay slot + nop + ld [%i1],%f6 + add %i1,%i2,%i1 + mov %i3,%l3 + add %i3,%i4,%i3 + fabsd %f6,%f36 + mov %o4,%l7 + add %o4,%o5,%o4 + fcmple32 %f36,%f18,%o3 + andcc %o3,2,%g0 + bz,pn %icc,.range3 +! delay slot + nop + ba,pt %icc,.checkprimary +! delay slot + fcmple32 %f36,%f8,%o3 +1: + fzero %f6 ! set up dummy argument + add %fp,junk,%l3 + add %fp,junk,%l7 + mov 2,%o3 + ba,pt %icc,.checkprimary +! delay slot + fzero %f36 + + SET_SIZE(__vsincosf) + diff --git a/usr/src/lib/libmvec/common/vis/__vsinf.S b/usr/src/lib/libmvec/common/vis/__vsinf.S new file mode 100644 index 0000000000..f8d4a44753 --- /dev/null +++ b/usr/src/lib/libmvec/common/vis/__vsinf.S @@ -0,0 +1,2094 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "__vsinf.S" + +#include "libm.h" + + RO_DATA + .align 64 +constants: + .word 0xbfc55554,0x60000000 + .word 0x3f811077,0xe0000000 + .word 0xbf29956b,0x60000000 + .word 0x3ff00000,0x00000000 + .word 0xbfe00000,0x00000000 + .word 0x3fa55554,0xa0000000 + .word 0xbf56c0c1,0xe0000000 + .word 0x3ef99e24,0xe0000000 + .word 0x3fe45f30,0x6dc9c883 + .word 0x43380000,0x00000000 + .word 0x3ff921fb,0x54400000 + .word 0x3dd0b461,0x1a626331 + .word 0x3f490fdb,0 + .word 0x49c90fdb,0 + .word 0x7f800000,0 + .word 0x80000000,0 + +#define S0 0x0 +#define S1 0x08 +#define S2 0x10 +#define one 0x18 +#define mhalf 0x20 +#define C0 0x28 +#define C1 0x30 +#define C2 0x38 +#define invpio2 0x40 +#define round 0x48 +#define pio2_1 0x50 +#define pio2_t 0x58 +#define thresh1 0x60 +#define thresh2 0x68 +#define inf 0x70 +#define signbit 0x78 + +! local storage indices + +#define xsave STACK_BIAS-0x8 +#define ysave STACK_BIAS-0x10 +#define nsave STACK_BIAS-0x14 +#define sxsave STACK_BIAS-0x18 +#define sysave STACK_BIAS-0x1c +#define junk STACK_BIAS-0x20 +#define n3 STACK_BIAS-0x24 +#define n2 STACK_BIAS-0x28 +#define n1 STACK_BIAS-0x2c +#define n0 STACK_BIAS-0x30 +! sizeof temp storage - must be a multiple of 16 for V9 +#define tmps 0x30 + +! register use + +! i0 n +! i1 x +! i2 stridex +! i3 y +! i4 stridey +! i5 biguns + +! l0 n0 +! l1 n1 +! l2 n2 +! l3 n3 +! l4 +! l5 +! l6 +! l7 + +! the following are 64-bit registers in both V8+ and V9 + +! g1 +! g5 + +! o0 py0 +! o1 py1 +! o2 py2 +! o3 py3 +! o4 +! o5 +! o7 + +! f0 x0 +! f2 x1 +! f4 x2 +! f6 x3 +! f8 thresh1 (pi/4) +! f10 y0 +! f12 y1 +! f14 y2 +! f16 y3 +! f18 thresh2 (2^19 pi) +! f20 +! f22 +! f24 +! f26 +! f28 signbit +! f30 +! f32 +! f34 +! f36 +! f38 inf +! f40 S0 +! f42 S1 +! f44 S2 +! f46 one +! f48 mhalf +! f50 C0 +! f52 C1 +! f54 C2 +! f56 invpio2 +! f58 round +! f60 pio2_1 +! f62 pio2_t + + ENTRY(__vsinf) + save %sp,-SA(MINFRAME)-tmps,%sp + PIC_SETUP(l7) + PIC_SET(l7,constants,l1) + mov %l1,%g1 + wr %g0,0x82,%asi ! set %asi for non-faulting loads +#ifdef __sparcv9 + stx %i1,[%fp+xsave] ! save arguments + stx %i3,[%fp+ysave] +#else + st %i1,[%fp+xsave] ! save arguments + st %i3,[%fp+ysave] +#endif + st %i0,[%fp+nsave] + st %i2,[%fp+sxsave] + st %i4,[%fp+sysave] + mov 0,%i5 ! biguns = 0 + ldd [%g1+S0],%f40 ! load constants + ldd [%g1+S1],%f42 + ldd [%g1+S2],%f44 + ldd [%g1+one],%f46 + ldd [%g1+mhalf],%f48 + ldd [%g1+C0],%f50 + ldd [%g1+C1],%f52 + ldd [%g1+C2],%f54 + ldd [%g1+invpio2],%f56 + ldd [%g1+round],%f58 + ldd [%g1+pio2_1],%f60 + ldd [%g1+pio2_t],%f62 + ldd [%g1+thresh1],%f8 + ldd [%g1+thresh2],%f18 + ldd [%g1+inf],%f38 + ldd [%g1+signbit],%f28 + sll %i2,2,%i2 ! scale strides + sll %i4,2,%i4 + fzero %f10 ! loop prologue + add %fp,junk,%o0 + fzero %f12 + add %fp,junk,%o1 + fzero %f14 + add %fp,junk,%o2 + fzero %f16 + ba .start + add %fp,junk,%o3 + +! 16-byte aligned + .align 16 +.start: + ld [%i1],%f0 ! *x + add %i1,%i2,%i1 ! x += stridex + addcc %i0,-1,%i0 + fdtos %f10,%f10 + + st %f10,[%o0] + mov %i3,%o0 ! py0 = y + ble,pn %icc,.last1 +! delay slot + add %i3,%i4,%i3 ! y += stridey + + ld [%i1],%f2 ! *x + add %i1,%i2,%i1 ! x += stridex + addcc %i0,-1,%i0 + fdtos %f12,%f12 + + st %f12,[%o1] + mov %i3,%o1 ! py1 = y + ble,pn %icc,.last2 +! delay slot + add %i3,%i4,%i3 ! y += stridey + + ld [%i1],%f4 ! *x + add %i1,%i2,%i1 ! x += stridex + addcc %i0,-1,%i0 + fdtos %f14,%f14 + + st %f14,[%o2] + mov %i3,%o2 ! py2 = y + ble,pn %icc,.last3 +! delay slot + add %i3,%i4,%i3 ! y += stridey + + ld [%i1],%f6 ! *x + add %i1,%i2,%i1 ! x += stridex + nop + fdtos %f16,%f16 + + st %f16,[%o3] + mov %i3,%o3 ! py3 = y + add %i3,%i4,%i3 ! y += stridey +.cont: + fabsd %f0,%f30 + + fabsd %f2,%f32 + + fabsd %f4,%f34 + + fabsd %f6,%f36 + fcmple32 %f30,%f18,%l0 + + fcmple32 %f32,%f18,%l1 + + fcmple32 %f34,%f18,%l2 + + fcmple32 %f36,%f18,%l3 + nop + +! 16-byte aligned + andcc %l0,2,%g0 + bz,pn %icc,.range0 ! branch if > 2^19 pi +! delay slot + fcmple32 %f30,%f8,%l0 + +.check1: + andcc %l1,2,%g0 + bz,pn %icc,.range1 ! branch if > 2^19 pi +! delay slot + fcmple32 %f32,%f8,%l1 + +.check2: + andcc %l2,2,%g0 + bz,pn %icc,.range2 ! branch if > 2^19 pi +! delay slot + fcmple32 %f34,%f8,%l2 + +.check3: + andcc %l3,2,%g0 + bz,pn %icc,.range3 ! branch if > 2^19 pi +! delay slot + fcmple32 %f36,%f8,%l3 + +.checkprimary: + fsmuld %f0,%f0,%f30 + fstod %f0,%f0 + + fsmuld %f2,%f2,%f32 + fstod %f2,%f2 + and %l0,%l1,%o4 + + fsmuld %f4,%f4,%f34 + fstod %f4,%f4 + + fsmuld %f6,%f6,%f36 + fstod %f6,%f6 + and %l2,%l3,%o5 + + fmuld %f30,%f44,%f10 + and %o4,%o5,%o5 + + fmuld %f32,%f44,%f12 + andcc %o5,2,%g0 + bz,pn %icc,.medium ! branch if any argument is > pi/4 +! delay slot + nop + + fmuld %f34,%f44,%f14 + + fmuld %f36,%f44,%f16 + + fmuld %f30,%f40,%f20 + faddd %f10,%f42,%f10 + + fmuld %f32,%f40,%f22 + faddd %f12,%f42,%f12 + + fmuld %f34,%f40,%f24 + faddd %f14,%f42,%f14 + + fmuld %f36,%f40,%f26 + faddd %f16,%f42,%f16 + + fmuld %f30,%f30,%f30 + faddd %f20,%f46,%f20 + + fmuld %f32,%f32,%f32 + faddd %f22,%f46,%f22 + + fmuld %f34,%f34,%f34 + faddd %f24,%f46,%f24 + + fmuld %f36,%f36,%f36 + faddd %f26,%f46,%f26 + + fmuld %f30,%f10,%f10 + + fmuld %f32,%f12,%f12 + + fmuld %f34,%f14,%f14 + + fmuld %f36,%f16,%f16 + + faddd %f10,%f20,%f10 + + faddd %f12,%f22,%f12 + + faddd %f14,%f24,%f14 + + faddd %f16,%f26,%f16 + + fmuld %f0,%f10,%f10 + + fmuld %f2,%f12,%f12 + + fmuld %f4,%f14,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fmuld %f6,%f16,%f16 + + ba,pt %icc,.end +! delay slot + nop + + + .align 16 +.medium: + fmuld %f0,%f56,%f10 + + fmuld %f2,%f56,%f12 + + fmuld %f4,%f56,%f14 + + fmuld %f6,%f56,%f16 + + faddd %f10,%f58,%f10 + st %f11,[%fp+n0] + + faddd %f12,%f58,%f12 + st %f13,[%fp+n1] + + faddd %f14,%f58,%f14 + st %f15,[%fp+n2] + + faddd %f16,%f58,%f16 + st %f17,[%fp+n3] + + fsubd %f10,%f58,%f10 + + fsubd %f12,%f58,%f12 + + fsubd %f14,%f58,%f14 + + fsubd %f16,%f58,%f16 + + fmuld %f10,%f60,%f20 + ld [%fp+n0],%l0 + + fmuld %f12,%f60,%f22 + ld [%fp+n1],%l1 + + fmuld %f14,%f60,%f24 + ld [%fp+n2],%l2 + + fmuld %f16,%f60,%f26 + ld [%fp+n3],%l3 + + fsubd %f0,%f20,%f0 + fmuld %f10,%f62,%f30 + + fsubd %f2,%f22,%f2 + fmuld %f12,%f62,%f32 + + fsubd %f4,%f24,%f4 + fmuld %f14,%f62,%f34 + + fsubd %f6,%f26,%f6 + fmuld %f16,%f62,%f36 + + fsubd %f0,%f30,%f0 + + fsubd %f2,%f32,%f2 + + fsubd %f4,%f34,%f4 + + fsubd %f6,%f36,%f6 + andcc %l0,1,%g0 + + fmuld %f0,%f0,%f30 + bz,pn %icc,.case8 +! delay slot + andcc %l1,1,%g0 + + fmuld %f2,%f2,%f32 + bz,pn %icc,.case4 +! delay slot + andcc %l2,1,%g0 + + fmuld %f4,%f4,%f34 + bz,pn %icc,.case2 +! delay slot + andcc %l3,1,%g0 + + fmuld %f6,%f6,%f36 + bz,pn %icc,.case1 +! delay slot + nop + +!.case0: + fmuld %f30,%f54,%f10 ! cos(x0) + fzero %f0 + + fmuld %f32,%f54,%f12 ! cos(x1) + fzero %f2 + + fmuld %f34,%f54,%f14 ! cos(x2) + fzero %f4 + + fmuld %f36,%f54,%f16 ! cos(x3) + fzero %f6 + + fmuld %f30,%f48,%f20 + faddd %f10,%f52,%f10 + + fmuld %f32,%f48,%f22 + faddd %f12,%f52,%f12 + + fmuld %f34,%f48,%f24 + faddd %f14,%f52,%f14 + + fmuld %f36,%f48,%f26 + faddd %f16,%f52,%f16 + + fmuld %f30,%f10,%f10 + faddd %f20,%f46,%f20 + + fmuld %f32,%f12,%f12 + faddd %f22,%f46,%f22 + + fmuld %f34,%f14,%f14 + faddd %f24,%f46,%f24 + + fmuld %f36,%f16,%f16 + faddd %f26,%f46,%f26 + + fmuld %f30,%f30,%f30 + faddd %f10,%f50,%f10 + and %l0,2,%g1 + + fmuld %f32,%f32,%f32 + faddd %f12,%f50,%f12 + and %l1,2,%g5 + + fmuld %f34,%f34,%f34 + faddd %f14,%f50,%f14 + and %l2,2,%o4 + + fmuld %f36,%f36,%f36 + faddd %f16,%f50,%f16 + and %l3,2,%o5 + + fmuld %f30,%f10,%f10 + fmovrdnz %g1,%f28,%f0 + + fmuld %f32,%f12,%f12 + fmovrdnz %g5,%f28,%f2 + + fmuld %f34,%f14,%f14 + fmovrdnz %o4,%f28,%f4 + + fmuld %f36,%f16,%f16 + fmovrdnz %o5,%f28,%f6 + + faddd %f10,%f20,%f10 + + faddd %f12,%f22,%f12 + + faddd %f14,%f24,%f14 + + faddd %f16,%f26,%f16 + + fxor %f10,%f0,%f10 + + fxor %f12,%f2,%f12 + + fxor %f14,%f4,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f6,%f16 + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case1: + fmuld %f30,%f54,%f10 ! cos(x0) + fzero %f0 + + fmuld %f32,%f54,%f12 ! cos(x1) + fzero %f2 + + fmuld %f34,%f54,%f14 ! cos(x2) + fzero %f4 + + fmuld %f36,%f44,%f16 ! sin(x3) + + fmuld %f30,%f48,%f20 + faddd %f10,%f52,%f10 + + fmuld %f32,%f48,%f22 + faddd %f12,%f52,%f12 + + fmuld %f34,%f48,%f24 + faddd %f14,%f52,%f14 + + fmuld %f36,%f40,%f26 + faddd %f16,%f42,%f16 + + fmuld %f30,%f10,%f10 + faddd %f20,%f46,%f20 + + fmuld %f32,%f12,%f12 + faddd %f22,%f46,%f22 + + fmuld %f34,%f14,%f14 + faddd %f24,%f46,%f24 + + fmuld %f36,%f36,%f36 + faddd %f26,%f46,%f26 + + fmuld %f30,%f30,%f30 + faddd %f10,%f50,%f10 + and %l0,2,%g1 + + fmuld %f32,%f32,%f32 + faddd %f12,%f50,%f12 + and %l1,2,%g5 + + fmuld %f34,%f34,%f34 + faddd %f14,%f50,%f14 + and %l2,2,%o4 + + fmuld %f36,%f16,%f16 + fzero %f36 + + fmuld %f30,%f10,%f10 + fmovrdnz %g1,%f28,%f0 + + fmuld %f32,%f12,%f12 + fmovrdnz %g5,%f28,%f2 + + fmuld %f34,%f14,%f14 + fmovrdnz %o4,%f28,%f4 + + faddd %f16,%f26,%f16 + and %l3,2,%o5 + + faddd %f10,%f20,%f10 + + faddd %f12,%f22,%f12 + + faddd %f14,%f24,%f14 + + fmuld %f6,%f16,%f16 + fmovrdnz %o5,%f28,%f36 + + fxor %f10,%f0,%f10 + + fxor %f12,%f2,%f12 + + fxor %f14,%f4,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f36,%f16 + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case2: + fmuld %f6,%f6,%f36 + bz,pn %icc,.case3 +! delay slot + nop + + fmuld %f30,%f54,%f10 ! cos(x0) + fzero %f0 + + fmuld %f32,%f54,%f12 ! cos(x1) + fzero %f2 + + fmuld %f34,%f44,%f14 ! sin(x2) + + fmuld %f36,%f54,%f16 ! cos(x3) + fzero %f6 + + fmuld %f30,%f48,%f20 + faddd %f10,%f52,%f10 + + fmuld %f32,%f48,%f22 + faddd %f12,%f52,%f12 + + fmuld %f34,%f40,%f24 + faddd %f14,%f42,%f14 + + fmuld %f36,%f48,%f26 + faddd %f16,%f52,%f16 + + fmuld %f30,%f10,%f10 + faddd %f20,%f46,%f20 + + fmuld %f32,%f12,%f12 + faddd %f22,%f46,%f22 + + fmuld %f34,%f34,%f34 + faddd %f24,%f46,%f24 + + fmuld %f36,%f16,%f16 + faddd %f26,%f46,%f26 + + fmuld %f30,%f30,%f30 + faddd %f10,%f50,%f10 + and %l0,2,%g1 + + fmuld %f32,%f32,%f32 + faddd %f12,%f50,%f12 + and %l1,2,%g5 + + fmuld %f34,%f14,%f14 + fzero %f34 + + fmuld %f36,%f36,%f36 + faddd %f16,%f50,%f16 + and %l3,2,%o5 + + fmuld %f30,%f10,%f10 + fmovrdnz %g1,%f28,%f0 + + fmuld %f32,%f12,%f12 + fmovrdnz %g5,%f28,%f2 + + faddd %f14,%f24,%f14 + and %l2,2,%o4 + + fmuld %f36,%f16,%f16 + fmovrdnz %o5,%f28,%f6 + + faddd %f10,%f20,%f10 + + faddd %f12,%f22,%f12 + + fmuld %f4,%f14,%f14 + fmovrdnz %o4,%f28,%f34 + + faddd %f16,%f26,%f16 + + fxor %f10,%f0,%f10 + + fxor %f12,%f2,%f12 + + fxor %f14,%f34,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f6,%f16 + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case3: + fmuld %f30,%f54,%f10 ! cos(x0) + fzero %f0 + + fmuld %f32,%f54,%f12 ! cos(x1) + fzero %f2 + + fmuld %f34,%f44,%f14 ! sin(x2) + + fmuld %f36,%f44,%f16 ! sin(x3) + + fmuld %f30,%f48,%f20 + faddd %f10,%f52,%f10 + + fmuld %f32,%f48,%f22 + faddd %f12,%f52,%f12 + + fmuld %f34,%f40,%f24 + faddd %f14,%f42,%f14 + + fmuld %f36,%f40,%f26 + faddd %f16,%f42,%f16 + + fmuld %f30,%f10,%f10 + faddd %f20,%f46,%f20 + + fmuld %f32,%f12,%f12 + faddd %f22,%f46,%f22 + + fmuld %f34,%f34,%f34 + faddd %f24,%f46,%f24 + + fmuld %f36,%f36,%f36 + faddd %f26,%f46,%f26 + + fmuld %f30,%f30,%f30 + faddd %f10,%f50,%f10 + and %l0,2,%g1 + + fmuld %f32,%f32,%f32 + faddd %f12,%f50,%f12 + and %l1,2,%g5 + + fmuld %f34,%f14,%f14 + fzero %f34 + + fmuld %f36,%f16,%f16 + fzero %f36 + + fmuld %f30,%f10,%f10 + fmovrdnz %g1,%f28,%f0 + + fmuld %f32,%f12,%f12 + fmovrdnz %g5,%f28,%f2 + + faddd %f14,%f24,%f14 + and %l2,2,%o4 + + faddd %f16,%f26,%f16 + and %l3,2,%o5 + + faddd %f10,%f20,%f10 + + faddd %f12,%f22,%f12 + + fmuld %f4,%f14,%f14 + fmovrdnz %o4,%f28,%f34 + + fmuld %f6,%f16,%f16 + fmovrdnz %o5,%f28,%f36 + + fxor %f10,%f0,%f10 + + fxor %f12,%f2,%f12 + + fxor %f14,%f34,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f36,%f16 + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case4: + fmuld %f4,%f4,%f34 + bz,pn %icc,.case6 +! delay slot + andcc %l3,1,%g0 + + fmuld %f6,%f6,%f36 + bz,pn %icc,.case5 +! delay slot + nop + + fmuld %f30,%f54,%f10 ! cos(x0) + fzero %f0 + + fmuld %f32,%f44,%f12 ! sin(x1) + + fmuld %f34,%f54,%f14 ! cos(x2) + fzero %f4 + + fmuld %f36,%f54,%f16 ! cos(x3) + fzero %f6 + + fmuld %f30,%f48,%f20 + faddd %f10,%f52,%f10 + + fmuld %f32,%f40,%f22 + faddd %f12,%f42,%f12 + + fmuld %f34,%f48,%f24 + faddd %f14,%f52,%f14 + + fmuld %f36,%f48,%f26 + faddd %f16,%f52,%f16 + + fmuld %f30,%f10,%f10 + faddd %f20,%f46,%f20 + + fmuld %f32,%f32,%f32 + faddd %f22,%f46,%f22 + + fmuld %f34,%f14,%f14 + faddd %f24,%f46,%f24 + + fmuld %f36,%f16,%f16 + faddd %f26,%f46,%f26 + + fmuld %f30,%f30,%f30 + faddd %f10,%f50,%f10 + and %l0,2,%g1 + + fmuld %f32,%f12,%f12 + fzero %f32 + + fmuld %f34,%f34,%f34 + faddd %f14,%f50,%f14 + and %l2,2,%o4 + + fmuld %f36,%f36,%f36 + faddd %f16,%f50,%f16 + and %l3,2,%o5 + + fmuld %f30,%f10,%f10 + fmovrdnz %g1,%f28,%f0 + + faddd %f12,%f22,%f12 + and %l1,2,%g5 + + fmuld %f34,%f14,%f14 + fmovrdnz %o4,%f28,%f4 + + fmuld %f36,%f16,%f16 + fmovrdnz %o5,%f28,%f6 + + faddd %f10,%f20,%f10 + + fmuld %f2,%f12,%f12 + fmovrdnz %g5,%f28,%f32 + + faddd %f14,%f24,%f14 + + faddd %f16,%f26,%f16 + + fxor %f10,%f0,%f10 + + fxor %f12,%f32,%f12 + + fxor %f14,%f4,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f6,%f16 + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case5: + fmuld %f30,%f54,%f10 ! cos(x0) + fzero %f0 + + fmuld %f32,%f44,%f12 ! sin(x1) + + fmuld %f34,%f54,%f14 ! cos(x2) + fzero %f4 + + fmuld %f36,%f44,%f16 ! sin(x3) + + fmuld %f30,%f48,%f20 + faddd %f10,%f52,%f10 + + fmuld %f32,%f40,%f22 + faddd %f12,%f42,%f12 + + fmuld %f34,%f48,%f24 + faddd %f14,%f52,%f14 + + fmuld %f36,%f40,%f26 + faddd %f16,%f42,%f16 + + fmuld %f30,%f10,%f10 + faddd %f20,%f46,%f20 + + fmuld %f32,%f32,%f32 + faddd %f22,%f46,%f22 + + fmuld %f34,%f14,%f14 + faddd %f24,%f46,%f24 + + fmuld %f36,%f36,%f36 + faddd %f26,%f46,%f26 + + fmuld %f30,%f30,%f30 + faddd %f10,%f50,%f10 + and %l0,2,%g1 + + fmuld %f32,%f12,%f12 + fzero %f32 + + fmuld %f34,%f34,%f34 + faddd %f14,%f50,%f14 + and %l2,2,%o4 + + fmuld %f36,%f16,%f16 + fzero %f36 + + fmuld %f30,%f10,%f10 + fmovrdnz %g1,%f28,%f0 + + faddd %f12,%f22,%f12 + and %l1,2,%g5 + + fmuld %f34,%f14,%f14 + fmovrdnz %o4,%f28,%f4 + + faddd %f16,%f26,%f16 + and %l3,2,%o5 + + faddd %f10,%f20,%f10 + + fmuld %f2,%f12,%f12 + fmovrdnz %g5,%f28,%f32 + + faddd %f14,%f24,%f14 + + fmuld %f6,%f16,%f16 + fmovrdnz %o5,%f28,%f36 + + fxor %f10,%f0,%f10 + + fxor %f12,%f32,%f12 + + fxor %f14,%f4,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f36,%f16 + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case6: + fmuld %f6,%f6,%f36 + bz,pn %icc,.case7 +! delay slot + nop + + fmuld %f30,%f54,%f10 ! cos(x0) + fzero %f0 + + fmuld %f32,%f44,%f12 ! sin(x1) + + fmuld %f34,%f44,%f14 ! sin(x2) + + fmuld %f36,%f54,%f16 ! cos(x3) + fzero %f6 + + fmuld %f30,%f48,%f20 + faddd %f10,%f52,%f10 + + fmuld %f32,%f40,%f22 + faddd %f12,%f42,%f12 + + fmuld %f34,%f40,%f24 + faddd %f14,%f42,%f14 + + fmuld %f36,%f48,%f26 + faddd %f16,%f52,%f16 + + fmuld %f30,%f10,%f10 + faddd %f20,%f46,%f20 + + fmuld %f32,%f32,%f32 + faddd %f22,%f46,%f22 + + fmuld %f34,%f34,%f34 + faddd %f24,%f46,%f24 + + fmuld %f36,%f16,%f16 + faddd %f26,%f46,%f26 + + fmuld %f30,%f30,%f30 + faddd %f10,%f50,%f10 + and %l0,2,%g1 + + fmuld %f32,%f12,%f12 + fzero %f32 + + fmuld %f34,%f14,%f14 + fzero %f34 + + fmuld %f36,%f36,%f36 + faddd %f16,%f50,%f16 + and %l3,2,%o5 + + fmuld %f30,%f10,%f10 + fmovrdnz %g1,%f28,%f0 + + faddd %f12,%f22,%f12 + and %l1,2,%g5 + + faddd %f14,%f24,%f14 + and %l2,2,%o4 + + fmuld %f36,%f16,%f16 + fmovrdnz %o5,%f28,%f6 + + faddd %f10,%f20,%f10 + + fmuld %f2,%f12,%f12 + fmovrdnz %g5,%f28,%f32 + + fmuld %f4,%f14,%f14 + fmovrdnz %o4,%f28,%f34 + + faddd %f16,%f26,%f16 + + fxor %f10,%f0,%f10 + + fxor %f12,%f32,%f12 + + fxor %f14,%f34,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f6,%f16 + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case7: + fmuld %f30,%f54,%f10 ! cos(x0) + fzero %f0 + + fmuld %f32,%f44,%f12 ! sin(x1) + + fmuld %f34,%f44,%f14 ! sin(x2) + + fmuld %f36,%f44,%f16 ! sin(x3) + + fmuld %f30,%f48,%f20 + faddd %f10,%f52,%f10 + + fmuld %f32,%f40,%f22 + faddd %f12,%f42,%f12 + + fmuld %f34,%f40,%f24 + faddd %f14,%f42,%f14 + + fmuld %f36,%f40,%f26 + faddd %f16,%f42,%f16 + + fmuld %f30,%f10,%f10 + faddd %f20,%f46,%f20 + + fmuld %f32,%f32,%f32 + faddd %f22,%f46,%f22 + + fmuld %f34,%f34,%f34 + faddd %f24,%f46,%f24 + + fmuld %f36,%f36,%f36 + faddd %f26,%f46,%f26 + + fmuld %f30,%f30,%f30 + faddd %f10,%f50,%f10 + and %l0,2,%g1 + + fmuld %f32,%f12,%f12 + fzero %f32 + + fmuld %f34,%f14,%f14 + fzero %f34 + + fmuld %f36,%f16,%f16 + fzero %f36 + + fmuld %f30,%f10,%f10 + fmovrdnz %g1,%f28,%f0 + + faddd %f12,%f22,%f12 + and %l1,2,%g5 + + faddd %f14,%f24,%f14 + and %l2,2,%o4 + + faddd %f16,%f26,%f16 + and %l3,2,%o5 + + faddd %f10,%f20,%f10 + + fmuld %f2,%f12,%f12 + fmovrdnz %g5,%f28,%f32 + + fmuld %f4,%f14,%f14 + fmovrdnz %o4,%f28,%f34 + + fmuld %f6,%f16,%f16 + fmovrdnz %o5,%f28,%f36 + + fxor %f10,%f0,%f10 + + fxor %f12,%f32,%f12 + + fxor %f14,%f34,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f36,%f16 + + ba,pt %icc,.end +! delay slot + nop + + + .align 16 +.case8: + fmuld %f2,%f2,%f32 + bz,pn %icc,.case12 +! delay slot + andcc %l2,1,%g0 + + fmuld %f4,%f4,%f34 + bz,pn %icc,.case10 +! delay slot + andcc %l3,1,%g0 + + fmuld %f6,%f6,%f36 + bz,pn %icc,.case9 +! delay slot + nop + + fmuld %f30,%f44,%f10 ! sin(x0) + + fmuld %f32,%f54,%f12 ! cos(x1) + fzero %f2 + + fmuld %f34,%f54,%f14 ! cos(x2) + fzero %f4 + + fmuld %f36,%f54,%f16 ! cos(x3) + fzero %f6 + + fmuld %f30,%f40,%f20 + faddd %f10,%f42,%f10 + + fmuld %f32,%f48,%f22 + faddd %f12,%f52,%f12 + + fmuld %f34,%f48,%f24 + faddd %f14,%f52,%f14 + + fmuld %f36,%f48,%f26 + faddd %f16,%f52,%f16 + + fmuld %f30,%f30,%f30 + faddd %f20,%f46,%f20 + + fmuld %f32,%f12,%f12 + faddd %f22,%f46,%f22 + + fmuld %f34,%f14,%f14 + faddd %f24,%f46,%f24 + + fmuld %f36,%f16,%f16 + faddd %f26,%f46,%f26 + + fmuld %f30,%f10,%f10 + fzero %f30 + + fmuld %f32,%f32,%f32 + faddd %f12,%f50,%f12 + and %l1,2,%g5 + + fmuld %f34,%f34,%f34 + faddd %f14,%f50,%f14 + and %l2,2,%o4 + + fmuld %f36,%f36,%f36 + faddd %f16,%f50,%f16 + and %l3,2,%o5 + + faddd %f10,%f20,%f10 + and %l0,2,%g1 + + fmuld %f32,%f12,%f12 + fmovrdnz %g5,%f28,%f2 + + fmuld %f34,%f14,%f14 + fmovrdnz %o4,%f28,%f4 + + fmuld %f36,%f16,%f16 + fmovrdnz %o5,%f28,%f6 + + fmuld %f0,%f10,%f10 + fmovrdnz %g1,%f28,%f30 + + faddd %f12,%f22,%f12 + + faddd %f14,%f24,%f14 + + faddd %f16,%f26,%f16 + + fxor %f10,%f30,%f10 + + fxor %f12,%f2,%f12 + + fxor %f14,%f4,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f6,%f16 + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case9: + fmuld %f30,%f44,%f10 ! sin(x0) + + fmuld %f32,%f54,%f12 ! cos(x1) + fzero %f2 + + fmuld %f34,%f54,%f14 ! cos(x2) + fzero %f4 + + fmuld %f36,%f44,%f16 ! sin(x3) + + fmuld %f30,%f40,%f20 + faddd %f10,%f42,%f10 + + fmuld %f32,%f48,%f22 + faddd %f12,%f52,%f12 + + fmuld %f34,%f48,%f24 + faddd %f14,%f52,%f14 + + fmuld %f36,%f40,%f26 + faddd %f16,%f42,%f16 + + fmuld %f30,%f30,%f30 + faddd %f20,%f46,%f20 + + fmuld %f32,%f12,%f12 + faddd %f22,%f46,%f22 + + fmuld %f34,%f14,%f14 + faddd %f24,%f46,%f24 + + fmuld %f36,%f36,%f36 + faddd %f26,%f46,%f26 + + fmuld %f30,%f10,%f10 + fzero %f30 + + fmuld %f32,%f32,%f32 + faddd %f12,%f50,%f12 + and %l1,2,%g5 + + fmuld %f34,%f34,%f34 + faddd %f14,%f50,%f14 + and %l2,2,%o4 + + fmuld %f36,%f16,%f16 + fzero %f36 + + faddd %f10,%f20,%f10 + and %l0,2,%g1 + + fmuld %f32,%f12,%f12 + fmovrdnz %g5,%f28,%f2 + + fmuld %f34,%f14,%f14 + fmovrdnz %o4,%f28,%f4 + + faddd %f16,%f26,%f16 + and %l3,2,%o5 + + fmuld %f0,%f10,%f10 + fmovrdnz %g1,%f28,%f30 + + faddd %f12,%f22,%f12 + + faddd %f14,%f24,%f14 + + fmuld %f6,%f16,%f16 + fmovrdnz %o5,%f28,%f36 + + fxor %f10,%f30,%f10 + + fxor %f12,%f2,%f12 + + fxor %f14,%f4,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f36,%f16 + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case10: + fmuld %f6,%f6,%f36 + bz,pn %icc,.case11 +! delay slot + nop + + fmuld %f30,%f44,%f10 ! sin(x0) + + fmuld %f32,%f54,%f12 ! cos(x1) + fzero %f2 + + fmuld %f34,%f44,%f14 ! sin(x2) + + fmuld %f36,%f54,%f16 ! cos(x3) + fzero %f6 + + fmuld %f30,%f40,%f20 + faddd %f10,%f42,%f10 + + fmuld %f32,%f48,%f22 + faddd %f12,%f52,%f12 + + fmuld %f34,%f40,%f24 + faddd %f14,%f42,%f14 + + fmuld %f36,%f48,%f26 + faddd %f16,%f52,%f16 + + fmuld %f30,%f30,%f30 + faddd %f20,%f46,%f20 + + fmuld %f32,%f12,%f12 + faddd %f22,%f46,%f22 + + fmuld %f34,%f34,%f34 + faddd %f24,%f46,%f24 + + fmuld %f36,%f16,%f16 + faddd %f26,%f46,%f26 + + fmuld %f30,%f10,%f10 + fzero %f30 + + fmuld %f32,%f32,%f32 + faddd %f12,%f50,%f12 + and %l1,2,%g5 + + fmuld %f34,%f14,%f14 + fzero %f34 + + fmuld %f36,%f36,%f36 + faddd %f16,%f50,%f16 + and %l3,2,%o5 + + faddd %f10,%f20,%f10 + and %l0,2,%g1 + + fmuld %f32,%f12,%f12 + fmovrdnz %g5,%f28,%f2 + + faddd %f14,%f24,%f14 + and %l2,2,%o4 + + fmuld %f36,%f16,%f16 + fmovrdnz %o5,%f28,%f6 + + fmuld %f0,%f10,%f10 + fmovrdnz %g1,%f28,%f30 + + faddd %f12,%f22,%f12 + + fmuld %f4,%f14,%f14 + fmovrdnz %o4,%f28,%f34 + + faddd %f16,%f26,%f16 + + fxor %f10,%f30,%f10 + + fxor %f12,%f2,%f12 + + fxor %f14,%f34,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f6,%f16 + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case11: + fmuld %f30,%f44,%f10 ! sin(x0) + + fmuld %f32,%f54,%f12 ! cos(x1) + fzero %f2 + + fmuld %f34,%f44,%f14 ! sin(x2) + + fmuld %f36,%f44,%f16 ! sin(x3) + + fmuld %f30,%f40,%f20 + faddd %f10,%f42,%f10 + + fmuld %f32,%f48,%f22 + faddd %f12,%f52,%f12 + + fmuld %f34,%f40,%f24 + faddd %f14,%f42,%f14 + + fmuld %f36,%f40,%f26 + faddd %f16,%f42,%f16 + + fmuld %f30,%f30,%f30 + faddd %f20,%f46,%f20 + + fmuld %f32,%f12,%f12 + faddd %f22,%f46,%f22 + + fmuld %f34,%f34,%f34 + faddd %f24,%f46,%f24 + + fmuld %f36,%f36,%f36 + faddd %f26,%f46,%f26 + + fmuld %f30,%f10,%f10 + fzero %f30 + + fmuld %f32,%f32,%f32 + faddd %f12,%f50,%f12 + and %l1,2,%g5 + + fmuld %f34,%f14,%f14 + fzero %f34 + + fmuld %f36,%f16,%f16 + fzero %f36 + + faddd %f10,%f20,%f10 + and %l0,2,%g1 + + fmuld %f32,%f12,%f12 + fmovrdnz %g5,%f28,%f2 + + faddd %f14,%f24,%f14 + and %l2,2,%o4 + + faddd %f16,%f26,%f16 + and %l3,2,%o5 + + fmuld %f0,%f10,%f10 + fmovrdnz %g1,%f28,%f30 + + faddd %f12,%f22,%f12 + + fmuld %f4,%f14,%f14 + fmovrdnz %o4,%f28,%f34 + + fmuld %f6,%f16,%f16 + fmovrdnz %o5,%f28,%f36 + + fxor %f10,%f30,%f10 + + fxor %f12,%f2,%f12 + + fxor %f14,%f34,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f36,%f16 + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case12: + fmuld %f4,%f4,%f34 + bz,pn %icc,.case14 +! delay slot + andcc %l3,1,%g0 + + fmuld %f6,%f6,%f36 + bz,pn %icc,.case13 +! delay slot + nop + + fmuld %f30,%f44,%f10 ! sin(x0) + + fmuld %f32,%f44,%f12 ! sin(x1) + + fmuld %f34,%f54,%f14 ! cos(x2) + fzero %f4 + + fmuld %f36,%f54,%f16 ! cos(x3) + fzero %f6 + + fmuld %f30,%f40,%f20 + faddd %f10,%f42,%f10 + + fmuld %f32,%f40,%f22 + faddd %f12,%f42,%f12 + + fmuld %f34,%f48,%f24 + faddd %f14,%f52,%f14 + + fmuld %f36,%f48,%f26 + faddd %f16,%f52,%f16 + + fmuld %f30,%f30,%f30 + faddd %f20,%f46,%f20 + + fmuld %f32,%f32,%f32 + faddd %f22,%f46,%f22 + + fmuld %f34,%f14,%f14 + faddd %f24,%f46,%f24 + + fmuld %f36,%f16,%f16 + faddd %f26,%f46,%f26 + + fmuld %f30,%f10,%f10 + fzero %f30 + + fmuld %f32,%f12,%f12 + fzero %f32 + + fmuld %f34,%f34,%f34 + faddd %f14,%f50,%f14 + and %l2,2,%o4 + + fmuld %f36,%f36,%f36 + faddd %f16,%f50,%f16 + and %l3,2,%o5 + + faddd %f10,%f20,%f10 + and %l0,2,%g1 + + faddd %f12,%f22,%f12 + and %l1,2,%g5 + + fmuld %f34,%f14,%f14 + fmovrdnz %o4,%f28,%f4 + + fmuld %f36,%f16,%f16 + fmovrdnz %o5,%f28,%f6 + + fmuld %f0,%f10,%f10 + fmovrdnz %g1,%f28,%f30 + + fmuld %f2,%f12,%f12 + fmovrdnz %g5,%f28,%f32 + + faddd %f14,%f24,%f14 + + faddd %f16,%f26,%f16 + + fxor %f10,%f30,%f10 + + fxor %f12,%f32,%f12 + + fxor %f14,%f4,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f6,%f16 + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case13: + fmuld %f30,%f44,%f10 ! sin(x0) + + fmuld %f32,%f44,%f12 ! sin(x1) + + fmuld %f34,%f54,%f14 ! cos(x2) + fzero %f4 + + fmuld %f36,%f44,%f16 ! sin(x3) + + fmuld %f30,%f40,%f20 + faddd %f10,%f42,%f10 + + fmuld %f32,%f40,%f22 + faddd %f12,%f42,%f12 + + fmuld %f34,%f48,%f24 + faddd %f14,%f52,%f14 + + fmuld %f36,%f40,%f26 + faddd %f16,%f42,%f16 + + fmuld %f30,%f30,%f30 + faddd %f20,%f46,%f20 + + fmuld %f32,%f32,%f32 + faddd %f22,%f46,%f22 + + fmuld %f34,%f14,%f14 + faddd %f24,%f46,%f24 + + fmuld %f36,%f36,%f36 + faddd %f26,%f46,%f26 + + fmuld %f30,%f10,%f10 + fzero %f30 + + fmuld %f32,%f12,%f12 + fzero %f32 + + fmuld %f34,%f34,%f34 + faddd %f14,%f50,%f14 + and %l2,2,%o4 + + fmuld %f36,%f16,%f16 + fzero %f36 + + faddd %f10,%f20,%f10 + and %l0,2,%g1 + + faddd %f12,%f22,%f12 + and %l1,2,%g5 + + fmuld %f34,%f14,%f14 + fmovrdnz %o4,%f28,%f4 + + faddd %f16,%f26,%f16 + and %l3,2,%o5 + + fmuld %f0,%f10,%f10 + fmovrdnz %g1,%f28,%f30 + + fmuld %f2,%f12,%f12 + fmovrdnz %g5,%f28,%f32 + + faddd %f14,%f24,%f14 + + fmuld %f6,%f16,%f16 + fmovrdnz %o5,%f28,%f36 + + fxor %f10,%f30,%f10 + + fxor %f12,%f32,%f12 + + fxor %f14,%f4,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f36,%f16 + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case14: + fmuld %f6,%f6,%f36 + bz,pn %icc,.case15 +! delay slot + nop + + fmuld %f30,%f44,%f10 ! sin(x0) + + fmuld %f32,%f44,%f12 ! sin(x1) + + fmuld %f34,%f44,%f14 ! sin(x2) + + fmuld %f36,%f54,%f16 ! cos(x3) + fzero %f6 + + fmuld %f30,%f40,%f20 + faddd %f10,%f42,%f10 + + fmuld %f32,%f40,%f22 + faddd %f12,%f42,%f12 + + fmuld %f34,%f40,%f24 + faddd %f14,%f42,%f14 + + fmuld %f36,%f48,%f26 + faddd %f16,%f52,%f16 + + fmuld %f30,%f30,%f30 + faddd %f20,%f46,%f20 + + fmuld %f32,%f32,%f32 + faddd %f22,%f46,%f22 + + fmuld %f34,%f34,%f34 + faddd %f24,%f46,%f24 + + fmuld %f36,%f16,%f16 + faddd %f26,%f46,%f26 + + fmuld %f30,%f10,%f10 + fzero %f30 + + fmuld %f32,%f12,%f12 + fzero %f32 + + fmuld %f34,%f14,%f14 + fzero %f34 + + fmuld %f36,%f36,%f36 + faddd %f16,%f50,%f16 + and %l3,2,%o5 + + faddd %f10,%f20,%f10 + and %l0,2,%g1 + + faddd %f12,%f22,%f12 + and %l1,2,%g5 + + faddd %f14,%f24,%f14 + and %l2,2,%o4 + + fmuld %f36,%f16,%f16 + fmovrdnz %o5,%f28,%f6 + + fmuld %f0,%f10,%f10 + fmovrdnz %g1,%f28,%f30 + + fmuld %f2,%f12,%f12 + fmovrdnz %g5,%f28,%f32 + + fmuld %f4,%f14,%f14 + fmovrdnz %o4,%f28,%f34 + + faddd %f16,%f26,%f16 + + fxor %f10,%f30,%f10 + + fxor %f12,%f32,%f12 + + fxor %f14,%f34,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f6,%f16 + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case15: + fmuld %f30,%f44,%f10 ! sin(x0) + + fmuld %f32,%f44,%f12 ! sin(x1) + + fmuld %f34,%f44,%f14 ! sin(x2) + + fmuld %f36,%f44,%f16 ! sin(x3) + + fmuld %f30,%f40,%f20 + faddd %f10,%f42,%f10 + + fmuld %f32,%f40,%f22 + faddd %f12,%f42,%f12 + + fmuld %f34,%f40,%f24 + faddd %f14,%f42,%f14 + + fmuld %f36,%f40,%f26 + faddd %f16,%f42,%f16 + + fmuld %f30,%f30,%f30 + faddd %f20,%f46,%f20 + + fmuld %f32,%f32,%f32 + faddd %f22,%f46,%f22 + + fmuld %f34,%f34,%f34 + faddd %f24,%f46,%f24 + + fmuld %f36,%f36,%f36 + faddd %f26,%f46,%f26 + + fmuld %f30,%f10,%f10 + fzero %f30 + + fmuld %f32,%f12,%f12 + fzero %f32 + + fmuld %f34,%f14,%f14 + fzero %f34 + + fmuld %f36,%f16,%f16 + fzero %f36 + + faddd %f10,%f20,%f10 + and %l0,2,%g1 + + faddd %f12,%f22,%f12 + and %l1,2,%g5 + + faddd %f14,%f24,%f14 + and %l2,2,%o4 + + faddd %f16,%f26,%f16 + and %l3,2,%o5 + + fmuld %f0,%f10,%f10 + fmovrdnz %g1,%f28,%f30 + + fmuld %f2,%f12,%f12 + fmovrdnz %g5,%f28,%f32 + + fmuld %f4,%f14,%f14 + fmovrdnz %o4,%f28,%f34 + + fmuld %f6,%f16,%f16 + fmovrdnz %o5,%f28,%f36 + + fxor %f10,%f30,%f10 + + fxor %f12,%f32,%f12 + + fxor %f14,%f34,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f36,%f16 + + ba,pt %icc,.end +! delay slot + nop + + + .align 32 +.end: + fdtos %f10,%f10 + st %f10,[%o0] + fdtos %f12,%f12 + st %f12,[%o1] + fdtos %f14,%f14 + st %f14,[%o2] + fdtos %f16,%f16 + tst %i5 ! check for huge arguments remaining + be,pt %icc,.exit +! delay slot + st %f16,[%o3] +#ifdef __sparcv9 + ldx [%fp+xsave],%o1 + ldx [%fp+ysave],%o3 +#else + ld [%fp+xsave],%o1 + ld [%fp+ysave],%o3 +#endif + ld [%fp+nsave],%o0 + ld [%fp+sxsave],%o2 + ld [%fp+sysave],%o4 + sra %o2,0,%o2 ! sign-extend for V9 + call __vlibm_vsin_bigf + sra %o4,0,%o4 ! delay slot + +.exit: + ret + restore + + + .align 32 +.last1: + fdtos %f12,%f12 + st %f12,[%o1] + fzeros %f2 + add %fp,junk,%o1 +.last2: + fdtos %f14,%f14 + st %f14,[%o2] + fzeros %f4 + add %fp,junk,%o2 +.last3: + fdtos %f16,%f16 + st %f16,[%o3] + fzeros %f6 + ba,pt %icc,.cont +! delay slot + add %fp,junk,%o3 + + + .align 16 +.range0: + fcmpgt32 %f38,%f30,%l0 + andcc %l0,2,%g0 + bnz,a,pt %icc,1f ! branch if finite +! delay slot, squashed if branch not taken + mov 1,%i5 ! set biguns + fzeros %f1 + fmuls %f0,%f1,%f0 + st %f0,[%o0] +1: + addcc %i0,-1,%i0 + ble,pn %icc,1f +! delay slot + nop + ld [%i1],%f0 + add %i1,%i2,%i1 + mov %i3,%o0 + add %i3,%i4,%i3 + fabsd %f0,%f30 + fcmple32 %f30,%f18,%l0 + andcc %l0,2,%g0 + bz,pn %icc,.range0 +! delay slot + nop + ba,pt %icc,.check1 +! delay slot + fcmple32 %f30,%f8,%l0 +1: + fzero %f0 ! set up dummy argument + add %fp,junk,%o0 + mov 2,%l0 + ba,pt %icc,.check1 +! delay slot + fzero %f30 + + + .align 16 +.range1: + fcmpgt32 %f38,%f32,%l1 + andcc %l1,2,%g0 + bnz,a,pt %icc,1f ! branch if finite +! delay slot, squashed if branch not taken + mov 1,%i5 ! set biguns + fzeros %f3 + fmuls %f2,%f3,%f2 + st %f2,[%o1] +1: + addcc %i0,-1,%i0 + ble,pn %icc,1f +! delay slot + nop + ld [%i1],%f2 + add %i1,%i2,%i1 + mov %i3,%o1 + add %i3,%i4,%i3 + fabsd %f2,%f32 + fcmple32 %f32,%f18,%l1 + andcc %l1,2,%g0 + bz,pn %icc,.range1 +! delay slot + nop + ba,pt %icc,.check2 +! delay slot + fcmple32 %f32,%f8,%l1 +1: + fzero %f2 ! set up dummy argument + add %fp,junk,%o1 + mov 2,%l1 + ba,pt %icc,.check2 +! delay slot + fzero %f32 + + + .align 16 +.range2: + fcmpgt32 %f38,%f34,%l2 + andcc %l2,2,%g0 + bnz,a,pt %icc,1f ! branch if finite +! delay slot, squashed if branch not taken + mov 1,%i5 ! set biguns + fzeros %f5 + fmuls %f4,%f5,%f4 + st %f4,[%o2] +1: + addcc %i0,-1,%i0 + ble,pn %icc,1f +! delay slot + nop + ld [%i1],%f4 + add %i1,%i2,%i1 + mov %i3,%o2 + add %i3,%i4,%i3 + fabsd %f4,%f34 + fcmple32 %f34,%f18,%l2 + andcc %l2,2,%g0 + bz,pn %icc,.range2 +! delay slot + nop + ba,pt %icc,.check3 +! delay slot + fcmple32 %f34,%f8,%l2 +1: + fzero %f4 ! set up dummy argument + add %fp,junk,%o2 + mov 2,%l2 + ba,pt %icc,.check3 +! delay slot + fzero %f34 + + + .align 16 +.range3: + fcmpgt32 %f38,%f36,%l3 + andcc %l3,2,%g0 + bnz,a,pt %icc,1f ! branch if finite +! delay slot, squashed if branch not taken + mov 1,%i5 ! set biguns + fzeros %f7 + fmuls %f6,%f7,%f6 + st %f6,[%o3] +1: + addcc %i0,-1,%i0 + ble,pn %icc,1f +! delay slot + nop + ld [%i1],%f6 + add %i1,%i2,%i1 + mov %i3,%o3 + add %i3,%i4,%i3 + fabsd %f6,%f36 + fcmple32 %f36,%f18,%l3 + andcc %l3,2,%g0 + bz,pn %icc,.range3 +! delay slot + nop + ba,pt %icc,.checkprimary +! delay slot + fcmple32 %f36,%f8,%l3 +1: + fzero %f6 ! set up dummy argument + add %fp,junk,%o3 + mov 2,%l3 + ba,pt %icc,.checkprimary +! delay slot + fzero %f36 + + SET_SIZE(__vsinf) + diff --git a/usr/src/lib/libmvec/common/vis/__vsqrt.S b/usr/src/lib/libmvec/common/vis/__vsqrt.S new file mode 100644 index 0000000000..58e19e2e46 --- /dev/null +++ b/usr/src/lib/libmvec/common/vis/__vsqrt.S @@ -0,0 +1,1844 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "__vsqrt.S" + +#include "libm.h" + + RO_DATA + .align 64 + +.CONST_TBL: + .word 0x3fe00000, 0x00000000 ! A1 = 5.00000000000000001789e-01 + .word 0xbfbfffff, 0xfffd0bfd ! A2 = -1.24999999997314110667e-01 + .word 0x3fafffff, 0xfffb5bfb ! A3 = 6.24999999978896565817e-02 + .word 0xbfa4000f, 0xc00b4fc8 ! A4 = -3.90629693917215481458e-02 + .word 0x3f9c0018, 0xc012da4e ! A5 = 2.73441188080261677282e-02 + .word 0x000fffff, 0xffffffff ! DC0 = 0x000fffffffffffff + .word 0x00001000, 0x00000000 ! DC2 = 0x0000100000000000 + .word 0x7fffe000, 0x00000000 ! DC3 = 0x7fffe00000000000 + +! i = [0,128] +! TBL[8*i+0] = 1.0 / (*(double*)&(0x3fe0000000000000LL + (i << 45))); +! TBL[8*i+1] = (double)(2.0 * sqrtl(*(double*)&(0x3fe0000000000000LL + (i << 45)))); +! TBL[8*i+2] = (double)(2.0 * sqrtl(*(double*)&(0x3fe0000000000000LL + (i << 45))) - TBL[8*i+1]); +! TBL[8*i+3] = 0 +! TBL[8*i+4] = 1.0 / (*(double*)&(0x3fe0000000000000LL + (i << 45))); +! TBL[8*i+5] = (double)(2.0 * sqrtl(2.0) * sqrtl(*(double*)&(0x3fe0000000000000LL + (i << 45)))); +! TBL[8*i+6] = (double)(2.0 * sqrtl(2.0) * sqrtl(*(double*)&(0x3fe0000000000000LL + (i << 45))) - TBL[8*i+5]); +! TBL[8*i+7] = 0 + + .word 0x40000000, 0x00000000, 0x3ff6a09e, 0x667f3bcd + .word 0xbc9bdd34, 0x13b26456, 0x00000000, 0x00000000 + .word 0x40000000, 0x00000000, 0x40000000, 0x00000000 + .word 0xb8f00000, 0x00000000, 0x00000000, 0x00000000 + .word 0x3fffc07f, 0x01fc07f0, 0x3ff6b733, 0xbfd8c648 + .word 0x3c53b629, 0x05629048, 0x00000000, 0x00000000 + .word 0x3fffc07f, 0x01fc07f0, 0x40000ff8, 0x07f60deb + .word 0x3c90655c, 0x648a53f1, 0x00000000, 0x00000000 + .word 0x3fff81f8, 0x1f81f820, 0x3ff6cdb2, 0xbbb212eb + .word 0x3c960332, 0xcdbaba2d, 0x00000000, 0x00000000 + .word 0x3fff81f8, 0x1f81f820, 0x40001fe0, 0x3f61bad0 + .word 0x3ca2c41a, 0x15cbfaf2, 0x00000000, 0x00000000 + .word 0x3fff4465, 0x9e4a4271, 0x3ff6e41b, 0x9bfb3b75 + .word 0xbc925d8c, 0xfd6d5c87, 0x00000000, 0x00000000 + .word 0x3fff4465, 0x9e4a4271, 0x40002fb8, 0xd4e30f48 + .word 0xbca64203, 0xab1ba910, 0x00000000, 0x00000000 + .word 0x3fff07c1, 0xf07c1f08, 0x3ff6fa6e, 0xa162d0f0 + .word 0x3c691a24, 0x3d6297e9, 0x00000000, 0x00000000 + .word 0x3fff07c1, 0xf07c1f08, 0x40003f81, 0xf636b80c + .word 0xbca0efc8, 0xba812a8c, 0x00000000, 0x00000000 + .word 0x3ffecc07, 0xb301ecc0, 0x3ff710ac, 0x0b5e5e32 + .word 0xbc991218, 0xb8d2850d, 0x00000000, 0x00000000 + .word 0x3ffecc07, 0xb301ecc0, 0x40004f3b, 0xd03c0a64 + .word 0x3c9ee2cf, 0x2d8ae22b, 0x00000000, 0x00000000 + .word 0x3ffe9131, 0xabf0b767, 0x3ff726d4, 0x1832a0be + .word 0xbc2d9b1a, 0xa8ecb058, 0x00000000, 0x00000000 + .word 0x3ffe9131, 0xabf0b767, 0x40005ee6, 0x8efad48b + .word 0xbc9c35f4, 0x8f4b89f7, 0x00000000, 0x00000000 + .word 0x3ffe573a, 0xc901e574, 0x3ff73ce7, 0x04fb7b23 + .word 0x3c91470b, 0x816b17a6, 0x00000000, 0x00000000 + .word 0x3ffe573a, 0xc901e574, 0x40006e82, 0x5da8fc2b + .word 0x3c9a315a, 0x8bd8a03b, 0x00000000, 0x00000000 + .word 0x3ffe1e1e, 0x1e1e1e1e, 0x3ff752e5, 0x0db3a3a2 + .word 0xbc939331, 0x3eea4381, 0x00000000, 0x00000000 + .word 0x3ffe1e1e, 0x1e1e1e1e, 0x40007e0f, 0x66afed07 + .word 0xbc74a6e1, 0xdcd59eaf, 0x00000000, 0x00000000 + .word 0x3ffde5d6, 0xe3f8868a, 0x3ff768ce, 0x6d3c11e0 + .word 0xbc9478b8, 0xab33074d, 0x00000000, 0x00000000 + .word 0x3ffde5d6, 0xe3f8868a, 0x40008d8d, 0xd3b1d9aa + .word 0x3c81d533, 0x85fe2b96, 0x00000000, 0x00000000 + .word 0x3ffdae60, 0x76b981db, 0x3ff77ea3, 0x5d632e43 + .word 0x3c92f714, 0x9a22fa4f, 0x00000000, 0x00000000 + .word 0x3ffdae60, 0x76b981db, 0x40009cfd, 0xcd8ed009 + .word 0xbc4862a9, 0xbcf7f372, 0x00000000, 0x00000000 + .word 0x3ffd77b6, 0x54b82c34, 0x3ff79464, 0x16ebc56c + .word 0x3c9a7cd5, 0x224c7375, 0x00000000, 0x00000000 + .word 0x3ffd77b6, 0x54b82c34, 0x4000ac5f, 0x7c69a3c8 + .word 0x3ca94dff, 0x7bfa2757, 0x00000000, 0x00000000 + .word 0x3ffd41d4, 0x1d41d41d, 0x3ff7aa10, 0xd193c22d + .word 0xbc790ed9, 0x403afe85, 0x00000000, 0x00000000 + .word 0x3ffd41d4, 0x1d41d41d, 0x4000bbb3, 0x07acafdb + .word 0xbc852a97, 0x686f9d2e, 0x00000000, 0x00000000 + .word 0x3ffd0cb5, 0x8f6ec074, 0x3ff7bfa9, 0xc41ab040 + .word 0x3c8d6bc3, 0x02ae758f, 0x00000000, 0x00000000 + .word 0x3ffd0cb5, 0x8f6ec074, 0x4000caf8, 0x960e710d + .word 0x3c9caa6b, 0xe2366171, 0x00000000, 0x00000000 + .word 0x3ffcd856, 0x89039b0b, 0x3ff7d52f, 0x244809e9 + .word 0x3c9081f6, 0xf3b99d5f, 0x00000000, 0x00000000 + .word 0x3ffcd856, 0x89039b0b, 0x4000da30, 0x4d95fb06 + .word 0xbc9e1269, 0x76855586, 0x00000000, 0x00000000 + .word 0x3ffca4b3, 0x055ee191, 0x3ff7eaa1, 0x26f15284 + .word 0xbc846ce4, 0x68c1882b, 0x00000000, 0x00000000 + .word 0x3ffca4b3, 0x055ee191, 0x4000e95a, 0x539f492c + .word 0xbc80c73f, 0xc38a2184, 0x00000000, 0x00000000 + .word 0x3ffc71c7, 0x1c71c71c, 0x3ff80000, 0x00000000 + .word 0x00000000, 0x00000000, 0x00000000, 0x00000000 + .word 0x3ffc71c7, 0x1c71c71c, 0x4000f876, 0xccdf6cd9 + .word 0x3cab1a18, 0xf13a34c0, 0x00000000, 0x00000000 + .word 0x3ffc3f8f, 0x01c3f8f0, 0x3ff8154b, 0xe2773526 + .word 0xbc857147, 0xe067d0ee, 0x00000000, 0x00000000 + .word 0x3ffc3f8f, 0x01c3f8f0, 0x40010785, 0xdd689a29 + .word 0xbcaaabbe, 0x9e4d810a, 0x00000000, 0x00000000 + .word 0x3ffc0e07, 0x0381c0e0, 0x3ff82a85, 0x00794e6c + .word 0xbc82edaa, 0x75e6ac5f, 0x00000000, 0x00000000 + .word 0x3ffc0e07, 0x0381c0e0, 0x40011687, 0xa8ae14a3 + .word 0x3cac9b43, 0xbcf06106, 0x00000000, 0x00000000 + .word 0x3ffbdd2b, 0x899406f7, 0x3ff83fab, 0x8b4d4315 + .word 0x3c829e06, 0x2d3e134d, 0x00000000, 0x00000000 + .word 0x3ffbdd2b, 0x899406f7, 0x4001257c, 0x5187fd09 + .word 0xbca4a750, 0xa83950a4, 0x00000000, 0x00000000 + .word 0x3ffbacf9, 0x14c1bad0, 0x3ff854bf, 0xb363dc39 + .word 0x3c99399f, 0xca38787e, 0x00000000, 0x00000000 + .word 0x3ffbacf9, 0x14c1bad0, 0x40013463, 0xfa37014e + .word 0x3c7b295b, 0xaa698cd3, 0x00000000, 0x00000000 + .word 0x3ffb7d6c, 0x3dda338b, 0x3ff869c1, 0xa85cc346 + .word 0x3c9fcc99, 0xde11b1d1, 0x00000000, 0x00000000 + .word 0x3ffb7d6c, 0x3dda338b, 0x4001433e, 0xc467effb + .word 0x3c92c031, 0x3b7278c8, 0x00000000, 0x00000000 + .word 0x3ffb4e81, 0xb4e81b4f, 0x3ff87eb1, 0x990b697a + .word 0x3c7c43e9, 0xf593ea0f, 0x00000000, 0x00000000 + .word 0x3ffb4e81, 0xb4e81b4f, 0x4001520c, 0xd1372feb + .word 0xbcadec22, 0x5d8e66d2, 0x00000000, 0x00000000 + .word 0x3ffb2036, 0x406c80d9, 0x3ff8938f, 0xb37bc9c1 + .word 0xbc7c115f, 0x9f5c8d6f, 0x00000000, 0x00000000 + .word 0x3ffb2036, 0x406c80d9, 0x400160ce, 0x41341d74 + .word 0x3c967036, 0x863a1bb2, 0x00000000, 0x00000000 + .word 0x3ffaf286, 0xbca1af28, 0x3ff8a85c, 0x24f70659 + .word 0x3c9f6e07, 0x6b588a50, 0x00000000, 0x00000000 + .word 0x3ffaf286, 0xbca1af28, 0x40016f83, 0x34644df9 + .word 0xbcae8679, 0x80a1c48e, 0x00000000, 0x00000000 + .word 0x3ffac570, 0x1ac5701b, 0x3ff8bd17, 0x1a07e38a + .word 0x3c9c20b5, 0xa697f23f, 0x00000000, 0x00000000 + .word 0x3ffac570, 0x1ac5701b, 0x40017e2b, 0xca46bab9 + .word 0x3ca1519b, 0x10d04d5f, 0x00000000, 0x00000000 + .word 0x3ffa98ef, 0x606a63be, 0x3ff8d1c0, 0xbe7f20ac + .word 0xbc8bdb8a, 0x6df021f3, 0x00000000, 0x00000000 + .word 0x3ffa98ef, 0x606a63be, 0x40018cc8, 0x21d6d3e3 + .word 0xbca30af1, 0xd725cc5b, 0x00000000, 0x00000000 + .word 0x3ffa6d01, 0xa6d01a6d, 0x3ff8e659, 0x3d77b0b8 + .word 0xbc7d99d7, 0x64769954, 0x00000000, 0x00000000 + .word 0x3ffa6d01, 0xa6d01a6d, 0x40019b58, 0x598f7c9f + .word 0xbc72e0d8, 0x51c0e011, 0x00000000, 0x00000000 + .word 0x3ffa41a4, 0x1a41a41a, 0x3ff8fae0, 0xc15ad38a + .word 0xbc7db7ad, 0xb6817f6d, 0x00000000, 0x00000000 + .word 0x3ffa41a4, 0x1a41a41a, 0x4001a9dc, 0x8f6df104 + .word 0xbcafc519, 0xc18dc1d5, 0x00000000, 0x00000000 + .word 0x3ffa16d3, 0xf97a4b02, 0x3ff90f57, 0x73e410e4 + .word 0x3c6fb605, 0xcee75482, 0x00000000, 0x00000000 + .word 0x3ffa16d3, 0xf97a4b02, 0x4001b854, 0xe0f496a0 + .word 0x3ca27006, 0x899b7c3a, 0x00000000, 0x00000000 + .word 0x3ff9ec8e, 0x951033d9, 0x3ff923bd, 0x7e25164d + .word 0xbc9278d1, 0x901d3b40, 0x00000000, 0x00000000 + .word 0x3ff9ec8e, 0x951033d9, 0x4001c6c1, 0x6b2db870 + .word 0x3c887e1d, 0x8335fb28, 0x00000000, 0x00000000 + .word 0x3ff9c2d1, 0x4ee4a102, 0x3ff93813, 0x088978c5 + .word 0xbc54312c, 0x627e5c52, 0x00000000, 0x00000000 + .word 0x3ff9c2d1, 0x4ee4a102, 0x4001d522, 0x4aae2ee1 + .word 0x3ca91222, 0xf6aebdc9, 0x00000000, 0x00000000 + .word 0x3ff99999, 0x9999999a, 0x3ff94c58, 0x3ada5b53 + .word 0xbc9b7ed7, 0x50df3cca, 0x00000000, 0x00000000 + .word 0x3ff99999, 0x9999999a, 0x4001e377, 0x9b97f4a8 + .word 0xbc9f5063, 0x19fcfd19, 0x00000000, 0x00000000 + .word 0x3ff970e4, 0xf80cb872, 0x3ff9608d, 0x3c41fb4b + .word 0x3c73df32, 0xeaa86b83, 0x00000000, 0x00000000 + .word 0x3ff970e4, 0xf80cb872, 0x4001f1c1, 0x799ca8ff + .word 0xbca28b52, 0xeb725e0a, 0x00000000, 0x00000000 + .word 0x3ff948b0, 0xfcd6e9e0, 0x3ff974b2, 0x334f2346 + .word 0x3c814e4a, 0xd3ae9e3f, 0x00000000, 0x00000000 + .word 0x3ff948b0, 0xfcd6e9e0, 0x40020000, 0x00000000 + .word 0xb9000000, 0x00000000, 0x00000000, 0x00000000 + .word 0x3ff920fb, 0x49d0e229, 0x3ff988c7, 0x45f88592 + .word 0x3c95af70, 0x1a56047b, 0x00000000, 0x00000000 + .word 0x3ff920fb, 0x49d0e229, 0x40020e33, 0x499a21a9 + .word 0xbc924ba2, 0x74fea9a1, 0x00000000, 0x00000000 + .word 0x3ff8f9c1, 0x8f9c18fa, 0x3ff99ccc, 0x999fff00 + .word 0x3c866234, 0x063b88ee, 0x00000000, 0x00000000 + .word 0x3ff8f9c1, 0x8f9c18fa, 0x40021c5b, 0x70d9f824 + .word 0xbca844f9, 0x9eee6fc3, 0x00000000, 0x00000000 + .word 0x3ff8d301, 0x8d3018d3, 0x3ff9b0c2, 0x5315c2ce + .word 0xbc87f64a, 0x65cc6887, 0x00000000, 0x00000000 + .word 0x3ff8d301, 0x8d3018d3, 0x40022a78, 0x8fc76de5 + .word 0x3c931e32, 0xd4e07a48, 0x00000000, 0x00000000 + .word 0x3ff8acb9, 0x0f6bf3aa, 0x3ff9c4a8, 0x969b7077 + .word 0xbc96ca9e, 0x5cd4517a, 0x00000000, 0x00000000 + .word 0x3ff8acb9, 0x0f6bf3aa, 0x4002388a, 0xc0059c28 + .word 0xbc96072f, 0xbe0e5da3, 0x00000000, 0x00000000 + .word 0x3ff886e5, 0xf0abb04a, 0x3ff9d87f, 0x87e71422 + .word 0xbc85fdd8, 0xb11b7b1d, 0x00000000, 0x00000000 + .word 0x3ff886e5, 0xf0abb04a, 0x40024692, 0x1ad4ea49 + .word 0xbcaa6d9b, 0x268ef62d, 0x00000000, 0x00000000 + .word 0x3ff86186, 0x18618618, 0x3ff9ec47, 0x4a261264 + .word 0xbc8540c4, 0x89ba5074, 0x00000000, 0x00000000 + .word 0x3ff86186, 0x18618618, 0x4002548e, 0xb9151e85 + .word 0x3c999820, 0x0a774879, 0x00000000, 0x00000000 + .word 0x3ff83c97, 0x7ab2bedd, 0x3ffa0000, 0x00000000 + .word 0x00000000, 0x00000000, 0x00000000, 0x00000000 + .word 0x3ff83c97, 0x7ab2bedd, 0x40026280, 0xb3476096 + .word 0x3c9ab88b, 0x5ffe1cf5, 0x00000000, 0x00000000 + .word 0x3ff81818, 0x18181818, 0x3ffa13a9, 0xcb996651 + .word 0xbc9f9ab9, 0x0e4e85c3, 0x00000000, 0x00000000 + .word 0x3ff81818, 0x18181818, 0x40027068, 0x21902e9a + .word 0x3c90ff4c, 0x20f541f6, 0x00000000, 0x00000000 + .word 0x3ff7f405, 0xfd017f40, 0x3ffa2744, 0xce9674f5 + .word 0xbc8b936c, 0x81e54daa, 0x00000000, 0x00000000 + .word 0x3ff7f405, 0xfd017f40, 0x40027e45, 0x1bb944c3 + .word 0x3c8e4a16, 0x42099ef0, 0x00000000, 0x00000000 + .word 0x3ff7d05f, 0x417d05f4, 0x3ffa3ad1, 0x2a1da160 + .word 0x3c951168, 0xf4be5984, 0x00000000, 0x00000000 + .word 0x3ff7d05f, 0x417d05f4, 0x40028c17, 0xb9337834 + .word 0xbc8af150, 0xa0e88972, 0x00000000, 0x00000000 + .word 0x3ff7ad22, 0x08e0ecc3, 0x3ffa4e4e, 0xfeda34de + .word 0x3c6afbb4, 0xdbdadd0d, 0x00000000, 0x00000000 + .word 0x3ff7ad22, 0x08e0ecc3, 0x400299e0, 0x11188575 + .word 0xbc9a6169, 0x3fb250e5, 0x00000000, 0x00000000 + .word 0x3ff78a4c, 0x8178a4c8, 0x3ffa61be, 0x6cfec997 + .word 0xbc8c37ea, 0xb2bb5ca0, 0x00000000, 0x00000000 + .word 0x3ff78a4c, 0x8178a4c8, 0x4002a79e, 0x3a2cd2e6 + .word 0xbca5ddd4, 0x9cc9ad59, 0x00000000, 0x00000000 + .word 0x3ff767dc, 0xe434a9b1, 0x3ffa751f, 0x9447b724 + .word 0x3c82b909, 0x477e9ed1, 0x00000000, 0x00000000 + .word 0x3ff767dc, 0xe434a9b1, 0x4002b552, 0x4ae1278e + .word 0xbca2f2a9, 0x8841b934, 0x00000000, 0x00000000 + .word 0x3ff745d1, 0x745d1746, 0x3ffa8872, 0x93fd6f34 + .word 0x3c768ef2, 0x4f198721, 0x00000000, 0x00000000 + .word 0x3ff745d1, 0x745d1746, 0x4002c2fc, 0x595456a7 + .word 0xbc996f60, 0xb0fc7e96, 0x00000000, 0x00000000 + .word 0x3ff72428, 0x7f46debc, 0x3ffa9bb7, 0x8af6cabc + .word 0x3c8ba60d, 0xc999aba7, 0x00000000, 0x00000000 + .word 0x3ff72428, 0x7f46debc, 0x4002d09c, 0x7b54e03e + .word 0x3c98c747, 0xfdeda6de, 0x00000000, 0x00000000 + .word 0x3ff702e0, 0x5c0b8170, 0x3ffaaeee, 0x979b4838 + .word 0xbc91f08a, 0xef9ef6c0, 0x00000000, 0x00000000 + .word 0x3ff702e0, 0x5c0b8170, 0x4002de32, 0xc6628741 + .word 0x3ca78746, 0xc499a4f7, 0x00000000, 0x00000000 + .word 0x3ff6e1f7, 0x6b4337c7, 0x3ffac217, 0xd7e53b66 + .word 0xbc64282a, 0xaa967e4f, 0x00000000, 0x00000000 + .word 0x3ff6e1f7, 0x6b4337c7, 0x4002ebbf, 0x4fafdd4b + .word 0xbca78a73, 0xb72d5c41, 0x00000000, 0x00000000 + .word 0x3ff6c16c, 0x16c16c17, 0x3ffad533, 0x6963eefc + .word 0xbc977c4a, 0x537dbdd2, 0x00000000, 0x00000000 + .word 0x3ff6c16c, 0x16c16c17, 0x4002f942, 0x2c23c47e + .word 0xbc827c85, 0xf29db65d, 0x00000000, 0x00000000 + .word 0x3ff6a13c, 0xd1537290, 0x3ffae841, 0x693db8b4 + .word 0x3c90f773, 0xcd7a0713, 0x00000000, 0x00000000 + .word 0x3ff6a13c, 0xd1537290, 0x400306bb, 0x705ae7c3 + .word 0x3caf4933, 0x907af47a, 0x00000000, 0x00000000 + .word 0x3ff68168, 0x16816817, 0x3ffafb41, 0xf432002e + .word 0xbc7ac94a, 0xfdfe8c5b, 0x00000000, 0x00000000 + .word 0x3ff68168, 0x16816817, 0x4003142b, 0x30a929ab + .word 0x3c98dc01, 0x081a6c5c, 0x00000000, 0x00000000 + .word 0x3ff661ec, 0x6a5122f9, 0x3ffb0e35, 0x269b38f5 + .word 0xbc4f69a8, 0x05c3271a, 0x00000000, 0x00000000 + .word 0x3ff661ec, 0x6a5122f9, 0x40032191, 0x811b0a41 + .word 0xbc9ce3f0, 0xb38c0bf7, 0x00000000, 0x00000000 + .word 0x3ff642c8, 0x590b2164, 0x3ffb211b, 0x1c70d023 + .word 0x3c2e4c5e, 0x66eae2f0, 0x00000000, 0x00000000 + .word 0x3ff642c8, 0x590b2164, 0x40032eee, 0x75770416 + .word 0x3caed8e7, 0x730eaff2, 0x00000000, 0x00000000 + .word 0x3ff623fa, 0x77016240, 0x3ffb33f3, 0xf1490def + .word 0xbc95894b, 0xcb02373b, 0x00000000, 0x00000000 + .word 0x3ff623fa, 0x77016240, 0x40033c42, 0x213ee0c9 + .word 0x3ca84c24, 0x4ba98124, 0x00000000, 0x00000000 + .word 0x3ff60581, 0x60581606, 0x3ffb46bf, 0xc05aeb89 + .word 0x3c9b1c7c, 0xc39adc9f, 0x00000000, 0x00000000 + .word 0x3ff60581, 0x60581606, 0x4003498c, 0x97b10540 + .word 0x3c734193, 0xbc8543b4, 0x00000000, 0x00000000 + .word 0x3ff5e75b, 0xb8d015e7, 0x3ffb597e, 0xa47fdda3 + .word 0xbc923cc8, 0x9d1e4635, 0x00000000, 0x00000000 + .word 0x3ff5e75b, 0xb8d015e7, 0x400356cd, 0xebc9b5e2 + .word 0x3c96dee1, 0x46bb1571, 0x00000000, 0x00000000 + .word 0x3ff5c988, 0x2b931057, 0x3ffb6c30, 0xb83593e6 + .word 0x3c8f4e3f, 0xd28d84bc, 0x00000000, 0x00000000 + .word 0x3ff5c988, 0x2b931057, 0x40036406, 0x30445306 + .word 0xbca78d86, 0x2327430a, 0x00000000, 0x00000000 + .word 0x3ff5ac05, 0x6b015ac0, 0x3ffb7ed6, 0x159fadc8 + .word 0xbc899bcf, 0xf04d134b, 0x00000000, 0x00000000 + .word 0x3ff5ac05, 0x6b015ac0, 0x40037135, 0x779c8dcb + .word 0xbc8fe126, 0xce9778ae, 0x00000000, 0x00000000 + .word 0x3ff58ed2, 0x308158ed, 0x3ffb916e, 0xd68964ec + .word 0x3c826a5d, 0x5dbaae29, 0x00000000, 0x00000000 + .word 0x3ff58ed2, 0x308158ed, 0x40037e5b, 0xd40f95a1 + .word 0x3cac6ff5, 0xeca5d122, 0x00000000, 0x00000000 + .word 0x3ff571ed, 0x3c506b3a, 0x3ffba3fb, 0x14672d7c + .word 0xbc8117d3, 0x97dcefc9, 0x00000000, 0x00000000 + .word 0x3ff571ed, 0x3c506b3a, 0x40038b79, 0x579d3eab + .word 0xbcac254f, 0xc0db598e, 0x00000000, 0x00000000 + .word 0x3ff55555, 0x55555555, 0x3ffbb67a, 0xe8584caa + .word 0x3c9cec95, 0xd0b5c1e3, 0x00000000, 0x00000000 + .word 0x3ff55555, 0x55555555, 0x4003988e, 0x1409212e + .word 0x3caf40c8, 0x6450c869, 0x00000000, 0x00000000 + .word 0x3ff53909, 0x48f40feb, 0x3ffbc8ee, 0x6b2865b9 + .word 0x3c9394eb, 0x90f645c8, 0x00000000, 0x00000000 + .word 0x3ff53909, 0x48f40feb, 0x4003a59a, 0x1adbb257 + .word 0x3ca6adce, 0x020a308d, 0x00000000, 0x00000000 + .word 0x3ff51d07, 0xeae2f815, 0x3ffbdb55, 0xb550fdbc + .word 0x3c7365e9, 0x6aa5fae3, 0x00000000, 0x00000000 + .word 0x3ff51d07, 0xeae2f815, 0x4003b29d, 0x7d635662 + .word 0x3cac99b0, 0x5e282129, 0x00000000, 0x00000000 + .word 0x3ff50150, 0x15015015, 0x3ffbedb0, 0xdefaf661 + .word 0x3c91a627, 0xb279170d, 0x00000000, 0x00000000 + .word 0x3ff50150, 0x15015015, 0x4003bf98, 0x4cb56c77 + .word 0x3ca8f653, 0xbcc0c4a1, 0x00000000, 0x00000000 + .word 0x3ff4e5e0, 0xa72f0539, 0x3ffc0000, 0x00000000 + .word 0x00000000, 0x00000000, 0x00000000, 0x00000000 + .word 0x3ff4e5e0, 0xa72f0539, 0x4003cc8a, 0x99af5453 + .word 0xbc486364, 0x4f05f2be, 0x00000000, 0x00000000 + .word 0x3ff4cab8, 0x8725af6e, 0x3ffc1243, 0x2fec0329 + .word 0x3c96e0d7, 0x8dd23a7d, 0x00000000, 0x00000000 + .word 0x3ff4cab8, 0x8725af6e, 0x4003d974, 0x74f76df2 + .word 0x3c82e3c9, 0xfdbbbdc2, 0x00000000, 0x00000000 + .word 0x3ff4afd6, 0xa052bf5b, 0x3ffc247a, 0x85fe81fa + .word 0x3c89d8ee, 0xf6854220, 0x00000000, 0x00000000 + .word 0x3ff4afd6, 0xa052bf5b, 0x4003e655, 0xeefe1367 + .word 0x3c80eb35, 0xbb532559, 0x00000000, 0x00000000 + .word 0x3ff49539, 0xe3b2d067, 0x3ffc36a6, 0x192bf168 + .word 0xbc9083d8, 0x1a423b11, 0x00000000, 0x00000000 + .word 0x3ff49539, 0xe3b2d067, 0x4003f32f, 0x17fe8d04 + .word 0xbc905d6c, 0x1c437de0, 0x00000000, 0x00000000 + .word 0x3ff47ae1, 0x47ae147b, 0x3ffc48c6, 0x001f0ac0 + .word 0xbc92d481, 0x189efd6b, 0x00000000, 0x00000000 + .word 0x3ff47ae1, 0x47ae147b, 0x40040000, 0x00000000 + .word 0x00000000, 0x00000000, 0x00000000, 0x00000000 + .word 0x3ff460cb, 0xc7f5cf9a, 0x3ffc5ada, 0x513a1593 + .word 0xbc7aaedd, 0x014f5f03, 0x00000000, 0x00000000 + .word 0x3ff460cb, 0xc7f5cf9a, 0x40040cc8, 0xb6d657c2 + .word 0xbc9c05ab, 0xf480ce19, 0x00000000, 0x00000000 + .word 0x3ff446f8, 0x6562d9fb, 0x3ffc6ce3, 0x22982a3f + .word 0x3c891b2d, 0xf3e15f29, 0x00000000, 0x00000000 + .word 0x3ff446f8, 0x6562d9fb, 0x40041989, 0x4c2329f0 + .word 0x3c976037, 0x46da0ea6, 0x00000000, 0x00000000 + .word 0x3ff42d66, 0x25d51f87, 0x3ffc7ee0, 0x8a0e6d4c + .word 0x3c991c54, 0xc53e75c8, 0x00000000, 0x00000000 + .word 0x3ff42d66, 0x25d51f87, 0x40042641, 0xcf569572 + .word 0xbcadf80b, 0x1442c029, 0x00000000, 0x00000000 + .word 0x3ff41414, 0x14141414, 0x3ffc90d2, 0x9d2d43ce + .word 0xbc9edadb, 0x07f1137a, 0x00000000, 0x00000000 + .word 0x3ff41414, 0x14141414, 0x400432f2, 0x4fb01c7a + .word 0x3ca38bfe, 0x0e012c1c, 0x00000000, 0x00000000 + .word 0x3ff3fb01, 0x3fb013fb, 0x3ffca2b9, 0x714180f7 + .word 0xbc81a63d, 0x6750c57c, 0x00000000, 0x00000000 + .word 0x3ff3fb01, 0x3fb013fb, 0x40043f9a, 0xdc3f79ce + .word 0x3c66d2b1, 0x767ae30a, 0x00000000, 0x00000000 + .word 0x3ff3e22c, 0xbce4a902, 0x3ffcb495, 0x1b558d17 + .word 0x3c8fcbcb, 0x357f2308, 0x00000000, 0x00000000 + .word 0x3ff3e22c, 0xbce4a902, 0x40044c3b, 0x83e57153 + .word 0x3c98c853, 0xc6be5ee1, 0x00000000, 0x00000000 + .word 0x3ff3c995, 0xa47babe7, 0x3ffcc665, 0xb0328622 + .word 0xbc91baa4, 0xd369f814, 0x00000000, 0x00000000 + .word 0x3ff3c995, 0xa47babe7, 0x400458d4, 0x55549c1a + .word 0x3ca02d72, 0x8d9a6054, 0x00000000, 0x00000000 + .word 0x3ff3b13b, 0x13b13b14, 0x3ffcd82b, 0x446159f3 + .word 0x3c983fb7, 0xb33cdfe8, 0x00000000, 0x00000000 + .word 0x3ff3b13b, 0x13b13b14, 0x40046565, 0x5f122ff6 + .word 0x3ca862c5, 0xd2f0ca4c, 0x00000000, 0x00000000 + .word 0x3ff3991c, 0x2c187f63, 0x3ffce9e5, 0xec2bda80 + .word 0xbc94ccf3, 0xd8e249ab, 0x00000000, 0x00000000 + .word 0x3ff3991c, 0x2c187f63, 0x400471ee, 0xaf76c2c6 + .word 0x3c975c62, 0xeff26e8e, 0x00000000, 0x00000000 + .word 0x3ff38138, 0x13813814, 0x3ffcfb95, 0xbb9dcc0c + .word 0x3c92cea2, 0x0857ae03, 0x00000000, 0x00000000 + .word 0x3ff38138, 0x13813814, 0x40047e70, 0x54af0989 + .word 0x3c9d8c33, 0xc0054830, 0x00000000, 0x00000000 + .word 0x3ff3698d, 0xf3de0748, 0x3ffd0d3a, 0xc685eda4 + .word 0x3c94115a, 0x0ff4cf9e, 0x00000000, 0x00000000 + .word 0x3ff3698d, 0xf3de0748, 0x40048aea, 0x5cbc935f + .word 0xbca8cb00, 0x12d14ff5, 0x00000000, 0x00000000 + .word 0x3ff3521c, 0xfb2b78c1, 0x3ffd1ed5, 0x2076fbe9 + .word 0x3c8f48a8, 0x6b72875f, 0x00000000, 0x00000000 + .word 0x3ff3521c, 0xfb2b78c1, 0x4004975c, 0xd5768088 + .word 0xbca1731e, 0xbc02f748, 0x00000000, 0x00000000 + .word 0x3ff33ae4, 0x5b57bcb2, 0x3ffd3064, 0xdcc8ae67 + .word 0x3c93480e, 0x805158ba, 0x00000000, 0x00000000 + .word 0x3ff33ae4, 0x5b57bcb2, 0x4004a3c7, 0xcc8a358a + .word 0xbc9d8f7f, 0xd2726ffa, 0x00000000, 0x00000000 + .word 0x3ff323e3, 0x4a2b10bf, 0x3ffd41ea, 0x0e98af91 + .word 0x3c824640, 0x0309962f, 0x00000000, 0x00000000 + .word 0x3ff323e3, 0x4a2b10bf, 0x4004b02b, 0x4f7c0a88 + .word 0xbcaf71e1, 0xf6cafde2, 0x00000000, 0x00000000 + .word 0x3ff30d19, 0x0130d190, 0x3ffd5364, 0xc8cb8f86 + .word 0x3c8ad003, 0xc00630e1, 0x00000000, 0x00000000 + .word 0x3ff30d19, 0x0130d190, 0x4004bc87, 0x6ba7f6ec + .word 0x3c9c1edb, 0x2be943b8, 0x00000000, 0x00000000 + .word 0x3ff2f684, 0xbda12f68, 0x3ffd64d5, 0x1e0db1c6 + .word 0xbc911ed3, 0x6986d362, 0x00000000, 0x00000000 + .word 0x3ff2f684, 0xbda12f68, 0x4004c8dc, 0x2e423980 + .word 0xbc949d1f, 0x46ef5d2c, 0x00000000, 0x00000000 + .word 0x3ff2e025, 0xc04b8097, 0x3ffd763b, 0x20d435ef + .word 0x3c9d6780, 0xf76cb258, 0x00000000, 0x00000000 + .word 0x3ff2e025, 0xc04b8097, 0x4004d529, 0xa457fcfc + .word 0xbca1404a, 0x46484e3d, 0x00000000, 0x00000000 + .word 0x3ff2c9fb, 0x4d812ca0, 0x3ffd8796, 0xe35ddbb2 + .word 0x3c83fdd9, 0x1aeb637a, 0x00000000, 0x00000000 + .word 0x3ff2c9fb, 0x4d812ca0, 0x4004e16f, 0xdacff937 + .word 0xbca1deb9, 0xd3815ad2, 0x00000000, 0x00000000 + .word 0x3ff2b404, 0xad012b40, 0x3ffd98e8, 0x77b3e207 + .word 0xbc48c301, 0xee02dee8, 0x00000000, 0x00000000 + .word 0x3ff2b404, 0xad012b40, 0x4004edae, 0xde6b10fe + .word 0x3ca99709, 0x4a91a780, 0x00000000, 0x00000000 + .word 0x3ff29e41, 0x29e4129e, 0x3ffdaa2f, 0xefaae1d8 + .word 0xbc63fe0e, 0x03f44594, 0x00000000, 0x00000000 + .word 0x3ff29e41, 0x29e4129e, 0x4004f9e6, 0xbbc4ecb3 + .word 0x3c6ce5a6, 0x018493f1, 0x00000000, 0x00000000 + .word 0x3ff288b0, 0x1288b013, 0x3ffdbb6d, 0x5ce3a42f + .word 0xbc922c27, 0xf71c8337, 0x00000000, 0x00000000 + .word 0x3ff288b0, 0x1288b013, 0x40050617, 0x7f5491bb + .word 0xbc9e591e, 0x7b2a6d1a, 0x00000000, 0x00000000 + .word 0x3ff27350, 0xb8812735, 0x3ffdcca0, 0xd0cbf408 + .word 0x3c7a6d16, 0x2310db57, 0x00000000, 0x00000000 + .word 0x3ff27350, 0xb8812735, 0x40051241, 0x356cf6e0 + .word 0x3ca37dc2, 0x60e8bc2d, 0x00000000, 0x00000000 + .word 0x3ff25e22, 0x708092f1, 0x3ffdddca, 0x5c9f6be8 + .word 0x3c818520, 0xf0a3f809, 0x00000000, 0x00000000 + .word 0x3ff25e22, 0x708092f1, 0x40051e63, 0xea3d95b0 + .word 0x3caecf78, 0x2e88d5ce, 0x00000000, 0x00000000 + .word 0x3ff24924, 0x92492492, 0x3ffdeeea, 0x11683f49 + .word 0x3c802aae, 0x4bfa7c27, 0x00000000, 0x00000000 + .word 0x3ff24924, 0x92492492, 0x40052a7f, 0xa9d2f8ea + .word 0xbca21c62, 0xb033c079, 0x00000000, 0x00000000 + .word 0x3ff23456, 0x789abcdf, 0x3ffe0000, 0x00000000 + .word 0x00000000, 0x00000000, 0x00000000, 0x00000000 + .word 0x3ff23456, 0x789abcdf, 0x40053694, 0x80174810 + .word 0xbc9c3ec1, 0xa4ee7c21, 0x00000000, 0x00000000 + .word 0x3ff21fb7, 0x8121fb78, 0x3ffe110c, 0x39105faf + .word 0x3c776161, 0x4c513964, 0x00000000, 0x00000000 + .word 0x3ff21fb7, 0x8121fb78, 0x400542a2, 0x78d2d036 + .word 0xbca495c2, 0x45254df4, 0x00000000, 0x00000000 + .word 0x3ff20b47, 0x0c67c0d9, 0x3ffe220e, 0xcd13ed60 + .word 0xbc729f01, 0xf18c9dc9, 0x00000000, 0x00000000 + .word 0x3ff20b47, 0x0c67c0d9, 0x40054ea9, 0x9fac8a0f + .word 0x3c80cfbb, 0x19353b3d, 0x00000000, 0x00000000 + .word 0x3ff1f704, 0x7dc11f70, 0x3ffe3307, 0xcc56cf5c + .word 0xbc81f04e, 0xc3189131, 0x00000000, 0x00000000 + .word 0x3ff1f704, 0x7dc11f70, 0x40055aaa, 0x002a9d5a + .word 0xbc4bf504, 0x76241f94, 0x00000000, 0x00000000 + .word 0x3ff1e2ef, 0x3b3fb874, 0x3ffe43f7, 0x46f7795b + .word 0xbc931e7f, 0x8af68f8c, 0x00000000, 0x00000000 + .word 0x3ff1e2ef, 0x3b3fb874, 0x400566a3, 0xa5b2e1b1 + .word 0x3caa1fd2, 0x8cc92e33, 0x00000000, 0x00000000 + .word 0x3ff1cf06, 0xada2811d, 0x3ffe54dd, 0x4ce75f1e + .word 0xbc811b19, 0x5dfc62e5, 0x00000000, 0x00000000 + .word 0x3ff1cf06, 0xada2811d, 0x40057296, 0x9b8b5cd8 + .word 0x3ca30cbf, 0x1c53312e, 0x00000000, 0x00000000 + .word 0x3ff1bb4a, 0x4046ed29, 0x3ffe65b9, 0xedeba38e + .word 0xbc7bb732, 0x51e8c364, 0x00000000, 0x00000000 + .word 0x3ff1bb4a, 0x4046ed29, 0x40057e82, 0xecdabe8d + .word 0xbc7c2aed, 0xf3c4c4bd, 0x00000000, 0x00000000 + .word 0x3ff1a7b9, 0x611a7b96, 0x3ffe768d, 0x399dc470 + .word 0xbc9a8c81, 0x3405c01c, 0x00000000, 0x00000000 + .word 0x3ff1a7b9, 0x611a7b96, 0x40058a68, 0xa4a8d9f3 + .word 0x3ca50798, 0xe67012d9, 0x00000000, 0x00000000 + .word 0x3ff19453, 0x808ca29c, 0x3ffe8757, 0x3f6c42c5 + .word 0x3c9dbf9c, 0xf7bbcda3, 0x00000000, 0x00000000 + .word 0x3ff19453, 0x808ca29c, 0x40059647, 0xcddf1ca5 + .word 0x3ca14a95, 0xf35dea0b, 0x00000000, 0x00000000 + .word 0x3ff18118, 0x11811812, 0x3ffe9818, 0x0e9b47f2 + .word 0xbc9b6bd7, 0x4396d08e, 0x00000000, 0x00000000 + .word 0x3ff18118, 0x11811812, 0x4005a220, 0x73490377 + .word 0xbcadd036, 0x39925812, 0x00000000, 0x00000000 + .word 0x3ff16e06, 0x89427379, 0x3ffea8cf, 0xb64547ab + .word 0x3c8721b2, 0x6374e19f, 0x00000000, 0x00000000 + .word 0x3ff16e06, 0x89427379, 0x4005adf2, 0x9f948cfb + .word 0xbca42520, 0xf7716fa6, 0x00000000, 0x00000000 + .word 0x3ff15b1e, 0x5f75270d, 0x3ffeb97e, 0x455b9edb + .word 0x3c999b45, 0x40857883, 0x00000000, 0x00000000 + .word 0x3ff15b1e, 0x5f75270d, 0x4005b9be, 0x5d52a9da + .word 0x3c9098cd, 0x1b3af777, 0x00000000, 0x00000000 + .word 0x3ff1485f, 0x0e0acd3b, 0x3ffeca23, 0xcaa72f73 + .word 0x3c7e3ed5, 0x29679959, 0x00000000, 0x00000000 + .word 0x3ff1485f, 0x0e0acd3b, 0x4005c583, 0xb6f7ab03 + .word 0x3ca963bc, 0x9d795b51, 0x00000000, 0x00000000 + .word 0x3ff135c8, 0x1135c811, 0x3ffedac0, 0x54c8f94c + .word 0x3c90b5c1, 0x15a56207, 0x00000000, 0x00000000 + .word 0x3ff135c8, 0x1135c811, 0x4005d142, 0xb6dbadc5 + .word 0x3ca6f1f5, 0x5323d116, 0x00000000, 0x00000000 + .word 0x3ff12358, 0xe75d3033, 0x3ffeeb53, 0xf23ab028 + .word 0xbc8617e4, 0xb5384f5d, 0x00000000, 0x00000000 + .word 0x3ff12358, 0xe75d3033, 0x4005dcfb, 0x673b05df + .word 0xbca099df, 0xc321634f, 0x00000000, 0x00000000 + .word 0x3ff11111, 0x11111111, 0x3ffefbde, 0xb14f4eda + .word 0xbc93a145, 0xfe1be078, 0x00000000, 0x00000000 + .word 0x3ff11111, 0x11111111, 0x4005e8ad, 0xd236a58f + .word 0xbc7ef8c7, 0xc0d1fec6, 0x00000000, 0x00000000 + .word 0x3ff0fef0, 0x10fef011, 0x3fff0c60, 0xa033a7b3 + .word 0xbc91b0fc, 0x15cd89c6, 0x00000000, 0x00000000 + .word 0x3ff0fef0, 0x10fef011, 0x4005f45a, 0x01d483b4 + .word 0xbc94a237, 0xdc0fa105, 0x00000000, 0x00000000 + .word 0x3ff0ecf5, 0x6be69c90, 0x3fff1cd9, 0xcceef239 + .word 0x3c91afd8, 0x64eab60a, 0x00000000, 0x00000000 + .word 0x3ff0ecf5, 0x6be69c90, 0x40060000, 0x00000000 + .word 0x00000000, 0x00000000, 0x00000000, 0x00000000 + .word 0x3ff0db20, 0xa88f4696, 0x3fff2d4a, 0x45635640 + .word 0xbc8eebae, 0xea670bc2, 0x00000000, 0x00000000 + .word 0x3ff0db20, 0xa88f4696, 0x40060b9f, 0xd68a4554 + .word 0x3ca328e1, 0x70dae176, 0x00000000, 0x00000000 + .word 0x3ff0c971, 0x4fbcda3b, 0x3fff3db2, 0x174e7468 + .word 0x3c9e1513, 0x2d6ac52a, 0x00000000, 0x00000000 + .word 0x3ff0c971, 0x4fbcda3b, 0x40061739, 0x8f2aaa48 + .word 0xbc9b672b, 0xba260735, 0x00000000, 0x00000000 + .word 0x3ff0b7e6, 0xec259dc8, 0x3fff4e11, 0x5049ec26 + .word 0xbc9b6656, 0xb6bd5d76, 0x00000000, 0x00000000 + .word 0x3ff0b7e6, 0xec259dc8, 0x400622cd, 0x337f0fe8 + .word 0x3c9fe207, 0x3279559f, 0x00000000, 0x00000000 + .word 0x3ff0a681, 0x0a6810a7, 0x3fff5e67, 0xfdcbdf44 + .word 0xbc98af06, 0x1849d6fc, 0x00000000, 0x00000000 + .word 0x3ff0a681, 0x0a6810a7, 0x40062e5a, 0xcd0c3ebe + .word 0xbca2c50e, 0x2092203a, 0x00000000, 0x00000000 + .word 0x3ff0953f, 0x39010954, 0x3fff6eb6, 0x2d27730d + .word 0xbc9401d9, 0x5ca1ce34, 0x00000000, 0x00000000 + .word 0x3ff0953f, 0x39010954, 0x400639e2, 0x653e421b + .word 0xbc9f75e0, 0x5835e4b9, 0x00000000, 0x00000000 + .word 0x3ff08421, 0x08421084, 0x3fff7efb, 0xeb8d4f12 + .word 0xbc7e84e8, 0xa6ff3256, 0x00000000, 0x00000000 + .word 0x3ff08421, 0x08421084, 0x40064564, 0x0568c1c3 + .word 0x3cad1778, 0x7e4c8970, 0x00000000, 0x00000000 + .word 0x3ff07326, 0x0a47f7c6, 0x3fff8f39, 0x460c19a8 + .word 0x3c989b4e, 0x16ee9aaf, 0x00000000, 0x00000000 + .word 0x3ff07326, 0x0a47f7c6, 0x400650df, 0xb6c759f4 + .word 0x3c99063c, 0x91db4c77, 0x00000000, 0x00000000 + .word 0x3ff0624d, 0xd2f1a9fc, 0x3fff9f6e, 0x4990f227 + .word 0x3c8b42e5, 0xb5d1e808, 0x00000000, 0x00000000 + .word 0x3ff0624d, 0xd2f1a9fc, 0x40065c55, 0x827df1d2 + .word 0xbca3923d, 0xf03e1e2f, 0x00000000, 0x00000000 + .word 0x3ff05197, 0xf7d73404, 0x3fffaf9b, 0x02e7e8f2 + .word 0x3c897a76, 0x8f34e1c2, 0x00000000, 0x00000000 + .word 0x3ff05197, 0xf7d73404, 0x400667c5, 0x7199104b + .word 0x3c875b89, 0x6f332e70, 0x00000000, 0x00000000 + .word 0x3ff04104, 0x10410410, 0x3fffbfbf, 0x7ebc755f + .word 0xbc9b2a94, 0x084da0b6, 0x00000000, 0x00000000 + .word 0x3ff04104, 0x10410410, 0x4006732f, 0x8d0e2f77 + .word 0xbc93dffd, 0x470422e3, 0x00000000, 0x00000000 + .word 0x3ff03091, 0xb51f5e1a, 0x3fffcfdb, 0xc999e97d + .word 0x3c82be17, 0xecdd3bbc, 0x00000000, 0x00000000 + .word 0x3ff03091, 0xb51f5e1a, 0x40067e93, 0xddbc0e73 + .word 0xbc86eb9f, 0x32ac1a5c, 0x00000000, 0x00000000 + .word 0x3ff02040, 0x81020408, 0x3fffdfef, 0xefebe3d6 + .word 0xbc909afc, 0xfc7c1f3b, 0x00000000, 0x00000000 + .word 0x3ff02040, 0x81020408, 0x400689f2, 0x6c6b01d0 + .word 0x3cae816f, 0x9d2a1032, 0x00000000, 0x00000000 + .word 0x3ff01010, 0x10101010, 0x3fffeffb, 0xfdfebf1f + .word 0x3c95dee5, 0x1994f18b, 0x00000000, 0x00000000 + .word 0x3ff01010, 0x10101010, 0x4006954b, 0x41cd4293 + .word 0x3ca3d5bc, 0xcc443076, 0x00000000, 0x00000000 + .word 0x3ff00000, 0x00000000, 0x40000000, 0x00000000 + .word 0x00000000, 0x00000000, 0x00000000, 0x00000000 + .word 0x3ff00000, 0x00000000, 0x4006a09e, 0x667f3bcd + .word 0xbcabdd34, 0x13b26456, 0x00000000, 0x00000000 + +#define A5 %f32 +#define A4 %f30 +#define A3 %f28 +#define A2 %f26 +#define A1 %f56 + +#define DC0 %f8 +#define DC2 %f6 +#define DC3 %f4 + +#define counter %l3 +#define TBL %l5 +#define stridex %l6 +#define stridey %l7 + +#define _0x00001ff8 %i0 +#define _0x7ff00000 %o0 +#define _0x00100000 %o2 + +#define tmp_counter STACK_BIAS-0x40 +#define tmp_px STACK_BIAS-0x38 +#define tmp0 STACK_BIAS-0x30 +#define tmp1 STACK_BIAS-0x28 +#define tmp2 STACK_BIAS-0x20 +#define tmp3 STACK_BIAS-0x18 +#define tmp4 STACK_BIAS-0x10 +#define tmp5 STACK_BIAS-0x08 + +! sizeof temp storage - must be a multiple of 16 for V9 +#define tmps 0x40 + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +! !!!!! algorithm !!!!! +! ((float*)&res)[0] = ((float*)px)[0]; +! ((float*)&res)[1] = ((float*)px)[1]; +! hx = *(int*)px; +! px += stridex; +! +! if ( hx >= 0x7ff00000 ) +! { +! res = sqrt(res); +! ((float*)py)[0] = ((float*)&res)[0]; +! ((float*)py)[1] = ((float*)&res)[1]; +! py += stridey; +! goto next; +! } +! if ( hx < 0x00100000 ) +! { +! res = sqrt(res); +! ((float*)py)[0] = ((float*)&res)[0]; +! ((float*)py)[1] = ((float*)&res)[1]; +! py += stridey; +! goto next; +! } +! +! sqrt_exp = hx >> 21; +! sqrt_exp -= 512; +! sqrt_exp <<= 52; +! dsqrt_exp = *(double*)&sqrt_exp; +! bit = hx >> 15; +! bit &= 32; +! ind0 = hx >> 7; +! ind0 &= 0x1ff8; +! ind0 += 32; +! ind0 &= -64; +! ind1 = ind0; +! ind1 += bit; +! +! res = vis_fand(res,DC0); /* DC0 = vis_to_double(0x000fffff, 0xffffffff); */ +! res = vis_for(res,A1); /* A1 = vis_to_double(0x3fe00000, 0x00000000); */ +! res_c = vis_fpadd32(res,DC2); /* DC2 = vis_to_double(0x00001000, 0x00000000); */ +! res_c = vis_fand(res_c,DC3); /* DC3 = vis_to_double(0x7fffe000, 0x00000000); */ +! +! pind = (char*)TBL + ind1; +! dexp_hi = ((double*)pind)[1]; +! dexp_lo = ((double*)pind)[2]; +! +! dtmp0 = ((double*)pind)[0]; +! xx = (res - res_c); +! xx *= dtmp0; +! +! res = A5 * xx; +! res += A4; +! res *= xx; +! res += A3; +! res *= xx; +! res += A2; +! res *= xx; +! res += A1; +! res *= xx; +! +! res = dexp_hi * res; +! res += dexp_lo; +! res += dexp_hi; +! +! dtmp0 = vis_fpadd32(dsqrt_exp,res); +! ((float*)py)[0] = ((float*)&dtmp0)[0]; +! ((float*)py)[1] = ((float*)&dtmp0)[1]; +! py += stridey; +! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + ENTRY(__vsqrt) + save %sp,-SA(MINFRAME)-tmps,%sp + PIC_SETUP(l7) + PIC_SET(l7,.CONST_TBL,l5) + wr %g0,0x82,%asi + + ldd [TBL],A1 + sll %i2,3,stridex + or %g0,%i3,%o4 + + ldd [TBL+8],A2 + sll %i4,3,stridey + or %g0,0x7ff,%o0 + + ldd [TBL+16],A3 + sll %o0,20,_0x7ff00000 + or %g0,0x001,%o2 + + ldd [TBL+24],A4 + sll %o2,20,_0x00100000 + + ldd [TBL+32],A5 + ldd [TBL+40],DC0 + ldd [TBL+48],DC2 + ldd [TBL+56],DC3 + + add TBL,64,TBL + add %g0,1023,%o5 + st %i0,[%fp+tmp_counter] + + sll %o5,3,_0x00001ff8 + stx %i1,[%fp+tmp_px] + +.begin: + ld [%fp+tmp_counter],counter + ldx [%fp+tmp_px],%l2 + st %g0,[%fp+tmp_counter] +.begin1: + cmp counter,0 + ble,pn %icc,.exit + lda [%l2]%asi,%o5 ! (5_1) hx = *(int*)px; + + lda [%l2]%asi,%f10 ! (5_0) ((float*)&res)[0] = ((float*)px)[0]; + + lda [%l2+4]%asi,%f11 ! (5_0) ((float*)&res)[1] = ((float*)px)[1]; + + cmp %o5,_0x7ff00000 ! (5_1) hx ? 0x7ff00000 + bge,pn %icc,.spec ! (5_1) if ( hx >= 0x7ff00000 ) + nop + + cmp %o5,_0x00100000 ! (5_1) hx ? 0x00100000 + bl,pn %icc,.spec ! (5_1) if ( hx < 0x00100000 ) + nop + + add %l2,stridex,%l2 ! px += stridex + fand %f10,DC0,%f50 ! (5_1) res = vis_fand(res,DC0); + + for %f50,A1,%f40 ! (5_1) res = vis_for(res,A1); + sra %o5,21,%l1 ! (5_1) sqrt_exp = hx >> 21; + sra %o5,15,%i1 ! (5_1) bit = hx >> 15; + + sra %o5,7,%o1 ! (5_1) ind0 = hx >> 7; + sub %l1,512,%o3 ! (5_1) sqrt_exp -= 512; + + and %o1,_0x00001ff8,%o1 ! (5_1) ind0 &= 0x1ff8; + lda [%l2]%asi,%f10 ! (0_0) ((float*)&res)[0] = ((float*)px)[0]; + + add %o1,32,%o1 ! (5_1) ind0 += 32; + lda [%l2+4]%asi,%f11 ! (0_0) ((float*)&res)[1] = ((float*)px)[1]; + + and %i1,32,%i4 ! (5_1) bit &= 32; + and %o1,-64,%o1 ! (5_1) ind0 &= -8; + + sll %o1,0,%o7 ! (5_1) ind1 = ind0; + + sllx %o3,52,%o3 ! (5_1) sqrt_exp <<= 52; + add %o7,%i4,%l0 ! (5_1) ind1 += bit; + lda [%l2]%asi,%o5 ! (0_0) hx = *(int*)px; + + stx %o3,[%fp+tmp0] ! (5_1) dsqrt_exp = *(double*)&sqrt_exp; + fand %f10,DC0,%f50 ! (0_0) res = vis_fand(res,DC0); + + add %l2,stridex,%l2 ! px += stridex + fpadd32 %f40,DC2,%f54 ! (5_1) res_c = vis_fpadd32(res,DC2); + + add %l0,TBL,%o1 ! (5_1) pind = (char*)TBL + ind1 + + cmp %o5,_0x7ff00000 ! (0_0) hx ? 0x7ff00000 + bge,pn %icc,.update0 ! (0_0) if ( hx >= 0x7ff00000 ) + for %f50,A1,%f42 ! (0_0) res = vis_for(res,A1); +.cont0: + sra %o5,21,%l1 ! (0_0) sqrt_exp = hx >> 21; + sra %o5,15,%i2 ! (0_0) bit = hx >> 15; + ldd [%o1],%f50 ! (5_1) dtmp0 = ((double*)pind)[0]; + + sra %o5,7,%o1 ! (0_0) ind0 = hx >> 7; + sub %l1,512,%o3 ! (0_0) sqrt_exp -= 512; + fand %f54,DC3,%f54 ! (5_1) res_c = vis_fand(res_c,DC3); + + and %o1,_0x00001ff8,%o1 ! (0_0) ind0 &= 0x1ff8; + lda [%l2]%asi,%f10 ! (1_0) ((float*)&res)[0] = ((float*)px)[0]; + + add %o1,32,%o1 ! (0_0) ind0 += 32; + lda [%l2+4]%asi,%f11 ! (1_0) ((float*)&res)[1] = ((float*)px)[1]; + + and %i2,32,%i4 ! (0_0) bit &= 32; + and %o1,-64,%o1 ! (0_0) ind0 &= -8; + fsubd %f40,%f54,%f40 ! (5_1) xx = (res - res_c); + + sll %o1,0,%o7 ! (0_0) ind1 = ind0; + + cmp %o5,_0x00100000 ! (0_0) hx ? 0x00100000 + bl,pn %icc,.update1 ! (0_0) if ( hx < 0x00100000 ) + lda [%l2]%asi,%o5 ! (1_0) hx = *(int*)px; +.cont1: + sllx %o3,52,%o3 ! (0_0) sqrt_exp <<= 52; + add %o7,%i4,%i1 ! (0_0) ind1 += bit; + + fmuld %f40,%f50,%f40 ! (5_1) xx *= dtmp0; + stx %o3,[%fp+tmp1] ! (0_0) dsqrt_exp = *(double*)&sqrt_exp; + fand %f10,DC0,%f50 ! (1_0) res = vis_fand(res,DC0); + + add %l2,stridex,%l2 ! px += stridex + fpadd32 %f42,DC2,%f54 ! (0_0) res_c = vis_fpadd32(res,DC2); + + add %i1,TBL,%o1 ! (0_0) pind = (char*)TBL + ind1 + + cmp %o5,_0x7ff00000 ! (1_0) hx ? 0x7ff00000 + bge,pn %icc,.update2 ! (1_0) if ( hx >= 0x7ff00000 ) + for %f50,A1,%f14 ! (1_0) res = vis_for(res,A1); +.cont2: + sra %o5,21,%l1 ! (1_0) sqrt_exp = hx >> 21; + sra %o5,15,%g5 ! (1_0) bit = hx >> 15; + ldd [%o1],%f50 ! (0_0) dtmp0 = ((double*)pind)[0]; + + fmuld A5,%f40,%f52 ! (5_1) res = A5 * xx; + sra %o5,7,%o1 ! (1_0) ind0 = hx >> 7; + sub %l1,512,%o3 ! (1_0) sqrt_exp -= 512; + fand %f54,DC3,%f54 ! (0_0) res_c = vis_fand(res_c,DC3); + + and %o1,_0x00001ff8,%o1 ! (1_0) ind0 &= 0x1ff8; + lda [%l2]%asi,%f10 ! (2_0) ((float*)&res)[0] = ((float*)px)[0]; + + add %o1,32,%o1 ! (1_0) ind0 += 32; + lda [%l2+4]%asi,%f11 ! (2_0) ((float*)&res)[1] = ((float*)px)[1]; + + and %g5,32,%i4 ! (1_0) bit &= 32; + and %o1,-64,%o1 ! (1_0) ind0 &= -8; + fsubd %f42,%f54,%f42 ! (0_0) xx = (res - res_c); + + sll %o1,0,%o7 ! (1_0) ind1 = ind0; + faddd %f52,A4,%f54 ! (5_1) res += A4; + + cmp %o5,_0x00100000 ! (1_0) hx ? 0x00100000 + bl,pn %icc,.update3 ! (1_0) if ( hx < 0x00100000 ) + lda [%l2]%asi,%o5 ! (2_0) hx = *(int*)px; +.cont3: + sllx %o3,52,%o3 ! (1_0) sqrt_exp <<= 52; + add %o7,%i4,%i2 ! (1_0) ind1 += bit; + + fmuld %f42,%f50,%f42 ! (0_0) xx *= dtmp0; + stx %o3,[%fp+tmp2] ! (1_0) dsqrt_exp = *(double*)&sqrt_exp; + fand %f10,DC0,%f50 ! (2_0) res = vis_fand(res,DC0); + + fmuld %f54,%f40,%f34 ! (5_1) res *= xx; + fpadd32 %f14,DC2,%f54 ! (1_0) res_c = vis_fpadd32(res,DC2); + add %l2,stridex,%l2 ! px += stridex + + add %i2,TBL,%o1 ! (1_0) pind = (char*)TBL + ind1 + + cmp %o5,_0x7ff00000 ! (2_0) hx ? 0x7ff00000 + bge,pn %icc,.update4 ! (2_0) if ( hx >= 0x7ff00000 ) + for %f50,A1,%f18 ! (2_0) res = vis_for(res,A1); +.cont4: + sra %o5,21,%l1 ! (2_0) sqrt_exp = hx >> 21; + sra %o5,15,%g1 ! (2_0) bit = hx >> 15; + ldd [%o1],%f50 ! (1_0) dtmp0 = ((double*)pind)[0]; + + fmuld A5,%f42,%f52 ! (0_0) res = A5 * xx; + sra %o5,7,%o1 ! (2_0) ind0 = hx >> 7; + sub %l1,512,%o3 ! (2_0) sqrt_exp -= 512; + fand %f54,DC3,%f54 ! (1_0) res_c = vis_fand(res_c,DC3); + + and %o1,_0x00001ff8,%o1 ! (2_0) ind0 &= 0x1ff8; + lda [%l2]%asi,%f10 ! (3_0) ((float*)&res)[0] = ((float*)px)[0]; + faddd %f34,A3,%f62 ! (5_1) res += A3; + + add %o1,32,%o1 ! (2_0) ind0 += 32; + lda [%l2+4]%asi,%f11 ! (3_0) ((float*)&res)[1] = ((float*)px)[1]; + + and %g1,32,%i4 ! (2_0) bit &= 32; + and %o1,-64,%o1 ! (2_0) ind0 &= -8; + fsubd %f14,%f54,%f14 ! (1_0) xx = (res - res_c); + + sll %o1,0,%o7 ! (2_0) ind1 = ind0; + faddd %f52,A4,%f54 ! (0_0) res += A4; + + fmuld %f62,%f40,%f52 ! (5_1) res *= xx; + cmp %o5,_0x00100000 ! (2_0) hx ? 0x00100000 + bl,pn %icc,.update5 ! (2_0) if ( hx < 0x00100000 ) + lda [%l2]%asi,%o5 ! (3_0) hx = *(int*)px; +.cont5: + sllx %o3,52,%o3 ! (2_0) sqrt_exp <<= 52; + add %o7,%i4,%g5 ! (2_0) ind1 += bit; + + fmuld %f14,%f50,%f14 ! (1_0) xx *= dtmp0; + stx %o3,[%fp+tmp3] ! (2_0) dsqrt_exp = *(double*)&sqrt_exp; + fand %f10,DC0,%f50 ! (3_0) res = vis_fand(res,DC0); + + fmuld %f54,%f42,%f34 ! (0_0) res *= xx; + fpadd32 %f18,DC2,%f54 ! (2_0) res_c = vis_fpadd32(res,DC2); + add %l2,stridex,%l2 ! px += stridex + + add %g5,TBL,%o1 ! (2_0) pind = (char*)TBL + ind1 + faddd %f52,A2,%f20 ! (5_1) res += A2; + + cmp %o5,_0x7ff00000 ! (3_0) hx ? 0x7ff00000 + bge,pn %icc,.update6 ! (3_0) if ( hx >= 0x7ff00000 ) + for %f50,A1,%f44 ! (3_0) res = vis_for(res,A1); +.cont6: + sra %o5,21,%l1 ! (3_0) sqrt_exp = hx >> 21; + sra %o5,15,%i3 ! (3_0) bit = hx >> 15; + ldd [%o1],%f50 ! (2_0) dtmp0 = ((double*)pind)[0]; + + fmuld A5,%f14,%f52 ! (1_0) res = A5 * xx; + sra %o5,7,%o1 ! (3_0) ind0 = hx >> 7; + sub %l1,512,%o3 ! (3_0) sqrt_exp -= 512; + fand %f54,DC3,%f54 ! (2_0) res_c = vis_fand(res_c,DC3); + + fmuld %f20,%f40,%f20 ! (5_1) res *= xx; + and %o1,_0x00001ff8,%o1 ! (3_0) ind0 &= 0x1ff8; + lda [%l2]%asi,%f10 ! (4_0) ((float*)&res)[0] = ((float*)px)[0]; + faddd %f34,A3,%f62 ! (0_0) res += A3; + + add %o1,32,%o1 ! (3_0) ind0 += 32; + lda [%l2+4]%asi,%f11 ! (4_0) ((float*)&res)[1] = ((float*)px)[1]; + + and %i3,32,%i4 ! (3_0) bit &= 32; + and %o1,-64,%o1 ! (3_0) ind0 &= -8; + fsubd %f18,%f54,%f18 ! (2_0) xx = (res - res_c); + + sll %o1,0,%o7 ! (3_0) ind1 = ind0; + faddd %f52,A4,%f54 ! (1_0) res += A4; + + fmuld %f62,%f42,%f52 ! (0_0) res *= xx; + cmp %o5,_0x00100000 ! (3_0) hx ? 0x00100000 + bl,pn %icc,.update7 ! (3_0) if ( hx < 0x00100000 ) + faddd %f20,A1,%f12 ! (5_1) res += A1; +.cont7: + lda [%l2]%asi,%o5 ! (4_0) hx = *(int*)px; + sllx %o3,52,%o3 ! (3_0) sqrt_exp <<= 52; + add %o7,%i4,%g1 ! (3_0) ind1 += bit; + + fmuld %f18,%f50,%f18 ! (2_0) xx *= dtmp0; + add %l0,TBL,%l0 ! (5_1) pind = (char*)TBL + ind1; + stx %o3,[%fp+tmp4] ! (3_0) dsqrt_exp = *(double*)&sqrt_exp; + fand %f10,DC0,%f50 ! (4_0) res = vis_fand(res,DC0); + + fmuld %f54,%f14,%f34 ! (1_0) res *= xx; + add %l2,stridex,%l2 ! px += stridex + ldd [%l0+16],%f36 ! (5_1) dexp_lo = ((double*)pind)[2]; + fpadd32 %f44,DC2,%f54 ! (3_0) res_c = vis_fpadd32(res,DC2); + + fmuld %f12,%f40,%f12 ! (5_1) res *= xx; + add %g1,TBL,%o1 ! (3_0) (char*)div_arr+ind0 + ldd [%l0+8],%f40 ! (5_1) dexp_hi = ((double*)pind)[1]; + faddd %f52,A2,%f20 ! (0_0) res += A2; + + cmp %o5,_0x7ff00000 ! (4_0) hx ? 0x7ff00000 + bge,pn %icc,.update8 ! (4_0) if ( hx >= 0x7ff00000 ) + for %f50,A1,%f24 ! (4_0) res = vis_for(res,A1); +.cont8: + sra %o5,21,%l1 ! (4_0) sqrt_exp = hx >> 21; + sra %o5,15,%l0 ! (4_0) bit = hx >> 15; + ldd [%o1],%f22 ! (3_0) dtmp0 = ((double*)pind)[0]; + + fmuld A5,%f18,%f52 ! (2_0) res = A5 * xx; + sra %o5,7,%o1 ! (4_0) ind0 = hx >> 7; + sub %l1,512,%o3 ! (4_0) sqrt_exp -= 512; + fand %f54,DC3,%f54 ! (3_0) res_c = vis_fand(res_c,DC3); + + fmuld %f20,%f42,%f20 ! (0_0) res *= xx; + and %o1,_0x00001ff8,%o1 ! (4_0) ind0 &= 0x1ff8; + lda [%l2]%asi,%f10 ! (5_0) ((float*)&res)[0] = ((float*)px)[0]; + faddd %f34,A3,%f62 ! (1_0) res += A3; + + fmuld %f40,%f12,%f34 ! (5_1) res = dexp_hi * res; + add %o1,32,%o1 ! (4_0) ind0 += 32; + lda [%l2+4]%asi,%f11 ! (5_0) ((float*)&res)[1] = ((float*)px)[1]; + + and %l0,32,%i4 ! (4_0) bit &= 32; + cmp %o5,_0x00100000 ! (4_0) hx ? 0x00100000 + bl,pn %icc,.update9 ! (4_0) if ( hx < 0x00100000 ) + fsubd %f44,%f54,%f44 ! (3_0) xx = (res - res_c); +.cont9: + and %o1,-64,%o1 ! (4_0) ind0 &= -8; + faddd %f52,A4,%f54 ! (2_0) res += A4; + + cmp counter,6 + bl,pn %icc,.tail + or %g0,%o4,%l0 + + ba .main_loop + nop + + .align 16 +.main_loop: + fmuld %f62,%f14,%f52 ! (1_1) res *= xx; + sll %o1,0,%i3 ! (4_1) ind1 = ind0; + add %i1,TBL,%i1 ! (0_1) pind = (char*)TBL + ind1; + faddd %f20,A1,%f12 ! (0_1) res += A1; + + lda [%l2]%asi,%o5 ! (5_1) hx = *(int*)px; + sllx %o3,52,%o3 ! (4_1) sqrt_exp <<= 52; + add %i3,%i4,%i3 ! (4_1) ind1 += bit; + faddd %f34,%f36,%f60 ! (5_2) res += dexp_lo; + + fmuld %f44,%f22,%f44 ! (3_1) xx *= dtmp0; + add %l2,stridex,%l2 ! px += stridex + stx %o3,[%fp+tmp5] ! (4_1) dsqrt_exp = *(double*)&sqrt_exp; + fand %f10,DC0,%f50 ! (5_1) res = vis_fand(res,DC0); + + fmuld %f54,%f18,%f34 ! (2_1) res *= xx; + nop + ldd [%i1+16],%f36 ! (0_1) dexp_lo = ((double*)pind)[2]; + fpadd32 %f24,DC2,%f54 ! (4_1) res_c = vis_fpadd32(res,DC2); + + fmuld %f12,%f42,%f16 ! (0_1) res *= xx; + sra %o5,21,%l1 ! (5_1) sqrt_exp = hx >> 21; + ldd [%i1+8],%f42 ! (0_1) dexp_hi = ((double*)pind)[1]; + faddd %f52,A2,%f20 ! (1_1) res += A2; + + ldd [%fp+tmp0],%f48 ! (5_2) dsqrt_exp = *(double*)&sqrt_exp; + cmp %o5,_0x7ff00000 ! (5_1) hx ? 0x7ff00000 + bge,pn %icc,.update10 ! (5_1) if ( hx >= 0x7ff00000 ) + faddd %f60,%f40,%f60 ! (5_2) res += dexp_hi; +.cont10: + lda [%l2]%asi,%f10 ! (0_0) ((float*)&res)[0] = ((float*)px)[0]; + sra %o5,15,%i1 ! (5_1) bit = hx >> 15; + add %i3,TBL,%o7 ! (4_1) pind = (char*)TBL + ind1 + for %f50,A1,%f40 ! (5_1) res = vis_for(res,A1); + + fmuld A5,%f44,%f52 ! (3_1) res = A5 * xx; + sra %o5,7,%o1 ! (5_1) ind0 = hx >> 7; + ldd [%o7],%f22 ! (4_1) dtmp0 = ((double*)pind)[0]; + fand %f54,DC3,%f54 ! (4_1) res_c = vis_fand(res_c,DC3); + + fmuld %f20,%f14,%f20 ! (1_1) res *= xx; + and %o1,_0x00001ff8,%o1 ! (5_1) ind0 &= 0x1ff8; + sub %l1,512,%o3 ! (5_1) sqrt_exp -= 512; + faddd %f34,A3,%f62 ! (2_1) res += A3; + + fpadd32 %f48,%f60,%f12 ! (5_2) dtmp0 = vis_fpadd32(dsqrt_exp,res); + add %o1,32,%o1 ! (5_1) ind0 += 32; + st %f12,[%l0] ! (5_2) ((float*)py)[0] = ((float*)&dtmp0)[0]; + fmuld %f42,%f16,%f34 ! (0_1) res = dexp_hi * res; + + lda [%l2+4]%asi,%f11 ! (0_0) ((float*)&res)[1] = ((float*)px)[1]; + and %i1,32,%i4 ! (5_1) bit &= 32; + and %o1,-64,%o1 ! (5_1) ind0 &= -8; + fsubd %f24,%f54,%f24 ! (4_1) xx = (res - res_c); + + sll %o1,0,%o7 ! (5_1) ind1 = ind0; + add %l0,stridey,%i1 ! py += stridey + st %f13,[%l0+4] ! (5_2) ((float*)py)[1] = ((float*)&dtmp0)[1]; + faddd %f52,A4,%f54 ! (3_1) res += A4; + + fmuld %f62,%f18,%f52 ! (2_1) res *= xx; + cmp %o5,_0x00100000 ! (5_1) hx ? 0x00100000 + bl,pn %icc,.update11 ! (5_1) if ( hx < 0x00100000 ) + faddd %f20,A1,%f12 ! (1_1) res += A1; +.cont11: + sllx %o3,52,%o3 ! (5_1) sqrt_exp <<= 52; + add %o7,%i4,%l0 ! (5_1) ind1 += bit; + lda [%l2]%asi,%o5 ! (0_0) hx = *(int*)px; + faddd %f34,%f36,%f60 ! (0_1) res += dexp_lo; + + fmuld %f24,%f22,%f24 ! (4_1) xx *= dtmp0; + add %i2,TBL,%i2 ! (1_1) pind = (char*)TBL + ind1; + stx %o3,[%fp+tmp0] ! (5_1) dsqrt_exp = *(double*)&sqrt_exp; + fand %f10,DC0,%f50 ! (0_0) res = vis_fand(res,DC0); + + fmuld %f54,%f44,%f34 ! (3_1) res *= xx; + add %l2,stridex,%l2 ! px += stridex + ldd [%i2+16],%f36 ! (1_1) dexp_lo = ((double*)pind)[2]; + fpadd32 %f40,DC2,%f54 ! (5_1) res_c = vis_fpadd32(res,DC2); + + fmuld %f12,%f14,%f16 ! (1_1) res *= xx; + sra %o5,21,%l1 ! (0_0) sqrt_exp = hx >> 21; + ldd [%i2+8],%f14 ! (1_1) dexp_hi = ((double*)pind)[1]; + faddd %f52,A2,%f20 ! (2_1) res += A2; + + ldd [%fp+tmp1],%f48 ! (0_1) dsqrt_exp = *(double*)&sqrt_exp; + cmp %o5,_0x7ff00000 ! (0_0) hx ? 0x7ff00000 + bge,pn %icc,.update12 ! (0_0) if ( hx >= 0x7ff00000 ) + faddd %f60,%f42,%f60 ! (0_1) res += dexp_hi; +.cont12: + lda [%l2]%asi,%f10 ! (1_0) ((float*)&res)[0] = ((float*)px)[0]; + sra %o5,15,%i2 ! (0_0) bit = hx >> 15; + add %l0,TBL,%o7 ! (5_1) pind = (char*)TBL + ind1 + for %f50,A1,%f42 ! (0_0) res = vis_for(res,A1); + + fmuld A5,%f24,%f52 ! (4_1) res = A5 * xx; + sra %o5,7,%o1 ! (0_0) ind0 = hx >> 7; + ldd [%o7],%f22 ! (5_1) dtmp0 = ((double*)pind)[0]; + fand %f54,DC3,%f54 ! (5_1) res_c = vis_fand(res_c,DC3); + + fmuld %f20,%f18,%f20 ! (2_1) res *= xx; + and %o1,_0x00001ff8,%o1 ! (0_0) ind0 &= 0x1ff8; + sub %l1,512,%o3 ! (0_0) sqrt_exp -= 512; + faddd %f34,A3,%f62 ! (3_1) res += A3; + + fpadd32 %f48,%f60,%f12 ! (0_1) dtmp0 = vis_fpadd32(dsqrt_exp,res); + add %o1,32,%o1 ! (0_0) ind0 += 32; + st %f12,[%i1] ! (0_1) ((float*)py)[0] = ((float*)&dtmp0)[0]; + fmuld %f14,%f16,%f34 ! (1_1) res = dexp_hi * res; + + lda [%l2+4]%asi,%f11 ! (1_0) ((float*)&res)[1] = ((float*)px)[1]; + and %i2,32,%i4 ! (0_0) bit &= 32; + and %o1,-64,%o1 ! (0_0) ind0 &= -8; + fsubd %f40,%f54,%f40 ! (5_1) xx = (res - res_c); + + sll %o1,0,%o7 ! (0_0) ind1 = ind0; + add %i1,stridey,%i2 ! py += stridey + st %f13,[%i1+4] ! (0_1) ((float*)py)[1] = ((float*)&dtmp0)[1]; + faddd %f52,A4,%f54 ! (4_1) res += A4; + + fmuld %f62,%f44,%f52 ! (3_1) res *= xx; + cmp %o5,_0x00100000 ! (0_0) hx ? 0x00100000 + bl,pn %icc,.update13 ! (0_0) if ( hx < 0x00100000 ) + faddd %f20,A1,%f12 ! (2_1) res += A1; +.cont13: + lda [%l2]%asi,%o5 ! (1_0) hx = *(int*)px; + sllx %o3,52,%o3 ! (0_0) sqrt_exp <<= 52; + add %o7,%i4,%i1 ! (0_0) ind1 += bit; + faddd %f34,%f36,%f60 ! (1_1) res += dexp_lo; + + fmuld %f40,%f22,%f40 ! (5_1) xx *= dtmp0; + add %g5,TBL,%g5 ! (2_1) pind = (char*)TBL + ind1; + stx %o3,[%fp+tmp1] ! (0_0) dsqrt_exp = *(double*)&sqrt_exp; + fand %f10,DC0,%f50 ! (1_0) res = vis_fand(res,DC0); + + fmuld %f54,%f24,%f34 ! (4_1) res *= xx; + add %l2,stridex,%l2 ! px += stridex + ldd [%g5+16],%f36 ! (2_1) dexp_lo = ((double*)pind)[2]; + fpadd32 %f42,DC2,%f54 ! (0_0) res_c = vis_fpadd32(res,DC2); + + fmuld %f12,%f18,%f16 ! (2_1) res *= xx; + sra %o5,21,%l1 ! (1_0) sqrt_exp = hx >> 21; + ldd [%g5+8],%f18 ! (2_1) dexp_hi = ((double*)pind)[1]; + faddd %f52,A2,%f20 ! (3_1) res += A2; + + ldd [%fp+tmp2],%f48 ! (1_1) dsqrt_exp = *(double*)&sqrt_exp; + cmp %o5,_0x7ff00000 ! (1_0) hx ? 0x7ff00000 + bge,pn %icc,.update14 ! (1_0) if ( hx >= 0x7ff00000 ) + faddd %f60,%f14,%f60 ! (1_1) res += dexp_hi; +.cont14: + lda [%l2]%asi,%f10 ! (2_0) ((float*)&res)[0] = ((float*)px)[0]; + sra %o5,15,%g5 ! (1_0) bit = hx >> 15; + add %i1,TBL,%o7 ! (0_0) pind = (char*)TBL + ind1 + for %f50,A1,%f14 ! (1_0) res = vis_for(res,A1); + + fmuld A5,%f40,%f52 ! (5_1) res = A5 * xx; + sra %o5,7,%o1 ! (1_0) ind0 = hx >> 7; + ldd [%o7],%f22 ! (0_0) dtmp0 = ((double*)pind)[0]; + fand %f54,DC3,%f54 ! (0_0) res_c = vis_fand(res_c,DC3); + + fmuld %f20,%f44,%f20 ! (3_1) res *= xx; + and %o1,_0x00001ff8,%o1 ! (1_0) ind0 &= 0x1ff8; + sub %l1,512,%o3 ! (1_0) sqrt_exp -= 512; + faddd %f34,A3,%f62 ! (4_1) res += A3; + + fpadd32 %f48,%f60,%f12 ! (1_1) dtmp0 = vis_fpadd32(dsqrt_exp,res); + add %o1,32,%o1 ! (1_0) ind0 += 32; + st %f12,[%i2] ! (1_1) ((float*)py)[0] = ((float*)&dtmp0)[0]; + fmuld %f18,%f16,%f34 ! (2_1) res = dexp_hi * res; + + lda [%l2+4]%asi,%f11 ! (2_0) ((float*)&res)[1] = ((float*)px)[1]; + and %g5,32,%i4 ! (1_0) bit &= 32; + and %o1,-64,%o1 ! (1_0) ind0 &= -8; + fsubd %f42,%f54,%f42 ! (0_0) xx = (res - res_c); + + sll %o1,0,%o7 ! (1_0) ind1 = ind0; + add %i2,stridey,%g5 ! py += stridey + st %f13,[%i2+4] ! (1_1) ((float*)py)[1] = ((float*)&dtmp0)[1]; + faddd %f52,A4,%f54 ! (5_1) res += A4; + + fmuld %f62,%f24,%f52 ! (4_1) res *= xx; + cmp %o5,_0x00100000 ! (1_0) hx ? 0x00100000 + bl,pn %icc,.update15 ! (1_0) if ( hx < 0x00100000 ) + faddd %f20,A1,%f12 ! (3_1) res += A1; +.cont15: + lda [%l2]%asi,%o5 ! (2_0) hx = *(int*)px; + sllx %o3,52,%o3 ! (1_0) sqrt_exp <<= 52; + add %o7,%i4,%i2 ! (1_0) ind1 += bit; + faddd %f34,%f36,%f60 ! (2_1) res += dexp_lo; + + fmuld %f42,%f22,%f42 ! (0_0) xx *= dtmp0; + add %g1,TBL,%g1 ! (3_1) pind = (char*)TBL + ind1; + stx %o3,[%fp+tmp2] ! (1_0) dsqrt_exp = *(double*)&sqrt_exp; + fand %f10,DC0,%f50 ! (2_0) res = vis_fand(res,DC0); + + fmuld %f54,%f40,%f34 ! (5_1) res *= xx; + fpadd32 %f14,DC2,%f54 ! (1_0) res_c = vis_fpadd32(res,DC2); + add %l2,stridex,%l2 ! px += stridex + ldd [%g1+16],%f36 ! (3_1) dexp_lo = ((double*)pind)[2]; + + fmuld %f12,%f44,%f16 ! (3_1) res *= xx; + sra %o5,21,%l1 ! (2_0) sqrt_exp = hx >> 21; + ldd [%g1+8],%f44 ! (3_1) dexp_hi = ((double*)pind)[1]; + faddd %f52,A2,%f20 ! (4_1) res += A2; + + ldd [%fp+tmp3],%f48 ! (2_1) dsqrt_exp = *(double*)&sqrt_exp; + cmp %o5,_0x7ff00000 ! (2_0) hx ? 0x7ff00000 + bge,pn %icc,.update16 ! (2_0) if ( hx >= 0x7ff00000 ) + faddd %f60,%f18,%f60 ! (2_1) res += dexp_hi; +.cont16: + lda [%l2]%asi,%f10 ! (3_0) ((float*)&res)[0] = ((float*)px)[0]; + sra %o5,15,%g1 ! (2_0) bit = hx >> 15; + add %i2,TBL,%o7 ! (1_0) pind = (char*)TBL + ind1 + for %f50,A1,%f18 ! (2_0) res = vis_for(res,A1); + + fmuld A5,%f42,%f52 ! (0_0) res = A5 * xx; + sra %o5,7,%o1 ! (2_0) ind0 = hx >> 7; + ldd [%o7],%f22 ! (1_0) dtmp0 = ((double*)pind)[0]; + fand %f54,DC3,%f54 ! (1_0) res_c = vis_fand(res_c,DC3); + + fmuld %f20,%f24,%f20 ! (4_1) res *= xx; + and %o1,_0x00001ff8,%o1 ! (2_0) ind0 &= 0x1ff8; + sub %l1,512,%o3 ! (2_0) sqrt_exp -= 512; + faddd %f34,A3,%f62 ! (5_1) res += A3; + + fpadd32 %f48,%f60,%f12 ! (2_1) dtmp0 = vis_fpadd32(dsqrt_exp,res); + add %o1,32,%o1 ! (2_0) ind0 += 32; + st %f12,[%g5] ! (2_1) ((float*)py)[0] = ((float*)&dtmp0)[0]; + fmuld %f44,%f16,%f34 ! (3_1) res = dexp_hi * res; + + lda [%l2+4]%asi,%f11 ! (3_0) ((float*)&res)[1] = ((float*)px)[1]; + and %g1,32,%i4 ! (2_0) bit &= 32; + and %o1,-64,%o1 ! (2_0) ind0 &= -8; + fsubd %f14,%f54,%f14 ! (1_0) xx = (res - res_c); + + sll %o1,0,%o7 ! (2_0) ind1 = ind0; + add %g5,stridey,%g1 ! py += stridey + st %f13,[%g5+4] ! (2_1) ((float*)py)[1] = ((float*)&dtmp0)[1]; + faddd %f52,A4,%f54 ! (0_0) res += A4; + + fmuld %f62,%f40,%f52 ! (5_1) res *= xx; + cmp %o5,_0x00100000 ! (2_0) hx ? 0x00100000 + bl,pn %icc,.update17 ! (2_0) if ( hx < 0x00100000 ) + faddd %f20,A1,%f12 ! (4_1) res += A1; +.cont17: + lda [%l2]%asi,%o5 ! (3_0) hx = *(int*)px; + sllx %o3,52,%o3 ! (2_0) sqrt_exp <<= 52; + add %o7,%i4,%g5 ! (2_0) ind1 += bit; + faddd %f34,%f36,%f60 ! (3_1) res += dexp_lo; + + fmuld %f14,%f22,%f14 ! (1_0) xx *= dtmp0; + add %i3,TBL,%i3 ! (4_1) pind = (char*)TBL + ind1; + stx %o3,[%fp+tmp3] ! (2_0) dsqrt_exp = *(double*)&sqrt_exp; + fand %f10,DC0,%f50 ! (3_0) res = vis_fand(res,DC0); + + fmuld %f54,%f42,%f34 ! (0_0) res *= xx; + fpadd32 %f18,DC2,%f54 ! (2_0) res_c = vis_fpadd32(res,DC2); + add %l2,stridex,%l2 ! px += stridex + ldd [%i3+16],%f36 ! (4_1) dexp_lo = ((double*)pind)[2]; + + fmuld %f12,%f24,%f16 ! (4_1) res *= xx; + sra %o5,21,%l1 ! (3_0) sqrt_exp = hx >> 21; + ldd [%i3+8],%f24 ! (4_1) dexp_hi = ((double*)pind)[1]; + faddd %f52,A2,%f20 ! (5_1) res += A2; + + ldd [%fp+tmp4],%f48 ! (3_1) dsqrt_exp = *(double*)&sqrt_exp; + cmp %o5,_0x7ff00000 ! (3_0) hx ? 0x7ff00000 + bge,pn %icc,.update18 ! (3_0) if ( hx >= 0x7ff00000 ) + faddd %f60,%f44,%f60 ! (3_1) res += dexp_hi; +.cont18: + lda [%l2]%asi,%f10 ! (4_0) ((float*)&res)[0] = ((float*)px)[0]; + sra %o5,15,%i3 ! (3_0) bit = hx >> 15; + add %g5,TBL,%o7 ! (2_0) pind = (char*)TBL + ind1 + for %f50,A1,%f44 ! (3_0) res = vis_for(res,A1); + + fmuld A5,%f14,%f52 ! (1_0) res = A5 * xx; + sra %o5,7,%o1 ! (3_0) ind0 = hx >> 7; + ldd [%o7],%f22 ! (2_0) dtmp0 = ((double*)pind)[0]; + fand %f54,DC3,%f54 ! (2_0) res_c = vis_fand(res_c,DC3); + + fmuld %f20,%f40,%f20 ! (5_1) res *= xx; + and %o1,_0x00001ff8,%o1 ! (3_0) ind0 &= 0x1ff8; + sub %l1,512,%o3 ! (3_0) sqrt_exp -= 512; + faddd %f34,A3,%f62 ! (0_0) res += A3; + + fpadd32 %f48,%f60,%f12 ! (3_1) dtmp0 = vis_fpadd32(dsqrt_exp,res); + add %o1,32,%o1 ! (3_0) ind0 += 32; + st %f12,[%g1] ! (3_1) ((float*)py)[0] = ((float*)&dtmp0)[0]; + fmuld %f24,%f16,%f34 ! (4_1) res = dexp_hi * res; + + lda [%l2+4]%asi,%f11 ! (4_0) ((float*)&res)[1] = ((float*)px)[1]; + and %i3,32,%i4 ! (3_0) bit &= 32; + and %o1,-64,%o1 ! (3_0) ind0 &= -8; + fsubd %f18,%f54,%f18 ! (2_0) xx = (res - res_c); + + or %g0,%o1,%o7 ! (3_0) ind1 = ind0; + add %g1,stridey,%i3 ! py += stridey + st %f13,[%g1+4] ! (3_1) ((float*)py)[1] = ((float*)&dtmp0)[1]; + faddd %f52,A4,%f54 ! (1_0) res += A4; + + fmuld %f62,%f42,%f52 ! (0_0) res *= xx; + cmp %o5,_0x00100000 ! (3_0) hx ? 0x00100000 + bl,pn %icc,.update19 ! (3_0) if ( hx < 0x00100000 ) + faddd %f20,A1,%f12 ! (5_1) res += A1; +.cont19: + lda [%l2]%asi,%o5 ! (4_0) hx = *(int*)px; + sllx %o3,52,%o3 ! (3_0) sqrt_exp <<= 52; + add %o7,%i4,%g1 ! (3_0) ind1 += bit; + faddd %f34,%f36,%f60 ! (4_1) res += dexp_lo; + + fmuld %f18,%f22,%f18 ! (2_0) xx *= dtmp0; + add %l0,TBL,%l0 ! (5_1) pind = (char*)TBL + ind1; + stx %o3,[%fp+tmp4] ! (3_0) dsqrt_exp = *(double*)&sqrt_exp; + fand %f10,DC0,%f50 ! (4_0) res = vis_fand(res,DC0); + + fmuld %f54,%f14,%f34 ! (1_0) res *= xx; + add %l2,stridex,%l2 ! px += stridex + ldd [%l0+16],%f36 ! (5_1) dexp_lo = ((double*)pind)[2]; + fpadd32 %f44,DC2,%f54 ! (3_0) res_c = vis_fpadd32(res,DC2); + + fmuld %f12,%f40,%f16 ! (5_1) res *= xx; + sra %o5,21,%l1 ! (4_0) sqrt_exp = hx >> 21; + ldd [%l0+8],%f40 ! (5_1) dexp_hi = ((double*)pind)[1]; + faddd %f52,A2,%f20 ! (0_0) res += A2; + + ldd [%fp+tmp5],%f48 ! (4_1) dsqrt_exp = *(double*)&sqrt_exp; + cmp %o5,_0x7ff00000 ! (4_0) hx ? 0x7ff00000 + bge,pn %icc,.update20 ! (4_0) if ( hx >= 0x7ff00000 ) + faddd %f60,%f24,%f60 ! (4_1) res += dexp_hi; +.cont20: + lda [%l2]%asi,%f10 ! (5_0) ((float*)&res)[0] = ((float*)px)[0]; + sra %o5,15,%l0 ! (4_0) bit = hx >> 15; + add %g1,TBL,%o7 ! (3_0) (char*)div_arr+ind0 + for %f50,A1,%f24 ! (4_0) res = vis_for(res,A1); + + fmuld A5,%f18,%f52 ! (2_0) res = A5 * xx; + sra %o5,7,%o1 ! (4_0) ind0 = hx >> 7; + ldd [%o7],%f22 ! (3_0) dtmp0 = ((double*)pind)[0]; + fand %f54,DC3,%f54 ! (3_0) res_c = vis_fand(res_c,DC3); + + fmuld %f20,%f42,%f20 ! (0_0) res *= xx; + and %o1,_0x00001ff8,%o1 ! (4_0) ind0 &= 0x1ff8; + sub %l1,512,%o3 ! (4_0) sqrt_exp -= 512; + faddd %f34,A3,%f62 ! (1_0) res += A3; + + lda [%l2+4]%asi,%f11 ! (5_0) ((float*)&res)[1] = ((float*)px)[1]; + add %o1,32,%o1 ! (4_0) ind0 += 32; + fpadd32 %f48,%f60,%f12 ! (4_1) dtmp0 = vis_fpadd32(dsqrt_exp,res); + fmuld %f40,%f16,%f34 ! (5_1) res = dexp_hi * res; + + and %l0,32,%i4 ! (4_0) bit &= 32; + cmp %o5,_0x00100000 ! (4_0) hx ? 0x00100000 + bl,pn %icc,.update21 ! (4_0) if ( hx < 0x00100000 ) + fsubd %f44,%f54,%f44 ! (3_0) xx = (res - res_c); +.cont21: + and %o1,-64,%o1 ! (4_0) ind0 &= -8; + sub counter,6,counter ! counter + st %f12,[%i3] ! (4_1) ((float*)py)[0] = ((float*)&dtmp0)[0]; + faddd %f52,A4,%f54 ! (2_0) res += A4; + + st %f13,[%i3+4] ! (4_1) ((float*)py)[1] = ((float*)&dtmp0)[1]; + cmp counter,6 + bge,pt %icc,.main_loop + add %i3,stridey,%l0 ! py += stridey + +.tail: + subcc counter,1,counter + bneg .begin + or %g0,%l0,%o4 + + fmuld %f62,%f14,%f52 ! (1_1) res *= xx; + add %i1,TBL,%i1 ! (0_1) pind = (char*)TBL + ind1; + faddd %f20,A1,%f12 ! (0_1) res += A1; + + faddd %f34,%f36,%f60 ! (5_2) res += dexp_lo; + + fmuld %f44,%f22,%f44 ! (3_1) xx *= dtmp0; + add %l2,stridex,%l2 ! px += stridex + + fmuld %f54,%f18,%f34 ! (2_1) res *= xx; + ldd [%i1+16],%f36 ! (0_1) dexp_lo = ((double*)pind)[2]; + + fmuld %f12,%f42,%f12 ! (0_1) res *= xx; + ldd [%i1+8],%f42 ! (0_1) dexp_hi = ((double*)pind)[1]; + faddd %f52,A2,%f20 ! (1_1) res += A2; + + ldd [%fp+tmp0],%f48 ! (5_2) dsqrt_exp = *(double*)&sqrt_exp; + faddd %f60,%f40,%f60 ! (5_2) res += dexp_hi; + + fmuld A5,%f44,%f52 ! (3_1) res = A5 * xx; + + fmuld %f20,%f14,%f20 ! (1_1) res *= xx; + faddd %f34,A3,%f62 ! (2_1) res += A3; + + fmuld %f42,%f12,%f34 ! (0_1) res = dexp_hi * res; + fpadd32 %f48,%f60,%f12 ! (5_2) dtmp0 = vis_fpadd32(dsqrt_exp,res); + + st %f12,[%l0] ! (5_2) ((float*)py)[0] = ((float*)&dtmp0)[0]; + + add %l0,stridey,%i1 ! py += stridey + st %f13,[%l0+4] ! (5_2) ((float*)py)[1] = ((float*)&dtmp0)[1]; + faddd %f52,A4,%f54 ! (3_1) res += A4; + + subcc counter,1,counter + bneg .begin + or %g0,%i1,%o4 + + fmuld %f62,%f18,%f52 ! (2_1) res *= xx; + faddd %f20,A1,%f12 ! (1_1) res += A1; + + faddd %f34,%f36,%f60 ! (0_1) res += dexp_lo; + + add %i2,TBL,%i2 ! (1_1) pind = (char*)TBL + ind1; + + fmuld %f54,%f44,%f34 ! (3_1) res *= xx; + add %l2,stridex,%l2 ! px += stridex + ldd [%i2+16],%f36 ! (1_1) dexp_lo = ((double*)pind)[2]; + + fmuld %f12,%f14,%f12 ! (1_1) res *= xx; + ldd [%i2+8],%f14 ! (1_1) dexp_hi = ((double*)pind)[1]; + faddd %f52,A2,%f20 ! (2_1) res += A2; + + ldd [%fp+tmp1],%f48 ! (0_1) dsqrt_exp = *(double*)&sqrt_exp; + faddd %f60,%f42,%f60 ! (0_1) res += dexp_hi; + + fmuld %f20,%f18,%f20 ! (2_1) res *= xx; + faddd %f34,A3,%f62 ! (3_1) res += A3; + + fmuld %f14,%f12,%f34 ! (1_1) res = dexp_hi * res; + fpadd32 %f48,%f60,%f12 ! (0_1) dtmp0 = vis_fpadd32(dsqrt_exp,res); + + st %f12,[%i1] ! (0_1) ((float*)py)[0] = ((float*)&dtmp0)[0]; + + add %i1,stridey,%i2 ! py += stridey + st %f13,[%i1+4] ! (0_1) ((float*)py)[1] = ((float*)&dtmp0)[1]; + + subcc counter,1,counter + bneg .begin + or %g0,%i2,%o4 + + fmuld %f62,%f44,%f52 ! (3_1) res *= xx; + faddd %f20,A1,%f12 ! (2_1) res += A1; + + faddd %f34,%f36,%f60 ! (1_1) res += dexp_lo; + + add %g5,TBL,%g5 ! (2_1) pind = (char*)TBL + ind1; + + add %l2,stridex,%l2 ! px += stridex + ldd [%g5+16],%f36 ! (2_1) dexp_lo = ((double*)pind)[2]; + + fmuld %f12,%f18,%f12 ! (2_1) res *= xx; + ldd [%g5+8],%f18 ! (2_1) dexp_hi = ((double*)pind)[1]; + faddd %f52,A2,%f20 ! (3_1) res += A2; + + ldd [%fp+tmp2],%f48 ! (1_1) dsqrt_exp = *(double*)&sqrt_exp; + faddd %f60,%f14,%f60 ! (1_1) res += dexp_hi; + + fmuld %f20,%f44,%f20 ! (3_1) res *= xx; + + fmuld %f18,%f12,%f34 ! (2_1) res = dexp_hi * res; + fpadd32 %f48,%f60,%f12 ! (1_1) dtmp0 = vis_fpadd32(dsqrt_exp,res); + + st %f12,[%i2] ! (1_1) ((float*)py)[0] = ((float*)&dtmp0)[0]; + + add %i2,stridey,%g5 ! py += stridey + st %f13,[%i2+4] ! (1_1) ((float*)py)[1] = ((float*)&dtmp0)[1]; + + subcc counter,1,counter + bneg .begin + or %g0,%g5,%o4 + + faddd %f20,A1,%f12 ! (3_1) res += A1; + + faddd %f34,%f36,%f60 ! (2_1) res += dexp_lo; + + add %g1,TBL,%g1 ! (3_1) pind = (char*)TBL + ind1; + + add %l2,stridex,%l2 ! px += stridex + ldd [%g1+16],%f36 ! (3_1) dexp_lo = ((double*)pind)[2]; + + fmuld %f12,%f44,%f12 ! (3_1) res *= xx; + ldd [%g1+8],%f44 ! (3_1) dexp_hi = ((double*)pind)[1]; + + ldd [%fp+tmp3],%f48 ! (2_1) dsqrt_exp = *(double*)&sqrt_exp; + faddd %f60,%f18,%f60 ! (2_1) res += dexp_hi; + + fmuld %f44,%f12,%f34 ! (3_1) res = dexp_hi * res; + fpadd32 %f48,%f60,%f12 ! (2_1) dtmp0 = vis_fpadd32(dsqrt_exp,res); + + st %f12,[%g5] ! (2_1) ((float*)py)[0] = ((float*)&dtmp0)[0]; + + add %g5,stridey,%g1 ! py += stridey + st %f13,[%g5+4] ! (2_1) ((float*)py)[1] = ((float*)&dtmp0)[1]; + + subcc counter,1,counter + bneg .begin + or %g0,%g1,%o4 + + faddd %f34,%f36,%f60 ! (3_1) res += dexp_lo; + + add %l2,stridex,%l2 ! px += stridex + + ldd [%fp+tmp4],%f48 ! (3_1) dsqrt_exp = *(double*)&sqrt_exp; + faddd %f60,%f44,%f60 ! (3_1) res += dexp_hi; + + fpadd32 %f48,%f60,%f12 ! (3_1) dtmp0 = vis_fpadd32(dsqrt_exp,res); + + st %f12,[%g1] ! (3_1) ((float*)py)[0] = ((float*)&dtmp0)[0]; + + add %g1,stridey,%i3 ! py += stridey + st %f13,[%g1+4] ! (3_1) ((float*)py)[1] = ((float*)&dtmp0)[1]; + + ba .begin + or %g0,%i3,%o4 + + .align 16 +.spec: + fsqrtd %f10,%f10 + add %l2,stridex,%l2 + + st %f10,[%o4] + st %f11,[%o4+4] + + add %o4,stridey,%o4 + ba .begin1 + sub counter,1,counter + + .align 16 +.update0: + cmp counter,1 + ble .cont0 + nop + + sub %l2,stridex,%i5 + stx %i5,[%fp+tmp_px] + + sub counter,1,counter + st counter,[%fp+tmp_counter] + + ba .cont0 + or %g0,1,counter + + .align 16 +.update1: + cmp counter,1 + ble .cont1 + nop + + sub %l2,stridex,%i5 + stx %i5,[%fp+tmp_px] + + sub counter,1,counter + st counter,[%fp+tmp_counter] + + ba .cont1 + or %g0,1,counter + + .align 16 +.update2: + cmp counter,2 + ble .cont2 + nop + + sub %l2,stridex,%i5 + stx %i5,[%fp+tmp_px] + + sub counter,2,counter + st counter,[%fp+tmp_counter] + + ba .cont2 + or %g0,2,counter + + .align 16 +.update3: + cmp counter,2 + ble .cont3 + nop + + sub %l2,stridex,%i5 + stx %i5,[%fp+tmp_px] + + sub counter,2,counter + st counter,[%fp+tmp_counter] + + ba .cont3 + or %g0,2,counter + + .align 16 +.update4: + cmp counter,3 + ble .cont4 + nop + + sub %l2,stridex,%i5 + stx %i5,[%fp+tmp_px] + + sub counter,3,counter + st counter,[%fp+tmp_counter] + + ba .cont4 + or %g0,3,counter + + .align 16 +.update5: + cmp counter,3 + ble .cont5 + nop + + sub %l2,stridex,%i5 + stx %i5,[%fp+tmp_px] + + sub counter,3,counter + st counter,[%fp+tmp_counter] + + ba .cont5 + or %g0,3,counter + + .align 16 +.update6: + cmp counter,4 + ble .cont6 + nop + + sub %l2,stridex,%i5 + stx %i5,[%fp+tmp_px] + + sub counter,4,counter + st counter,[%fp+tmp_counter] + + ba .cont6 + or %g0,4,counter + + .align 16 +.update7: + cmp counter,4 + ble .cont7 + nop + + sub %l2,stridex,%i5 + stx %i5,[%fp+tmp_px] + + sub counter,4,counter + st counter,[%fp+tmp_counter] + + ba .cont7 + or %g0,4,counter + + .align 16 +.update8: + cmp counter,5 + ble .cont8 + nop + + sub %l2,stridex,%i5 + stx %i5,[%fp+tmp_px] + + sub counter,5,counter + st counter,[%fp+tmp_counter] + + ba .cont8 + or %g0,5,counter + + .align 16 +.update9: + cmp counter,5 + ble .cont9 + nop + + sub %l2,stridex,%i5 + stx %i5,[%fp+tmp_px] + + sub counter,5,counter + st counter,[%fp+tmp_counter] + + ba .cont9 + or %g0,5,counter + + .align 16 +.update10: + cmp counter,6 + ble .cont10 + nop + + sub %l2,stridex,%i5 + stx %i5,[%fp+tmp_px] + + sub counter,6,counter + st counter,[%fp+tmp_counter] + + ba .cont10 + or %g0,6,counter + + .align 16 +.update11: + cmp counter,6 + ble .cont11 + nop + + sub %l2,stridex,%i5 + stx %i5,[%fp+tmp_px] + + sub counter,6,counter + st counter,[%fp+tmp_counter] + + ba .cont11 + or %g0,6,counter + + .align 16 +.update12: + cmp counter,7 + ble .cont12 + nop + + sub %l2,stridex,%i5 + stx %i5,[%fp+tmp_px] + + sub counter,7,counter + st counter,[%fp+tmp_counter] + + ba .cont12 + or %g0,7,counter + + .align 16 +.update13: + cmp counter,7 + ble .cont13 + nop + + sub %l2,stridex,%i5 + stx %i5,[%fp+tmp_px] + + sub counter,7,counter + st counter,[%fp+tmp_counter] + + ba .cont13 + or %g0,7,counter + + .align 16 +.update14: + cmp counter,8 + ble .cont14 + nop + + sub %l2,stridex,%i5 + stx %i5,[%fp+tmp_px] + + sub counter,8,counter + st counter,[%fp+tmp_counter] + + ba .cont14 + or %g0,8,counter + + .align 16 +.update15: + cmp counter,8 + ble .cont15 + nop + + sub %l2,stridex,%i5 + stx %i5,[%fp+tmp_px] + + sub counter,8,counter + st counter,[%fp+tmp_counter] + + ba .cont15 + or %g0,8,counter + + .align 16 +.update16: + cmp counter,9 + ble .cont16 + nop + + sub %l2,stridex,%i5 + stx %i5,[%fp+tmp_px] + + sub counter,9,counter + st counter,[%fp+tmp_counter] + + ba .cont16 + or %g0,9,counter + + .align 16 +.update17: + cmp counter,9 + ble .cont17 + nop + + sub %l2,stridex,%i5 + stx %i5,[%fp+tmp_px] + + sub counter,9,counter + st counter,[%fp+tmp_counter] + + ba .cont17 + or %g0,9,counter + + .align 16 +.update18: + cmp counter,10 + ble .cont18 + nop + + sub %l2,stridex,%i5 + stx %i5,[%fp+tmp_px] + + sub counter,10,counter + st counter,[%fp+tmp_counter] + + ba .cont18 + or %g0,10,counter + + .align 16 +.update19: + cmp counter,10 + ble .cont19 + nop + + sub %l2,stridex,%i5 + stx %i5,[%fp+tmp_px] + + sub counter,10,counter + st counter,[%fp+tmp_counter] + + ba .cont19 + or %g0,10,counter + + .align 16 +.update20: + cmp counter,11 + ble .cont20 + nop + + sub %l2,stridex,%i5 + stx %i5,[%fp+tmp_px] + + sub counter,11,counter + st counter,[%fp+tmp_counter] + + ba .cont20 + or %g0,11,counter + + .align 16 +.update21: + cmp counter,11 + ble .cont21 + nop + + sub %l2,stridex,%i5 + stx %i5,[%fp+tmp_px] + + sub counter,11,counter + st counter,[%fp+tmp_counter] + + ba .cont21 + or %g0,11,counter + +.exit: + ret + restore + + SET_SIZE(__vsqrt) + diff --git a/usr/src/lib/libmvec/common/vis/__vsqrtf.S b/usr/src/lib/libmvec/common/vis/__vsqrtf.S new file mode 100644 index 0000000000..45b20af2bc --- /dev/null +++ b/usr/src/lib/libmvec/common/vis/__vsqrtf.S @@ -0,0 +1,59 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "__vsqrtf.S" + +#include "libm.h" + + .section ".text" + .file "__vsqrtf.S" + + ENTRY(__vsqrtf) + + lda [%o1]0x82,%f0 + subcc %o0,1,%o0 + bneg,pn %icc,.exit + sll %o2,2,%o2 + ba .loop + sll %o4,2,%o4 + + .align 16 +.loop: + fsqrts %f0,%f2 + lda [%o1+%o2]0x82,%f0 + add %o1,%o2,%o1 + subcc %o0,1,%o0 + st %f2,[%o3] + bpos,pt %icc,.loop + add %o3,%o4,%o3 +.exit: + retl + nop + + SET_SIZE(__vsqrtf) + diff --git a/usr/src/lib/libmvec/common/vis/__vsqrtf_ultra3.S b/usr/src/lib/libmvec/common/vis/__vsqrtf_ultra3.S new file mode 100644 index 0000000000..054a418ae9 --- /dev/null +++ b/usr/src/lib/libmvec/common/vis/__vsqrtf_ultra3.S @@ -0,0 +1,994 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "__vsqrtf_ultra3.S" + +#include "libm.h" +#if defined(LIBMVEC_SO_BUILD) + .weak __vsqrtf + .type __vsqrtf,#function + __vsqrtf = __vsqrtf_ultra3 +#endif + + RO_DATA + .align 64 + +.CONST_TBL: + .word 0x3fe00001, 0x80007e00 ! K1 = 5.00000715259318464227e-01 + .word 0xbfc00003, 0xc0017a01 ! K2 = -1.25000447037521686593e-01 + .word 0x000fffff, 0xffffffff ! DC0 = 0x000fffffffffffff + .word 0x3ff00000, 0x00000000 ! DC1 = 0x3ff0000000000000 + .word 0x7ffff000, 0x00000000 ! DC2 = 0x7ffff00000000000 + +#define DC0 %f6 +#define DC1 %f4 +#define DC2 %f2 +#define K2 %f38 +#define K1 %f36 +#define TBL %l2 +#define stridex %l3 +#define stridey %l4 +#define _0x1ff0 %l5 +#define counter %l6 +#define _0x00800000 %l7 +#define _0x7f800000 %o0 + +#define tmp_px STACK_BIAS-0x40 +#define tmp_counter STACK_BIAS-0x38 +#define tmp0 STACK_BIAS-0x30 +#define tmp1 STACK_BIAS-0x28 +#define tmp2 STACK_BIAS-0x20 +#define tmp3 STACK_BIAS-0x18 +#define tmp4 STACK_BIAS-0x10 + +! sizeof temp storage - must be a multiple of 16 for V9 +#define tmps 0x40 + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +! !!!!! algorithm !!!!! +! +! x0 = *px; +! ax = *(int*)px; +! px += stridex; +! +! if( ax >= 0x7f800000 ) +! { +! *py = sqrtf(x0); +! py += stridey; +! continue; +! } +! if( ax < 0x00800000 ) +! { +! *py = sqrtf(x0); +! py += stridey; +! continue; +! } +! +! db0 = (double)x0; +! iexp0 = ax >> 24; +! iexp0 += 0x3c0; +! lexp0 = (long long)iexp0 << 52; +! +! db0 = vis_fand(db0,DC0); +! db0 = vis_for(db0,DC1); +! hi0 = vis_fand(db0,DC2); +! +! ax >>= 11; +! si0 = ax & 0x1ff0; +! dtmp0 = ((double*)((char*)TBL + si0))[0]; +! xx0 = (db0 - hi0); +! xx0 *= dtmp0; +! dtmp0 = ((double*)((char*)TBL + si0))[1] +! res0 = K2 * xx0; +! res0 += K1; +! res0 *= xx0; +! res0 += DC1; +! res0 = dtmp0 * res0; +! dtmp1 = *((double*)&lexp0); +! res0 *= dtmp1; +! fres0 = (float)res0; +! *py = fres0; +! py += stridey; +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + ENTRY(__vsqrtf_ultra3) + save %sp,-SA(MINFRAME)-tmps,%sp + PIC_SETUP(l7) + PIC_SET(l7,.CONST_TBL,o2) + PIC_SET(l7,__vlibm_TBL_sqrtf,l2) + + st %i0,[%fp+tmp_counter] + sll %i2,2,stridex + or %g0,0xff8,%l5 + + stx %i1,[%fp+tmp_px] + sll %l5,1,_0x1ff0 + + ldd [%o2],K1 + sll %i4,2,stridey + + ldd [%o2+8],K2 + or %g0,%i3,%g5 + + ldd [%o2+16],DC0 + sethi %hi(0x7f800000),%o0 + + ldd [%o2+24],DC1 + sethi %hi(0x00800000),%l7 + + ldd [%o2+32],DC2 + +.begin: + ld [%fp+tmp_counter],counter + ldx [%fp+tmp_px],%i1 + st %g0,[%fp+tmp_counter] +.begin1: + cmp counter,0 + ble,pn %icc,.exit + + lda [%i1]0x82,%o2 ! (2_0) ax = *(int*)px; + + or %g0,%i1,%o7 + lda [%i1]0x82,%f25 ! (2_0) x0 = *px; + + cmp %o2,_0x7f800000 ! (2_0) ax ? 0x7f800000 + bge,pn %icc,.spec ! (2_0) if( ax >= 0x7f800000 ) + nop + + cmp %o2,_0x00800000 ! (2_0) ax ? 0x00800000 + bl,pn %icc,.spec ! (2_0) if( ax < 0x00800000 ) + nop + + fstod %f25,%f56 ! (2_0) db0 = (double)x0; + + lda [stridex+%o7]0x82,%o1 ! (3_0) ax = *(int*)px; + + sra %o2,24,%l1 ! (2_0) iexp0 = ax >> 24; + + add %o7,stridex,%i1 ! px += stridex + add %l1,960,%l0 ! (2_0) iexp0 += 0x3c0; + lda [stridex+%o7]0x82,%f0 ! (3_0) x0 = *px; + fand %f56,DC0,%f60 ! (2_0) db0 = vis_fand(db0,DC0); + + cmp %o1,_0x7f800000 ! (3_0) ax ? 0x7f800000 + bge,pn %icc,.update0 ! (3_0) if( ax >= 0x7f800000 ) + nop +.cont0: + sllx %l0,52,%o3 ! (2_0) lexp0 = (long long)iexp0 << 52; + + sra %o2,11,%i2 ! (2_0) ax >>= 11; + stx %o3,[%fp+tmp0] ! (2_0) dtmp1 = *((double*)&lexp0); + for %f60,DC1,%f40 ! (2_0) db0 = vis_for(db0,DC1); + + cmp %o1,_0x00800000 ! (3_0) ax ? 0x00800000 + bl,pn %icc,.update1 ! (3_0) if( ax < 0x00800000 ) + nop +.cont1: + fstod %f0,%f48 ! (3_0) db0 = (double)x0; + + and %i2,_0x1ff0,%o3 ! (2_0) si0 = ax & 0x1ff0; + lda [%i1+stridex]0x82,%o2 ! (4_0) ax = *(int*)px; + + add %i1,stridex,%i1 ! px += stridex + add %o3,TBL,%i2 ! (2_0) (char*)TBL + si0 + fand %f40,DC2,%f46 ! (2_0) hi0 = vis_fand(db0,DC2); + + sra %o1,24,%o4 ! (3_0) iexp0 = ax >> 24; + + lda [%i1]0x82,%f13 ! (4_0) x0 = *px; + fand %f48,DC0,%f58 ! (3_0) db0 = vis_fand(db0,DC0); + + add %o4,960,%i0 ! (3_0) iexp0 += 0x3c0; + + cmp %o2,_0x7f800000 ! (4_1) ax ? 0x7f800000 + bge,pn %icc,.update2 ! (4_1) if( ax >= 0x7f800000 ) + nop +.cont2: + fsubd %f40,%f46,%f44 ! (2_1) xx0 = (db0 - hi0); + sllx %i0,52,%g1 ! (3_1) lexp0 = (long long)iexp0 << 52; + ldd [%i2],%f40 ! (2_1) dtmp0 = ((double*)((char*)TBL + si0))[0]; + + sra %o1,11,%l0 ! (3_1) ax >>= 11; + stx %g1,[%fp+tmp1] ! (3_1) dtmp1 = *((double*)&lexp0); + for %f58,DC1,%f48 ! (3_1) db0 = vis_for(db0,DC1); + + cmp %o2,_0x00800000 ! (4_1) ax ? 0x00800000 + bl,pn %icc,.update3 ! (4_1) if( ax < 0x00800000 ) + nop +.cont3: + fstod %f13,%f50 ! (4_1) db0 = (double)x0; + + fmuld %f44,%f40,%f46 ! (2_1) xx0 *= dtmp0; + and %l0,_0x1ff0,%i0 ! (3_1) si0 = ax & 0x1ff0; + lda [%i1+stridex]0x82,%l1 ! (0_0) ax = *(int*)px; + + add %i0,TBL,%l0 ! (3_1) (char*)TBL + si0 + fand %f48,DC2,%f62 ! (3_1) hi0 = vis_fand(db0,DC2); + + sra %o2,24,%o7 ! (4_1) iexp0 = ax >> 24; + + add %i1,stridex,%o4 ! px += stridex + add %o7,960,%o7 ! (4_1) iexp0 += 0x3c0; + lda [%i1+stridex]0x82,%f17 ! (0_0) x0 = *px; + fand %f50,DC0,%f54 ! (4_1) db0 = vis_fand(db0,DC0); + + fmuld K2,%f46,%f52 ! (2_1) res0 = K2 * xx0; + cmp %l1,_0x7f800000 ! (0_0) ax ? 0x7f800000 + bge,pn %icc,.update4 ! (0_0) if( ax >= 0x7f800000 ) + fsubd %f48,%f62,%f42 ! (3_1) xx0 = (db0 - hi0); +.cont4: + sllx %o7,52,%o1 ! (4_1) lexp0 = (long long)iexp0 << 52; + ldd [%i0+TBL],%f40 ! (3_1) dtmp0 = ((double*)((char*)TBL + si0))[0]; + + sra %o2,11,%i5 ! (4_1) ax >>= 11; + stx %o1,[%fp+tmp2] ! (4_1) dtmp1 = *((double*)&lexp0); + for %f54,DC1,%f34 ! (4_1) db0 = vis_for(db0,DC1); + + cmp %l1,_0x00800000 ! (0_0) ax ? 0x00800000 + bl,pn %icc,.update5 ! (0_0) if( ax < 0x00800000 ) + nop +.cont5: + fstod %f17,%f56 ! (0_0) db0 = (double)x0; + + fmuld %f42,%f40,%f42 ! (3_1) xx0 *= dtmp0; + lda [stridex+%o4]0x82,%i0 ! (1_0) ax = *(int*)px; + faddd %f52,K1,%f52 ! (2_1) res0 += K1; + + sra %l1,24,%g1 ! (0_0) iexp0 = ax >> 24; + and %i5,_0x1ff0,%i5 ! (4_1) si0 = ax & 0x1ff0; + fand %f34,DC2,%f62 ! (4_1) hi0 = vis_fand(db0,DC2); + + add %o4,stridex,%i1 ! px += stridex + + add %g1,960,%o5 ! (0_0) iexp0 += 0x3c0; + add %i5,TBL,%i3 ! (4_1) (char*)TBL + si0 + lda [stridex+%o4]0x82,%f21 ! (1_0) x0 = *px; + fand %f56,DC0,%f32 ! (0_0) db0 = vis_fand(db0,DC0); + + fmuld K2,%f42,%f50 ! (3_1) res0 = K2 * xx0; + cmp %i0,_0x7f800000 ! (1_0) ax ? 0x7f800000 + bge,pn %icc,.update6 ! (1_0) if( ax >= 0x7f800000 ) + fsubd %f34,%f62,%f54 ! (4_1) xx0 = (db0 - hi0); +.cont6: + fmuld %f52,%f46,%f52 ! (2_1) res0 *= xx0; + sllx %o5,52,%o7 ! (0_0) lexp0 = (long long)iexp0 << 52; + ldd [TBL+%i5],%f62 ! (4_1) dtmp0 = ((double*)((char*)TBL + si0))[0]; + + sra %l1,11,%i4 ! (0_0) ax >>= 11; + stx %o7,[%fp+tmp3] ! (0_0) dtmp1 = *((double*)&lexp0); + for %f32,DC1,%f48 ! (0_0) db0 = vis_for(db0,DC1); + + cmp %i0,_0x00800000 ! (1_0) ax ? 0x00800000 + bl,pn %icc,.update7 ! (1_0) if( ax < 0x00800000 ) + nop +.cont7: + fstod %f21,%f56 ! (1_0) db0 = (double)x0; + + fmuld %f54,%f62,%f46 ! (4_1) xx0 *= dtmp0; + and %i4,_0x1ff0,%g1 ! (0_0) si0 = ax & 0x1ff0; + lda [%i1+stridex]0x82,%o2 ! (2_0) ax = *(int*)px; + faddd %f50,K1,%f62 ! (3_1) res0 += K1; + + add %g1,TBL,%i5 ! (0_0) (double*)((char*)TBL + si0 + fand %f48,DC2,%f32 ! (0_0) hi0 = vis_fand(db0,DC2); + + sra %i0,24,%o4 ! (1_0) iexp0 = ax >> 24; + ldd [%i2+8],%f60 ! (2_1) dtmp0 = ((double*)((char*)TBL + si0))[1] + faddd %f52,DC1,%f58 ! (2_1) res0 += DC1; + + add %i1,stridex,%o7 ! px += stridex + add %o4,960,%i2 ! (1_0) iexp0 += 0x3c0; + lda [%i1+stridex]0x82,%f25 ! (2_0) x0 = *px; + fand %f56,DC0,%f34 ! (1_0) db0 = vis_fand(db0,DC0); + + fmuld K2,%f46,%f50 ! (4_1) res0 = K2 * xx0; + cmp %o2,_0x7f800000 ! (2_0) ax ? 0x7f800000 + bge,pn %icc,.update8 ! (2_0) if( ax >= 0x7f800000 ) + fsubd %f48,%f32,%f52 ! (0_0) xx0 = (db0 - hi0); +.cont8: + fmuld %f62,%f42,%f54 ! (3_1) res0 *= xx0; + sllx %i2,52,%o4 ! (1_0) lexp0 = (long long)iexp0 << 52; + ldd [TBL+%g1],%f32 ! (0_0) dtmp0 = ((double*)((char*)TBL + si0))[0]; + + fmuld %f60,%f58,%f60 ! (2_1) res0 = dtmp0 * res0; + sra %i0,11,%g1 ! (1_0) ax >>= 11; + stx %o4,[%fp+tmp4] ! (1_0) dtmp1 = *((double*)&lexp0); + for %f34,DC1,%f48 ! (1_0) db0 = vis_for(db0,DC1); + + cmp %o2,_0x00800000 ! (2_0) ax ? 0x00800000 + bl,pn %icc,.update9 ! (2_0) if( ax < 0x00800000 ) + ldd [%fp+tmp0],%f40 ! (2_1) dtmp1 = *((double*)&lexp0); + fstod %f25,%f56 ! (2_0) db0 = (double)x0; +.cont9: + fmuld %f52,%f32,%f42 ! (0_0) xx0 *= dtmp0; + and %g1,_0x1ff0,%o5 ! (1_0) si0 = ax & 0x1ff0; + lda [stridex+%o7]0x82,%o1 ! (3_0) ax = *(int*)px; + faddd %f50,K1,%f34 ! (4_1) res0 += K1; + + add %o5,TBL,%i4 ! (1_0) (char*)TBL + si0 + fand %f48,DC2,%f62 ! (1_0) hi0 = vis_fand(db0,DC2); + + fmuld %f60,%f40,%f32 ! (2_1) res0 *= dtmp1; + sra %o2,24,%l1 ! (2_0) iexp0 = ax >> 24; + ldd [%l0+8],%f40 ! (3_1) dtmp0 = ((double*)((char*)TBL + si0))[1] + faddd %f54,DC1,%f58 ! (3_1) res0 += DC1; + + add %o7,stridex,%i1 ! px += stridex + add %l1,960,%l0 ! (2_0) iexp0 += 0x3c0; + lda [stridex+%o7]0x82,%f0 ! (3_0) x0 = *px; + fand %f56,DC0,%f60 ! (2_0) db0 = vis_fand(db0,DC0); + + fmuld K2,%f42,%f50 ! (0_0) res0 = K2 * xx0; + cmp %o1,_0x7f800000 ! (3_0) ax ? 0x7f800000 + bge,pn %icc,.update10 ! (3_0) if( ax >= 0x7f800000 ) + fsubd %f48,%f62,%f54 ! (1_0) xx0 = (db0 - hi0); +.cont10: + fmuld %f34,%f46,%f52 ! (4_1) res0 *= xx0; + sllx %l0,52,%o3 ! (2_0) lexp0 = (long long)iexp0 << 52; + ldd [TBL+%o5],%f56 ! (1_0) dtmp0 = ((double*)((char*)TBL + si0))[0]; + + fmuld %f40,%f58,%f34 ! (3_1) res0 = dtmp0 * res0; + sra %o2,11,%i2 ! (2_0) ax >>= 11; + stx %o3,[%fp+tmp0] ! (2_0) dtmp1 = *((double*)&lexp0); + for %f60,DC1,%f40 ! (2_0) db0 = vis_for(db0,DC1); + + cmp %o1,_0x00800000 ! (3_0) ax ? 0x00800000 + bl,pn %icc,.update11 ! (3_0) if( ax < 0x00800000 ) + ldd [%fp+tmp1],%f62 ! (3_1) dtmp1 = *((double*)&lexp0); + fstod %f0,%f48 ! (3_0) db0 = (double)x0; +.cont11: + fmuld %f54,%f56,%f30 ! (1_0) xx0 *= dtmp0; + and %i2,_0x1ff0,%o3 ! (2_0) si0 = ax & 0x1ff0; + lda [%i1+stridex]0x82,%o2 ! (4_0) ax = *(int*)px; + faddd %f50,K1,%f56 ! (0_0) res0 += K1; + + add %i1,stridex,%i1 ! px += stridex + add %o3,TBL,%i2 ! (2_0) (char*)TBL + si0 + fand %f40,DC2,%f46 ! (2_0) hi0 = vis_fand(db0,DC2); + + fmuld %f34,%f62,%f28 ! (3_1) res0 *= dtmp1; + sra %o1,24,%o4 ! (3_0) iexp0 = ax >> 24; + ldd [%i3+8],%f50 ! (4_1) dtmp0 = ((double*)((char*)TBL + si0))[1] + faddd %f52,DC1,%f54 ! (4_1) res0 += DC1; + + lda [%i1]0x82,%f13 ! (4_0) x0 = *px; + fand %f48,DC0,%f58 ! (3_0) db0 = vis_fand(db0,DC0); + + or %g0,%g5,%i3 + cmp counter,5 + bl,pn %icc,.tail + add %o4,960,%g5 ! (3_0) iexp0 += 0x3c0; + + ba .main_loop + sub counter,5,counter ! counter + + .align 16 +.main_loop: + fmuld K2,%f30,%f60 ! (1_1) res0 = K2 * xx0; + cmp %o2,_0x7f800000 ! (4_1) ax ? 0x7f800000 + bge,pn %icc,.update12 ! (4_1) if( ax >= 0x7f800000 ) + fsubd %f40,%f46,%f44 ! (2_1) xx0 = (db0 - hi0); +.cont12: + fmuld %f56,%f42,%f52 ! (0_1) res0 *= xx0; + sllx %g5,52,%g5 ! (3_1) lexp0 = (long long)iexp0 << 52; + ldd [%i2],%f40 ! (2_1) dtmp0 = ((double*)((char*)TBL + si0))[0]; + fdtos %f32,%f15 ! (2_2) fres0 = (float)res0; + + fmuld %f50,%f54,%f42 ! (4_2) res0 = dtmp0 * res0; + sra %o1,11,%l0 ! (3_1) ax >>= 11; + stx %g5,[%fp+tmp1] ! (3_1) dtmp1 = *((double*)&lexp0); + for %f58,DC1,%f48 ! (3_1) db0 = vis_for(db0,DC1); + + cmp %o2,_0x00800000 ! (4_1) ax ? 0x00800000 + bl,pn %icc,.update13 ! (4_1) if( ax < 0x00800000 ) + ldd [%fp+tmp2],%f56 ! (4_2) dtmp1 = *((double*)&lexp0); + fstod %f13,%f50 ! (4_1) db0 = (double)x0; +.cont13: + fmuld %f44,%f40,%f46 ! (2_1) xx0 *= dtmp0; + and %l0,_0x1ff0,%i0 ! (3_1) si0 = ax & 0x1ff0; + lda [%i1+stridex]0x82,%l1 ! (0_0) ax = *(int*)px; + faddd %f60,K1,%f32 ! (1_1) res0 += K1; + + add %i0,TBL,%l0 ! (3_1) (char*)TBL + si0 + add %i3,stridey,%o3 ! py += stridey + st %f15,[%i3] ! (2_2) *py = fres0; + fand %f48,DC2,%f62 ! (3_1) hi0 = vis_fand(db0,DC2); + + fmuld %f42,%f56,%f44 ! (4_2) res0 *= dtmp1; + sra %o2,24,%o7 ! (4_1) iexp0 = ax >> 24; + ldd [%i5+8],%f58 ! (0_1) dtmp0 = ((double*)((char*)TBL + si0))[1] + faddd %f52,DC1,%f34 ! (0_1) res0 += DC1; + + add %i1,stridex,%o4 ! px += stridex + add %o7,960,%o7 ! (4_1) iexp0 += 0x3c0; + lda [%i1+stridex]0x82,%f17 ! (0_0) x0 = *px; + fand %f50,DC0,%f54 ! (4_1) db0 = vis_fand(db0,DC0); + + fmuld K2,%f46,%f52 ! (2_1) res0 = K2 * xx0; + cmp %l1,_0x7f800000 ! (0_0) ax ? 0x7f800000 + bge,pn %icc,.update14 ! (0_0) if( ax >= 0x7f800000 ) + fsubd %f48,%f62,%f42 ! (3_1) xx0 = (db0 - hi0); +.cont14: + fmuld %f32,%f30,%f48 ! (1_1) res0 *= xx0; + sllx %o7,52,%o1 ! (4_1) lexp0 = (long long)iexp0 << 52; + ldd [%i0+TBL],%f40 ! (3_1) dtmp0 = ((double*)((char*)TBL + si0))[0]; + fdtos %f28,%f19 ! (3_2) fres0 = (float)res0; + + fmuld %f58,%f34,%f32 ! (0_1) res0 = dtmp0 * res0; + sra %o2,11,%i5 ! (4_1) ax >>= 11; + stx %o1,[%fp+tmp2] ! (4_1) dtmp1 = *((double*)&lexp0); + for %f54,DC1,%f34 ! (4_1) db0 = vis_for(db0,DC1); + + cmp %l1,_0x00800000 ! (0_0) ax ? 0x00800000 + bl,pn %icc,.update15 ! (0_0) if( ax < 0x00800000 ) + ldd [%fp+tmp3],%f60 ! (0_1) dtmp1 = *((double*)&lexp0); + fstod %f17,%f56 ! (0_0) db0 = (double)x0; +.cont15: + fmuld %f42,%f40,%f42 ! (3_1) xx0 *= dtmp0; + add %o3,stridey,%g5 ! py += stridey + lda [stridex+%o4]0x82,%i0 ! (1_0) ax = *(int*)px; + faddd %f52,K1,%f52 ! (2_1) res0 += K1; + + sra %l1,24,%g1 ! (0_0) iexp0 = ax >> 24; + and %i5,_0x1ff0,%i5 ! (4_1) si0 = ax & 0x1ff0; + st %f19,[%o3] ! (3_2) *py = fres0; + fand %f34,DC2,%f62 ! (4_1) hi0 = vis_fand(db0,DC2); + + fmuld %f32,%f60,%f40 ! (0_1) res0 *= dtmp1; + add %o4,stridex,%i1 ! px += stridex + ldd [%i4+8],%f60 ! (1_1) dtmp0 = ((double*)((char*)TBL + si0))[1] + faddd %f48,DC1,%f58 ! (1_1) res0 += DC1; + + add %g1,960,%o5 ! (0_0) iexp0 += 0x3c0; + add %i5,TBL,%i3 ! (4_1) (char*)TBL + si0 + lda [stridex+%o4]0x82,%f21 ! (1_0) x0 = *px; + fand %f56,DC0,%f32 ! (0_0) db0 = vis_fand(db0,DC0); + + fmuld K2,%f42,%f50 ! (3_1) res0 = K2 * xx0; + cmp %i0,_0x7f800000 ! (1_0) ax ? 0x7f800000 + bge,pn %icc,.update16 ! (1_0) if( ax >= 0x7f800000 ) + fsubd %f34,%f62,%f54 ! (4_1) xx0 = (db0 - hi0); +.cont16: + fmuld %f52,%f46,%f52 ! (2_1) res0 *= xx0; + sllx %o5,52,%o7 ! (0_0) lexp0 = (long long)iexp0 << 52; + ldd [TBL+%i5],%f62 ! (4_1) dtmp0 = ((double*)((char*)TBL + si0))[0]; + fdtos %f44,%f23 ! (4_2) fres0 = (float)res0; + + fmuld %f60,%f58,%f44 ! (1_1) res0 = dtmp0 * res0; + sra %l1,11,%i4 ! (0_0) ax >>= 11; + stx %o7,[%fp+tmp3] ! (0_0) dtmp1 = *((double*)&lexp0); + for %f32,DC1,%f48 ! (0_0) db0 = vis_for(db0,DC1); + + cmp %i0,_0x00800000 ! (1_0) ax ? 0x00800000 + bl,pn %icc,.update17 ! (1_0) if( ax < 0x00800000 ) + ldd [%fp+tmp4],%f34 ! (1_1) dtmp1 = *((double*)&lexp0); + fstod %f21,%f56 ! (1_0) db0 = (double)x0; +.cont17: + fmuld %f54,%f62,%f46 ! (4_1) xx0 *= dtmp0; + and %i4,_0x1ff0,%g1 ! (0_0) si0 = ax & 0x1ff0; + lda [%i1+stridex]0x82,%o2 ! (2_0) ax = *(int*)px; + faddd %f50,K1,%f62 ! (3_1) res0 += K1; + + add %g1,TBL,%i5 ! (0_0) (double*)((char*)TBL + si0 + add %g5,stridey,%g5 ! py += stridey + st %f23,[stridey+%o3] ! (4_2) *py = fres0; + fand %f48,DC2,%f32 ! (0_0) hi0 = vis_fand(db0,DC2); + + fmuld %f44,%f34,%f44 ! (1_1) res0 *= dtmp1; + sra %i0,24,%o4 ! (1_0) iexp0 = ax >> 24; + ldd [%i2+8],%f60 ! (2_1) dtmp0 = ((double*)((char*)TBL + si0))[1] + faddd %f52,DC1,%f58 ! (2_1) res0 += DC1; + + add %i1,stridex,%o7 ! px += stridex + add %o4,960,%i2 ! (1_0) iexp0 += 0x3c0; + lda [%i1+stridex]0x82,%f25 ! (2_0) x0 = *px; + fand %f56,DC0,%f34 ! (1_0) db0 = vis_fand(db0,DC0); + + fmuld K2,%f46,%f50 ! (4_1) res0 = K2 * xx0; + cmp %o2,_0x7f800000 ! (2_0) ax ? 0x7f800000 + bge,pn %icc,.update18 ! (2_0) if( ax >= 0x7f800000 ) + fsubd %f48,%f32,%f52 ! (0_0) xx0 = (db0 - hi0); +.cont18: + fmuld %f62,%f42,%f54 ! (3_1) res0 *= xx0; + sllx %i2,52,%o4 ! (1_0) lexp0 = (long long)iexp0 << 52; + ldd [TBL+%g1],%f32 ! (0_0) dtmp0 = ((double*)((char*)TBL + si0))[0]; + fdtos %f40,%f27 ! (0_1) fres0 = (float)res0; + + fmuld %f60,%f58,%f60 ! (2_1) res0 = dtmp0 * res0; + sra %i0,11,%g1 ! (1_0) ax >>= 11; + stx %o4,[%fp+tmp4] ! (1_0) dtmp1 = *((double*)&lexp0); + for %f34,DC1,%f48 ! (1_0) db0 = vis_for(db0,DC1); + + cmp %o2,_0x00800000 ! (2_0) ax ? 0x00800000 + bl,pn %icc,.update19 ! (2_0) if( ax < 0x00800000 ) + ldd [%fp+tmp0],%f40 ! (2_1) dtmp1 = *((double*)&lexp0); + fstod %f25,%f56 ! (2_0) db0 = (double)x0; +.cont19: + fmuld %f52,%f32,%f42 ! (0_0) xx0 *= dtmp0; + and %g1,_0x1ff0,%o5 ! (1_0) si0 = ax & 0x1ff0; + lda [stridex+%o7]0x82,%o1 ! (3_0) ax = *(int*)px; + faddd %f50,K1,%f34 ! (4_1) res0 += K1; + + add %o5,TBL,%i4 ! (1_0) (char*)TBL + si0 + add %g5,stridey,%g1 ! py += stridey + st %f27,[%g5] ! (0_1) *py = fres0; + fand %f48,DC2,%f62 ! (1_0) hi0 = vis_fand(db0,DC2); + + fmuld %f60,%f40,%f32 ! (2_1) res0 *= dtmp1; + sra %o2,24,%l1 ! (2_0) iexp0 = ax >> 24; + ldd [%l0+8],%f40 ! (3_1) dtmp0 = ((double*)((char*)TBL + si0))[1] + faddd %f54,DC1,%f58 ! (3_1) res0 += DC1; + + add %o7,stridex,%i1 ! px += stridex + add %l1,960,%l0 ! (2_0) iexp0 += 0x3c0; + lda [stridex+%o7]0x82,%f0 ! (3_0) x0 = *px; + fand %f56,DC0,%f60 ! (2_0) db0 = vis_fand(db0,DC0); + + fmuld K2,%f42,%f50 ! (0_0) res0 = K2 * xx0; + cmp %o1,_0x7f800000 ! (3_0) ax ? 0x7f800000 + bge,pn %icc,.update20 ! (3_0) if( ax >= 0x7f800000 ) + fsubd %f48,%f62,%f54 ! (1_0) xx0 = (db0 - hi0); +.cont20: + fmuld %f34,%f46,%f52 ! (4_1) res0 *= xx0; + sllx %l0,52,%o3 ! (2_0) lexp0 = (long long)iexp0 << 52; + ldd [TBL+%o5],%f56 ! (1_0) dtmp0 = ((double*)((char*)TBL + si0))[0]; + fdtos %f44,%f8 ! (1_1) fres0 = (float)res0; + + fmuld %f40,%f58,%f34 ! (3_1) res0 = dtmp0 * res0; + sra %o2,11,%i2 ! (2_0) ax >>= 11; + stx %o3,[%fp+tmp0] ! (2_0) dtmp1 = *((double*)&lexp0); + for %f60,DC1,%f40 ! (2_0) db0 = vis_for(db0,DC1); + + cmp %o1,_0x00800000 ! (3_0) ax ? 0x00800000 + bl,pn %icc,.update21 ! (3_0) if( ax < 0x00800000 ) + ldd [%fp+tmp1],%f62 ! (3_1) dtmp1 = *((double*)&lexp0); + fstod %f0,%f48 ! (3_0) db0 = (double)x0; +.cont21: + fmuld %f54,%f56,%f30 ! (1_0) xx0 *= dtmp0; + and %i2,_0x1ff0,%o3 ! (2_0) si0 = ax & 0x1ff0; + lda [%i1+stridex]0x82,%o2 ! (4_0) ax = *(int*)px; + faddd %f50,K1,%f56 ! (0_0) res0 += K1; + + add %i1,stridex,%i1 ! px += stridex + add %o3,TBL,%i2 ! (2_0) (char*)TBL + si0 + st %f8,[stridey+%g5] ! (1_1) *py = fres0; + fand %f40,DC2,%f46 ! (2_0) hi0 = vis_fand(db0,DC2); + + fmuld %f34,%f62,%f28 ! (3_1) res0 *= dtmp1; + sra %o1,24,%o4 ! (3_0) iexp0 = ax >> 24; + ldd [%i3+8],%f50 ! (4_1) dtmp0 = ((double*)((char*)TBL + si0))[1] + faddd %f52,DC1,%f54 ! (4_1) res0 += DC1; + + add %g1,stridey,%i3 ! py += stridey + subcc counter,5,counter ! counter + lda [%i1]0x82,%f13 ! (4_0) x0 = *px; + fand %f48,DC0,%f58 ! (3_0) db0 = vis_fand(db0,DC0); + + bpos,pt %icc,.main_loop + add %o4,960,%g5 ! (3_0) iexp0 += 0x3c0; + + add counter,5,counter +.tail: + subcc counter,1,counter + bneg,a .begin + or %g0,%i3,%g5 + + fmuld %f56,%f42,%f52 ! (0_1) res0 *= xx0; + fdtos %f32,%f15 ! (2_2) fres0 = (float)res0; + + fmuld %f50,%f54,%f42 ! (4_2) res0 = dtmp0 * res0; + + ldd [%fp+tmp2],%f56 ! (4_2) dtmp1 = *((double*)&lexp0); + + add %i3,stridey,%o3 ! py += stridey + st %f15,[%i3] ! (2_2) *py = fres0; + + subcc counter,1,counter + bneg,a .begin + or %g0,%o3,%g5 + + fmuld %f42,%f56,%f44 ! (4_2) res0 *= dtmp1; + ldd [%i5+8],%f58 ! (0_1) dtmp0 = ((double*)((char*)TBL + si0))[1] + faddd %f52,DC1,%f34 ! (0_1) res0 += DC1; + + fdtos %f28,%f19 ! (3_2) fres0 = (float)res0; + + fmuld %f58,%f34,%f32 ! (0_1) res0 = dtmp0 * res0; + + ldd [%fp+tmp3],%f60 ! (0_1) dtmp1 = *((double*)&lexp0); + + add %o3,stridey,%g5 ! py += stridey + + st %f19,[%o3] ! (3_2) *py = fres0; + + subcc counter,1,counter + bneg,a .begin + nop + + fmuld %f32,%f60,%f40 ! (0_1) res0 *= dtmp1; + + fdtos %f44,%f23 ! (4_2) fres0 = (float)res0; + + add %g5,stridey,%g5 ! py += stridey + st %f23,[stridey+%o3] ! (4_2) *py = fres0; + + subcc counter,1,counter + bneg,a .begin + nop + + fdtos %f40,%f27 ! (0_1) fres0 = (float)res0; + + st %f27,[%g5] ! (0_1) *py = fres0; + + ba .begin + add %g5,stridey,%g5 + + .align 16 +.spec: + fsqrts %f25,%f25 + sub counter,1,counter + add %i1,stridex,%i1 + st %f25,[%g5] + ba .begin1 + add %g5,stridey,%g5 + + .align 16 +.update0: + cmp counter,1 + ble .cont0 + fzeros %f0 + + stx %i1,[%fp+tmp_px] + sethi %hi(0x7f800000),%o1 + + sub counter,1,counter + st counter,[%fp+tmp_counter] + + ba .cont0 + or %g0,1,counter + + .align 16 +.update1: + cmp counter,1 + ble .cont1 + fzeros %f0 + + stx %i1,[%fp+tmp_px] + clr %o1 + + sub counter,1,counter + st counter,[%fp+tmp_counter] + + ba .cont1 + or %g0,1,counter + + .align 16 +.update2: + cmp counter,2 + ble .cont2 + fzeros %f13 + + stx %i1,[%fp+tmp_px] + sethi %hi(0x7f800000),%o2 + + sub counter,2,counter + st counter,[%fp+tmp_counter] + + ba .cont2 + or %g0,2,counter + + .align 16 +.update3: + cmp counter,2 + ble .cont3 + fzeros %f13 + + stx %i1,[%fp+tmp_px] + clr %o2 + + sub counter,2,counter + st counter,[%fp+tmp_counter] + + ba .cont3 + or %g0,2,counter + + .align 16 +.update4: + cmp counter,3 + ble .cont4 + fzeros %f17 + + stx %o4,[%fp+tmp_px] + sethi %hi(0x7f800000),%l1 + + sub counter,3,counter + st counter,[%fp+tmp_counter] + + ba .cont4 + or %g0,3,counter + + .align 16 +.update5: + cmp counter,3 + ble .cont5 + fzeros %f17 + + stx %o4,[%fp+tmp_px] + clr %l1 + + sub counter,3,counter + st counter,[%fp+tmp_counter] + + ba .cont5 + or %g0,3,counter + + .align 16 +.update6: + cmp counter,4 + ble .cont6 + fzeros %f21 + + stx %i1,[%fp+tmp_px] + sethi %hi(0x7f800000),%i0 + + sub counter,4,counter + st counter,[%fp+tmp_counter] + + ba .cont6 + or %g0,4,counter + + .align 16 +.update7: + cmp counter,4 + ble .cont7 + fzeros %f21 + + stx %i1,[%fp+tmp_px] + clr %i0 + + sub counter,4,counter + st counter,[%fp+tmp_counter] + + ba .cont7 + or %g0,4,counter + + .align 16 +.update8: + cmp counter,5 + ble .cont8 + fzeros %f25 + + stx %o7,[%fp+tmp_px] + sethi %hi(0x7f800000),%o2 + + sub counter,5,counter + st counter,[%fp+tmp_counter] + + ba .cont8 + or %g0,5,counter + + .align 16 +.update9: + cmp counter,5 + ble .cont9 + fzeros %f25 + + stx %o7,[%fp+tmp_px] + clr %o2 + + sub counter,5,counter + st counter,[%fp+tmp_counter] + + ba .cont9 + or %g0,5,counter + + .align 16 +.update10: + cmp counter,6 + ble .cont10 + fzeros %f0 + + stx %i1,[%fp+tmp_px] + sethi %hi(0x7f800000),%o1 + + sub counter,6,counter + st counter,[%fp+tmp_counter] + + ba .cont10 + or %g0,6,counter + + .align 16 +.update11: + cmp counter,6 + ble .cont11 + fzeros %f0 + + stx %i1,[%fp+tmp_px] + clr %o1 + + sub counter,6,counter + st counter,[%fp+tmp_counter] + + ba .cont11 + or %g0,6,counter + + .align 16 +.update12: + cmp counter,2 + ble .cont12 + fzeros %f13 + + stx %i1,[%fp+tmp_px] + sethi %hi(0x7f800000),%o2 + + sub counter,2,counter + st counter,[%fp+tmp_counter] + + ba .cont12 + or %g0,2,counter + + .align 16 +.update13: + cmp counter,2 + ble .cont13 + fzeros %f13 + + stx %i1,[%fp+tmp_px] + clr %o2 + + sub counter,2,counter + st counter,[%fp+tmp_counter] + + ba .cont13 + or %g0,2,counter + + .align 16 +.update14: + cmp counter,3 + ble .cont14 + fzeros %f17 + + stx %o4,[%fp+tmp_px] + sethi %hi(0x7f800000),%l1 + + sub counter,3,counter + st counter,[%fp+tmp_counter] + + ba .cont14 + or %g0,3,counter + + .align 16 +.update15: + cmp counter,3 + ble .cont15 + fzeros %f17 + + stx %o4,[%fp+tmp_px] + clr %l1 + + sub counter,3,counter + st counter,[%fp+tmp_counter] + + ba .cont15 + or %g0,3,counter + + .align 16 +.update16: + cmp counter,4 + ble .cont16 + fzeros %f21 + + stx %i1,[%fp+tmp_px] + sethi %hi(0x7f800000),%i0 + + sub counter,4,counter + st counter,[%fp+tmp_counter] + + ba .cont16 + or %g0,4,counter + + .align 16 +.update17: + cmp counter,4 + ble .cont17 + fzeros %f21 + + stx %i1,[%fp+tmp_px] + clr %i0 + + sub counter,4,counter + st counter,[%fp+tmp_counter] + + ba .cont17 + or %g0,4,counter + + .align 16 +.update18: + cmp counter,5 + ble .cont18 + fzeros %f25 + + stx %o7,[%fp+tmp_px] + sethi %hi(0x7f800000),%o2 + + sub counter,5,counter + st counter,[%fp+tmp_counter] + + ba .cont18 + or %g0,5,counter + + .align 16 +.update19: + cmp counter,5 + ble .cont19 + fzeros %f25 + + stx %o7,[%fp+tmp_px] + clr %o2 + + sub counter,5,counter + st counter,[%fp+tmp_counter] + + ba .cont19 + or %g0,5,counter + + .align 16 +.update20: + cmp counter,6 + ble .cont20 + fzeros %f0 + + stx %i1,[%fp+tmp_px] + sethi %hi(0x7f800000),%o1 + + sub counter,6,counter + st counter,[%fp+tmp_counter] + + ba .cont20 + or %g0,6,counter + + .align 16 +.update21: + cmp counter,6 + ble .cont21 + fzeros %f0 + + stx %i1,[%fp+tmp_px] + clr %o1 + + sub counter,6,counter + st counter,[%fp+tmp_counter] + + ba .cont21 + or %g0,6,counter + +.exit: + ret + restore + SET_SIZE(__vsqrtf_ultra3) + diff --git a/usr/src/lib/libmvec/common/vlog_.c b/usr/src/lib/libmvec/common/vlog_.c new file mode 100644 index 0000000000..def5cfa3b5 --- /dev/null +++ b/usr/src/lib/libmvec/common/vlog_.c @@ -0,0 +1,74 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +extern void __vlog(int, double *, int, double *, int); + +#if !defined(LIBMVEC_SO_BUILD) +#if defined(ARCH_v8plusa) || defined(ARCH_v8plusb) || defined(ARCH_v9a) || defined(ARCH_v9b) +#define CHECK_ULTRA3 +#endif +#endif /* !defined(LIBMVEC_SO_BUILD) */ + +#ifdef CHECK_ULTRA3 +#include <strings.h> +#define sysinfo _sysinfo +#include <sys/systeminfo.h> + +#define BUFLEN 257 + +static int use_ultra3 = 0; + +extern void __vlog_ultra3(int, double *, int, double *, int); +#endif + +#pragma weak vlog_ = __vlog_ + +/* just invoke the serial function */ +void +__vlog_(int *n, double *x, int *stridex, double *y, int *stridey) +{ +#ifdef CHECK_ULTRA3 + int u; + char buf[BUFLEN]; + + u = use_ultra3; + if (!u) { + /* use __vlog_ultra3 on Cheetah (and ???) */ + if (sysinfo(SI_ISALIST, buf, BUFLEN) > 0 && !strncmp(buf, "sparcv9+vis2", 12)) + u = 3; + else + u = 1; + use_ultra3 = u; + } + if (u & 2) + __vlog_ultra3(*n, x, *stridex, y, *stridey); + else +#endif + __vlog(*n, x, *stridex, y, *stridey); +} diff --git a/usr/src/lib/libmvec/common/vlogf_.c b/usr/src/lib/libmvec/common/vlogf_.c new file mode 100644 index 0000000000..1c84d729fc --- /dev/null +++ b/usr/src/lib/libmvec/common/vlogf_.c @@ -0,0 +1,39 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +extern void __vlogf(int, float *, int, float *, int); + +#pragma weak vlogf_ = __vlogf_ + +/* just invoke the serial function */ +void +__vlogf_(int *n, float *x, int *stridex, float *y, int *stridey) +{ + __vlogf(*n, x, *stridex, y, *stridey); +} diff --git a/usr/src/lib/libmvec/common/vpow_.c b/usr/src/lib/libmvec/common/vpow_.c new file mode 100644 index 0000000000..73c3dadbd3 --- /dev/null +++ b/usr/src/lib/libmvec/common/vpow_.c @@ -0,0 +1,40 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +extern void __vpow(int, double *, int, double *, int, double *, int); + +#pragma weak vpow_ = __vpow_ + +/* just invoke the serial function */ +void +__vpow_(int *n, double *x, int *stridex, double *y, int *stridey, + double *z, int *stridez) +{ + __vpow(*n, x, *stridex, y, *stridey, z, *stridez); +} diff --git a/usr/src/lib/libmvec/common/vpowf_.c b/usr/src/lib/libmvec/common/vpowf_.c new file mode 100644 index 0000000000..bbe233f386 --- /dev/null +++ b/usr/src/lib/libmvec/common/vpowf_.c @@ -0,0 +1,40 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +extern void __vpowf(int, float *, int, float *, int, float *, int); + +#pragma weak vpowf_ = __vpowf_ + +/* just invoke the serial function */ +void +__vpowf_(int *n, float *x, int *stridex, float *y, int *stridey, + float *z, int *stridez) +{ + __vpowf(*n, x, *stridex, y, *stridey, z, *stridez); +} diff --git a/usr/src/lib/libmvec/common/vrhypot_.c b/usr/src/lib/libmvec/common/vrhypot_.c new file mode 100644 index 0000000000..111059be0d --- /dev/null +++ b/usr/src/lib/libmvec/common/vrhypot_.c @@ -0,0 +1,42 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "libm_inlines.h" + +extern void __vrhypot(int, double *, int, double *, int, double *, int); + +#pragma weak vrhypot_ = __vrhypot_ + +/* just invoke the serial function */ +void +__vrhypot_(int *n, double *x, int *stridex, double *y, int *stridey, + double *z, int *stridez) +{ + __vrhypot(*n, x, *stridex, y, *stridey, z, *stridez); +} diff --git a/usr/src/lib/libmvec/common/vrhypotf_.c b/usr/src/lib/libmvec/common/vrhypotf_.c new file mode 100644 index 0000000000..99a25102f6 --- /dev/null +++ b/usr/src/lib/libmvec/common/vrhypotf_.c @@ -0,0 +1,42 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include "libm_inlines.h" + +extern void __vrhypotf(int, float *, int, float *, int, float *, int); + +#pragma weak vrhypotf_ = __vrhypotf_ + +/* just invoke the serial function */ +void +__vrhypotf_(int *n, float *x, int *stridex, float *y, int *stridey, + float *z, int *stridez) +{ + __vrhypotf(*n, x, *stridex, y, *stridey, z, *stridez); +} diff --git a/usr/src/lib/libmvec/common/vrsqrt_.c b/usr/src/lib/libmvec/common/vrsqrt_.c new file mode 100644 index 0000000000..3f0d8c03fb --- /dev/null +++ b/usr/src/lib/libmvec/common/vrsqrt_.c @@ -0,0 +1,39 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +extern void __vrsqrt(int, double *, int, double *, int); + +#pragma weak vrsqrt_ = __vrsqrt_ + +/* just invoke the serial function */ +void +__vrsqrt_(int *n, double *x, int *stridex, double *y, int *stridey) +{ + __vrsqrt(*n, x, *stridex, y, *stridey); +} diff --git a/usr/src/lib/libmvec/common/vrsqrtf_.c b/usr/src/lib/libmvec/common/vrsqrtf_.c new file mode 100644 index 0000000000..b3cab90eb8 --- /dev/null +++ b/usr/src/lib/libmvec/common/vrsqrtf_.c @@ -0,0 +1,39 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +extern void __vrsqrtf(int, float *, int, float *, int); + +#pragma weak vrsqrtf_ = __vrsqrtf_ + +/* just invoke the serial function */ +void +__vrsqrtf_(int *n, float *x, int *stridex, float *y, int *stridey) +{ + __vrsqrtf(*n, x, *stridex, y, *stridey); +} diff --git a/usr/src/lib/libmvec/common/vsin_.c b/usr/src/lib/libmvec/common/vsin_.c new file mode 100644 index 0000000000..9060c4fed8 --- /dev/null +++ b/usr/src/lib/libmvec/common/vsin_.c @@ -0,0 +1,74 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +extern void __vsin(int, double *, int, double *, int); + +#if !defined(LIBMVEC_SO_BUILD) +#if defined(ARCH_v8plusa) || defined(ARCH_v8plusb) || defined(ARCH_v9a) || defined(ARCH_v9b) +#define CHECK_ULTRA3 +#endif +#endif /* !defined(LIBMVEC_SO_BUILD) */ + +#ifdef CHECK_ULTRA3 +#include <strings.h> +#define sysinfo _sysinfo +#include <sys/systeminfo.h> + +#define BUFLEN 257 + +static int use_ultra3 = 0; + +extern void __vsin_ultra3(int, double *, int, double *, int); +#endif + +#pragma weak vsin_ = __vsin_ + +/* just invoke the serial function */ +void +__vsin_(int *n, double *x, int *stridex, double *y, int *stridey) +{ +#ifdef CHECK_ULTRA3 + int u; + char buf[BUFLEN]; + + u = use_ultra3; + if (!u) { + /* use __vsin_ultra3 on Cheetah (and ???) */ + if (sysinfo(SI_ISALIST, buf, BUFLEN) > 0 && !strncmp(buf, "sparcv9+vis2", 12)) + u = 3; + else + u = 1; + use_ultra3 = u; + } + if (u & 2) + __vsin_ultra3(*n, x, *stridex, y, *stridey); + else +#endif + __vsin(*n, x, *stridex, y, *stridey); +} diff --git a/usr/src/lib/libmvec/common/vsincos_.c b/usr/src/lib/libmvec/common/vsincos_.c new file mode 100644 index 0000000000..14795c7bdd --- /dev/null +++ b/usr/src/lib/libmvec/common/vsincos_.c @@ -0,0 +1,40 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +extern void __vsincos(int, double *, int, double *, int, double *, int); + +#pragma weak vsincos_ = __vsincos_ + +/* just invoke the serial function */ +void +__vsincos_(int *n, double *x, int *stridex, double *s, int *strides, + double *c, int *stridec) +{ + __vsincos(*n, x, *stridex, s, *strides, c, *stridec); +} diff --git a/usr/src/lib/libmvec/common/vsincosf_.c b/usr/src/lib/libmvec/common/vsincosf_.c new file mode 100644 index 0000000000..117efeed04 --- /dev/null +++ b/usr/src/lib/libmvec/common/vsincosf_.c @@ -0,0 +1,40 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +extern void __vsincosf(int, float *, int, float *, int, float *, int); + +#pragma weak vsincosf_ = __vsincosf_ + +/* just invoke the serial function */ +void +__vsincosf_(int *n, float *x, int *stridex, float *s, int *strides, + float *c, int *stridec) +{ + __vsincosf(*n, x, *stridex, s, *strides, c, *stridec); +} diff --git a/usr/src/lib/libmvec/common/vsinf_.c b/usr/src/lib/libmvec/common/vsinf_.c new file mode 100644 index 0000000000..67d1d13f28 --- /dev/null +++ b/usr/src/lib/libmvec/common/vsinf_.c @@ -0,0 +1,39 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +extern void __vsinf(int, float *, int, float *, int); + +#pragma weak vsinf_ = __vsinf_ + +/* just invoke the serial function */ +void +__vsinf_(int *n, float *x, int *stridex, float *y, int *stridey) +{ + __vsinf(*n, x, *stridex, y, *stridey); +} diff --git a/usr/src/lib/libmvec/common/vsqrt_.c b/usr/src/lib/libmvec/common/vsqrt_.c new file mode 100644 index 0000000000..60fdd6332e --- /dev/null +++ b/usr/src/lib/libmvec/common/vsqrt_.c @@ -0,0 +1,39 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +extern void __vsqrt(int, double *, int, double *, int); + +#pragma weak vsqrt_ = __vsqrt_ + +/* just invoke the serial function */ +void +__vsqrt_(int *n, double *x, int *stridex, double *y, int *stridey) +{ + __vsqrt(*n, x, *stridex, y, *stridey); +} diff --git a/usr/src/lib/libmvec/common/vsqrtf_.c b/usr/src/lib/libmvec/common/vsqrtf_.c new file mode 100644 index 0000000000..d173bcb948 --- /dev/null +++ b/usr/src/lib/libmvec/common/vsqrtf_.c @@ -0,0 +1,74 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +extern void __vsqrtf(int, float *, int, float *, int); + +#if !defined(LIBMVEC_SO_BUILD) +#if defined(ARCH_v8plusa) || defined(ARCH_v8plusb) || defined(ARCH_v9a) || defined(ARCH_v9b) +#define CHECK_ULTRA3 +#endif +#endif /* !defined(LIBMVEC_SO_BUILD) */ + +#ifdef CHECK_ULTRA3 +#include <strings.h> +#define sysinfo _sysinfo +#include <sys/systeminfo.h> + +#define BUFLEN 257 + +static int use_ultra3 = 0; + +extern void __vsqrtf_ultra3(int, float *, int, float *, int); +#endif + +#pragma weak vsqrtf_ = __vsqrtf_ + +/* just invoke the serial function */ +void +__vsqrtf_(int *n, float *x, int *stridex, float *y, int *stridey) +{ +#ifdef CHECK_ULTRA3 + int u; + char buf[BUFLEN]; + + u = use_ultra3; + if (!u) { + /* use __vsqrtf_ultra3 on Cheetah (and ???) */ + if (sysinfo(SI_ISALIST, buf, BUFLEN) > 0 && !strncmp(buf, "sparcv9+vis2", 12)) + u = 3; + else + u = 1; + use_ultra3 = u; + } + if (u & 2) + __vsqrtf_ultra3(*n, x, *stridex, y, *stridey); + else +#endif + __vsqrtf(*n, x, *stridex, y, *stridey); +} diff --git a/usr/src/lib/libmvec/common/vz_abs_.c b/usr/src/lib/libmvec/common/vz_abs_.c new file mode 100644 index 0000000000..e0096ae311 --- /dev/null +++ b/usr/src/lib/libmvec/common/vz_abs_.c @@ -0,0 +1,39 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +extern void __vz_abs(int, double *, int, double *, int); + +#pragma weak vz_abs_ = __vz_abs_ + +/* just invoke the serial function */ +void +__vz_abs_(int *n, double *x, int *stridex, double *y, int *stridey) +{ + __vz_abs(*n, x, *stridex, y, *stridey); +} diff --git a/usr/src/lib/libmvec/common/vz_exp_.c b/usr/src/lib/libmvec/common/vz_exp_.c new file mode 100644 index 0000000000..76655a8e9e --- /dev/null +++ b/usr/src/lib/libmvec/common/vz_exp_.c @@ -0,0 +1,40 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +extern void __vz_exp(int, double *, int, double *, int, double *); + +#pragma weak vz_exp_ = __vz_exp_ + +/* just invoke the serial function */ +void +__vz_exp_(int *n, double *x, int *stridex, double *y, int *stridey, + double *tmp) +{ + __vz_exp(*n, x, *stridex, y, *stridey, tmp); +} diff --git a/usr/src/lib/libmvec/common/vz_log_.c b/usr/src/lib/libmvec/common/vz_log_.c new file mode 100644 index 0000000000..010005ecd9 --- /dev/null +++ b/usr/src/lib/libmvec/common/vz_log_.c @@ -0,0 +1,39 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +extern void __vz_log(int, double *, int, double *, int); + +#pragma weak vz_log_ = __vz_log_ + +/* just invoke the serial function */ +void +__vz_log_(int *n, double *x, int *stridex, double *y, int *stridey) +{ + __vz_log(*n, x, *stridex, y, *stridey); +} diff --git a/usr/src/lib/libmvec/common/vz_pow_.c b/usr/src/lib/libmvec/common/vz_pow_.c new file mode 100644 index 0000000000..612db15d24 --- /dev/null +++ b/usr/src/lib/libmvec/common/vz_pow_.c @@ -0,0 +1,41 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +extern void __vz_pow(int, double *, int, double *, int, double *, int, + double *); + +#pragma weak vz_pow_ = __vz_pow_ + +/* just invoke the serial function */ +void +__vz_pow_(int *n, double *x, int *stridex, double *y, int *stridey, + double *z, int *stridez, double *tmp) +{ + __vz_pow(*n, x, *stridex, y, *stridey, z, *stridez, tmp); +} diff --git a/usr/src/lib/libmvec/i386/Makefile b/usr/src/lib/libmvec/i386/Makefile new file mode 100644 index 0000000000..f99f809c60 --- /dev/null +++ b/usr/src/lib/libmvec/i386/Makefile @@ -0,0 +1,29 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2011 Nexenta Systems, Inc. All rights reserved. +# + +TARGET_ARCH= i386 + +LIBRARY = libmvec.a +VERS = .1 + +OBJECTS = $(mvecOBJS) + +include ../Makefile.com + +SRCS = $(SRCS_mvec) + +install: all $(ROOTLIBS) $(ROOTLINKS) + +include $(SRC)/lib/libm/Makefile.targ diff --git a/usr/src/lib/libmvec/i386_hwcap1/Makefile b/usr/src/lib/libmvec/i386_hwcap1/Makefile new file mode 100644 index 0000000000..61c5f76989 --- /dev/null +++ b/usr/src/lib/libmvec/i386_hwcap1/Makefile @@ -0,0 +1,33 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2011 Nexenta Systems, Inc. All rights reserved. +# + +TARGET_ARCH= i386 + +LIBRARY = libmvec_hwcap1.a +VERS = .1 + +OBJECTS= $(mvecOBJS) + +include ../Makefile.com + +CPPFLAGS += -D_CMOV_INSN -D_SSE_INSN -D_SSE2_INSN +MAPFILES += mapfile +CFLAGS += -xtarget=pentium_pro -xarch=sse2 + +ROOTLIBDIR = $(ROOTFS_LIBDIR)/libmvec + +install: all $(ROOTLIBDIR) $(ROOTLIBS) + +include $(SRC)/lib/libm/Makefile.targ diff --git a/usr/src/lib/libmvec/i386_hwcap1/mapfile b/usr/src/lib/libmvec/i386_hwcap1/mapfile new file mode 100644 index 0000000000..42aa156399 --- /dev/null +++ b/usr/src/lib/libmvec/i386_hwcap1/mapfile @@ -0,0 +1,20 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2011 Nexenta Systems, Inc. All rights reserved. +# + +$mapfile_version 2 + +CAPABILITY { + hw += fpu cmov sse sse2; +}; diff --git a/usr/src/lib/libmvec/sparc/Makefile b/usr/src/lib/libmvec/sparc/Makefile new file mode 100644 index 0000000000..39df5d8353 --- /dev/null +++ b/usr/src/lib/libmvec/sparc/Makefile @@ -0,0 +1,30 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2011 Nexenta Systems, Inc. All rights reserved. +# + +TARGET_ARCH= sparc + +LIBRARY = libmvec.a +VERS = .1 + +OBJECTS = $(mvecOBJS) + +include ../Makefile.com + +CHIP = ultra +SRCS = $(SRCS_mvec) + +install: all $(ROOTLIBS) $(ROOTLINKS) + +include $(SRC)/lib/Makefile.targ diff --git a/usr/src/lib/libmvec/sparc_sparcv8plus+vis/Makefile b/usr/src/lib/libmvec/sparc_sparcv8plus+vis/Makefile new file mode 100644 index 0000000000..02ced700e2 --- /dev/null +++ b/usr/src/lib/libmvec/sparc_sparcv8plus+vis/Makefile @@ -0,0 +1,34 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2011 Nexenta Systems, Inc. All rights reserved. +# + +LIBRARY = libmvec_isa.a +VERS = .1 + +TARGET_ARCH= sparc +OBJECTS = $(mvecvisCOBJS) $(mvecvisSOBJS) + +include ../Makefile.com + +SRCS = $(mvecvisCOBJS:%.o=../common/%.c) + +CHIP = vis +XARCH = sparcvis + +MAPFILES = ../common/mapfilevis-vers +ROOTLIBDIR = $(ROOTFS_LIBDIR)/cpu/sparcv8plus+vis + +install: all $(ROOTLIBDIR) $(ROOTLIBS) + +include $(SRC)/lib/libm/Makefile.targ diff --git a/usr/src/lib/libmvec/sparc_sparcv9+vis2/Makefile b/usr/src/lib/libmvec/sparc_sparcv9+vis2/Makefile new file mode 100644 index 0000000000..fc3d316dfd --- /dev/null +++ b/usr/src/lib/libmvec/sparc_sparcv9+vis2/Makefile @@ -0,0 +1,34 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2011 Nexenta Systems, Inc. All rights reserved. +# + +LIBRARY = libmvec_isa.a +VERS = .1 + +TARGET_ARCH= sparc +OBJECTS = $(mvecvis2COBJS) $(mvecvis2SOBJS) + +include ../Makefile.com + +CHIP = vis +XARCH = sparcvis2 + +SRCS = $(mvecvis2COBJS:%.o=../common/%.c) + +MAPFILES = ../common/mapfilevis2-vers +ROOTLIBDIR = $(ROOTFS_LIBDIR)/cpu/sparcv9+vis2 + +install: all $(ROOTLIBDIR) $(ROOTLIBS) + +include $(SRC)/lib/libm/Makefile.targ diff --git a/usr/src/lib/libmvec/sparcv9/Makefile b/usr/src/lib/libmvec/sparcv9/Makefile new file mode 100644 index 0000000000..e649b04b32 --- /dev/null +++ b/usr/src/lib/libmvec/sparcv9/Makefile @@ -0,0 +1,32 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2011 Nexenta Systems, Inc. All rights reserved. +# + +TARGET_ARCH = sparcv9 + +LIBRARY = libmvec.a +VERS = .1 + +OBJECTS = $(mvecOBJS) + +include ../Makefile.com +include $(SRC)/lib/Makefile.lib.64 + +CHIP = ultra + +SRCS = $(SRCS_mvec) + +install: all $(ROOTLIBS64) $(ROOTLINKS64) + +include $(SRC)/lib/libm/Makefile.targ diff --git a/usr/src/lib/libmvec/sparcv9_sparcv9+vis/Makefile b/usr/src/lib/libmvec/sparcv9_sparcv9+vis/Makefile new file mode 100644 index 0000000000..830c10f0ce --- /dev/null +++ b/usr/src/lib/libmvec/sparcv9_sparcv9+vis/Makefile @@ -0,0 +1,35 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2011 Nexenta Systems, Inc. All rights reserved. +# + +LIBRARY = libmvec_isa.a +VERS = .1 + +TARGET_ARCH= sparcv9 +OBJECTS = $(mvecvisCOBJS) $(mvecvisSOBJS) + +include ../Makefile.com +include $(SRC)/lib/Makefile.lib.64 + +CHIP = vis +XARCH = sparcvis + +SRCS = $(mvicvisCOBJS:%.o=../common/%.c) + +MAPFILES = ../common/mapfilevis-vers +ROOTLIBDIR64 = $(ROOTFS_LIBDIR)/cpu/sparcv9+vis/$(MACH64) + +install: all $(ROOTLIBDIR64) $(ROOTLIBS64) + +include $(SRC)/lib/libm/Makefile.targ diff --git a/usr/src/lib/libmvec/sparcv9_sparcv9+vis2/Makefile b/usr/src/lib/libmvec/sparcv9_sparcv9+vis2/Makefile new file mode 100644 index 0000000000..1c3b9ba24e --- /dev/null +++ b/usr/src/lib/libmvec/sparcv9_sparcv9+vis2/Makefile @@ -0,0 +1,35 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2011 Nexenta Systems, Inc. All rights reserved. +# + +LIBRARY = libmvec_isa.a +VERS = .1 + +TARGET_ARCH= sparcv9 +OBJECTS = $(mvecvis2COBJS) $(mvecvis2SOBJS) + +include ../Makefile.com +include $(SRC)/lib/Makefile.lib.64 + +CHIP = vis +XARCH = sparcvis2 + +SRCS = $(mvecvis2COBJS:%.o=../common/%.c) + +MAPFILES = ../common/mapfilevis2-vers +ROOTLIBDIR64 = $(ROOTFS_LIBDIR)/cpu/sparcv9+vis2/$(MACH64) + +install: all $(ROOTLIBDIR64) $(ROOTLIBS64) + +include $(SRC)/lib/libm/Makefile.targ diff --git a/usr/src/man/Makefile b/usr/src/man/Makefile index 03aa0d40d1..9c85fcf4cb 100644 --- a/usr/src/man/Makefile +++ b/usr/src/man/Makefile @@ -11,6 +11,7 @@ # # Copyright 2011, Richard Lowe +# Copyright (c) 2012, Igor Kozhukhov <ikozhukhov@gmail.com> # Copyright 2014 Nexenta Systems, Inc. All rights reserved. # @@ -48,10 +49,12 @@ SUBDIRS= man1 \ man3ldap \ man3lgrp \ man3lib \ + man3m \ man3mail \ man3malloc \ man3mp \ man3mpapi \ + man3mvec \ man3nsl \ man3nvpair \ man3pam \ diff --git a/usr/src/man/man3m/Makefile b/usr/src/man/man3m/Makefile new file mode 100644 index 0000000000..32343dddf7 --- /dev/null +++ b/usr/src/man/man3m/Makefile @@ -0,0 +1,130 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet +# at http://www.illumos.org/license/CDDL. +# + +# Copyright (c) 2012, Igor Kozhukhov <ikozhukhov@gmail.com> + +include ../../Makefile.master + +MANSECT = 3m + +MANFILES = acos.3m \ + acosh.3m \ + asin.3m \ + asinh.3m \ + atan.3m \ + atan2.3m \ + atanh.3m \ + cabs.3m \ + cacos.3m \ + cacosh.3m \ + carg.3m \ + casin.3m \ + casinh.3m \ + catan.3m \ + catanh.3m \ + cbrt.3m \ + ccos.3m \ + ccosh.3m \ + ceil.3m \ + cexp.3m \ + cimag.3m \ + clog.3m \ + conj.3m \ + copysign.3m \ + cos.3m \ + cosh.3m \ + cpow.3m \ + cproj.3m \ + creal.3m \ + csin.3m \ + csinh.3m \ + csqrt.3m \ + ctan.3m \ + ctanh.3m \ + erf.3m \ + erfc.3m \ + exp.3m \ + exp2.3m \ + expm1.3m \ + fabs.3m \ + fdim.3m \ + feclearexcept.3m \ + fegetenv.3m \ + fegetexceptflag.3m \ + fegetround.3m \ + feholdexcept.3m \ + feraiseexcept.3m \ + fesetprec.3m \ + fetestexcept.3m \ + feupdateenv.3m \ + fex_merge_flags.3m \ + fex_set_handling.3m \ + fex_set_log.3m \ + floor.3m \ + fma.3m \ + fmax.3m \ + fmin.3m \ + fmod.3m \ + fpclassify.3m \ + frexp.3m \ + hypot.3m \ + ilogb.3m \ + isfinite.3m \ + isgreater.3m \ + isgreaterequal.3m \ + isinf.3m \ + isless.3m \ + islessequal.3m \ + islessgreater.3m \ + isnan.3m \ + isnormal.3m \ + isunordered.3m \ + j0.3m \ + ldexp.3m \ + lgamma.3m \ + llrint.3m \ + llround.3m \ + log.3m \ + log10.3m \ + log1p.3m \ + log2.3m \ + logb.3m \ + lrint.3m \ + lround.3m \ + matherr.3m \ + modf.3m \ + nan.3m \ + nearbyint.3m \ + nextafter.3m \ + pow.3m \ + remainder.3m \ + remquo.3m \ + rint.3m \ + round.3m \ + scalb.3m \ + scalbln.3m \ + signbit.3m \ + significand.3m \ + sin.3m \ + sincos.3m \ + sinh.3m \ + sqrt.3m \ + tan.3m \ + tanh.3m \ + tgamma.3m \ + trunc.3m \ + y0.3m + +.KEEP_STATE: + +include ../Makefile.man + +install: $(ROOTMANFILES) diff --git a/usr/src/man/man3m/acos.3m b/usr/src/man/man3m/acos.3m new file mode 100644 index 0000000000..7af66e7a98 --- /dev/null +++ b/usr/src/man/man3m/acos.3m @@ -0,0 +1,124 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Copyright (c) 1992, X/Open Company Limited. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH acos 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +acos, acosf, acosl \- arc cosine functions +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <math.h> + +\fBdouble\fR \fBacos\fR(\fBdouble\fR \fIx\fR); +.fi + +.LP +.nf +\fBfloat\fR \fBacosf\fR(\fBfloat\fR \fIx\fR); +.fi + +.LP +.nf +\fBlong double\fR \fBacosl\fR(\fBlong double\fR \fIx\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions compute the principal value of the arc cosine of \fIx\fR. The +value of \fIx\fR should be in the range [\(mi1,1]. +.SH RETURN VALUES +.sp +.LP +Upon successful completion, these functions return the arc cosine of \fIx\fR in +the range [0, \c +.if n pi\c +.if t \(*p +\c +] radians. +.sp +.LP +For finite values of \fIx\fR not in the range [\(mi1,1], a domain error occurs +and NaN is returned. +.sp +.LP +If \fIx\fR is NaN, NaN is returned. +.sp +.LP +If \fIx\fR is +1, +0 is returned. +.sp +.LP +If \fIx\fR is \(+-Inf, a domain error occurs and NaN is returned. +.sp +.LP +For exceptional cases, \fBmatherr\fR(3M) tabulates the values to be returned by +\fBacos()\fR as specified by SVID3 and XPG3. +.SH ERRORS +.sp +.LP +These functions will fail if: +.sp +.ne 2 +.mk +.na +\fBDomain Error\fR +.ad +.RS 16n +.rt +The \fIx\fR argument is finite and not in the range [-1,1], or is \(+-Inf. +.sp +If the integer expression (\fBmath_errhandling\fR & \fBMATH_ERREXCEPT\fR) is +non-zero, the invalid floating-point exception is raised. +.sp +The \fBacos()\fR function sets \fBerrno\fR to \fBEDOM\fR if \fIx\fR is not +\(+-Inf or NaN and is not in the range [\(mi1,1]. +.RE + +.SH USAGE +.sp +.LP +An application wanting to check for exceptions should call +\fBfeclearexcept\fR(\fBFE_ALL_EXCEPT\fR) before calling these functions. On +return, if \fBfetestexcept\fR(\fBFE_INVALID\fR | \fBFE_DIVBYZERO\fR | +\fBFE_OVERFLOW\fR | \fBFE_UNDERFLOW\fR) is non-zero, an exception has been +raised. An application should either examine the return value or check the +floating point exception flags to detect exceptions. +.sp +.LP +An application can also set \fBerrno\fR to 0 before calling \fBacos()\fR. On +return, if \fBerrno\fR is non-zero, an error has occurred. The \fBacosf()\fR +and \fBacosl()\fR functions do not set \fBerrno\fR. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBcos\fR(3M), \fBfeclearexcept\fR(3M), \fBfetestexcept\fR(3M), +\fBisnan\fR(3M), \fBmath.h\fR(3HEAD), \fBmatherr\fR(3M), \fBattributes\fR(5), +\fBstandards\fR(5) diff --git a/usr/src/man/man3m/acosh.3m b/usr/src/man/man3m/acosh.3m new file mode 100644 index 0000000000..dc3c482333 --- /dev/null +++ b/usr/src/man/man3m/acosh.3m @@ -0,0 +1,119 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Copyright (c) 1992, X/Open Company Limited. All Rights Reserved. +.\" Copyright (c) 1983 Regents of the University of California. All rights reserved. The Berkeley software License Agreement specifies the terms and conditions for redistribution. +.\" Portions Copyright (c) 2002, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.TH acosh 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +acosh, acoshf, acoshl \- inverse hyperbolic cosine functions +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <math.h> + +\fBdouble\fR \fBacosh\fR(\fBdouble\fR \fIx\fR); +.fi + +.LP +.nf +\fBfloat\fR \fBacoshf\fR(\fBfloat\fR \fIx\fR); +.fi + +.LP +.nf +\fBlong double\fR \fBacoshl\fR(\fBlong double\fR \fIx\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions compute the inverse hyperbolic cosine of their argument +\fIx\fR. +.SH RETURN VALUES +.sp +.LP +Upon successful completion, these functions return the inverse hyperbolic +cosine of their argument. +.sp +.LP +For finite values of \fIx\fR < 1, a domain error occurs and NaN is returned. +.sp +.LP +If \fIx\fR is NaN, NaN is returned. +.sp +.LP +If \fIx\fR is +1, +0 is returned. +.sp +.LP +If \fIx\fR is +Inf, +Inf is returned. +.sp +.LP +If \fIx\fR is \(miInf, a domain error occurs and NaN is returned. +.sp +.LP +For exceptional cases, \fBmatherr\fR(3M) tabulates the values to be returned by +\fBacosh()\fR as specified by SVID3 and XPG3. +.SH ERRORS +.sp +.LP +These functions will fail if: +.sp +.ne 2 +.mk +.na +\fBDomain Error\fR +.ad +.RS 16n +.rt +The \fIx\fR argument is finite and less than 1.0, or is \(miInf. +.sp +If the integer expression (\fBmath_errhandling\fR & \fBMATH_ERREXCEPT\fR) is +non-zero, the invalid floating-point exception is raised. +.sp +The \fBacosh()\fR function sets \fBerrno\fR to \fBEDOM\fR if \fIx\fR is less +than 1.0. +.RE + +.SH USAGE +.sp +.LP +An application wanting to check for exceptions should call +\fBfeclearexcept\fR(\fBFE_ALL_EXCEPT\fR) before calling these functions. On +return, if \fBfetestexcept\fR(\fBFE_INVALID\fR | \fBFE_DIVBYZERO\fR | +\fBFE_OVERFLOW\fR | \fBFE_UNDERFLOW\fR) is non-zero, an exception has been +raised. An application should either examine the return value or check the +floating point exception flags to detect exceptions. +.sp +.LP +An application can also set \fBerrno\fR to 0 before calling \fBacosh()\fR. On +return, if \fBerrno\fR is non-zero, an error has occurred. The \fBacoshf()\fR +and \fBacoshl()\fR functions do not set \fBerrno\fR. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBcosh\fR(3M), \fBfeclearexcept\fR(3M), \fBfetestexcept\fR(3M), +\fBmath.h\fR(3HEAD), \fBmatherr\fR(3M), \fBattributes\fR(5), \fBstandards\fR(5) diff --git a/usr/src/man/man3m/asin.3m b/usr/src/man/man3m/asin.3m new file mode 100644 index 0000000000..2a71a3feaf --- /dev/null +++ b/usr/src/man/man3m/asin.3m @@ -0,0 +1,128 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Copyright (c) 1992, X/Open Company Limited. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH asin 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +asin, asinf, asinl \- arc sine function +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <math.h> + +\fBdouble\fR \fBasin\fR(\fBdouble\fR \fIx\fR); +.fi + +.LP +.nf +\fBfloat\fR \fBasinf\fR(\fBfloat\fR \fIx\fR); +.fi + +.LP +.nf +\fBlong double\fR \fBasinl\fR(\fBlong double\fR \fIx\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions compute the principal value of the arc sine of their argument +\fIx\fR. The value of \fIx\fR should be in the range [\(mi1,1]. +.SH RETURN VALUES +.sp +.LP +Upon successful completion, these functions return the arc sine of \fIx\fR in +the range [\(mi\c +.if n pi\c +.if t \(*p +\c +/2, \c +.if n pi\c +.if t \(*p +\c +/2] radians. +.sp +.LP +For finite values of \fIx\fR not in the range [\(mi1,1], a domain error occurs +and a NaN is returned. +.sp +.LP +If \fIx\fR is NaN, NaN is returned. +.sp +.LP +If \fIx\fR is \(+-0, \fIx\fR is returned. +.sp +.LP +If \fIx\fR is \(+-Inf, a domain error occurs and a NaN is returned. +.sp +.LP +For exceptional cases, \fBmatherr\fR(3M) tabulates the values to be returned by +\fBasin()\fR as specified by SVID3 and XPG3. +.SH ERRORS +.sp +.LP +These functions will fail if: +.sp +.ne 2 +.mk +.na +\fBDomain Error\fR +.ad +.RS 16n +.rt +The \fIx\fR argument is finite and not in the range [\(mi1,1], or is \(+-Inf. +.sp +If the integer expression (\fBmath_errhandling\fR & \fBMATH_ERREXCEPT\fR) is +non-zero, the invalid floating-point exception is raised. +.sp +The \fBasin()\fR function sets \fBerrno\fR to \fBEDOM\fR if \fIx\fR is not +\(+-Inf or NaN and is not in the range [\(mi1,1]. +.RE + +.SH USAGE +.sp +.LP +An application wanting to check for exceptions should call +\fBfeclearexcept\fR(\fBFE_ALL_EXCEPT\fR) before calling these functions. On +return, if \fBfetestexcept\fR(\fBFE_INVALID\fR | \fBFE_DIVBYZERO\fR | +\fBFE_OVERFLOW\fR | \fBFE_UNDERFLOW\fR) is non-zero, an exception has been +raised. An application should either examine the return value or check the +floating point exception flags to detect exceptions. +.sp +.LP +An application can also set \fBerrno\fR to 0 before calling \fBasin()\fR. On +return, if \fBerrno\fR is non-zero, an error has occurred. The \fBasinf()\fR +and \fBasinl()\fR functions do not set \fBerrno\fR. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBisnan\fR(3M), \fBfeclearexcept\fR(3M), \fBfetestexcept\fR(3M), +\fBmath.h\fR(3HEAD), \fBmatherr\fR(3M), \fBsin\fR(3M), \fBattributes\fR(5), +\fBstandards\fR(5) diff --git a/usr/src/man/man3m/asinh.3m b/usr/src/man/man3m/asinh.3m new file mode 100644 index 0000000000..0e49abaad4 --- /dev/null +++ b/usr/src/man/man3m/asinh.3m @@ -0,0 +1,73 @@ +'\" te +.\" Copyright (c) 1992, X/Open Company Limited All Rights Reserved Portions Copyright (c) 2002, Sun Microsystems, Inc. All Rights Reserved +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH asinh 3M "1 Sep 2002" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +asinh, asinhf, asinhl \- inverse hyperbolic sine functions +.SH SYNOPSIS +.LP +.nf +cc [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <math.h> + +\fBdouble\fR \fBasinh\fR(\fBdouble\fR \fIx\fR); +.fi + +.LP +.nf +\fBfloat\fR \fBasinhf\fR(\fBfloat\fR \fIx\fR); +.fi + +.LP +.nf +\fBlong double\fR \fBasinhl\fR(\fBlong double\fR \fIx\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions compute the inverse hyperbolic sine of their argument \fIx\fR. +.SH RETURN VALUES +.sp +.LP +Upon successful completion, these functions return the inverse hyperbolic sine +of their argument. +.sp +.LP +If \fIx\fR is NaN, NaN is returned. +.sp +.LP +If \fIx\fR is \(+-0 or \(+-Inf, \fIx\fR is returned. +.SH ERRORS +.sp +.LP +No errors are defined. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBmath.h\fR(3HEAD), \fBsinh\fR(3M), \fBattributes\fR(5), \fBstandards\fR(5) diff --git a/usr/src/man/man3m/atan.3m b/usr/src/man/man3m/atan.3m new file mode 100644 index 0000000000..5fd5a711a3 --- /dev/null +++ b/usr/src/man/man3m/atan.3m @@ -0,0 +1,91 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Copyright (c) 1992, X/Open Company Limited. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH atan 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +atan, atanf, atanl \- arc tangent function +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <math.h> + +\fBdouble\fR \fBatan\fR(\fBdouble\fR \fIx\fR); +.fi + +.LP +.nf +\fBfloat\fR \fBatanf\fR(\fBfloat\fR \fIx\fR); +.fi + +.LP +.nf +\fBlong double\fR \fBatanl\fR(\fBlong double\fR \fIx\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions compute the principal value of the arc tangent of \fIx\fR. +.SH RETURN VALUES +.sp +.LP +Upon successful completion, these functions return the arc tangent of \fIx\fR +in the range [\(mi\c +.if n pi\c +.if t \(*p +\c +/2,\c +.if n pi\c +.if t \(*p +\c +/2] radians. +.sp +.LP +If \fIx\fR is NaN, NaN is returned. +.sp +.LP +If x is \(+-0, \fIx\fR is returned. +.sp +.LP +If \fIx\fR is \(+-Inf, \(+-\c +.if n pi\c +.if t \(*p +\c +/2 is returned. +.SH ERRORS +.sp +.LP +No errors are defined. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBatan2\fR(3M), \fBisnan\fR(3M), \fBmath.h\fR(3HEAD), \fBtan\fR(3M), +\fBattributes\fR(5), \fBstandards\fR(5) diff --git a/usr/src/man/man3m/atan2.3m b/usr/src/man/man3m/atan2.3m new file mode 100644 index 0000000000..ece70e3f0c --- /dev/null +++ b/usr/src/man/man3m/atan2.3m @@ -0,0 +1,154 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Copyright (c) 1992, X/Open Company Limited. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH atan2 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +atan2, atan2f, atan2l \- arc tangent function +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <math.h> + +\fBdouble\fR \fBatan2\fR(\fBdouble\fR \fIy\fR, \fBdouble\fR \fIx\fR); +.fi + +.LP +.nf +\fBfloat\fR \fBatan2f\fR(\fBfloat\fR \fIy\fR, \fBfloat\fR \fIx\fR); +.fi + +.LP +.nf +\fBlong double\fR \fBatan2l\fR(\fBlong double\fR \fIy\fR, \fBlong double\fR \fIx\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions compute the principal value of the arc tangent of \fIy/x\fR, +using the signs of both arguments to determine the quadrant of the return +value. +.SH RETURN VALUES +.sp +.LP +Upon successful completion, these functions return the arc tangent of +\fIy\fR/\fIx\fR in the range [ \(mi\c +.if n pi\c +.if t \(*p +\c +,\c +.if n pi\c +.if t \(*p +\c + ] radians. +.sp +.LP +If \fIy\fR is \(+-0 and \fIx\fR is < 0, \(+-\c +.if n pi\c +.if t \(*p +\c + is returned. +.sp +.LP +If \fIy\fR is \(+-0 and \fIx\fR is > 0, \(+-0 is returned. +.sp +.LP +If \fIy\fR is < 0 and \fIx\fR is \(+-0, \(mi\c +.if n pi\c +.if t \(*p +\c +/2 is returned. +.sp +.LP +If \fIy\fR is > 0 and \fIx\fR is \(+-0, \c +.if n pi\c +.if t \(*p +\c +/2 is returned. +.sp +.LP +If \fIx\fR is 0, a pole error does not occur. +.sp +.LP +If either \fIx\fR or \fIy\fR is NaN, a NaN is returned. +.sp +.LP +If \fIy\fR is \(+-0 and \fIx\fR is -0, \(+-\c +.if n pi\c +.if t \(*p +\c + is returned. +.sp +.LP +If \fIy\fR is \(+-0 and \fIx\fR is +0, \(+-0 is returned. +.sp +.LP +For finite values of \(+-\fIy\fR > 0, if x is \(miInf, \(+-\c +.if n pi\c +.if t \(*p +\c + is returned. +.sp +.LP +For finite values of \(+-\fIy\fR > 0, if x is +Inf, \(+-0 is returned. +.sp +.LP +For finite values of \fIx\fR, if \fIy\fR is \(+-Inf, \(+-\c +.if n pi\c +.if t \(*p +\c +/2 is returned. +.sp +.LP +If \fIy\fR is \(+-Inf and \fIx\fR is \(miInf, \(+-3\c +.if n pi\c +.if t \(*p +\c +/4 is returned. +.sp +.LP +If \fIy\fR is \(+-Inf and \fIx\fR is +Inf, \(+-\c +.if n pi\c +.if t \(*p +\c +/4 is returned. +.sp +.LP +If both arguments are 0, a domain error does not occur. +.SH ERRORS +.sp +.LP +No errors are defined. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBatan\fR(3M), \fBisnan\fR(3M), \fBmath.h\fR(3HEAD)\fBtan\fR(3M), +\fBattributes\fR(5), \fBstandards\fR(5) diff --git a/usr/src/man/man3m/atanh.3m b/usr/src/man/man3m/atanh.3m new file mode 100644 index 0000000000..3f5dd486e2 --- /dev/null +++ b/usr/src/man/man3m/atanh.3m @@ -0,0 +1,141 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Copyright (c) 1992, X/Open Company Limited. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH atanh 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +atanh, atanhf, atanhl \- inverse hyperbolic tangent functions +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <math.h> + +\fBdouble\fR \fBatanh\fR(\fBdouble\fR \fIx\fR); +.fi + +.LP +.nf +\fBfloat\fR \fBatanhf\fR(\fBfloat\fR \fIx\fR); +.fi + +.LP +.nf +\fBlong double\fR \fBatanhl\fR(\fBlong double\fR \fIx\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions compute the inverse hyperbolic tangent of their argument +\fIx\fR. +.SH RETURN VALUES +.sp +.LP +Upon successful completion, these functions return the inverse hyperbolic +tangent of their argument. +.sp +.LP +If \fIx\fR is \(+-1, a pole error occurs and \fBatanh()\fR, \fBatanhf()\fR, and +\fBatanhl()\fR return the value of the macro \fBHUGE_VAL\fR, \fBHUGE_VALF\fR, +and \fBHUGE_VALL\fR, respectively, with the same sign as the correct value of +the function. +.sp +.LP +For finite |\fIx\fR| > 1, a domain error occurs and a NaN is returned. +.sp +.LP +If \fIx\fR is NaN, NaN is returned. +.sp +.LP +If \fIx\fR is +0, \fIx\fR is returned. +.sp +.LP +If \fIx\fR is +Inf, a domain error occurs and a NaN is returned. +.sp +.LP +For exceptional cases, \fBmatherr\fR(3M) tabulates the values to be returned by +\fBatanh()\fR as specified by SVID3 and XPG3. +.SH ERRORS +.sp +.LP +These functions will fail if: +.sp +.ne 2 +.mk +.na +\fBDomain Error\fR +.ad +.RS 16n +.rt +The \fIx\fR argument is finite and not in the range [-1,1], or is \(+-Inf. +.sp +If the integer expression (\fBmath_errhandling\fR & \fBMATH_ERREXCEPT\fR) is +non-zero, the invalid floating-point exception is raised. +.sp +The \fBatanh()\fR function sets \fBerrno\fR to \fBEDOM\fR if the absolute value +of \fIx\fR is greater than 1.0. +.RE + +.sp +.ne 2 +.mk +.na +\fBPole Error\fR +.ad +.RS 16n +.rt +The \fIx\fR argument is \(+-1. +.sp +If the integer expression (math_errhandling & MATH_ERREXCEPT) is non-zero, then +the divide-by-zero floating-point exception is raised. +.sp +The \fBatanh()\fR function sets \fBerrno\fR to \fBERANGE\fR if the absolute +value of \fIx\fR is equal to 1.0. +.RE + +.SH USAGE +.sp +.LP +An application wanting to check for exceptions should call +\fBfeclearexcept\fR(\fBFE_ALL_EXCEPT\fR) before calling these functions. On +return, if \fBfetestexcept\fR(\fBFE_INVALID\fR | \fBFE_DIVBYZERO\fR | +\fBFE_OVERFLOW\fR | \fBFE_UNDERFLOW\fR) is non-zero, an exception has been +raised. An application should either examine the return value or check the +floating point exception flags to detect exceptions. +.sp +.LP +An application can also set \fBerrno\fR to 0 before calling \fBatanh()\fR. On +return, if \fBerrno\fR is non-zero, an error has occurred. The \fBatanhf()\fR +and \fBatanhl()\fR functions do not set \fBerrno\fR. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBfeclearexcept\fR(3M), \fBfetestexcept\fR(3M), \fBmath.h\fR(3HEAD), +\fBmatherr\fR(3M), \fBtanh\fR(3M), \fBattributes\fR(5), \fBstandards\fR(5) diff --git a/usr/src/man/man3m/cabs.3m b/usr/src/man/man3m/cabs.3m new file mode 100644 index 0000000000..201f920b5b --- /dev/null +++ b/usr/src/man/man3m/cabs.3m @@ -0,0 +1,68 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH cabs 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +cabs, cabsf, cabsl \- return a complex absolute value +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <complex.h> + +\fBdouble\fR \fBcabs\fR(\fBdouble complex\fR \fIz\fR); +.fi + +.LP +.nf +\fBfloat\fR \fBcabsf\fR(\fBfloat complex\fR \fIz\fR); +.fi + +.LP +.nf +\fBlong double\fR \fBcabsl\fR(\fBlong double complex\fR \fIz\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions compute the complex absolute value (also called norm, modulus, +or magnitude) of \fIz\fR. +.SH RETURN VALUES +.sp +.LP +These functions return the complex absolute value. +.SH ERRORS +.sp +.LP +No errors are defined. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBcomplex.h\fR(3HEAD), \fBattributes\fR(5), \fBstandards\fR(5) diff --git a/usr/src/man/man3m/cacos.3m b/usr/src/man/man3m/cacos.3m new file mode 100644 index 0000000000..a13816d553 --- /dev/null +++ b/usr/src/man/man3m/cacos.3m @@ -0,0 +1,73 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH cacos 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +cacos, cacosf, cacosl \- complex arc cosine functions +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <complex.h> + +\fBdouble complex\fR \fBcacos\fR(\fBdouble complex\fR \fIz\fR); +.fi + +.LP +.nf +\fBfloat complex\fR \fBcacosf\fR(\fBfloat complex\fR \fIz\fR); +.fi + +.LP +.nf +\fBlong double complex\fR \fBcacosl\fR(\fBlong double complex\fR \fIz\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions compute the complex arc cosine of \fIz\fR, with branch cuts +outside the interval [ -1, +1 ] along the real axis. +.SH RETURN VALUES +.sp +.LP +These functions return the complex arc cosine value, in the range of a strip +mathematically unbounded along the imaginary axis and in the interval [ 0, \c +.if n pi\c +.if t \(*p +\c + ] along the real axis. +.SH ERRORS +.sp +.LP +No errors are defined. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBccos\fR(3M), \fBcomplex.h\fR(3HEAD), \fBattributes\fR(5), \fBstandards\fR(5) diff --git a/usr/src/man/man3m/cacosh.3m b/usr/src/man/man3m/cacosh.3m new file mode 100644 index 0000000000..b578750e01 --- /dev/null +++ b/usr/src/man/man3m/cacosh.3m @@ -0,0 +1,79 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH cacosh 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +cacosh, cacoshf, cacoshl \- complex arc hyperbolic cosine functions +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <complex.h> + +\fBdouble complex\fR \fBcacosh\fR(\fBdouble complex\fR \fIz\fR); +.fi + +.LP +.nf +\fBfloat complex\fR \fBcacoshf\fR(\fBfloat complex\fR \fIz\fR); +.fi + +.LP +.nf +\fBlong double complex\fR \fBcacoshl\fR(\fBlong double complex\fR \fIz\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions compute the complex arc hyperbolic cosine of \fIz\fR, with a +branch cut at values less than 1 along the real axis. +.SH RETURN VALUES +.sp +.LP +These functions return the complex arc hyperbolic cosine value, in the range of +a half-strip of non-negative values along the real axis and in the interval [ +-\fIi\fR\c +.if n pi\c +.if t \(*p +\c +, +\fIi\fR\c +.if n pi\c +.if t \(*p +\c + ] along the imaginary axis. +.SH ERRORS +.sp +.LP +No errors are defined. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBccosh\fR(3M), \fBcomplex.h\fR(3HEAD), \fBattributes\fR(5), +\fBstandards\fR(5) diff --git a/usr/src/man/man3m/carg.3m b/usr/src/man/man3m/carg.3m new file mode 100644 index 0000000000..b3b3802167 --- /dev/null +++ b/usr/src/man/man3m/carg.3m @@ -0,0 +1,77 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH carg 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +carg, cargf, cargl \- complex argument functions +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <complex.h> + +\fBdouble\fR \fBcarg\fR(\fBdouble complex\fR \fIz\fR); +.fi + +.LP +.nf +\fBfloat\fR \fBcargf\fR(\fBfloat complex\fR \fIz\fR); +.fi + +.LP +.nf +\fBlong double\fR \fBcargl\fR(\fBlong double complex\fR \fIz\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions compute the argument (also called phase angle) of \fIz\fR, with +a branch cut along the negative real axis. +.SH RETURN VALUES +.sp +.LP +These functions return the value of the argument in the interval [ \(mi\c +.if n pi\c +.if t \(*p +\c +, +\c +.if n pi\c +.if t \(*p +\c + ]. +.SH ERRORS +.sp +.LP +No errors are defined. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBcimag\fR(3M), \fBcomplex.h\fR(3HEAD), \fBconj\fR(3M), \fBcproj\fR(3M), +\fBattributes\fR(5), \fBstandards\fR(5) diff --git a/usr/src/man/man3m/casin.3m b/usr/src/man/man3m/casin.3m new file mode 100644 index 0000000000..b498c898e3 --- /dev/null +++ b/usr/src/man/man3m/casin.3m @@ -0,0 +1,77 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH casin 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +casin, casinf, casinl \- complex arc sine functions +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <complex.h> + +\fBdouble complex\fR \fBcasin\fR(\fBdouble complex\fR \fIz\fR); +.fi + +.LP +.nf +\fBfloat complex\fR \fBcasinf\fR(\fBfloat complex\fR \fIz\fR); +.fi + +.LP +.nf +\fBlong double complex\fR \fBcasinl\fR(\fBlong double complex\fR \fIz\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions compute the complex arc sine of \fIz\fR, with branch cuts +outside the interval [ \(mi1, +1 ] along the real axis. +.SH RETURN VALUES +.sp +.LP +These functions return the complex arc sine value, in the range of a strip +mathematically unbounded along the imaginary axis and in the interval [ \(mi\c +.if n pi\c +.if t \(*p +\c +/2, +\c +.if n pi\c +.if t \(*p +\c +/2 ] along the real axis. +.SH ERRORS +.sp +.LP +No errors are defined. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBcomplex.h\fR(3HEAD), \fBcsin\fR(3M), \fBattributes\fR(5), \fBstandards\fR(5) diff --git a/usr/src/man/man3m/casinh.3m b/usr/src/man/man3m/casinh.3m new file mode 100644 index 0000000000..7c1f973dcb --- /dev/null +++ b/usr/src/man/man3m/casinh.3m @@ -0,0 +1,79 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH casinh 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +casinh, casinhf, casinhl \- complex arc hyperbolic sine functions +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <complex.h> + +\fBdouble complex\fR \fBcasinh\fR(\fBdouble complex\fR \fIz\fR); +.fi + +.LP +.nf +\fBfloat complex\fR \fBcasinhf\fR(\fBfloat complex\fR \fIz\fR); +.fi + +.LP +.nf +\fBlong double complex\fR \fBcasinhl\fR(\fBlong double complex\fR \fIz\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions compute the complex arc hyperbolic sine of \fIz\fR, with branch +cuts outside the interval [ -\fIi\fR, +\fIi\fR] along the imaginary axis. +.SH RETURN VALUES +.sp +.LP +These functions return the complex arc hyperbolic sine value, in the range of a +strip mathematically unbounded along the real axis and in the interval [ +\(mi\fIi\fR\c +.if n pi\c +.if t \(*p +\c +/2, +\fIi\fR\c +.if n pi\c +.if t \(*p +\c +/2 ] along the imaginary axis. +.SH ERRORS +.sp +.LP +No errors are defined. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBcomplex.h\fR(3HEAD), \fBcsinh\fR(3M), \fBattributes\fR(5), +\fBstandards\fR(5) diff --git a/usr/src/man/man3m/catan.3m b/usr/src/man/man3m/catan.3m new file mode 100644 index 0000000000..bf8c5e241e --- /dev/null +++ b/usr/src/man/man3m/catan.3m @@ -0,0 +1,85 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH catan 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +catan, catanf, catanl \- complex arc tangent functions +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <complex.h> + +\fBdouble complex\fR \fBcatan\fR(\fBdouble complex\fR \fIz\fR); +.fi + +.LP +.nf +\fBfloat complex\fR \fBcatanf\fR(\fBfloat complex\fR \fIz\fR); +.fi + +.LP +.nf +\fBlong double complex\fR \fBcatanl\fR(\fBlong double complex\fR \fIz\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions compute the complex arc tangent of \fIz\fR, with branch cuts +outside the interval [ \(mi\fIi\fR, +++++\c +.if n pi\c +.if t \(*p +\c +\c +.if n pi\c +.if t \(*p +\c +\fIi\fR ] along the imaginary axis. +.SH RETURN VALUES +.sp +.LP +These functions return the complex arc tangent value, in the range of a strip +mathematically unbounded along the imaginary axis and in the interval [ \(mi\c +.if n pi\c +.if t \(*p +\c +/2, +\c +.if n pi\c +.if t \(*p +\c +/2 ] along the real axis. +.SH ERRORS +.sp +.LP +No errors are defined. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBcomplex.h\fR(3HEAD), \fBctan\fR(3M), \fBattributes\fR(5), \fBstandards\fR(5) diff --git a/usr/src/man/man3m/catanh.3m b/usr/src/man/man3m/catanh.3m new file mode 100644 index 0000000000..465f9e4ed2 --- /dev/null +++ b/usr/src/man/man3m/catanh.3m @@ -0,0 +1,79 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH catanh 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +catanh, catanhf, catanhl \- complex arc hyperbolic tangent functions +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <complex.h> + +\fBdouble complex\fR \fBcatanh\fR(\fBdouble complex\fR \fIz\fR); +.fi + +.LP +.nf +\fBfloat complex\fR \fBcatanhf\fR(\fBfloat complex\fR \fIz\fR); +.fi + +.LP +.nf +\fBlong double complex\fR \fBcatanhl\fR(\fBlong double complex\fR \fIz\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions compute the complex arc hyperbolic tangent of \fIz\fR, with +branch cuts outside the interval [ \(mi1, +1 ] along the real axis. +.SH RETURN VALUES +.sp +.LP +These functions return the complex arc hyperbolic tangent value, in the range +of a strip mathematically unbounded along the real axis and in the interval [ +\(mi\fIi\fR\c +.if n pi\c +.if t \(*p +\c +/2, +\fIi\fR\c +.if n pi\c +.if t \(*p +\c +/2 ] along the imaginary axis. +.SH ERRORS +.sp +.LP +No errors are defined. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBcomplex.h\fR(3HEAD), \fBctanh\fR(3M), \fBattributes\fR(5), +\fBstandards\fR(5) diff --git a/usr/src/man/man3m/cbrt.3m b/usr/src/man/man3m/cbrt.3m new file mode 100644 index 0000000000..2798da8e2d --- /dev/null +++ b/usr/src/man/man3m/cbrt.3m @@ -0,0 +1,74 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Copyright (c) 1992, X/Open Company Limited. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH cbrt 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +cbrt, cbrtf, cbrtl \- cube root functions +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <math.h> + +\fBdouble\fR \fBcbrt\fR(\fBdouble\fR \fIx\fR); +.fi + +.LP +.nf +\fBfloat\fR \fBcbrtf\fR(\fBfloat\fR \fIx\fR); +.fi + +.LP +.nf +\fBlong double\fR \fBcbrtl\fR(\fBlong double\fR \fIx\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions compute the real cube root of their argument \fIx\fR. +.SH RETURN VALUES +.sp +.LP +On successful completion, these functions return the cube root of \fIx\fR. +.sp +.LP +If \fIx\fR is NaN, a NaN is returned. +.sp +.LP +If \fIx\fR is \(+-0 or \(+-Inf, \fIx\fR is returned. +.SH ERRORS +.sp +.LP +No errors are defined. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBmath.h\fR(3HEAD), \fBattributes\fR(5), \fBstandards\fR(5) diff --git a/usr/src/man/man3m/ccos.3m b/usr/src/man/man3m/ccos.3m new file mode 100644 index 0000000000..d142b57ada --- /dev/null +++ b/usr/src/man/man3m/ccos.3m @@ -0,0 +1,68 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH ccos 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +ccos, ccosf, ccosl \- complex cosine functions +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <complex.h> + +\fBdouble complex\fR \fBccos\fR(\fBdouble complex\fR \fIz\fR); +.fi + +.LP +.nf +\fBfloat complex\fR \fBccosf\fR(\fBfloat complex\fR \fIz\fR); +.fi + +.LP +.nf +\fBlong double complex\fR \fBccosl\fR(\fBlong double complex\fR \fIz\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions compute the complex cosine of \fIz\fR. +.SH RETURN VALUES +.sp +.LP +These functions return the complex cosine value. +.SH ERRORS +.sp +.LP +No errors are defined. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBcacos\fR(3M), \fBcomplex.h\fR(3HEAD), \fBattributes\fR(5), +\fBstandards\fR(5) diff --git a/usr/src/man/man3m/ccosh.3m b/usr/src/man/man3m/ccosh.3m new file mode 100644 index 0000000000..d470a6b07a --- /dev/null +++ b/usr/src/man/man3m/ccosh.3m @@ -0,0 +1,68 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH ccosh 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +ccosh, ccoshf, ccoshl \- complex hyperbolic cosine functions +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <complex.h> + +\fBdouble complex\fR \fBccosh\fR(\fBdouble complex\fR \fIz\fR); +.fi + +.LP +.nf +\fBfloat complex\fR \fBccoshf\fR(\fBfloat complex\fR \fIz\fR); +.fi + +.LP +.nf +\fBlong double complex\fR \fBccoshl\fR(\fBlong double complex\fR \fIz\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions compute the complex hyperbolic cosine of \fIz\fR. +.SH RETURN VALUES +.sp +.LP +These functions return the complex hyperbolic cosine value. +.SH ERRORS +.sp +.LP +No errors are defined. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBcacosh\fR(3M), \fBcomplex.h\fR(3HEAD), \fBattributes\fR(5), +\fBstandards\fR(5) diff --git a/usr/src/man/man3m/ceil.3m b/usr/src/man/man3m/ceil.3m new file mode 100644 index 0000000000..7e4d2586e1 --- /dev/null +++ b/usr/src/man/man3m/ceil.3m @@ -0,0 +1,79 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Copyright (c) 1992, X/Open Company Limited. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH ceil 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +ceil, ceilf, ceill \- ceiling value function +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <math.h> + +\fBdouble\fR \fBceil\fR(\fBdouble\fR \fIx\fR); +.fi + +.LP +.nf +\fBfloat\fR \fBceilf\fR(\fBfloat\fR \fIx\fR); +.fi + +.LP +.nf +\fBlong double\fR \fBceill\fR(\fBlong double\fR \fIx\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions compute the smallest integral value not less than \fIx\fR. +.SH RETURN VALUES +.sp +.LP +Upon successful completion, the \fBceil()\fR, \fBceilf()\fR, and \fBceill()\fR +functions return the smallest integral value not less than \fIx\fR, expressed +as a type \fBdouble\fR, \fBfloat\fR, or \fBlong double\fR, respectively. +.sp +.LP +If \fIx\fR is NaN, a NaN is returned. +.sp +.LP +If \fIx\fR is \(+-0 or \(+-Inf, \fIx\fR is returned. +.SH USAGE +.sp +.LP +The integral value returned by these functions need not be expressible as an +\fBint\fR or \fBlong int\fR. The return value should be tested before assigning +it to an integer type to avoid the undefined results of an integer overflow. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBfeclearexcept\fR(3M), \fBfetestexcept\fR(3M), \fBfloor\fR(3M), +\fBisnan\fR(3M), \fBmath.h\fR(3HEAD), \fBattributes\fR(5), \fBstandards\fR(5) diff --git a/usr/src/man/man3m/cexp.3m b/usr/src/man/man3m/cexp.3m new file mode 100644 index 0000000000..140f8b3658 --- /dev/null +++ b/usr/src/man/man3m/cexp.3m @@ -0,0 +1,67 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH cexp 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +cexp, cexpf, cexpl \- complex exponential functions +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <complex.h> + +\fBdouble complex\fR \fBcexp\fR(\fBdouble complex\fR \fIz\fR); +.fi + +.LP +.nf +\fBfloat complex\fR \fBcexpf\fR(\fBfloat complex\fR \fIz\fR); +.fi + +.LP +.nf +\fBlong double complex\fR \fBcexpl\fR(\fBlong double complex\fR \fIz\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions compute the complex exponent of \fIz\fR, defined as e^\fIz\fR. +.SH RETURN VALUES +.sp +.LP +These functions return the complex exponential value of \fIz\fR. +.SH ERRORS +.sp +.LP +No errors are defined. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBclog\fR(3M), \fBcomplex.h\fR(3HEAD), \fBattributes\fR(5), \fBstandards\fR(5) diff --git a/usr/src/man/man3m/cimag.3m b/usr/src/man/man3m/cimag.3m new file mode 100644 index 0000000000..16e776c944 --- /dev/null +++ b/usr/src/man/man3m/cimag.3m @@ -0,0 +1,68 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH cimag 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +cimag, cimagf, cimagl \- complex imaginary functions +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <complex.h> + +\fBdouble\fR \fBcimag\fR(\fBdouble complex\fR \fIz\fR); +.fi + +.LP +.nf +\fBfloat\fR \fBcimagf\fR(\fBfloat complex\fR \fIz\fR); +.fi + +.LP +.nf +\fBlong double\fR \fBcimagl\fR(\fBlong double complex\fR \fIz\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions compute the imaginary part of \fIz\fR. +.SH RETURN VALUES +.sp +.LP +These functions return the imaginary part value (as a real). +.SH ERRORS +.sp +.LP +No errors are defined. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBcarg\fR(3M), \fBcomplex.h\fR(3HEAD), \fBconj\fR(3M), \fBcproj\fR(3M), +\fBcreal\fR(3M), \fBattributes\fR(5), \fBstandards\fR(5) diff --git a/usr/src/man/man3m/clog.3m b/usr/src/man/man3m/clog.3m new file mode 100644 index 0000000000..f11cc044a5 --- /dev/null +++ b/usr/src/man/man3m/clog.3m @@ -0,0 +1,70 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH clog 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +clog, clogf, clogl \- complex natural logarithm functions +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <complex.h> + +\fBdouble complex\fR \fBclog\fR(\fBdouble complex\fR \fIz\fR); +.fi + +.LP +.nf +\fBfloat complex\fR \fBclogf\fR(\fBfloat complex\fR \fIz\fR); +.fi + +.LP +.nf +\fBlong double complex\fR \fBclogl\fR(\fBlong double complex\fR \fIz\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions compute the complex natural (base \fIe\fR) logarithm of +\fIz\fR, with a branch cut along the negative real axis. +.SH RETURN VALUES +.sp +.LP +These functions return the complex natural logarithm value, in the range of a +strip mathematically unbounded along the real axis and in the interval [ +-\fIi\fR , +\fIi\fR ] along the imaginary axis. +.SH ERRORS +.sp +.LP +No errors are defined. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBcexp\fR(3M), \fBcomplex.h\fR(3HEAD), \fBattributes\fR(5), \fBstandards\fR(5) diff --git a/usr/src/man/man3m/conj.3m b/usr/src/man/man3m/conj.3m new file mode 100644 index 0000000000..c02288bf63 --- /dev/null +++ b/usr/src/man/man3m/conj.3m @@ -0,0 +1,69 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH conj 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +conj, conjf, conjl \- complex conjugate functions +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <complex.h> + +\fBdouble complex\fR \fBconj\fR(\fBdouble complex\fR \fIz\fR); +.fi + +.LP +.nf +\fBfloat complex\fR \fBconjf\fR(\fBfloat complex\fR \fIz\fR); +.fi + +.LP +.nf +\fBlong double complex\fR \fBconjl\fR(\fBlong double complex\fR \fIz\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions compute the complex conjugate of z, by reversing the sign of +its imaginary part. +.SH RETURN VALUES +.sp +.LP +These functions return the complex conjugate value. +.SH ERRORS +.sp +.LP +No errors are defined. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBcarg\fR(3M), \fBcimag\fR(3M), \fBcomplex.h\fR(3HEAD), \fBcproj\fR(3M), +\fBcreal\fR(3M), \fBattributes\fR(5), \fBstandards\fR(5) diff --git a/usr/src/man/man3m/copysign.3m b/usr/src/man/man3m/copysign.3m new file mode 100644 index 0000000000..017b9f9088 --- /dev/null +++ b/usr/src/man/man3m/copysign.3m @@ -0,0 +1,70 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Copyright (c) 1992, X/Open Company Limited. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH copysign 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +copysign, copysignf, copysignl \- number manipulation function +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <math.h> + +\fBdouble\fR \fBcopysign\fR(\fBdouble\fR \fIx\fR, \fBdouble\fR \fIy\fR); +.fi + +.LP +.nf +\fBfloat\fR \fBcopysignf\fR(\fBfloat\fR \fIx\fR, \fBfloat\fR \fIy\fR); +.fi + +.LP +.nf +\fBlong double\fR \fBcopysignl\fR(\fBlong double\fR \fIx\fR, \fBlong double\fR \fIy\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions produce a value with the magnitude of \fIx\fR and the sign of +\fIy\fR. +.SH RETURN VALUES +.sp +.LP +Upon successful completion, these functions return a value with the magnitude +of \fIx\fR and the sign of \fIy\fR. +.SH ERRORS +.sp +.LP +No errors are defined. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBmath.h\fR(3HEAD), \fBsignbit\fR(3M),\fBattributes\fR(5), \fBstandards\fR(5) diff --git a/usr/src/man/man3m/cos.3m b/usr/src/man/man3m/cos.3m new file mode 100644 index 0000000000..023f211d14 --- /dev/null +++ b/usr/src/man/man3m/cos.3m @@ -0,0 +1,102 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Copyright (c) 1992, X/Open Company Limited. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH cos 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +cos, cosf, cosl \- cosine function +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <math.h> + +\fBdouble\fR \fBcos\fR(\fBdouble\fR \fIx\fR); +.fi + +.LP +.nf +\fBfloat\fR \fBcosf\fR(\fBfloat\fR \fIx\fR); +.fi + +.LP +.nf +\fBlong double\fR \fBcosl\fR(\fBlong double\fR \fIx\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions compute the cosine of \fIx\fR, measured in radians. +.SH RETURN VALUES +.sp +.LP +Upon successful completion, these functions return the cosine of \fIx\fR. +.sp +.LP +If \fIx\fR is NaN, NaN is returned. +.sp +.LP +If \fIx\fR is +0, 1.0 is returned. +.sp +.LP +If \fIx\fR is \(+-Inf, a domain error occurs and a NaN is returned. +.SH ERRORS +.sp +.LP +These functions will fail if: +.sp +.ne 2 +.mk +.na +\fBDomain Error\fR +.ad +.RS 16n +.rt +The \fIx\fR argument is \(+-Inf. +.sp +If the integer expression (\fBmath_errhandling\fR & \fBMATH_ERREXCEPT\fR) is +non-zero, the invalid floating-point exception is raised. +.RE + +.SH USAGE +.sp +.LP +An application wanting to check for exceptions should call +\fBfeclearexcept\fR(\fBFE_ALL_EXCEPT\fR) before calling these functions. On +return, if \fBfetestexcept\fR(\fBFE_INVALID\fR | \fBFE_DIVBYZERO\fR | +\fBFE_OVERFLOW\fR | \fBFE_UNDERFLOW\fR) is non-zero, an exception has been +raised. An application should either examine the return value or check the +floating point exception flags to detect exceptions. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBacos\fR(3M), \fBfeclearexcept\fR(3M), \fBfetestexcept\fR(3M), +\fBisnan\fR(3M), \fBmath.h\fR(3HEAD), \fBsin\fR(3M), \fBtan\fR(3M), +\fBattributes\fR(5), \fBstandards\fR(5) diff --git a/usr/src/man/man3m/cosh.3m b/usr/src/man/man3m/cosh.3m new file mode 100644 index 0000000000..5144e1b959 --- /dev/null +++ b/usr/src/man/man3m/cosh.3m @@ -0,0 +1,120 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Copyright (c) 1992, X/Open Company Limited. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH cosh 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +cosh, coshf, coshl \- hyperbolic cosine function +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <math.h> + +\fBdouble\fR \fBcosh\fR(\fBdouble\fR \fIx\fR); +.fi + +.LP +.nf +\fBfloat\fR \fBcoshf\fR(\fBfloat\fR \fIx\fR); +.fi + +.LP +.nf +\fBlong double\fR \fBcoshl\fR(\fBlong double\fR \fIx\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions compute the hyperbolic cosine of their argument \fIx\fR. +.SH RETURN VALUES +.sp +.LP +Upon successful completion, these functions return the hyperbolic cosine of +\fIx\fR. +.sp +.LP +If the correct value would cause overflow, a range error occurs and +\fBcosh()\fR, \fBcoshf()\fR, and \fBcoshl()\fR return the value of the macro +\fBHUGE_VAL\fR, \fBHUGE_VALF\fR, and \fBHUGE_VALL\fR, respectively. +.sp +.LP +If \fIx\fR is NaN, a NaN is returned. +.sp +.LP +If \fIx\fR is \(+-0, 1.0 is returned. +.sp +.LP +If \fIx\fR is \(+-Inf, \(+-Inf is returned. +.sp +.LP +For exceptional cases, \fBmatherr\fR(3M) tabulates the values to be returned by +\fBcosh()\fR as specified by SVID3 and XPG3. +.SH ERRORS +.sp +.LP +These functions will fail if: +.sp +.ne 2 +.mk +.na +\fBRange Error\fR +.ad +.RS 15n +.rt +The result would cause an overflow. +.sp +If the integer expression (\fBmath_errhandling\fR & \fBMATH_ERREXCEPT\fR) is +non-zero, the overflow floating-point exception is raised. +.sp +The \fBcosh()\fR function sets \fBerrno\fR to \fBERANGE\fR if the result would +cause an overflow. +.RE + +.SH USAGE +.sp +.LP +An application wanting to check for exceptions should call +\fBfeclearexcept\fR(\fBFE_ALL_EXCEPT\fR) before calling these functions. On +return, if \fBfetestexcept\fR(\fBFE_INVALID\fR | \fBFE_DIVBYZERO\fR | +\fBFE_OVERFLOW\fR | \fBFE_UNDERFLOW\fR) is non-zero, an exception has been +raised. An application should either examine the return value or check the +floating point exception flags to detect exceptions. +.sp +.LP +An application can also set \fBerrno\fR to 0 before calling \fBcosh()\fR. On +return, if \fBerrno\fR is non-zero, an error has occurred. The \fBcoshf()\fR +and \fBcoshl()\fR functions do not set \fBerrno\fR. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBacosh\fR(3M), \fBfeclearexcept\fR(3M), \fBfetestexcept\fR(3M), +\fBisnan\fR(3M), \fBmath.h\fR(3HEAD), \fBmatherr\fR(3M), \fBsinh\fR(3M), +\fBtanh\fR(3M), \fBattributes\fR(5), \fBstandards\fR(5) diff --git a/usr/src/man/man3m/cpow.3m b/usr/src/man/man3m/cpow.3m new file mode 100644 index 0000000000..cb6fca6081 --- /dev/null +++ b/usr/src/man/man3m/cpow.3m @@ -0,0 +1,70 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH cpow 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +cpow, cpowf, cpowl \- complex power functions +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <complex.h> + +\fBdouble complex\fR \fBcpow\fR(\fBdouble complex\fR \fIx\fR, \fBdouble complex\fR \fIy\fR); +.fi + +.LP +.nf +\fBfloat complex\fR \fBcpowf\fR(\fBfloat complex\fR \fIx\fR, \fBfloat complex\fR \fIy\fR); +.fi + +.LP +.nf +\fBlong double complex\fR \fBcpowl\fR(\fBlong double complex\fR \fIx\fR, + \fBlong double complex\fR \fIy\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions compute the complex power function \fIx\fR^\fIy\fR, with a +branch cut for the first parameter along the negative real axis. +.SH RETURN VALUES +.sp +.LP +These functions return the complex power function value. +.SH ERRORS +.sp +.LP +No errors are defined. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBcabs\fR(3M), \fBcomplex.h\fR(3HEAD), \fBcsqrt\fR(3M), \fBattributes\fR(5), +\fBstandards\fR(5) diff --git a/usr/src/man/man3m/cproj.3m b/usr/src/man/man3m/cproj.3m new file mode 100644 index 0000000000..c471752d3e --- /dev/null +++ b/usr/src/man/man3m/cproj.3m @@ -0,0 +1,79 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH cproj 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +cproj, cprojf, cprojl \- complex projection functions +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <complex.h> + +\fBdouble complex\fR \fBcproj\fR(\fBdouble complex\fR \fIz\fR); +.fi + +.LP +.nf +\fBfloat complex\fR \fBcprojf\fR(\fBfloat complex\fR \fIz\fR); +.fi + +.LP +.nf +\fBlong double complex\fR \fBcprojl\fR(\fBlong double complex\fR \fIz\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions compute a projection of \fIz\fR onto the Riemann sphere: +\fIz\fR projects to \fIz\fR, except that all complex infinities (even those +with one infinite part and one NaN part) project to positive infinity on the +real axis. If \fIz\fR has an infinite part, then \fBcproj\fR(\fIz\fR) is +equivalent to: +.sp +.in +2 +.nf +INFINITY + I * copysign(0.0, cimag(z)) +.fi +.in -2 + +.SH RETURN VALUES +.sp +.LP +These functions return the value of the projection onto the Riemann sphere. +.SH ERRORS +.sp +.LP +No errors are defined. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBcarg\fR(3M), \fBcimag\fR(3M), \fBcomplex.h\fR(3HEAD), \fBconj\fR(3M), +\fBcreal\fR(3M), \fBattributes\fR(5), \fBstandards\fR(5) diff --git a/usr/src/man/man3m/creal.3m b/usr/src/man/man3m/creal.3m new file mode 100644 index 0000000000..f3e6c90c99 --- /dev/null +++ b/usr/src/man/man3m/creal.3m @@ -0,0 +1,79 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH creal 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +creal, crealf, creall \- complex real functions +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <complex.h> + +\fBdouble\fR \fBcreal\fR(\fBdouble complex\fR \fIz\fR); +.fi + +.LP +.nf +\fBfloat\fR \fBcrealf\fR(\fBfloat complex\fR \fIz\fR); +.fi + +.LP +.nf +\fBlong double\fR \fBcreall\fR(\fBlong double complex\fR \fIz\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions compute the real part of z. +.SH RETURN VALUES +.sp +.LP +These functions return the real part value. +.SH ERRORS +.sp +.LP +No errors are defined. +.SH USAGE +.sp +.LP +For a variable \fIz\fR of complex type: +.sp +.in +2 +.nf +z == creal(z) + cimag(z)*I +.fi +.in -2 + +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBcarg\fR(3M), \fBcimag\fR(3M), \fBcomplex.h\fR(3HEAD), \fBconj\fR(3M), +\fBcproj\fR(3M), \fBattributes\fR(5), \fBstandards\fR(5) diff --git a/usr/src/man/man3m/csin.3m b/usr/src/man/man3m/csin.3m new file mode 100644 index 0000000000..33f79c7a51 --- /dev/null +++ b/usr/src/man/man3m/csin.3m @@ -0,0 +1,68 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH csin 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +csin, csinf, csinl \- complex sine functions +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <complex.h> + +\fBdouble complex\fR \fBcsin\fR(\fBdouble complex\fR \fIz\fR); +.fi + +.LP +.nf +\fBfloat complex\fR \fBcsinf\fR(\fBfloat complex\fR \fIz\fR); +.fi + +.LP +.nf +\fBlong double complex\fR \fBcsinl\fR(\fBlong double complex\fR \fIz\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions compute the complex sine of \fIz\fR. +.SH RETURN VALUES +.sp +.LP +These functions return the complex sine value. +.SH ERRORS +.sp +.LP +No errors are defined. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBcasin\fR(3M), \fBcomplex.h\fR(3HEAD), \fBattributes\fR(5), +\fBstandards\fR(5) diff --git a/usr/src/man/man3m/csinh.3m b/usr/src/man/man3m/csinh.3m new file mode 100644 index 0000000000..16a5e7db7c --- /dev/null +++ b/usr/src/man/man3m/csinh.3m @@ -0,0 +1,68 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH csinh 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +csinh, csinhf, csinhl \- complex hyperbolic sine functions +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <complex.h> + +\fBdouble complex\fR \fBcsinh\fR(\fBdouble complex\fR \fIz\fR); +.fi + +.LP +.nf +\fBfloat complex\fR \fBcsinhf\fR(\fBfloat complex\fR \fIz\fR); +.fi + +.LP +.nf +\fBlong double complex\fR \fBcsinhl\fR(\fBlong double complex\fR \fIz\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions compute the complex hyperbolic sine of \fIz\fR. +.SH RETURN VALUES +.sp +.LP +These functions return the complex hyperbolic sine value. +.SH ERRORS +.sp +.LP +No errors are defined. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBcasinh\fR(3M), \fBcomplex.h\fR(3HEAD), \fBattributes\fR(5), +\fBstandards\fR(5) diff --git a/usr/src/man/man3m/csqrt.3m b/usr/src/man/man3m/csqrt.3m new file mode 100644 index 0000000000..1038b7c580 --- /dev/null +++ b/usr/src/man/man3m/csqrt.3m @@ -0,0 +1,70 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH csqrt 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +csqrt, csqrtf, csqrtl \- complex square root functions +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <complex.h> + +\fBdouble complex\fR \fBcsqrt\fR(\fBdouble complex\fR \fIz\fR); +.fi + +.LP +.nf +\fBfloat complex\fR \fBcsqrtf\fR(\fBfloat complex\fR \fIz\fR); +.fi + +.LP +.nf +\fBlong double complex\fR \fBcsqrtl\fR(\fBlong double complex\fR \fIz\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions compute the complex square root of \fIz\fR, with a branch cut +along the negative real axis. +.SH RETURN VALUES +.sp +.LP +These functions return the complex square root value, in the range of the right +half-plane (including the imaginary axis). +.SH ERRORS +.sp +.LP +No errors are defined. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBcabs\fR(3M), \fBcomplex.h\fR(3HEAD), \fBcpow\fR(3M), \fBattributes\fR(5), +\fBstandards\fR(5) diff --git a/usr/src/man/man3m/ctan.3m b/usr/src/man/man3m/ctan.3m new file mode 100644 index 0000000000..b3b2901bc1 --- /dev/null +++ b/usr/src/man/man3m/ctan.3m @@ -0,0 +1,68 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH ctan 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +ctan, ctanf, ctanl \- complex tangent functions +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <complex.h> + +\fBdouble complex\fR \fBctan\fR(\fBdouble complex\fR \fIz\fR); +.fi + +.LP +.nf +\fBfloat complex\fR \fBctanf\fR(\fBfloat complex\fR \fIz\fR); +.fi + +.LP +.nf +\fBlong double complex\fR \fBctanl\fR(\fBlong double complex\fR \fIz\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions compute the complex tangent of \fIz\fR. +.SH RETURN VALUES +.sp +.LP +These functions return the complex tangent value. +.SH ERRORS +.sp +.LP +No errors are defined. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBcatan\fR(3M), \fBcomplex.h\fR(3HEAD), \fBattributes\fR(5), +\fBstandards\fR(5) diff --git a/usr/src/man/man3m/ctanh.3m b/usr/src/man/man3m/ctanh.3m new file mode 100644 index 0000000000..0d3490ad3d --- /dev/null +++ b/usr/src/man/man3m/ctanh.3m @@ -0,0 +1,68 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH ctanh 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +ctanh, ctanhf, ctanhl \- complex hyperbolic tangent functions +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <complex.h> + +\fBdouble complex\fR \fBctanh\fR(\fBdouble complex\fR \fIz\fR); +.fi + +.LP +.nf +\fBfloat complex\fR \fBctanhf\fR(\fBfloat complex\fR \fIz\fR); +.fi + +.LP +.nf +\fBlong double complex\fR \fBctanhl\fR(\fBlong double complex\fR \fIz\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions compute the complex hyperbolic tangent of \fIz\fR. +.SH RETURN VALUES +.sp +.LP +These functions return the complex hyperbolic tangent value. +.SH ERRORS +.sp +.LP +No errors are defined. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBcatanh\fR(3M), \fBcomplex.h\fR(3HEAD), \fBattributes\fR(5), +\fBstandards\fR(5) diff --git a/usr/src/man/man3m/erf.3m b/usr/src/man/man3m/erf.3m new file mode 100644 index 0000000000..d30fdd89e3 --- /dev/null +++ b/usr/src/man/man3m/erf.3m @@ -0,0 +1,88 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical +.\" and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Copyright (c) 1992, X/Open Company Limited. +.\" All Rights Reserved. +.\" Copyright (c) 1983 Regents of the University +.\" of California. All rights reserved. The Berkeley software License Agreement +.\" specifies the terms and conditions for redistribution. +.\" Portions Copyright (c) 2006, Sun Microsystems, +.\" Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.TH erf 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +erf, erff, erfl \- error function +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <math.h> + +\fBdouble\fR \fBerf\fR(\fBdouble\fR \fIx\fR); +.fi + +.LP +.nf +\fBfloat\fR \fBerff\fR(\fBfloat\fR \fIx\fR); +.fi + +.LP +.nf +\fBlong double\fR \fBerfl\fR(\fBlong double\fR \fIx\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions compute the error function of their argument \fIx\fR, defined +as: +.sp +2/sqrt(pi)*intregral from 0 to x of exp(-t*t) dt +.SH RETURN VALUES +.sp +.LP +Upon successful completion, these functions return the value of the error +function. +.sp +.LP +If \fIx\fR is NaN, a NaN is returned. +.sp +.LP +If \fIx\fR is \(+-0, \(+-0 is returned. +.sp +.LP +If \fIx\fR is \(+-Inf, \(+-1 is returned. +.sp +.LP +If \fIx\fR is subnormal, 2/sqrt(\c +.if n pi\c +.if t \(*p +\c +) * 2 is returned. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBerfc\fR(3M), \fBfeclearexcept\fR(3M), \fBfetestexcept\fR(3M), +\fBisnan\fR(3M), \fBmath.h\fR(3HEAD), \fBattributes\fR(5), \fBstandards\fR(5) diff --git a/usr/src/man/man3m/erfc.3m b/usr/src/man/man3m/erfc.3m new file mode 100644 index 0000000000..4f451278d2 --- /dev/null +++ b/usr/src/man/man3m/erfc.3m @@ -0,0 +1,88 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH erfc 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +erfc, erfcf, erfcl \- complementary error function +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <math.h> + +\fBdouble\fR \fBerfc\fR(\fBdouble\fR \fIx\fR); +.fi + +.LP +.nf +\fBfloat\fR \fBerfcf\fR(\fBfloat\fR \fIx\fR); +.fi + +.LP +.nf +\fBlong double\fR \fBerfcl\fR(\fBlong double\fR \fIx\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These function compute the complementary error function 1.0 \(mi +\fBerf(\fR\fIx\fR\fB).\fR +.SH RETURN VALUES +.sp +.LP +Upon successful completion, these functions return the value of the +complementary error function. +.sp +.LP +If \fIx\fR is NaN, a NaN is returned. +.sp +.LP +If \fIx\fR is \(+-0, +1 is returned. +.sp +.LP +If \fIx\fR is \(miInf, +2 is returned. +.sp +.LP +If \fIx\fR is +Inf, 0 is returned. +.SH ERRORS +.sp +.LP +No errors are defined. +.SH USAGE +.sp +.LP +The \fBerfc()\fR function is provided because of the extreme loss of relative +accuracy if \fBerf(\fR\fIx\fR\fB)\fR is called for large \fIx\fR and the result +subtracted from 1.0. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBerf\fR(3M), \fBisnan\fR(3M), \fBmath.h\fR(3HEAD), \fBattributes\fR(5), +\fBstandards\fR(5) diff --git a/usr/src/man/man3m/exp.3m b/usr/src/man/man3m/exp.3m new file mode 100644 index 0000000000..d751dda290 --- /dev/null +++ b/usr/src/man/man3m/exp.3m @@ -0,0 +1,120 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Copyright (c) 1992, X/Open Company Limited. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH exp 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +exp, expf, expl \- exponential function +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <math.h> + +\fBdouble\fR \fBexp\fR(\fBdouble\fR \fIx\fR); +.fi + +.LP +.nf +\fBfloat\fR \fBexpf\fR(\fBfloat\fR \fIx\fR); +.fi + +.LP +.nf +\fBlong double\fR \fBexpl\fR(\fBlong double\fR \fIx\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions compute the base-\fIe\fR exponential of \fIx\fR. +.SH RETURN VALUES +.sp +.LP +Upon successful completion, these functions return the exponential value of +\fIx\fR. +.sp +.LP +If the correct value would cause overflow, a range error occurs and +\fBexp()\fR, \fBexpf()\fR, and \fBexpl()\fR return \fBHUGE_VAL\fR, +\fBHUGE_VALF\fR, and \fBHUGE_VALL\fR, respectively. +.sp +.LP +If \fIx\fR is NaN, a NaN is returned. +.sp +.LP +If \fIx\fR is \(+-0, 1 is returned. +.sp +.LP +If \fIx\fR is +Inf, \fIx\fR is returned. +.sp +.LP +For exceptional cases, \fBmatherr\fR(3M) tabulates the values to be returned by +\fBexp()\fR as specified by SVID3 and XPG3. See \fBstandards\fR(5). +.SH ERRORS +.sp +.LP +These functions will fail if: +.sp +.ne 2 +.mk +.na +\fBRange Error\fR +.ad +.RS 15n +.rt +The result overflows. +.sp +If the integer expression (\fBmath_errhandling\fR & \fBMATH_ERREXCEPT\fR) is +non-zero, the overflow floating-point exception is raised. +.sp +The \fBexp()\fR function sets \fBerrno\fR to \fBERANGE\fR if the result +overflows. +.RE + +.SH USAGE +.sp +.LP +An application wanting to check for exceptions should call +\fBfeclearexcept\fR(\fBFE_ALL_EXCEPT\fR) before calling these functions. On +return, if \fBfetestexcept\fR(\fBFE_INVALID\fR | \fBFE_DIVBYZERO\fR | +\fBFE_OVERFLOW\fR | \fBFE_UNDERFLOW\fR) is non-zero, an exception has been +raised. An application should either examine the return value or check the +floating point exception flags to detect exceptions. +.sp +.LP +An application can also set \fBerrno\fR to 0 before calling \fBexp()\fR. On +return, if \fBerrno\fR is non-zero, an error has occurred. The \fBexpf()\fR and +\fBexpl()\fR functions do not set \fBerrno\fR. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBfeclearexcept\fR(3M), \fBfetestexcept\fR(3M), \fBisnan\fR(3M), +\fBlog\fR(3M), \fBmath.h\fR(3HEAD), \fBmatherr\fR(3M), \fBmp\fR(3MP), +\fBattributes\fR(5), \fBstandards\fR(5) diff --git a/usr/src/man/man3m/exp2.3m b/usr/src/man/man3m/exp2.3m new file mode 100644 index 0000000000..1eb9f59a4f --- /dev/null +++ b/usr/src/man/man3m/exp2.3m @@ -0,0 +1,109 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH exp2 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +exp2, exp2f, exp2l \- exponential base 2 functions +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <math.h> + +\fBdouble\fR \fBexp2\fR(\fBdouble\fR \fIx\fR); +.fi + +.LP +.nf +\fBfloat\fR \fBexp2f\fR(\fBfloat\fR \fIx\fR); +.fi + +.LP +.nf +\fBlong double\fR \fBexp2l\fR(\fBlong double\fR \fIx\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions compute the base-2 exponential of \fIx\fR. +.SH RETURN VALUES +.sp +.LP +Upon successful completion, these functions return 2^\fIx\fR. +.sp +.LP +If the correct value would cause overflow, a range error occurs and +\fBexp2()\fR, \fBexp2f()\fR, and \fBexp2l()\fR return the value of the macro +\fBHUGE_VAL\fR, \fBHUGE_VALF\fR, and \fBHUGE_VALL\fR, respectively. +.sp +.LP +If \fIx\fR is NaN, a NaN is returned. +.sp +.LP +If \fIx\fR is \(+-0, 1 is returned. +.sp +.LP +If \fIx\fR is \(miInf, +0 is returned. +.sp +.LP +If \fIx\fR is +Inf, \fIx\fR is returned. +.SH ERRORS +.sp +.LP +These functions will fail if: +.sp +.ne 2 +.mk +.na +\fBRange Error\fR +.ad +.RS 15n +.rt +The result overflows. +.sp +If the integer expression (\fBmath_errhandling\fR & \fBMATH_ERREXCEPT\fR) is +non-zero, the overflow floating-point exception will be raised. +.RE + +.SH USAGE +.sp +.LP +An application wanting to check for exceptions should call +\fBfeclearexcept\fR(\fBFE_ALL_EXCEPT\fR) before calling these functions. On +return, if \fBfetestexcept\fR(\fBFE_INVALID\fR | \fBFE_DIVBYZERO\fR | +\fBFE_OVERFLOW\fR | \fBFE_UNDERFLOW\fR) is non-zero, an exception has been +raised. An application should either examine the return value or check the +floating point exception flags to detect exceptions. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBexp\fR(3M), \fBfeclearexcept\fR(3M), \fBfetestexcept\fR(3M), +\fBisnan\fR(3M), \fBlog\fR(3M), \fBmath.h\fR(3HEAD), \fBattributes\fR(5), +\fBstandards\fR(5) diff --git a/usr/src/man/man3m/expm1.3m b/usr/src/man/man3m/expm1.3m new file mode 100644 index 0000000000..c68701f110 --- /dev/null +++ b/usr/src/man/man3m/expm1.3m @@ -0,0 +1,125 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Copyright (c) 1992, X/Open Company Limited. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH expm1 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +expm1, expm1f, expm1l \- compute exponential function +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <math.h> + +\fBdouble\fR \fBexpm1\fR(\fBdouble\fR \fIx\fR); +.fi + +.LP +.nf +\fBfloat\fR \fBexpm1f\fR(\fBfloat\fR \fIx\fR); +.fi + +.LP +.nf +\fBlong double\fR \fBexpm1l\fR(\fBlong double\fR \fIx\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions compute \fIe\fR^\fIx\fR\(mi1.0. +.SH RETURN VALUES +.sp +.LP +Upon successful completion, these functions return \fIe\fR^\fIx\fR\(mi1.0. +.sp +.LP +If \fIx\fR is NaN, a NaN is returned. +.sp +.LP +If \fIx\fR is \(+-0, \(+-0 is returned. +.sp +.LP +If \fIx\fR is \(miInf, \(mi1 is returned. +.sp +.LP +If \fIx\fR is +Inf, \fIx\fR is returned. +.SH ERRORS +.sp +.LP +These functions will fail if: +.sp +.ne 2 +.mk +.na +\fBRange Error\fR +.ad +.RS 15n +.rt +The result overflows. +.sp +If the integer expression (\fBmath_errhandling\fR & \fBMATH_ERREXCEPT\fR) is +non-zero, the overflow floating-point exception is raised. +.RE + +.SH USAGE +.sp +.LP +The value of \fBexpm1(\fR\fIx\fR\fB)\fR can be more accurate than +\fBexp(\fIx\fR)\fR\(mi1.0 for small values of \fIx\fR. +.sp +.LP +The \fBexpm1()\fR and \fBlog1p\fR(3M) functions are useful for financial +calculations of ((1+\fIx\fR)^\fIn\fR\(mi1)/\fIx\fR, namely: +.sp +.in +2 +.nf +\fBexpm1(\fIn\fR * log1p(\fIx\fR))\|/\|\fR\fIx\fR +.fi +.in -2 + +.sp +.LP +when \fIx\fR is very small (for example, when performing calculations with a +small daily interest rate). These functions also simplify writing accurate +inverse hyperbolic functions. +.sp +.LP +An application wanting to check for exceptions should call +\fBfeclearexcept\fR(\fBFE_ALL_EXCEPT\fR) before calling these functions. On +return, if \fBfetestexcept\fR(\fBFE_INVALID\fR | \fBFE_DIVBYZERO\fR | +\fBFE_OVERFLOW\fR | \fBFE_UNDERFLOW\fR) is non-zero, an exception has been +raised. An application should either examine the return value or check the +floating point exception flags to detect exceptions. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBexp\fR(3M), \fBfeclearexcept\fR(3M), \fBfetestexcept\fR(3M), +\fBilogb\fR(3M), \fBlog1p\fR(3M), \fBmath.h\fR(3HEAD), \fBattributes\fR(5), +\fBstandards\fR(5) diff --git a/usr/src/man/man3m/fabs.3m b/usr/src/man/man3m/fabs.3m new file mode 100644 index 0000000000..79de9d06f8 --- /dev/null +++ b/usr/src/man/man3m/fabs.3m @@ -0,0 +1,78 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Copyright (c) 1992, X/Open Company Limited. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH fabs 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +fabs, fabsf, fabsl \- absolute value function +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <math.h> + +\fBdouble\fR \fBfabs\fR(\fBdouble\fR \fIx\fR); +.fi + +.LP +.nf +\fBfloat\fR \fBfabsf\fR(\fBfloat\fR \fIx\fR); +.fi + +.LP +.nf +\fBlong double\fR \fBfabsl\fR(\fBlong double\fR \fIx\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions compute the absolute value of \fIx\fR, |\fIx\fR|. +.SH RETURN VALUES +.sp +.LP +Upon successful completion, these functions return the absolute value of +\fIx\fR. +.sp +.LP +If \fIx\fR is NaN, a NaN is returned. +.sp +.LP +If \fIx\fR is \(+-0, +0 is returned. +.sp +.LP +If \fIx\fR is \(+-Inf, +Inf is returned. +.SH ERRORS +.sp +.LP +No errors are defined. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBisnan\fR(3M), \fBmath.h\fR(3HEAD), \fBattributes\fR(5), \fBstandards\fR(5) diff --git a/usr/src/man/man3m/fdim.3m b/usr/src/man/man3m/fdim.3m new file mode 100644 index 0000000000..c2fd56b999 --- /dev/null +++ b/usr/src/man/man3m/fdim.3m @@ -0,0 +1,103 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH fdim 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +fdim, fdimf, fdiml \- compute positive difference between two floating-point +numbers +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <math.h> + +\fBdouble\fR \fBfdim\fR(\fBdouble\fR \fIx\fR, \fBdouble\fR \fIy\fR); +.fi + +.LP +.nf +\fBfloat\fR \fBfdimf\fR(\fBfloat\fR \fIx\fR, \fBfloat\fR \fIy\fR); +.fi + +.LP +.nf +\fBlong double\fR \fBfdiml\fR(\fBlong double\fR \fIx\fR, \fBlong double\fR \fIy\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions determine the positive difference between their arguments. If +\fIx\fR is greater than \fIy\fR, \fIx\fR\(mi\fIy\fR is returned. If \fIx\fR is +less than or equal to \fIy\fR, +0 is returned. +.SH RETURN VALUES +.sp +.LP +Upon successful completion, these functions return the positive difference +value. +.sp +.LP +If \fIx\fR\(mi\fIy\fR is positive and overflows, a range error occurs and +\fBfdim()\fR, \fBfdimf()\fR, and \fBfdiml()\fR returns the value of the macro +\fBHUGE_VAL\fR, \fBHUGE_VALF\fR, and \fBHUGE_VALL\fR, respectively. +.sp +.LP +If \fIx\fR or \fIy\fR is NaN, a NaN is returned. +.SH ERRORS +.sp +.LP +These functions will fail if: +.sp +.ne 2 +.mk +.na +\fBRange Error\fR +.ad +.RS 15n +.rt +The result overflows. +.sp +If the integer expression (\fBmath_errhandling\fR & \fBMATH_ERREXCEPT\fR) is +non-zero, the overflow floating-point exception will be raised. +.RE + +.SH USAGE +.sp +.LP +An application wanting to check for exceptions should call +\fBfeclearexcept\fR(\fBFE_ALL_EXCEPT\fR) before calling these functions. On +return, if \fBfetestexcept\fR(\fBFE_INVALID\fR | \fBFE_DIVBYZERO\fR | +\fBFE_OVERFLOW\fR | \fBFE_UNDERFLOW\fR) is non-zero, an exception has been +raised. An application should either examine the return value or check the +floating point exception flags to detect exceptions. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBfeclearexcept\fR(3M), \fBfetestexcept\fR(3M), \fBfmax\fR(3M), +\fBfmin\fR(3M), \fBmath.h\fR(3HEAD), \fBattributes\fR(5), \fBstandards\fR(5) diff --git a/usr/src/man/man3m/feclearexcept.3m b/usr/src/man/man3m/feclearexcept.3m new file mode 100644 index 0000000000..542bc4213b --- /dev/null +++ b/usr/src/man/man3m/feclearexcept.3m @@ -0,0 +1,62 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH feclearexcept 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +feclearexcept \- clear floating-point exception +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <fenv.h> + +\fBint\fR \fBfeclearexcept\fR(\fBint\fR \fIexcepts\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +The \fBfeclearexcept()\fR function attempts to clear the supported +floating-point exceptions represented by \fIexcepts\fR. +.SH RETURN VALUES +.sp +.LP +If \fIexcepts\fR is 0 or if all the specified exceptions were successfully +cleared, \fBfeclearexcept()\fR returns 0. Otherwise, it returns a non-zero +value. +.SH ERRORS +.sp +.LP +No errors are defined. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBfenv.h\fR(3HEAD), \fBfegetexceptflag\fR(3M), \fBferaiseexcept\fR(3M), +\fBfesetexceptflag\fR(3M), \fBfetestexcept\fR(3M), \fBattributes\fR(5), +\fBstandards\fR(5) diff --git a/usr/src/man/man3m/fegetenv.3m b/usr/src/man/man3m/fegetenv.3m new file mode 100644 index 0000000000..69739db5a1 --- /dev/null +++ b/usr/src/man/man3m/fegetenv.3m @@ -0,0 +1,97 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH fegetenv 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +fegetenv, fesetenv \- get and set current floating-point environment +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <fenv.h> + +\fBint\fR \fBfegetenv\fR(\fBfenv_t *\fR\fIenvp\fR); +.fi + +.LP +.nf +\fBint\fR \fBfesetenv\fR(\fBconst fenv_t *\fR\fIenvp\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +The \fBfegetenv()\fR function attempts to store the current floating-point +environment in the object pointed to by \fIenvp\fR. +.sp +.LP +The \fBfesetenv()\fR function attempts to establish the floating-point +environment represented by the object pointed to by \fIenvp\fR. The \fIenvp\fR +argument points to an object set by a call to \fBfegetenv()\fR or +\fBfeholdexcept\fR(3M), or equals a floating-point environment macro. The +\fBfesetenv()\fR function does not raise floating-point exceptions, but only +installs the state of the floating-point status flags represented through its +argument. +.SH RETURN VALUES +.sp +.LP +If the representation was successfully stored, fegetenv returns 0. Otherwise, +it returns a non-zero value. +.sp +.LP +If the environment was successfully established, fesetenv returns 0. Otherwise, +it returns a non-zero value. +.SH ERRORS +.sp +.LP +No errors are defined. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBfeholdexcept\fR(3M), \fBfenv.h\fR(3HEAD), \fBfeupdateenv\fR(3M), +\fBattributes\fR(5), \fBstandards\fR(5) +.SH NOTES +.sp +.LP +In a multithreaded program, the \fBfegetenv()\fR and \fBfegetenv()\fR functions +affect the floating point environment only for the calling thread. +.sp +.LP +These functions automatically install and deinstall \fBSIGFPE\fR handlers and +set and clear the trap enable mode bits in the floating point status register +as needed. If a program uses these functions and attempts to install a +\fBSIGFPE\fR handler or control the trap enable mode bits independently, the +resulting behavior is not defined. +.sp +.LP +As described in \fBfex_set_handling\fR(3M)\fB\fR, when a handling function +installed in \fBFEX_CUSTOM\fR mode is invoked, all exception traps are disabled +(and will not be reenabled while \fBSIGFPE\fR is blocked). Thus, attempting to +change the environment from within a handler by calling \fBfesetenv\fR or +\fBfeupdateenv\fR(3M) might not produce the expected results. diff --git a/usr/src/man/man3m/fegetexceptflag.3m b/usr/src/man/man3m/fegetexceptflag.3m new file mode 100644 index 0000000000..67539dd0c1 --- /dev/null +++ b/usr/src/man/man3m/fegetexceptflag.3m @@ -0,0 +1,80 @@ +'\" te +.\" Copyright (c) 2001, The IEEE and The Open Group. All Rights Reserved. Portions Copyright (c) 2002, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH fegetexceptflag 3M "1 Sep 2002" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +fegetexceptflag, fesetexceptflag \- get and set floating-point status flags +.SH SYNOPSIS +.LP +.nf +cc [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <fenv.h> + +\fBint\fR \fBfegetexceptflag\fR(\fBfexcept_t *\fR\fIflagp\fR, \fBint\fR \fIexcepts\fR); +.fi + +.LP +.nf +\fBint\fR \fBfesetexceptflag\fR(\fBconst fexcept_t *\fR\fIflagp\fR, \fBint\fR \fIexcepts\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +The \fBfegetexceptflag()\fR function attempts to store an +implementation-defined representation of the states of the floating-point +status flags indicated by the \fIexcepts\fR argument in the object pointed to +by the \fIflagp\fR argument. +.sp +.LP +The \fBfesetexceptflag()\fR function attempts to set the floating-point status +flags indicated by the \fIexcepts\fR argument to the states stored in the +object pointed to by \fIflagp\fR. The value pointed to by \fIflagp\fR will have +been set by a previous call to \fBfegetexceptflag()\fR whose second argument +represented at least those floating-point exceptions represented by the +\fIexcepts\fR argument. This function does not raise floating-point exceptions +but only sets the state of the flags. +.SH RETURN VALUES +.sp +.LP +If the representation was successfully stored, \fBfegetexceptflag()\fR returns +0. Otherwise, it returns a non-zero value. +.sp +.LP +If the excepts argument is 0 or if all the specified exceptions were +successfully set, \fBfesetexceptflag()\fR returns 0. Otherwise, it returns a +non-zero value. +.SH ERRORS +.sp +.LP +No errors are defined. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBfenv.h\fR(3HEAD), \fBfeclearexcept\fR(3M), \fBferaiseexcept\fR(3M), +\fBfesetexceptflag\fR(3M), \fBattributes\fR(5), \fBstandards\fR(5) diff --git a/usr/src/man/man3m/fegetround.3m b/usr/src/man/man3m/fegetround.3m new file mode 100644 index 0000000000..cebfd292a5 --- /dev/null +++ b/usr/src/man/man3m/fegetround.3m @@ -0,0 +1,101 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH fegetround 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +fegetround, fesetround \- get and set current rounding direction +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <fenv.h> + +\fBint\fR \fBfegetround\fR(\fBvoid\fR); +.fi + +.LP +.nf +\fBint\fR \fBfesetround\fR(\fBint\fR \fIround\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +The fegetround function gets the current rounding direction. +.sp +.LP +The fesetround function establishes the rounding direction represented by its +argument round. If the argument is not equal to the value of a rounding +direction macro, the rounding direction is not changed. +.SH RETURN VALUES +.sp +.LP +The fegetround function returns the value of the rounding direction macro +representing the current rounding direction, or a negative value if there is no +such rounding direction macro or the current rounding direction is not +determinable. +.sp +.LP +The fesetround function returns a 0 value if and only if the requested rounding +direction was established. +.SH ERRORS +.sp +.LP +No errors are defined. +.SH EXAMPLES +.sp +.LP +The following example saves, sets, and restores the rounding direction, +reporting an error and aborting if setting the rounding direction fails: +.LP +\fBExample 1 \fRSave, set, and restore the rounding direction. +.sp +.in +2 +.nf +#include <fenv.h> +#include <assert.h> +void f(int round_dir) +{ + #pragma STDC FENV_ACCESS ON + int save_round; + int setround_ok; + save_round = fegetround(); + setround_ok = fesetround(round_dir); + assert(setround_ok == 0); + /* ... */ + fesetround(save_round); + /* ... */ +} +.fi +.in -2 + +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBfenv.h\fR(3HEAD), \fBattributes\fR(5), \fBstandards\fR(5) diff --git a/usr/src/man/man3m/feholdexcept.3m b/usr/src/man/man3m/feholdexcept.3m new file mode 100644 index 0000000000..6da946efc9 --- /dev/null +++ b/usr/src/man/man3m/feholdexcept.3m @@ -0,0 +1,74 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH feholdexcept 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +feholdexcept \- save current floating-point environment +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <fenv.h> + +\fBint\fR \fBfeholdexcept\fR(\fBfenv_t *\fR\fIenvp\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +The \fBfeholdexcept()\fR function saves the current floating-point environment +in the object pointed to by \fIenvp\fR, clears the floating-point status flags, +and then installs a non-stop (continue on floating-point exceptions) mode, if +available, for all floating-point exceptions. +.SH RETURN VALUES +.sp +.LP +The \fBfeholdexcept()\fR function returns 0 if and only if non-stop +floating-point exception handling was successfully installed. +.SH ERRORS +.sp +.LP +No errors are defined. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBfegetenv\fR(3M), \fBfenv.h\fR(3HEAD), \fBfeupdateenv\fR(3M), +\fBattributes\fR(5), \fBstandards\fR(5) +.SH NOTES +.sp +.LP +In a multithreaded program, the \fBfeholdexcept()\fR function affects the +floating point environment only for the calling thread. +.sp +.LP +The \fBfeholdexcept()\fR function automatically installs and deinstalls +\fBSIGFPE\fR handlers and sets and clears the trap enable mode bits in the +floating point status register as needed. If a program uses these functions and +attempts to install a \fBSIGFPE\fR handler or control the trap enable mode bits +independently, the resulting behavior is not defined. diff --git a/usr/src/man/man3m/feraiseexcept.3m b/usr/src/man/man3m/feraiseexcept.3m new file mode 100644 index 0000000000..faa4c22a24 --- /dev/null +++ b/usr/src/man/man3m/feraiseexcept.3m @@ -0,0 +1,68 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH feraiseexcept 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +feraiseexcept \- raise floating-point exception +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <fenv.h> + +\fBint\fR \fBferaiseexcept\fR(\fBint\fR \fIexcepts\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +The \fBferaiseexcept()\fR function attempts to raise the supported +floating-point exceptions represented by the \fIexcepts\fR argument. The order +in which these floating-point exceptions are raised is unspecified. +.SH RETURN VALUES +.sp +.LP +If \fIexcepts\fR is 0 or if all the specified exceptions were successfully +raised, \fBferaiseexcept()\fR returns 0. Otherwise, it returns a non-zero +value. +.SH ERRORS +.sp +.LP +No errors are defined. +.SH USAGE +.sp +.LP +The effect is intended to be similar to that of floating-point exceptions +raised by arithmetic operations. Hence, enabled traps for floating-point +exceptions raised by this function are taken. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBfeclearexcept\fR(3M), \fBfegetexceptflag\fR(3M), \fBfenv.h\fR(3HEAD), +\fBfetestexcept\fR(3M), \fBattributes\fR(5), \fBstandards\fR(5) diff --git a/usr/src/man/man3m/fesetprec.3m b/usr/src/man/man3m/fesetprec.3m new file mode 100644 index 0000000000..41d61fb211 --- /dev/null +++ b/usr/src/man/man3m/fesetprec.3m @@ -0,0 +1,110 @@ +'\" te +.\" Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH fesetprec 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +fesetprec, fegetprec \- control floating point rounding precision modes +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... -lm [ \fIlibrary\fR... ] +#include <fenv.h> + +\fBint\fR \fBfesetprec\fR(\fBint\fR \fIprec\fR); +.fi + +.LP +.nf +\fBint\fR \fBfegetprec\fR(\fBvoid\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +The IEEE 754 standard defines rounding precision modes for systems that always +deliver intermediate results to destinations in extended double precision +format. These modes allow such systems to deliver correctly rounded single and +double precision results (in the absence of underflow and overflow) with only +one rounding. +.sp +.LP +The \fBfesetprec()\fR function sets the current rounding precision to the +precision specified by \fIprec\fR, which must be one of the following values +defined in <\fBfenv.h\fR>: +.sp +.ne 2 +.mk +.na +\fB\fBFE_FLTPREC\fR\fR +.ad +.RS 15n +.rt +round to single precision +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBFE_DBLPREC\fR\fR +.ad +.RS 15n +.rt +round to double precision +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBFE_LDBLPREC\fR\fR +.ad +.RS 15n +.rt +round to extended double precision +.RE + +.sp +.LP +The default rounding precision when a program starts is \fBFE_LDBLPREC\fR. +.sp +.LP +The \fBfegetprec()\fR function returns the current rounding precision. +.SH RETURN VALUES +.sp +.LP +The \fBfesetprec()\fR function returns a non-zero value if the requested +rounding precision is established and 0 otherwise. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +lw(2.75i) lw(2.75i) +lw(2.75i) lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +ArchitectureIntel (see below) +AvailabilitySUNWlibms +Interface StabilityStable +MT-LevelMT-Safe +.TE + +.sp +.LP +These functions are not available on SPARC systems because SPARC processors +deliver intermediate results to destinations in single or double format as +determined by each floating point instruction. +.SH SEE ALSO +.sp +.LP +\fBfegetenv\fR(3M), \fBfesetround\fR(3M), \fBattributes\fR(5) +.sp +.LP +\fINumerical\fR \fIComputation\fR \fIGuide\fR diff --git a/usr/src/man/man3m/fetestexcept.3m b/usr/src/man/man3m/fetestexcept.3m new file mode 100644 index 0000000000..ada49b7fb1 --- /dev/null +++ b/usr/src/man/man3m/fetestexcept.3m @@ -0,0 +1,88 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH fetestexcept 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +fetestexcept \- test floating-point exception flags +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <fenv.h> + +\fBint\fR \fBfetestexcept\fR(\fBint\fR \fIexcepts\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +The \fBfetestexcept()\fR function determines which of a specified subset of the +floating-point exception flags are currently set. The \fIexcepts\fR argument +specifies the floating-point status flags to be queried. +.SH RETURN VALUES +.sp +.LP +The \fBfetestexcept()\fR function returns the value of the bitwise-inclusive OR +of the floating-point exception macros corresponding to the currently set +floating-point exceptions included in \fIexcepts\fR. +.SH ERRORS +.sp +.LP +No errors are defined. +.SH EXAMPLES +.LP +\fBExample 1 \fRExample using \fBfetestexcept()\fR +.sp +.LP +The following example calls function \fIf\fR( ) if an invalid exception is set, +and then function \fIg\fR( ) if an overflow exception is set: + +.sp +.in +2 +.nf +#include <fenv.h> +/* ... */ +{ +# pragma STDC FENV_ACCESS ON + int set_excepts; + feclearexcept(FE_INVALID | FE_OVERFLOW); + // maybe raise exceptions + set_excepts = fetestexcept(FE_INVALID | FE_OVERFLOW); + if (set_excepts & FE_INVALID) f(); + if (set_excepts & FE_OVERFLOW) g(); + /* ... */ +} +.fi +.in -2 + +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBfeclearexcept\fR(3M), \fBfegetexceptflag\fR(3M), \fBfenv.h\fR(3HEAD), +\fBattributes\fR(5), \fBstandards\fR(5) diff --git a/usr/src/man/man3m/feupdateenv.3m b/usr/src/man/man3m/feupdateenv.3m new file mode 100644 index 0000000000..4fec2126e8 --- /dev/null +++ b/usr/src/man/man3m/feupdateenv.3m @@ -0,0 +1,116 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH feupdateenv 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +feupdateenv \- update floating-point environment +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <fenv.h> + +\fBint\fR \fBfeupdateenv\fR(\fBconst fenv_t *\fR\fIenvp\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +The \fBfeupdateenv()\fR function attempts to save the currently raised +floating-point exceptions in its automatic storage, attempts to install the +floating-point environment represented by the object pointed to by \fIenvp\fR, +and then attempts to raise the saved floating-point exceptions. The \fIenvp\fR +argument points to an object set by a call to \fBfegetenv\fR(3M) or +\fBfeholdexcept\fR(3M), or equals a floating-point environment macro. +.SH RETURN VALUES +.sp +.LP +The \fBfeupdateenv()\fR function returns 0 if and only if all the required +actions were successfully carried out. +.SH ERRORS +.sp +.LP +No errors are defined. +.SH EXAMPLES +.sp +.LP +The following example demonstrates sample code to hide spurious underflow +floating-point exceptions: +.LP +\fBExample 1 \fRHide spurious underflow floating-point exceptions. +.sp +.in +2 +.nf +#include <fenv.h> +double f(double x) +{ +# pragma STDC FENV_ACCESS ON + double result; + fenv_t save_env; + feholdexcept(&save_env); + // compute result + if (/* test spurious underflow */) + feclearexcept(FE_UNDERFLOW); + feupdateenv(&save_env); + return result; +} +.fi +.in -2 + +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBfegetenv\fR(3M), \fBfeholdexcept\fR(3M), \fBfenv.h\fR(3HEAD), +\fBattributes\fR(5), \fBstandards\fR(5) +.SH NOTES +.sp +.LP +In a multithreaded program, the \fBfeupdateenv()\fR function affects the +floating point environment only for the calling thread. +.sp +.LP +When the \fBFEX_CUSTOM\fR handling mode is in effect for an exception, raising +that exception using \fBfeupdateenv()\fR causes the handling function to be +invoked. The handling function can then modify the exception flags to be set as +described in \fBfex_set_handling\fR(3M). Any result value the handler supplies +will be ignored. +.sp +.LP +The \fBfeupdateenv()\fR function automatically installs and deinstalls +\fBSIGFPE\fR handlers and sets and clears the trap enable mode bits in the +floating point status register as needed. If a program uses these functions and +attempts to install a \fBSIGFPE\fR handler or control the trap enable mode bits +independently, the resulting behavior is not defined. +.sp +.LP +As described in \fBfex_set_handling\fR(3M), when a handling function installed +in \fBFEX_CUSTOM\fR mode is invoked, all exception traps are disabled (and will +not be reenabled while \fBSIGFPE\fR is blocked). Thus, attempting to change the +environment from within a handler by calling \fBfesetenv\fR(3M) or +\fBfeupdateenv\fR might not produce the expected results. diff --git a/usr/src/man/man3m/fex_merge_flags.3m b/usr/src/man/man3m/fex_merge_flags.3m new file mode 100644 index 0000000000..607b0cccdc --- /dev/null +++ b/usr/src/man/man3m/fex_merge_flags.3m @@ -0,0 +1,69 @@ +'\" te +.\" Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH fex_merge_flags 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +fex_merge_flags \- manage the floating point environment +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... -lm [ \fIlibrary\fR... ] +#include <fenv.h> + +\fBvoid\fR \fBfex_merge_flags\fR(\fBconst fenv_t *\fR\fIenvp\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +The \fBfex_merge_flags()\fR function copies into the current environment those +exception flags that are set in the environment represented by the object +pointed to by \fIenvp\fR. The argument \fIenvp\fR must point to an object set +by a call to \fBfeholdexcept\fR(3M) or \fBfegetenv\fR(3M) or equal to the macro +\fBFE_DFL_ENV\fR. The \fBfex_merge_flags()\fR function does not raise any +exceptions, but only sets its flags. +.SH RETURN VALUES +.sp +.LP +The \fBfex_merge_flags\fR function does not return a value. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +lw(2.75i) lw(2.75i) +lw(2.75i) lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +AvailabilitySUNWlibms, SUNWlmsx +Interface StabilityStable +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBfeclearexcept\fR(3M), \fBfegetenv\fR(3M), \fBfesetround\fR(3M), +\fBfesetprec\fR(3M), \fBfex_set_handling\fR(3M), \fBfex_set_log\fR(3M), +\fBattributes\fR(5) +.sp +.LP +\fINumerical Computation Guide\fR +.SH NOTES +.sp +.LP +In a multithreaded program, the \fBfex_merge_flags()\fR function affects the +floating point environment only for the calling thread. +.sp +.LP +The \fBfex_merge_flags()\fR function automatically installs and deinstalls +\fBSIGFPE\fR handlers and sets and clears the trap enable mode bits in the +floating point status register as needed. If a program uses these functions +and attempts to install a \fBSIGFPE\fR handler or control the trap enable mode +bits independently, the resulting behavior is not defined. diff --git a/usr/src/man/man3m/fex_set_handling.3m b/usr/src/man/man3m/fex_set_handling.3m new file mode 100644 index 0000000000..72d6d06ceb --- /dev/null +++ b/usr/src/man/man3m/fex_set_handling.3m @@ -0,0 +1,473 @@ +'\" te +.\" Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH fex_set_handling 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +fex_set_handling, fex_get_handling, fex_getexcepthandler, fex_setexcepthandler +\- control floating point exception handling modes +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... -lm [ \fIlibrary\fR... ] +#include <fenv.h> + +\fBint\fR \fBfex_set_handling\fR(\fBint\fR \fIex\fR, \fBint\fR \fImode\fR, \fBvoid(*\fR\fIhandler\fR); +.fi + +.LP +.nf +\fBint\fR \fBfex_get_handling\fR(\fBint\fR \fIex\fR); +.fi + +.LP +.nf +\fBvoid\fR \fBfex_getexcepthandler\fR(\fBfex_handler_t *\fR\fIbuf\fR, \fBint\fR \fIex\fR); +.fi + +.LP +.nf +\fBvoid\fR \fBfex_setexcepthandler\fR(\fBconst fex_handler_t *\fR\fIbuf\fR, \fBint\fR \fIex\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions provide control of floating point exception handling modes. For +each function, the \fIex\fR argument specifies one or more exceptions indicated +by a bitwise-OR of any of the following values defined in <\fBfenv.h\fR>: +.sp +.ne 2 +.mk +.na +\fB\fBFEX_INEXACT\fR\fR +.ad +.RS 17n +.rt + +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBFEX_UNDERFLOW\fR\fR +.ad +.RS 17n +.rt + +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBFEX_OVERFLOW\fR\fR +.ad +.RS 17n +.rt + +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBFEX_DIVBYZERO\fR\fR +.ad +.RS 17n +.rt +division by zero +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBFEX_INV_ZDZ\fR\fR +.ad +.RS 17n +.rt +0/0 invalid operation +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBFEX_INV_IDI\fR\fR +.ad +.RS 17n +.rt +infinity/infinity invalid operation +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBFEX_INV_ISI\fR\fR +.ad +.RS 17n +.rt +infinity-infinity invalid operation +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBFEX_INV_ZMI\fR\fR +.ad +.RS 17n +.rt +0*infinity invalid operation +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBFEX_INV_SQRT\fR\fR +.ad +.RS 17n +.rt +square root of negative operand +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBFEX_INV_SNAN\fR\fR +.ad +.RS 17n +.rt +signaling NaN +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBFEX_INV_INT\fR\fR +.ad +.RS 17n +.rt +invalid integer conversion +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBFEX_INV_CMP\fR\fR +.ad +.RS 17n +.rt +invalid comparison +.RE + +.sp +.LP +For convenience, the following combinations of values are also defined: +.sp +.ne 2 +.mk +.na +\fB\fBFEX_NONE\fR\fR +.ad +.RS 15n +.rt +no exceptions +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBFEX_INVALID\fR\fR +.ad +.RS 15n +.rt +all invalid operation exceptions +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBFEX_COMMON\fR\fR +.ad +.RS 15n +.rt +overflow, division by zero, and invalid operation +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBFEX_ALL\fR\fR +.ad +.RS 15n +.rt +all exceptions +.RE + +.sp +.LP +The \fBfex_set_handling()\fR function establishes the specified \fImode\fR for +handling the floating point exceptions identified by \fIex\fR. The selected +\fImode\fR determines the action to be taken when one of the indicated +exceptions occurs. It must be one of the following values: +.sp +.ne 2 +.mk +.na +\fB\fBFEX_NOHANDLER\fR\fR +.ad +.RS 17n +.rt +Trap but do not otherwise handle the exception, evoking instead whatever +ambient behavior would normally be in effect. This is the default behavior +when the exception's trap is enabled. The \fIhandler\fR parameter is ignored. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBFEX_NONSTOP\fR\fR +.ad +.RS 17n +.rt +Provide the IEEE 754 default result for the operation that caused the +exception, set the exception's flag, and continue execution. This is the +default behavior when the exception's trap is disabled. The \fIhandler\fR +parameter is ignored. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBFEX_ABORT\fR\fR +.ad +.RS 17n +.rt +Call \fBabort\fR(3C). The \fIhandler\fR parameter is ignored. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBFEX_SIGNAL\fR\fR +.ad +.RS 17n +.rt +Invoke the function *\fIhandler\fR with the parameters normally supplied to a +signal handler installed with \fBsigfpe\fR(3C). +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBFEX_CUSTOM\fR\fR +.ad +.RS 17n +.rt +Invoke the function *\fIhandler\fR as described in the next paragraph. +.RE + +.sp +.LP +In \fBFEX_CUSTOM\fR mode, when a floating point exception occurs, the handler +function is invoked as though its prototype were: +.sp +.in +2 +.nf +#include <fenv.h> +void handler(int ex, fex_info_t *info); +.fi +.in -2 + +.sp +.LP +On entry, \fIex\fR is the value (of the first twelve listed above) +corresponding to the exception that occurred, \fBinfo->op\fR indicates the +operation that caused the exception, \fBinfo->op1\fR and \fBinfo->op2\fR +contain the values of the operands, \fBinfo->res\fR contains the default +untrapped result value, and \fBinfo->flags\fR reflects the exception flags that +the operation would have set had it not been trapped. If the handler returns, +the value contained in \fBinfo->res\fR on exit is substituted for the result of +the operation, the flags indicated by \fBinfo->flags\fR are set, and execution +resumes at the point where the exception occurred. The handler might modify +\fBinfo->res\fR and \fBinfo->flags\fR to supply any desired result value and +flags. Alternatively, if the exception is underflow or overflow, the hander +might set +.sp +.LP +info->res.type = fex_nodata; +.sp +.LP +which causes the exponent-adjusted result specified by IEEE 754 to be +substituted. If the handler does not modify \fBinfo->res\fR or +\fBinfo->flags\fR, the effect is the same as if the exception had not been +trapped. +.sp +.LP +Although the default untrapped result of an exceptional operation is always +available to a \fBFEX_CUSTOM\fR handler, in some cases, one or both operands +may not be. In these cases, the handler may be invoked with \fBinfo->op1.type +== fex_nodata\fR or \fBinfo->op2.type == fex_nodata\fR to indicate that the +respective data structures do not contain valid data. (For example, +\fBinfo->op2.type == fex_nodata\fR if the exceptional operation is a unary +operation.) Before accessing the operand values, a custom handler should +always examine the \fBtype\fR field of the operand data structures to ensure +that they contain valid data in the appropriate format. +.sp +.LP +The \fBfex_get_handling()\fR function returns the current handling mode for the +exception specified by \fIex\fR, which must be one of the first twelve +exceptions listed above. +.sp +.LP +The \fBfex_getexcepthandler()\fR function saves the current handling modes and +associated data for the exceptions specified by \fIex\fR in the data structure +pointed to by \fIbuf\fR. The type \fBfex_handler_t\fR is defined in +<\fBfenv.h\fR>. +.sp +.LP +The \fBfex_setexcepthandler()\fR function restores the handling modes and +associated data for the exceptions specified by \fIex\fR from the data +structure pointed to by \fIbuf\fR. This data structure must have been set by a +previous call to \fBfex_getexcepthandler()\fR. Otherwise the effect on the +indicated modes is undefined. +.SH RETURN VALUES +.sp +.LP +The \fBfex_set_handling()\fR function returns a non-zero value if the requested +exception handling mode is established. Otherwise, it returns 0. +.SH EXAMPLES +.sp +.LP +The following example demonstrates how to substitute a predetermined value for +the result of a 0/0 invalid operation. +.sp +.in +2 +.nf +#include <math.h> +#include <fenv.h> + +double k; + +void presub(int ex, fex_info_t *info) { + info->res.type = fex_double; + info->res.val.d = k; +} + +int main() { + double x, w; + int i; + fex_handler_t buf; +/* + * save current 0/0 handler + */ + (void) fex_getexcepthandler(&buf, FEX_INV_ZDZ); +/* + * set up presubstitution handler for 0/0 + */ + (void) fex_set_handling(FEX_INV_ZDZ, FEX_CUSTOM, presub); +/* + * compute (k*x)/sin(x) for k=2.0, x=0.5, 0.4, ..., 0.1, 0.0 + */ + k = 2.0; + (void) printf("Evaluating f(x) = (k*x)/sin(x)\en\en"); + for (i = 5; i >= 0; i--) { + x = (double) i * 0.1; + w = (k * x) / sin(x); + (void) printf("\etx=%3.3f\et f(x) = % 1.20e\en", x, w); + } +/* + * restore old 0/0 handler + */ + (void) fex_setexcepthandler(&buf, FEX_INV_ZDZ); + return 0; +} +.fi +.in -2 + +.sp +.LP +The output from the preceding program reads: +.sp +.in +2 +.nf +Evaluating f(x) = (k*x)/sin(x) + + x=0.500 f(x) = 2.08582964293348816000e+00 + x=0.400 f(x) = 2.05434596443822626000e+00 + x=0.300 f(x) = 2.03031801709447368000e+00 + x=0.200 f(x) = 2.01339581906893761000e+00 + x=0.100 f(x) = 2.00333722632695554000e+00 + x=0.000 f(x) = 2.00000000000000000000e+00 +.fi +.in -2 + +.sp +.LP +When \fIx\fR = 0, \fIf(x)\fR is computed as 0/0 and an invalid operation +exception occurs. In this example, the value 2.0 is substituted for the +result. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +lw(2.75i) lw(2.75i) +lw(2.75i) lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +AvailabilitySUNWlibms, SUNWlmxs +Interface StabilityStable +MT-LevelMT-Safe (see Notes) +.TE + +.SH SEE ALSO +.sp +.LP +\fBsigfpe\fR(3C), \fBfeclearexcept\fR(3M), \fBfegetenv\fR(3M), +\fBfex_set_log\fR(3M), \fBattributes\fR(5) +.sp +.LP +\fINumerical Computation Guide\fR +.SH NOTES +.sp +.LP +In a multithreaded application, the preceding functions affect exception +handling modes only for the calling thread. +.sp +.LP +The functions described on this page automatically install and deinstall +\fBSIGFPE\fR handlers and set and clear the trap enable mode bits in the +floating point status register as needed. If a program uses these functions +and attempts to install a \fBSIGFPE\fR handler or control the trap enable mode +bits independently, the resulting behavior is not defined. +.sp +.LP +All traps are disabled before a handler installed in \fBFEX_CUSTOM\fR mode is +invoked. When the \fBSIGFPE\fR signal is blocked, as it is when such a handler +is invoked, the floating point environment, exception flags, and retrospective +diagnostic functions described in \fBfeclearexcept\fR(3M), \fBfegetenv\fR(3M), +and \fBfex_set_log\fR(3M) do not re-enable traps. Thus, the handler itself +always runs in \fBFEX_NONSTOP\fR mode with logging of retrospective diagnostics +disabled. Attempting to change these modes within the handler may not produce +the expected results. diff --git a/usr/src/man/man3m/fex_set_log.3m b/usr/src/man/man3m/fex_set_log.3m new file mode 100644 index 0000000000..051b0e5021 --- /dev/null +++ b/usr/src/man/man3m/fex_set_log.3m @@ -0,0 +1,209 @@ +'\" te +.\" Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH fex_set_log 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +fex_set_log, fex_get_log, fex_set_log_depth, fex_get_log_depth, fex_log_entry +\- log retrospective diagnostics for floating point exceptions +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... -lm [ \fIlibrary\fR... ] +#include <fenv.h> + +\fBint\fR \fBfex_set_log\fR(\fBFILE *\fR\fIfp\fR); +.fi + +.LP +.nf +\fBFILE *\fR\fBfex_get_log\fR(\fBvoid\fR); +.fi + +.LP +.nf +\fBint\fR \fBfex_set_log_depth\fR(\fBint\fR \fIdepth\fR); +.fi + +.LP +.nf +\fBint\fR \fBfex_get_log_depth\fR(\fBvoid\fR); +.fi + +.LP +.nf +\fBvoid\fR \fBfex_log_entry\fR(\fBconst char *\fR\fImsg\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +The \fBfex_set_log()\fR function enables logging of retrospective diagnostic +messages regarding floating point exceptions to the file specified by \fIfp\fR. +If \fIfp\fR is \fINULL\fR, logging is disabled. When a program starts, logging +is initially disabled. +.sp +.LP +The occurrence of any of the twelve exceptions listed in +\fBfex_set_handling\fR(3M) constitutes an event that can be logged. To prevent +the log from becoming exhorbitantly long, the logging mechanism eliminates +redundant entries by two methods. First, each exception is associated with a +\fIsite\fR in the program. The site is identified by the address of the +instruction that caused the exception together with a stack trace. Only the +first exception of a given type to occur at a given site will be logged. +Second, when \fBFEX_NONSTOP\fR handling mode is in effect for some exception, +only those occurrences of that exception that set its previously clear flag are +logged. Clearing a flag using \fBfeclearexcept()\fR allows the next occurrence +of the exception to be logged provided it does not occur at a site at which it +was previously logged. +.sp +.LP +Each of the different types of invalid operation exceptions can be logged at +the same site. Because all invalid operation exceptions share the same flag, +however, of those types for which \fBFEX_NONSTOP\fR mode is in effect, only the +first exception to set the flag will be logged. When the invalid operation +exception is raised by a call to \fBferaiseexcept\fR(3M) or +\fBfeupdateenv\fR(3M), which type of invalid operation is logged depends on the +implementation. +.sp +.LP +If an exception results in the creation of a log entry, the entry is created at +the time the exception occurs and before any exception handling actions +selected with \fBfex_set_handling()\fR are taken. In particular, the log entry +is available even if the program terminates as a result of the exception. The +log entry shows the type of exception, the address of the instruction that +caused it, how it will be handled, and the stack trace. If symbols are +available, the address of the excepting instruction and the addresses in the +stack trace are followed by the names of the corresponding symbols. +.sp +.LP +The \fBfex_get_log()\fR function returns the current log file. +.sp +.LP +The \fBfex_set_log_depth()\fR sets the maximum depth of the stack trace +recorded with each exception to \fIdepth\fR stack frames. The default depth is +100. +.sp +.LP +The\fBfex_get_log_depth()\fR function returns the current maximum stack trace +depth. +.sp +.LP +The \fBfex_log_entry()\fR function adds a user-supplied entry to the log. The +entry includes the string pointed to by \fImsg\fR and the stack trace. Like +entries for floating point exceptions, redundant user-supplied entries are +eliminated: only the first user-supplied entry with a given \fImsg\fR to be +requested from a given site will be logged. For the purpose of a user-supplied +entry, the site is defined only by the stack trace, which begins with the +function that called \fBfex_log_entry()\fR. +.SH RETURN VALUES +.sp +.LP +The \fBfex_set_log()\fR function returns a non-zero value if logging is enabled +or disabled accordingly and returns 0 otherwise. The \fBfex_set_log_depth()\fR +returns a non-zero value if the requested stack trace depth is established +(regardless of whether logging is enabled) and returns 0 otherwise. +.SH EXAMPLES +.sp +.LP +The following example demonstrates the output generated when a floating point +overflow occurs in \fBsscanf\fR(3C). +.sp +.in +2 +.nf +#include <fenv.h> + +int +main() { + double x; +/* + * enable logging of retrospective diagnostics + */ + (void) fex_set_log(stdout); +/* + * establish default handling for overflows + */ + (void) fex_set_handling(FEX_OVERFLOW, FEX_NONSTOP, NULL); +/* + * trigger an overflow in sscanf + */ + (void) sscanf("1.0e+400", "%lf", &x); + return 0; +} +.fi +.in -2 + +.sp +.LP +The output from the preceding program reads: +.sp +.in +2 +.nf +Floating point overflow at 0xef71cac4 __base_conversion_set_exceptio +n, nonstop mode + 0xef71cacc __base_conversion_set_exception + 0xef721820 _decimal_to_double + 0xef75aba8 number + 0xef75a94c __doscan_u + 0xef75ecf8 sscanf + 0x00010f20 main +.fi +.in -2 + +.sp +.LP +Recompiling the program or running it on another system can produce different +text addresses from those shown above. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +lw(2.75i) lw(2.75i) +lw(2.75i) lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +AvailabilitySUNWlibms, SUNWlmxs +Interface StabilityStable +MT-LevelMT-Safe (see NOTES) +.TE + +.SH SEE ALSO +.sp +.LP +\fBfeclearexcept\fR(3M), \fBfegetenv\fR(3M), \fBferaiseexcept\fR(3M), +\fBfeupdateenv\fR(3M), \fBfex_set_handling\fR(3M), \fBattributes\fR(5) +.sp +.LP +\fINumerical Computation Guide\fR +.SH NOTES +.sp +.LP +All threads in a process share the same log file. Each call to +\fBfex_set_log()\fR preempts the previous one. +.sp +.LP +In addition to the log file itself, two additional file descriptors are used +during the creation of a log entry in order to obtain symbol names from the +executable and any shared objects it uses. These file descriptors are +relinquished once the log entry is written. If the file descriptors cannot be +allocated, symbols names are omitted from the stack trace. +.sp +.LP +The functions described on this page automatically install and deinstall +\fBSIGFPE\fR handlers and set and clear the trap enable mode bits in the +floating point status register as needed. If a program uses these functions +and attempts to install a \fBSIGFPE\fR handler or control the trap enable mode +bits independently, the resulting behavior is not defined. +.sp +.LP +As described in \fBfex_set_handling()\fR, when a handling function installed in +\fBFEX_CUSTOM\fR mode is invoked, all exception traps are disabled (and will +not be reenabled while \fBSIGFPE\fR is blocked). Thus, retrospective +diagnostic messages are not logged for exceptions that occur within such a +handler. diff --git a/usr/src/man/man3m/floor.3m b/usr/src/man/man3m/floor.3m new file mode 100644 index 0000000000..ddc993f041 --- /dev/null +++ b/usr/src/man/man3m/floor.3m @@ -0,0 +1,73 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Copyright (c) 1992, X/Open Company Limited. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH floor 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +floor, floorf, floorl \- floor function +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <math.h> + +\fBdouble\fR \fBfloor\fR(\fBdouble\fR \fIx\fR); +.fi + +.LP +.nf +\fBfloat\fR \fBfloorf\fR(\fBfloat\fR \fIx\fR); +.fi + +.LP +.nf +\fBlong double\fR \fBfloorl\fR(\fBlong double\fR \fIx\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions compute the largest integral value not greater than \fIx\fR. +.SH RETURN VALUES +.sp +.LP +Upon successful completion, these functions return the largest integral value +not greater than \fIx\fR, expressed as a \fBdouble\fR, \fBfloat\fR, or \fBlong +double\fR, as appropriate for the return type of the function. +.sp +.LP +If \fIx\fR is NaN, a NaN is returned. +.sp +.LP +If \fIx\fR is \(+-Inf or \(+-0, \fIx\fR is returned. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBceil\fR(3M), \fBfeclearexcept\fR(3M), \fBfetestexcept\fR(3M), +\fBisnan\fR(3M), \fBmath.h\fR(3HEAD), \fBattributes\fR(5), \fBstandards\fR(5) diff --git a/usr/src/man/man3m/fma.3m b/usr/src/man/man3m/fma.3m new file mode 100644 index 0000000000..b79dae8df9 --- /dev/null +++ b/usr/src/man/man3m/fma.3m @@ -0,0 +1,125 @@ +'\" te +.\" Copyright (c) 2001, the Institute of ElectricalPortions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH fma 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +fma, fmaf, fmal \- floating-point multiply-add +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <math.h> + +\fBdouble\fR \fBfma\fR(\fBdouble\fR \fIx\fR, \fBdouble\fR \fIy\fR, \fBdouble\fR \fIz\fR); +.fi + +.LP +.nf +\fBfloat\fR \fBfmaf\fR(\fBfloat\fR \fIx\fR, \fBfloat\fR \fIy\fR, \fBfloat\fR \fIz\fR); +.fi + +.LP +.nf +\fBlong double\fR \fBfmal\fR(\fBlong double\fR \fIx\fR, \fBlong double\fR \fIy\fR, \fBlong double\fR \fIz\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions compute (\fIx\fR * \fIy\fR) + \fIz\fR, rounded as one ternary +operation. They compute the value (as if) to infinite precision and round once +to the result format, according to the rounding mode characterized by the value +of \fBFLT_ROUNDS\fR. +.SH RETURN VALUES +.sp +.LP +Upon successful completion, these functions return (\fIx\fR * \fIy\fR) + +\fIz\fR, rounded as one ternary operation. +.sp +.LP +If \fIx\fR or \fIy\fR are NaN, a NaN is returned. +.sp +.LP +If \fIx\fR multiplied by \fIy\fR is an exact infinity and \fIz\fR is also an +infinity but with the opposite sign, a domain error occurs and a NaN is +returned. +.sp +.LP +If one of \fIx\fR and \fIy\fR is infinite, the other is 0, and \fIz\fR is not a +NaN, a domain error occurs and a NaN is returned. +.sp +.LP +If \fIx\fR*\fIy\fR is not 0*Inf nor Inf*0 and \fIz\fR is a NaN, a NaN is +returned. +.SH ERRORS +.sp +.LP +These functions will fail if: +.sp +.ne 2 +.mk +.na +\fBDomain Error\fR +.ad +.RS 16n +.rt +The value of \fIx\fR*\fIy\fR+\fIz\fR is invalid or the value \fIx\fR*\fIy\fR is +invalid. +.sp +If the integer expression (\fBmath_errhandling\fR & \fBMATH_ERREXCEPT\fR) is +non-zero, the invalid floating-point exception will be raised. +.RE + +.sp +.ne 2 +.mk +.na +\fBRange Error\fR +.ad +.RS 16n +.rt +The result overflows. +.sp +If the integer expression (\fBmath_errhandling\fR & \fBMATH_ERREXCEPT\fR) is +non-zero, the overflow floating-point exception will be raised. +.RE + +.SH USAGE +.sp +.LP +An application wanting to check for exceptions should call +\fBfeclearexcept\fR(\fBFE_ALL_EXCEPT\fR) before calling these functions. On +return, if \fBfetestexcept\fR(\fBFE_INVALID\fR | \fBFE_DIVBYZERO\fR | +\fBFE_OVERFLOW\fR | \fBFE_UNDERFLOW\fR) is non-zero, an exception has been +raised. An application should either examine the return value or check the +floating point exception flags to detect exceptions. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBfeclearexcept\fR(3M), \fBfetestexcept\fR(3M), \fBmath.h\fR(3HEAD), +\fBattributes\fR(5), \fBstandards\fR(5) diff --git a/usr/src/man/man3m/fmax.3m b/usr/src/man/man3m/fmax.3m new file mode 100644 index 0000000000..c2cee612c6 --- /dev/null +++ b/usr/src/man/man3m/fmax.3m @@ -0,0 +1,78 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH fmax 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +fmax, fmaxf, fmaxl \- determine maximum numeric value of two floating-point +numbers +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <math.h> + +\fBdouble\fR \fBfmax\fR(\fBdouble\fR \fIx\fR, \fBdouble\fR \fIy\fR); +.fi + +.LP +.nf +\fBfloat\fR \fBfmaxf\fR(\fBfloat\fR \fIx\fR, \fBfloat\fR \fIy\fR); +.fi + +.LP +.nf +\fBlong double\fR \fBfmaxl\fR(\fBlong double\fR \fIx\fR, \fBlong double\fR \fIy\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions determine the maximum numeric value of their arguments. NaN +arguments are treated as missing data: if one argument is a NaN and the other +numeric, these functions choose the numeric value. +.SH RETURN VALUES +.sp +.LP +Upon successful completion, these functions return the maximum numeric value of +their arguments. +.sp +.LP +If just one argument is a NaN, the other argument is returned. +.sp +.LP +If \fIx\fR and \fIy\fR are NaN, a NaN is returned. +.SH ERRORS +.sp +.LP +No errors are defined. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBfdim\fR(3M), \fBfmin\fR(3M), \fBmath.h\fR(3HEAD), \fBattributes\fR(5), +\fBstandards\fR(5) diff --git a/usr/src/man/man3m/fmin.3m b/usr/src/man/man3m/fmin.3m new file mode 100644 index 0000000000..826834fd04 --- /dev/null +++ b/usr/src/man/man3m/fmin.3m @@ -0,0 +1,78 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH fmin 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +fmin, fminf, fminl \- determine minimum numeric value of two floating-point +numbers +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <math.h> + +\fBdouble\fR \fBfmin\fR(\fBdouble\fR \fIx\fR, \fBdouble\fR \fIy\fR); +.fi + +.LP +.nf +\fBfloat\fR \fBfminf\fR(\fBfloat float\fR \fIx\fR, \fBfloat\fR \fIy\fR); +.fi + +.LP +.nf +\fBlong double\fR \fBfminl\fR(\fBlong double\fR \fIx\fR, \fBlong double\fR \fIy\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions determine the minimum numeric value of their arguments. NaN +arguments are treated as missing data: if one argument is a NaN and the other +numeric, these functions choose the numeric value. +.SH RETURN VALUES +.sp +.LP +Upon successful completion, these functions return the minimum numeric value of +their arguments. +.sp +.LP +If just one argument is a NaN, the other argument is returned. +.sp +.LP +If \fIx\fR and \fIy\fR are NaN, a NaN is returned. +.SH ERRORS +.sp +.LP +No errors are defined. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBfdim\fR(3M), \fBfmax\fR(3M), \fBmath.h\fR(3HEAD), \fBattributes\fR(5), +\fBstandards\fR(5) diff --git a/usr/src/man/man3m/fmod.3m b/usr/src/man/man3m/fmod.3m new file mode 100644 index 0000000000..967b1ccbab --- /dev/null +++ b/usr/src/man/man3m/fmod.3m @@ -0,0 +1,110 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Copyright (c) 1992, X/Open Company Limited. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH fmod 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +fmod, fmodf, fmodl \- floating-point remainder value function +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <math.h> + +\fBdouble\fR \fBfmod\fR(\fBdouble\fR \fIx\fR, \fBdouble\fR \fIy\fR); +.fi + +.LP +.nf +\fBfloat\fR \fBfmodf\fR(\fBfloat\fR \fIx\fR, \fBfloat\fR \fIy\fR); +.fi + +.LP +.nf +\fBlong double\fR \fBfmodl\fR(\fBlong double\fR \fIx\fR, \fBlong double\fR \fIy\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions return the floating-point remainder of the division of \fIx\fR +by \fIy\fR. +.SH RETURN VALUES +.sp +.LP +These functions return the value \fIx\fR \(mi \fIi\fR * \fIy\fR, for some +integer \fIi\fR such that, if \fIy\fR is non-zero, the result has the same sign +as \fIx\fR and magnitude less than the magnitude of \fIy\fR. +.sp +.LP +If \fIx\fR or \fIy\fR is NaN, a NaN is returned. +.sp +.LP +If \fIy\fR is 0, a domain error occurs and a NaN is returned. +.sp +.LP +If \fIx\fR is infinite, a domain error occurs and a NaN is returned. +.sp +.LP +If \fIx\fR is \(+-0 and \fIy\fR is not 0, \(+-0 is returned. +.sp +.LP +If \fIx\fR is not infinite and \fIy\fR is \(+-Inf, \fIx\fR is returned. +.SH ERRORS +.sp +.LP +These functions will fail if: +.sp +.ne 2 +.mk +.na +\fBDomain Error\fR +.ad +.RS 16n +.rt +The \fIx\fR argument is infinite or \fIy\fR is 0. +.sp +If the integer expression (\fBmath_errhandling\fR & \fBMATH_ERREXCEPT\fR) is +non-zero, the invalid floating-point exception is raised. +.RE + +.SH USAGE +.sp +.LP +An application wanting to check for exceptions should call +\fBfeclearexcept\fR(\fBFE_ALL_EXCEPT\fR) before calling these functions. On +return, if \fBfetestexcept\fR(\fBFE_INVALID\fR | \fBFE_DIVBYZERO\fR | +\fBFE_OVERFLOW\fR | \fBFE_UNDERFLOW\fR) is non-zero, an exception has been +raised. An application should either examine the return value or check the +floating point exception flags to detect exceptions. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBfeclearexcept\fR(3M), \fBfetestexcept\fR(3M), \fBisnan\fR(3M), +\fBmath.h\fR(3HEAD), \fBattributes\fR(5), \fBstandards\fR(5) diff --git a/usr/src/man/man3m/fpclassify.3m b/usr/src/man/man3m/fpclassify.3m new file mode 100644 index 0000000000..e02c50f28e --- /dev/null +++ b/usr/src/man/man3m/fpclassify.3m @@ -0,0 +1,62 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH fpclassify 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +fpclassify \- classify real floating type +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <math.h> + +\fBint\fR \fBfpclassify\fR(\fBreal-floating\fR \fIx\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +The \fBfpclassify()\fR macro classifies its argument value as NaN, infinite, +normal, subnormal, or zero. First, an argument represented in a format wider +than its semantic type is converted to its semantic type. Then classification +is based on the type of the argument. +.SH RETURN VALUES +.sp +.LP +The \fBfpclassify()\fR macro returns the value of the number classification +macro appropriate to the value of its argument. +.SH ERRORS +.sp +.LP +No errors are defined. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBisfinite\fR(3M), \fBisinf\fR(3M), \fBisnan\fR(3M), \fBisnormal\fR(3M), +\fBmath.h\fR(3HEAD), \fBsignbit\fR(3M), \fBattributes\fR(5), \fBstandards\fR(5) diff --git a/usr/src/man/man3m/frexp.3m b/usr/src/man/man3m/frexp.3m new file mode 100644 index 0000000000..b419f55ac5 --- /dev/null +++ b/usr/src/man/man3m/frexp.3m @@ -0,0 +1,82 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Copyright (c) 1992, X/Open Company Limited. All Rights Reserved. +.\" Copyright 1989 AT&T +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH frexp 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +frexp, frexpf, frexpl \- extract mantissa and exponent from a floating-point +number +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <math.h> + +\fBdouble\fR \fBfrexp\fR(\fBdouble\fR \fInum\fR, \fBint *\fR\fIexp\fR); +.fi + +.LP +.nf +\fBfloat\fR \fBfrexpf\fR(\fBfloat\fR \fInum\fR, \fBint *\fR\fIexp\fR); +.fi + +.LP +.nf +\fBlong double\fR \fBfrexpl\fR(\fBlong double\fR \fInum\fR, \fBint *\fR\fIexp\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions break a floating-point number into a normalized fraction and an +integral power of 2. They store the integer exponent in the \fBint\fR object +pointed to by \fIexp\fR. +.SH RETURN VALUES +.sp +.LP +For finite arguments, these functions return the value \fIx\fR, such that +\fIx\fR is a \fBdouble\fR with magnitude in the interval [\(12, 1) or 0, and +\fInum\fR equals \fIx\fR times 2 raised to the power *\fIexp\fR. +.sp +.LP +If \fInum\fR is NaN, NaN is returned and the value of *\fIexp\fR is +unspecified. +.sp +.LP +If \fInum\fR is \(+- 0, \(+- 0 is returned and the value of *\fIexp\fR is 0. +.sp +.LP +If \fInum\fR is \(+-Inf, \fInum\fR is returned and the value of *\fIexp\fR is +unspecified. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBisnan\fR(3M), \fBldexp\fR(3M), \fBmodf\fR(3M), \fBattributes\fR(5), +\fBstandards\fR(5) diff --git a/usr/src/man/man3m/hypot.3m b/usr/src/man/man3m/hypot.3m new file mode 100644 index 0000000000..f09d9cd1b2 --- /dev/null +++ b/usr/src/man/man3m/hypot.3m @@ -0,0 +1,118 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Copyright (c) 1992, X/Open Company Limited. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH hypot 3M "1 Sep 2002" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +hypot, hypotf, hypotl \- Euclidean distance function +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <math.h> + +\fBdouble\fR \fBhypot\fR(\fBdouble\fR \fIx\fR, \fBdouble\fR \fIy\fR); +.fi + +.LP +.nf +\fBfloat\fR \fBhypotf\fR(\fBfloat\fR \fIx\fR, \fBfloat\fR \fIy\fR); +.fi + +.LP +.nf +\fBlong double\fR \fBhypotl\fR(\fBlong double\fR \fIx\fR, \fBlong double\fR \fIy\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions compute the length of the square root of \fIx\fR^2 + \fIy\fR^2 +without undue overflow or underflow. +.SH RETURN VALUES +.sp +.LP +Upon successful completion, these functions return the length of the hypotenuse +of a right angled triangle with sides of length \fIx\fR^2 and \fIy\fR^2. +.sp +.LP +If the correct value would cause overflow, a range error occurs and +\fBhypot()\fR, \fBhypotf()\fR, and \fBhypotl()\fR return the value of the macro +\fBHUGE_VAL\fR, \fBHUGE_VALF\fR, and \fBHUGE_VALL\fR, respectively. +.sp +.LP +If \fIx\fR or \fIy\fR is \(+-Inf, +Inf is returned even if one of \fIx\fR or +\fIy\fR is NaN. +.sp +.LP +If \fIx\fR or \fIy\fR is NaN and the other is not \(+-Inf, a NaN is returned. +.SH ERRORS +.sp +.LP +These functions will fail if: +.sp +.ne 2 +.mk +.na +\fBRange Error\fR +.ad +.RS 15n +.rt +The result overflows. +.sp +If the integer expression (\fBmath_errhandling\fR & \fBMATH_ERREXCEPT\fR) is +non-zero, the overflow floating-point exception is raised. +.RE + +.SH USAGE +.sp +.LP +\fBhypot\fR(\fIx\fR,\fIy\fR), \fBhypot\fR(\fIy\fR,\fIx\fR), and +\fBhypot\fR(\fIx\fR, \(mi\fIy\fR) are equivalent. +.sp +.LP +\fBhypot\fR(\fIx\fR, \(+-0) is equivalent to \fBfabs\fR(\fIx\fR). +.sp +.LP +These functions takes precautions against underflow and overflow during +intermediate steps of the computation. +.sp +.LP +An application wanting to check for exceptions should call +\fBfeclearexcept\fR(\fBFE_ALL_EXCEPT\fR) before calling these functions. On +return, if \fBfetestexcept\fR(\fBFE_INVALID\fR | \fBFE_DIVBYZERO\fR | +\fBFE_OVERFLOW\fR | \fBFE_UNDERFLOW\fR) is non-zero, an exception has been +raised. An application should either examine the return value or check the +floating point exception flags to detect exceptions. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBfabs\fR(3M), \fBfeclearexcept\fR(3M), \fBfetestexcept\fR(3M), +\fBisnan\fR(3M), \fBmath.h\fR(3HEAD), \fBsqrt\fR(3M), \fBattributes\fR(5), +\fBstandards\fR(5) diff --git a/usr/src/man/man3m/ilogb.3m b/usr/src/man/man3m/ilogb.3m new file mode 100644 index 0000000000..1bfeb88446 --- /dev/null +++ b/usr/src/man/man3m/ilogb.3m @@ -0,0 +1,123 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Copyright (c) 1992, X/Open Company Limited. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH ilogb 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +ilogb, ilogbf, ilogbl \- return an unbiased exponent +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <math.h> + +\fBint\fR \fBilogb\fR(\fBdouble\fR \fIx\fR); +.fi + +.LP +.nf +\fBint\fR \fBilogbf\fR(\fBfloat\fR \fIx\fR); +.fi + +.LP +.nf +\fBint\fR \fBilogbl\fR(\fBlong double\fR \fIx\fR); +.fi + +.LP +.nf +cc [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <math.h> + +\fBint\fR \fBilogb\fR(\fBdouble\fR \fIx\fR); +.fi + +.LP +.nf +\fBint\fR \fBilogbf\fR(\fBfloat\fR \fIx\fR); +.fi + +.LP +.nf +\fBint\fR \fBilogbl\fR(\fBlong double\fR \fIx\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions return the exponent part of their argument \fIx\fR. Formally, +the return value is the integral part of \fBlog\fR(r)| \fIx\fR | as a signed +integral value, for non-zero \fIx\fR, where \fIr\fR is the radix of the +machine's floating point arithmetic, which is the value of \fBFLT_RADIX\fR +defined in <\fBfloat.h\fR>. +.SH RETURN VALUES +.sp +.LP +Upon successful completion, these functions return the exponent part of \fIx\fR +as a signed integer value. They are equivalent to calling the corresponding +\fBlogb\fR(3M) function and casting the returned value to type \fBint\fR. +.sp +.LP +If \fIx\fR is 0, the value \fBFP_ILOGB0\fR is returned. For SUSv3-conforming +applications compiled with the \fBc99\fR compiler driver (see +\fBstandards\fR(5)), a domain error occurs. +.sp +.LP +If \fIx\fR is \(+-Inf, the value \fBINT_MAX\fR is returned. For +SUSv3-conforming applications compiled with the \fBc99\fR compiler driver, a +domain error occurs. +.sp +.LP +If \fIx\fR is NaN, the value \fBFP_ILOGBNAN\fR is returned. For +SUSv3-conforming applications compiled with the \fBc99\fR compiler driver, a +domain error occurs. +.SH ERRORS +.sp +.LP +These functions will fail if: +.sp +.ne 2 +.mk +.na +\fBDomain Error\fR +.ad +.RS 16n +.rt +The \fIx\fR argument is zero, NaN, or \(+-Inf. +.sp +If the integer expression (\fBmath_errhandling\fR & \fBMATH_ERREXCEPT\fR) is +non-zero, then the invalid floating-point exception is raised. +.RE + +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBfeclearexcept\fR(3M), \fBfetestexcept\fR(3M), \fBlimits.h\fR(3HEAD), +\fBlogb\fR(3M), \fBmath.h\fR(3HEAD), \fBscalb\fR(3M), \fBattributes\fR(5), +\fBstandards\fR(5) diff --git a/usr/src/man/man3m/isfinite.3m b/usr/src/man/man3m/isfinite.3m new file mode 100644 index 0000000000..23cfcc2347 --- /dev/null +++ b/usr/src/man/man3m/isfinite.3m @@ -0,0 +1,62 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH isfinite 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +isfinite \- test for finite value +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <math.h> + +\fBint\fR \fBisfinite\fR(\fBreal-floating\fR \fIx\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +The \fBisfinite()\fR macro determines whether its argument has a finite value +(zero, subnormal, or normal, and not infinite or NaN). First, an argument +represented in a format wider than its semantic type is converted to its +semantic type. Then determination is based on the type of the argument. +.SH RETURN VALUES +.sp +.LP +The \fBisfinite()\fR macro returns a non-zero value if and only if its argument +has a finite value. +.SH ERRORS +.sp +.LP +No errors are defined. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBfpclassify\fR(3M), \fBisinf\fR(3M), \fBisnan\fR(3M), \fBisnormal\fR(3M), +\fBmath.h\fR(3HEAD), \fBsignbit\fR(3M), \fBattributes\fR(5), \fBstandards\fR(5) diff --git a/usr/src/man/man3m/isgreater.3m b/usr/src/man/man3m/isgreater.3m new file mode 100644 index 0000000000..ec5e4bda96 --- /dev/null +++ b/usr/src/man/man3m/isgreater.3m @@ -0,0 +1,80 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH isgreater 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +isgreater \- test if x greater than y +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <math.h> + +\fBint\fR \fBisgreater\fR(\fBreal-floating\fR \fIx\fR, \fBreal-floating\fR \fIy\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +The \fBisgreater()\fR macro determines whether its first argument is greater +than its second argument. The value of \fBisgreater\fR(\fIx\fR, \fIy\fR) is +equal to (\fIx\fR) > (\fIy\fR); however, unlike (\fIx\fR) > (\fIy\fR), +\fBisgreater\fR(\fIx\fR, \fIy\fR) does not raise the invalid floating-point +exception when \fIx\fR and \fIy\fR are unordered. +.SH RETURN VALUES +.sp +.LP +Upon successful completion, the \fBisgreater()\fR macro returns the value of +(\fIx\fR) > (\fIy\fR). +.sp +.LP +If x or y is NaN, 0 is returned. +.SH ERRORS +.sp +.LP +No errors are defined. +.SH USAGE +.sp +.LP +The relational and equality operators support the usual mathematical +relationships between numeric values. For any ordered pair of numeric values, +exactly one of the relationships (less, greater, and equal) is true. Relational +operators can raise the invalid floating-point exception when argument values +are NaNs. For a NaN and a numeric value, or for two NaNs, just the unordered +relationship is true. This macro is a quiet (non-floating-point exception +raising) version of a relational operator. It facilitates writing efficient +code that accounts for quiet NaNs without suffering the invalid floating-point +exception. In the \fBSYNOPSIS\fR section, \fBreal-floating\fR indicates that +the argument is an expression of \fBreal-floating\fR type. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBisgreaterequal\fR(3M), \fBisless\fR(3M), \fBislessequal\fR(3M), +\fBislessgreater\fR(3M), \fBisunordered\fR(3M), \fBmath.h\fR(3HEAD), +\fBattributes\fR(5), \fBstandards\fR(5) diff --git a/usr/src/man/man3m/isgreaterequal.3m b/usr/src/man/man3m/isgreaterequal.3m new file mode 100644 index 0000000000..9b059f68e8 --- /dev/null +++ b/usr/src/man/man3m/isgreaterequal.3m @@ -0,0 +1,81 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH isgreaterequal 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +isgreaterequal \- test if x greater than or equal to y +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <math.h> + +\fBint\fR \fBisgreaterequal\fR(\fBreal-floating\fR \fIx\fR, \fBreal-floating\fR \fIy\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +The \fBisgreaterequal()\fR macro determines whether its first argument is +greater than or equal to its second argument. The value of +\fBisgreaterequal\fR(\fIx\fR, \fIy\fR) is equal to (\fIx\fR) \(>= (\fIy\fR); +however, unlike (\fIx\fR) \(>= (\fIy\fR), \fBisgreaterequal\fR(\fIx\fR, +\fIy\fR) does not raise the invalid floating-point exception when \fIx\fR and +\fIy\fR are unordered. +.SH RETURN VALUES +.sp +.LP +Upon successful completion, the \fBisgreaterequal()\fR macro returns the value +of (\fIx\fR) \(>= (\fIy\fR). +.sp +.LP +If \fIx\fR or \fIy\fR is NaN, 0 is returned. +.SH ERRORS +.sp +.LP +No errors are defined. +.SH USAGE +.sp +.LP +The relational and equality operators support the usual mathematical +relationships between numeric values. For any ordered pair of numeric values, +exactly one of the relationships (less, greater, and equal) is true. Relational +operators can raise the invalid floating-point exception when argument values +are NaNs. For a NaN and a numeric value, or for two NaNs, just the unordered +relationship is true. This macro is a quiet (non-floating-point exception +raising) version of a relational operator. It facilitates writing efficient +code that accounts for quiet NaNs without suffering the invalid floating-point +exception. In the \fBSYNOPSIS\fR section, \fBreal-floating\fR indicates that +the argument is an expression of \fBreal-floating\fR type. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBisgreater\fR(3M), \fBisless\fR(3M), \fBislessequal\fR(3M), +\fBislessgreater\fR(3M), \fBisunordered\fR(3M), \fBmath.h\fR(3HEAD), +\fBattributes\fR(5), \fBstandards\fR(5) diff --git a/usr/src/man/man3m/isinf.3m b/usr/src/man/man3m/isinf.3m new file mode 100644 index 0000000000..67496a184e --- /dev/null +++ b/usr/src/man/man3m/isinf.3m @@ -0,0 +1,63 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text +.\" are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical +.\" and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH isinf 3M "17 Nov 2008" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +isinf \- test for infinity +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <math.h> + +\fBint\fR \fBisinf\fR(\fBreal-floating\fR \fIx\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +The \fBisinf()\fR macro determines whether its argument value is an infinity +(positive or negative). First, an argument represented in a format wider than +its semantic type is converted to its semantic type. Then determination is +based on the type of the argument. +.SH RETURN VALUES +.sp +.LP +The \fBisinf()\fR macro returns a non-zero value if and only if its argument +has an infinite value. +.SH ERRORS +.sp +.LP +No errors are defined. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBfpclassify\fR(3M), \fBisfinite\fR(3M), \fBisnan\fR(3M), \fBisnormal\fR(3M), +\fBmath.h\fR(3HEAD), \fBsignbit\fR(3M), \fBattributes\fR(5), \fBstandards\fR(5) diff --git a/usr/src/man/man3m/isless.3m b/usr/src/man/man3m/isless.3m new file mode 100644 index 0000000000..1d1fb93035 --- /dev/null +++ b/usr/src/man/man3m/isless.3m @@ -0,0 +1,80 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH isless 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +isless \- test if x is less than y +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <math.h> + +\fBint\fR \fBisless\fR(\fBreal-floating\fR \fIx\fR, \fBreal-floating\fR \fIy\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +The \fBisless()\fR macro determines whether its first argument is less than its +second argument. The value of \fBisless\fR(\fIx\fR, \fIy\fR) is equal to +(\fIx\fR) < (\fIy\fR); however, unlike (\fIx\fR) < (\fIy\fR), +\fBisless\fR(\fIx\fR, \fIy\fR) does not raise the invalid floating-point +exception when \fIx\fR and \fIy\fR are unordered. +.SH RETURN VALUES +.sp +.LP +Upon successful completion, the \fBisless()\fR macro returns the value of +(\fIx\fR) < (\fIy\fR). +.sp +.LP +If \fIx\fR or \fIy\fR is NaN, 0 is returned. +.SH ERRORS +.sp +.LP +No errors are defined. +.SH USAGE +.sp +.LP +The relational and equality operators support the usual mathematical +relationships between numeric values. For any ordered pair of numeric values, +exactly one of the relationships (less, greater, and equal) is true. Relational +operators can raise the invalid floating-point exception when argument values +are NaNs. For a NaN and a numeric value, or for two NaNs, just the unordered +relationship is true. This macro is a quiet (non-floating-point exception +raising) version of a relational operator. It facilitates writing efficient +code that accounts for quiet NaNs without suffering the invalid floating-point +exception. In the \fBSYNOPSIS\fR section, \fBreal-floating\fR indicates that +the argument is an expression of \fBreal-floating\fR type. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBisgreater\fR(3M), \fBisgreaterequal\fR(3M), \fBislessequal\fR(3M), +\fBislessgreater\fR(3M), \fBisunordered\fR(3M), \fBmath.h\fR(3HEAD), +\fBattributes\fR(5), \fBstandards\fR(5) diff --git a/usr/src/man/man3m/islessequal.3m b/usr/src/man/man3m/islessequal.3m new file mode 100644 index 0000000000..c47897190e --- /dev/null +++ b/usr/src/man/man3m/islessequal.3m @@ -0,0 +1,80 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH islessequal 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +islessequal \- test if x is less than or equal to y +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <math.h> + +\fBint\fR \fBislessequal\fR(\fBreal-floating\fR \fIx\fR, \fBreal-floating\fR \fIy\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +The \fBislessequal()\fR macro determines whether its first argument is less +than or equal to its second argument. The value of \fBislessequal\fR(\fIx\fR, +\fIy\fR) is equal to (\fIx\fR) \(<= (\fIy\fR); however, unlike (\fIx\fR) \(<= +(\fIy\fR), \fBislessequal\fR(\fIx\fR, \fIy\fR) does not raise the invalid +floating-point exception when \fIx\fR and \fIy\fR are unordered. +.SH RETURN VALUES +.sp +.LP +Upon successful completion, the \fBislessequal()\fR macro returns the value of +(\fIx\fR) \(<= (\fIy\fR). +.sp +.LP +If \fIx\fR or \fIy\fR is NaN, 0 is returned. +.SH ERRORS +.sp +.LP +No errors are defined. +.SH USAGE +.sp +.LP +The relational and equality operators support the usual mathematical +relationships between numeric values. For any ordered pair of numeric values, +exactly one of the relationships (less, greater, and equal) is true. Relational +operators can raise the invalid floating-point exception when argument values +are NaNs. For a NaN and a numeric value, or for two NaNs, just the unordered +relationship is true. This macro is a quiet (non-floating-point exception +raising) version of a relational operator. It facilitates writing efficient +code that accounts for quiet NaNs without suffering the invalid floating-point +exception. In the \fBSYNOPSIS\fR section, \fBreal-floating\fR indicates that +the argument is an expression of \fBreal-floating\fR type. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBisgreater\fR(3M), \fBisgreaterequal\fR(3M), \fBisless\fR(3M), +\fBislessgreater\fR(3M), \fBisunordered\fR(3M), \fBmath.h\fR(3HEAD), +\fBattributes\fR(5), \fBstandards\fR(5) diff --git a/usr/src/man/man3m/islessgreater.3m b/usr/src/man/man3m/islessgreater.3m new file mode 100644 index 0000000000..d8b43e2416 --- /dev/null +++ b/usr/src/man/man3m/islessgreater.3m @@ -0,0 +1,81 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH islessgreater 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +islessgreater \- test if x is less than or greater than y +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <math.h> + +\fBint\fR \fBislessgreater\fR(\fBreal-floating\fR \fIx\fR, \fBreal-floating\fR \fIy\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +The \fBislessgreater()\fR macro determines whether its first argument is less +than or greater than its second argument. The \fBislessgreater\fR(\fIx\fR, +\fIy\fR) macro is similar to (\fIx\fR) < (\fIy\fR) || (\fIx\fR) > (\fIy\fR); +however, \fBislessgreater\fR(\fIx\fR, \fIy\fR) does not raise the invalid +floating-point exception when x and y are unordered (nor does it evaluate +\fIx\fR and \fIy\fR twice). +.SH RETURN VALUES +.sp +.LP +Upon successful completion, the \fBislessgreater()\fR macro returns the value +of (\fIx\fR) < (\fIy\fR) || (\fIx\fR) > (\fIy\fR). +.sp +.LP +If \fIx\fR or \fIy\fR is NaN, 0 is returned. +.SH ERRORS +.sp +.LP +No errors are defined. +.SH USAGE +.sp +.LP +The relational and equality operators support the usual mathematical +relationships between numeric values. For any ordered pair of numeric values, +exactly one of the relationships (less, greater, and equal) is true. Relational +operators can raise the invalid floating-point exception when argument values +are NaNs. For a NaN and a numeric value, or for two NaNs, just the unordered +relationship is true. This macro is a quiet (non-floating-point exception +raising) version of a relational operator. It facilitates writing efficient +code that accounts for quiet NaNs without suffering the invalid floating-point +exception. In the \fBSYNOPSIS\fR section, \fBreal-floating\fR indicates that +the argument is an expression of \fBreal-floating\fR type. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBisgreater\fR(3M), \fBisgreaterequal\fR(3M), \fBisless\fR(3M), +\fBislessequal\fR(3M), \fBisunordered\fR(3M), \fBmath.h\fR(3HEAD), +\fBattributes\fR(5), \fBstandards\fR(5) diff --git a/usr/src/man/man3m/isnan.3m b/usr/src/man/man3m/isnan.3m new file mode 100644 index 0000000000..b5747b30e5 --- /dev/null +++ b/usr/src/man/man3m/isnan.3m @@ -0,0 +1,89 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Copyright (c) 1992, X/Open Company Limited. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH isnan 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +isnan \- test for NaN +.SH SYNOPSIS +.LP +.nf +cc [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <math.h> + +\fBint\fR \fBisnan\fR(\fBdouble\fR \fIx\fR); +.fi + +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <math.h> + +\fBint\fR \fBisnan\fR(\fBreal\(emfloating\fR \fIx\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +In C90 mode, the \fBisnan()\fR function tests whether \fIx\fR is NaN. +.sp +.LP +In C99 mode, the \fBisnan()\fR macro determines whether its argument value is +NaN. First, an argument represented in a format wider than its semantic type is +converted to its semantic type. The determination is then based on the type of +the argument. +.SH RETURN VALUES +.sp +.LP +Both the \fBisnan()\fR function and macro return non-zero if and only if +\fIx\fR is NaN. +.SH ERRORS +.sp +.LP +No errors are defined. +.SH WARNINGS +.sp +.LP +In C99 mode, the practice of explicitly supplying a prototype for \fBisnan()\fR +after the line +.sp +.in +2 +.nf +#include <math.h> +.fi +.in -2 + +.sp +.LP +is obsolete and will no longer work. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBfpclassify\fR(3M), \fBisfinite\fR(3M), \fBisinf\fR(3M), \fBisnormal\fR(3M), +\fBmath.h\fR(3HEAD), \fBsignbit\fR(3M), \fBattributes\fR(5), \fBstandards\fR(5) diff --git a/usr/src/man/man3m/isnormal.3m b/usr/src/man/man3m/isnormal.3m new file mode 100644 index 0000000000..f85c2f9280 --- /dev/null +++ b/usr/src/man/man3m/isnormal.3m @@ -0,0 +1,62 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH isnormal 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +isnormal \- test for a normal value +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <math.h> + +\fBint\fR \fBisnormal\fR(\fBreal-floating\fR \fIx\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +The \fBisnormal()\fR macro determines whether its argument value is normal +(neither zero, subnormal, infinite, nor NaN). First, an argument represented in +a format wider than its semantic type is converted to its semantic type. Then +determination is based on the type of the argument. +.SH RETURN VALUES +.sp +.LP +The \fBisnormal()\fR macro returns a non-zero value if and only if its argument +has a normal value. +.SH ERRORS +.sp +.LP +No errors are defined. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBfpclassify\fR(3M), \fBisfinite\fR(3M), \fBisinf\fR(3M), \fBisnan\fR(3M), +\fBmath.h\fR(3HEAD), \fBsignbit\fR(3M), \fBattributes\fR(5), \fBstandards\fR(5) diff --git a/usr/src/man/man3m/isunordered.3m b/usr/src/man/man3m/isunordered.3m new file mode 100644 index 0000000000..ff6d909c7c --- /dev/null +++ b/usr/src/man/man3m/isunordered.3m @@ -0,0 +1,73 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH isunordered 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +isunordered \- test if arguments are unordered +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <math.h> + +\fBint\fR \fBisunordered\fR(\fBreal-floating\fR \fIx\fR, \fBreal-floating\fR \fIy\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +The \fBisunordered()\fR macro determines whether its arguments are unordered. +.SH RETURN VALUES +.sp +.LP +Upon successful completion, the \fBisunordered()\fR macro returns 1 if its +arguments are unordered and 0 otherwise. +.SH ERRORS +.sp +.LP +No errors are defined. +.SH USAGE +.sp +.LP +The relational and equality operators support the usual mathematical +relationships between numeric values. For any ordered pair of numeric values, +exactly one of the relationships (less, greater, and equal) is true. Relational +operators can raise the invalid floating-point exception when argument values +are NaNs. For a NaN and a numeric value, or for two NaNs, just the unordered +relationship is true. This macro is a quiet (non-floating-point exception +raising) version of a relational operator. It facilitates writing efficient +code that accounts for quiet NaNs without suffering the invalid floating-point +exception. In the \fBSYNOPSIS\fR section, \fBreal-floating\fR indicates that +the argument shall be an expression of \fBreal-floating\fR type. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBisgreater\fR(3M), \fBisgreaterequal\fR(3M), \fBisless\fR(3M), +\fBislessequal\fR(3M), \fBislessgreater\fR(3M), \fBmath.h\fR(3HEAD), +\fBattributes\fR(5), \fBstandards\fR(5) diff --git a/usr/src/man/man3m/j0.3m b/usr/src/man/man3m/j0.3m new file mode 100644 index 0000000000..89ed44afd5 --- /dev/null +++ b/usr/src/man/man3m/j0.3m @@ -0,0 +1,109 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Copyright (c) 1992, X/Open Company Limited. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH j0 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +j0, j0f, j0l, j1, j1f, j1l, jn, jnf, jnl \- Bessel functions of the first kind +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <math.h> + +\fBdouble\fR \fBj0\fR(\fBdouble\fR \fIx\fR); +.fi + +.LP +.nf +\fBfloat\fR \fBj0f\fR(\fBfloat\fR \fIx\fR); +.fi + +.LP +.nf +\fBlong double\fR \fBj0l\fR(\fBlong double\fR \fIx\fR); +.fi + +.LP +.nf +\fBdouble\fR \fBj1\fR(\fBdouble\fR \fIx\fR); +.fi + +.LP +.nf +\fBfloat\fR \fBj1f\fR(\fBfloat\fR \fIx\fR); +.fi + +.LP +.nf +\fBlong double\fR \fBj1l\fR(\fBlong double\fR \fIx\fR); +.fi + +.LP +.nf +\fBdouble\fR \fBjn\fR(\fBint\fR \fIn\fR, \fBdouble\fR \fIx\fR); +.fi + +.LP +.nf +\fBfloat\fR \fBjnf\fR(\fBint\fR \fIn\fR, \fBfloat\fR \fIx\fR); +.fi + +.LP +.nf +\fBlong double\fR \fBjnl\fR(\fBint\fR \fIn\fR, \fBlong double\fR \fIx\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions compute Bessel functions of \fIx\fR of the first kind of orders +0, 1 and \fIn\fR respectively. +.SH RETURN VALUES +.sp +.LP +Upon successful completion, these functions return the relevant Bessel value of +\fIx\fR of the first kind. +.sp +.LP +If \fIx\fR is NaN, a NaN is returned. +.SH ERRORS +.sp +.LP +No errors are defined. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilitySee below. +_ +MT-LevelMT-Safe +.TE + +.sp +.LP +The \fBj0()\fR, \fBj1()\fR, and \fBjn()\fR functions are Standard. The +\fBj0f()\fR, \fBj0l()\fR, \fBj1f()\fR, \fBj1l()\fR, \fBjnf()\fR, and +\fBjnl()\fR functions are Stable. +.SH SEE ALSO +.sp +.LP +\fBisnan\fR(3M), \fBy0\fR(3M), \fBmath.h\fR(3HEAD), \fBattributes\fR(5), +\fBstandards\fR(5) diff --git a/usr/src/man/man3m/ldexp.3m b/usr/src/man/man3m/ldexp.3m new file mode 100644 index 0000000000..1839f0fe15 --- /dev/null +++ b/usr/src/man/man3m/ldexp.3m @@ -0,0 +1,116 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Copyright (c) 1992, X/Open Company Limited. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH ldexp 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +ldexp, ldexpf, ldexpl \- load exponent of a floating point number +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <math.h> + +\fBdouble\fR \fBldexp\fR(\fBdouble\fR x, \fBint\fR exp); +.fi + +.LP +.nf +\fBfloat\fR \fBldexpf\fR(\fBfloat\fR x, \fBint\fR exp); +.fi + +.LP +.nf +\fBlong double\fR \fBldexpl\fR(\fBlong double\fR x, \fBint\fR exp); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions computes the quantity x\|* 2^exp. +.SH RETURN VALUES +.sp +.LP +Upon successful completion, these functions return \fIx\fR multiplied by 2 +raised to the power \fIexp\fR. +.sp +.LP +If these functions would cause overflow, a range error occurs and +\fBldexp()\fR, \fBldexpf()\fR, and \fBldexpl()\fR return \fB\(+-HUGE_VAL\fR, +\fB\(+-HUGE_VALF\fR, and \fB\(+-HUGE_VALL\fR (according to the sign of +\fIx\fR), respectively. +.sp +.LP +If \fIx\fR is NaN, a NaN is returned. +.sp +.LP +If \fIx\fR is \(+-0 or \(+-Inf, \fIx\fR is returned. +.sp +.LP +If \fIexp\fR is 0, \fIx\fR is returned. +.SH ERRORS +.sp +.LP +These functions will fail if: +.sp +.ne 2 +.mk +.na +\fBRange Error\fR +.ad +.RS 15n +.rt +The result overflows. +.sp +If the integer expression (\fBmath_errhandling\fR & \fBMATH_ERREXCEPT\fR) is +non-zero, the overflow floating-point exception is raised. +.sp +The \fBldexp()\fR function sets \fBerrno\fR to \fBERANGE\fR if the result +overflows. +.RE + +.SH USAGE +.sp +.LP +An application wanting to check for exceptions should call +\fBfeclearexcept\fR(\fBFE_ALL_EXCEPT\fR) before calling these functions. On +return, if \fBfetestexcept\fR(\fBFE_INVALID\fR | \fBFE_DIVBYZERO\fR | +\fBFE_OVERFLOW\fR | \fBFE_UNDERFLOW\fR) is non-zero, an exception has been +raised. An application should either examine the return value or check the +floating point exception flags to detect exceptions. +.sp +.LP +An application can also set \fBerrno\fR to 0 before calling \fBldexp()\fR. On +return, if \fBerrno\fR is non-zero, an error has occurred. The \fBldexpf()\fR +and \fBldexpl()\fR functions do not set \fBerrno\fR. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBfrexp\fR(3M), \fBisnan\fR(3M), \fBmodf\fR(3M), \fBattributes\fR(5), +\fBstandards\fR(5) diff --git a/usr/src/man/man3m/lgamma.3m b/usr/src/man/man3m/lgamma.3m new file mode 100644 index 0000000000..745bb00313 --- /dev/null +++ b/usr/src/man/man3m/lgamma.3m @@ -0,0 +1,214 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical +.\" and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Copyright (c) 1992, X/Open Company Limited. +.\" All Rights Reserved. +.\" Copyright (c) 1983 Regents of the University +.\" of California. All rights reserved. The Berkeley software License Agreement +.\" specifies the terms and conditions for redistribution. +.\" Portions Copyright (c) 2006, Sun Microsystems, +.\" Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.TH lgamma 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +lgamma, lgammaf, lgammal, lgamma_r, lgammaf_r, lgammal_r, gamma, gammaf, +gammal, gamma_r, gammaf_r, gammal_r \- log gamma function +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <math.h> + +extern int signgam; + +\fBdouble\fR \fBlgamma\fR(\fBdouble\fR \fIx\fR); +.fi + +.LP +.nf +\fBfloat\fR \fBlgammaf\fR(\fBfloat\fR \fIx\fR); +.fi + +.LP +.nf +\fBlong double\fR \fBlgammal\fR(\fBlong double\fR \fIx\fR); +.fi + +.LP +.nf +\fBdouble\fR \fBgamma\fR(\fBdouble\fR \fIx\fR); +.fi + +.LP +.nf +\fBfloat\fR \fBgammaf\fR(\fBfloat\fR \fIx\fR); +.fi + +.LP +.nf +\fBlong double\fR \fBgammal\fR(\fBlong double\fR \fIx\fR); +.fi + +.LP +.nf +\fBdouble\fR \fBlgamma_r\fR(\fBdouble\fR \fIx\fR, \fBint *\fR\fIsigngamp\fR); +.fi + +.LP +.nf +\fBfloat\fR \fBlgammaf_r\fR(\fBfloat\fR \fIx\fR, \fBint *\fR\fIsigngamp\fR); +.fi + +.LP +.nf +\fBlong double\fR \fBlgammal_r\fR(\fBlong double\fR \fIx\fR, \fBint *\fR\fIsigngamp\fR); +.fi + +.LP +.nf +\fBdouble\fR \fBgamma_r\fR(\fBdouble\fR \fIx\fR, \fBint *\fR\fIsigngamp\fR); +.fi + +.LP +.nf +\fBfloat\fR \fBgammaf_r\fR(\fBfloat\fR \fIx\fR, \fBint *\fR\fIsigngamp\fR); +.fi + +.LP +.nf +\fBlong double\fR \fBgammal_r\fR(\fBlong double\fR \fIx\fR, \fBint *\fR\fIsigngamp\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions return +.sp +ln||~(x)| +.sp +.LP +where +.sp +|~(x) = integral from 0 to +Infinity of pow(t,x-1)*exp(-t) dt +.sp +.LP +for x > 0 and +.sp +|~(x) = n/(|~(1-x)sin(nx)) +.sp +.LP +for x < 1. +.sp +.LP +These functions use the external integer \fBsigngam\fR to return the sign of +\fB|~(x)\fR while \fBlgamma_r()\fR and \fBgamma_r()\fR use the user-allocated +space addressed by \fBsigngamp\fR. +.SH RETURN VALUES +.sp +.LP +Upon successful completion, these functions return the logarithmic gamma of +\fIx\fR. +.sp +.LP +If \fIx\fR is a non-positive integer, a pole error occurs and these functions +return +\fBHUGE_VAL\fR, +\fBHUGE_VALF\fR, and +\fBHUGE_VALL\fR, respectively. +.sp +.LP +If \fIx\fR is NaN, a NaN is returned. +.sp +.LP +If \fIx\fR is 1 or 2, +0 shall be returned. +.sp +.LP +If \fIx\fR is \(+-Inf, +Inf is returned. +.SH ERRORS +.sp +.LP +These functions will fail if: +.sp +.ne 2 +.mk +.na +\fBPole Error\fR +.ad +.RS 14n +.rt +The \fIx\fR argument is a negative integer or 0. +.sp +If the integer expression (\fBmath_errhandling\fR & \fBMATH_ERREXCEPT\fR) is +non-zero, then the divide-by-zero floating-point exception is raised. +.RE + +.SH USAGE +.sp +.LP +An application wanting to check for exceptions should call +\fBfeclearexcept\fR(\fBFE_ALL_EXCEPT\fR) before calling these functions. On +return, if \fBfetestexcept\fR(\fBFE_INVALID\fR | \fBFE_DIVBYZERO\fR | +\fBFE_OVERFLOW\fR | \fBFE_UNDERFLOW\fR) is non-zero, an exception has been +raised. An application should either examine the return value or check the +floating point exception flags to detect exceptions. +.sp +.LP +In the case of \fBlgamma()\fR, do not use the expression +\fBsigngam*exp(lgamma(x))\fR to compute +.sp +`g := |~(x)' +.sp +.LP +Instead compute \fBlgamma()\fR first: +.sp +.LP +\fBlg = lgamma(x); g = signgam*exp(lg);\fR +.sp +.LP +only after \fBlgamma()\fR has returned can \fBsigngam\fR be correct. Note that +\fB|~(x)\fR must overflow when \fIx\fR is large enough, underflow when +\(mi\fIx\fR is large enough, and generate a division by 0 exception at the +singularities \fIx\fR a nonpositive integer. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilitySee below. +_ +MT-LevelSee below. +.TE + +.sp +.LP +The \fBlgamma()\fR, \fBlgammaf()\fR, \fBlgammal()\fR, and \fBgamma()\fR +functions are Standard. The \fBlgamma_r()\fR, \fBlgammaf_r()\fR, +\fBlgammal_r()\fR, \fBgamma_r()\fR, \fBgammaf_r()\fR, and \fBgammal_r()\fR, +functions are Stable. +.sp +.LP +The \fBlgamma()\fR, \fBlgammaf()\fR, \fBlgammal()\fR, \fBgamma()\fR, +\fBgammaf()\fR, and \fBgammal()\fR functions are Unsafe in multithreaded +applications. The \fBlgamma_r()\fR, \fBlgammaf_r()\fR, \fBlgammal_r()\fR, +\fBgamma_r()\fR, \fBgammaf_r()\fR, and \fBgammal_r()\fR functions are MT-Safe +and should be used instead. +.SH SEE ALSO +.sp +.LP +\fBexp\fR(3M), \fBfeclearexcept\fR(3M), \fBfetestexcept\fR(3M), +\fBisnan\fR(3M), \fBmath.h\fR(3HEAD), \fBattributes\fR(5), \fBstandards\fR(5) +.SH NOTES +.sp +.LP +When compiling multithreaded applications, the \fB_REENTRANT\fR flag must be +defined on the compile line. This flag should only be used in multithreaded +applications. diff --git a/usr/src/man/man3m/llrint.3m b/usr/src/man/man3m/llrint.3m new file mode 100644 index 0000000000..4ef3e361ab --- /dev/null +++ b/usr/src/man/man3m/llrint.3m @@ -0,0 +1,119 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH llrint 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +llrint, llrintf, llrintl \- round to nearest integer value using current +rounding direction +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <math.h> + +\fBlong long\fR \fBllrint\fR(\fBdouble\fR \fIx\fR); +.fi + +.LP +.nf +\fBlong long\fR \fBllrintf\fR(\fBfloat\fR \fIx\fR); +.fi + +.LP +.nf +\fBlong long\fR \fBllrintl\fR(\fBlong double\fR \fIx\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions round their argument to the nearest integer value, rounding +according to the current rounding direction. +.SH RETURN VALUES +.sp +.LP + Upon successful completion, these functions return the rounded integer value. +.sp +.LP +If \fIx\fR is NaN, a domain error occurs and an unspecified value is returned. +.sp +.LP +If \fIx\fR is +Inf, a domain error occurs and an unspecified value is returned. +.sp +.LP +If \fIx\fR is -Inf, a domain error occurs and an unspecified value is returned. +.sp +.LP +If the correct value is positive and too large to represent as a \fBlong +long\fR, a domain error occurs and an unspecified value is returned. +.sp +.LP + If the correct value is negative and too large to represent as a \fBlong +long\fR, a domain error occurs and an unspecified value is returned. +.SH ERRORS +.sp +.LP +These functions will fail if: +.sp +.ne 2 +.mk +.na +\fBDomain Error\fR +.ad +.RS 16n +.rt +The \fIx\fR argument is NaN or \(+-Inf, or the correct value is not +representable as an integer. +.sp +If the integer expression (\fBmath_errhandling\fR & \fBMATH_ERREXCEPT\fR) is +non-zero, then the invalid floating-point exception will be raised. +.RE + +.SH USAGE +.sp +.LP +An application wanting to check for exceptions should call +\fBfeclearexcept\fR(\fBFE_ALL_EXCEPT\fR) before calling these functions. On +return, if \fBfetestexcept\fR(\fBFE_INVALID\fR | \fBFE_DIVBYZERO\fR | +\fBFE_OVERFLOW\fR | \fBFE_UNDERFLOW\fR) is non-zero, an exception has been +raised. An application should either examine the return value or check the +floating point exception flags to detect exceptions. +.sp +.LP +These functions provide floating-to-integer conversions. They round according +to the current rounding direction. If the rounded value is outside the range of +the return type, the numeric result is unspecified and the invalid +floating-point exception is raised. When they raise no other floating-point +exception and the result differs from the argument, they raise the inexact +floating-point exception. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBfeclearexcept\fR(3M), \fBfetestexcept\fR(3M), \fBlrint\fR(3M), +\fBmath.h\fR(3HEAD), \fBattributes\fR(5), \fBstandards\fR(5) diff --git a/usr/src/man/man3m/llround.3m b/usr/src/man/man3m/llround.3m new file mode 100644 index 0000000000..c2d2cd7ce7 --- /dev/null +++ b/usr/src/man/man3m/llround.3m @@ -0,0 +1,117 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH llround 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +llround, llroundf, llroundl \- round to nearest integer value +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <math.h> + +\fBlong long\fR \fBllround\fR(\fBdouble\fR \fIx\fR); +.fi + +.LP +.nf +\fBlong long\fR \fBllroundf\fR(\fBfloat\fR \fIx\fR); +.fi + +.LP +.nf +\fBlong long\fR \fBllroundl\fR(\fBlong double\fR \fIx\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions rounds their argument to the nearest integer value, rounding +halfway cases away from 0 regardless of the current rounding direction. +.SH RETURN VALUES +.sp +.LP + Upon successful completion, these functions return the rounded integer value. +.sp +.LP +If \fIx\fR is NaN, a domain error occurs and an unspecified value is returned. +.sp +.LP +If \fIx\fR is +Inf, a domain error occurs and an unspecified value is returned. +.sp +.LP +If \fIx\fR is -Inf, a domain error occurs and an unspecified value is returned. +.sp +.LP +If the correct value is positive and too large to represent as a \fBlong +long\fR, a domain error occurs and an unspecified value is returned. +.sp +.LP + If the correct value is negative and too large to represent as a \fBlong +long\fR, a domain error occurs and an unspecified value is returned. +.SH ERRORS +.sp +.LP +These functions will fail if: +.sp +.ne 2 +.mk +.na +\fBDomain Error\fR +.ad +.RS 16n +.rt +The \fIx\fR argument is NaN or \(+-Inf, or the correct value is not +representable as an integer. +.sp +If the integer expression (\fBmath_errhandling\fR & \fBMATH_ERREXCEPT\fR) is +non-zero, then the invalid floating-point exception will be raised. +.RE + +.SH USAGE +.sp +.LP +An application wanting to check for exceptions should call +\fBfeclearexcept\fR(\fBFE_ALL_EXCEPT\fR) before calling these functions. On +return, if \fBfetestexcept\fR(\fBFE_INVALID\fR | \fBFE_DIVBYZERO\fR | +\fBFE_OVERFLOW\fR | \fBFE_UNDERFLOW\fR) is non-zero, an exception has been +raised. An application should either examine the return value or check the +floating point exception flags to detect exceptions. +.sp +.LP +These functions differ from the \fBllrint\fR(3M) functions in that the default +rounding direction for the \fBllround()\fR functions round halfway cases away +from 0 and need not raise the inexact floating-point exception for non-integer +arguments that round to within the range of the return type. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBfeclearexcept\fR(3M), \fBfetestexcept\fR(3M), \fBllrint\fR(3M), +\fBlrint\fR(3M), \fBlround\fR(3M), \fBmath.h\fR(3HEAD), \fBattributes\fR(5), +\fBstandards\fR(5) diff --git a/usr/src/man/man3m/log.3m b/usr/src/man/man3m/log.3m new file mode 100644 index 0000000000..b126a266aa --- /dev/null +++ b/usr/src/man/man3m/log.3m @@ -0,0 +1,139 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Copyright (c) 1992, X/Open Company Limited. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH log 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +log, logf, logl \- natural logarithm function +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <math.h> + +\fBdouble\fR \fBlog\fR(\fBdouble\fR \fIx\fR); +.fi + +.LP +.nf +\fBfloat\fR \fBlogf\fR(\fBfloat\fR \fIx\fR); +.fi + +.LP +.nf +\fBlong double\fR \fBlogl\fR(\fBlong double\fR \fIx\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions compute the natural logarithm of their argument \fIx\fR, +log(\fIe\fR)(\fIx\fR). +.SH RETURN VALUES +.sp +.LP +Upon successful completion, \fBlog()\fR returns the natural logarithm of +\fIx\fR. +.sp +.LP +If \fIx\fR is \(+-0, a pole error occurs and \fBlog()\fR, \fBlogf()\fR, and +\fBlogl()\fR return \(mi\fBHUGE_VAL\fR, \(mi\fBHUGE_VALF\fR, and +\(mi\fBHUGE_VALL\fR, respectively. +.sp +.LP +For finite values of \fIx\fR that are less than 0, or if \fIx\fR is \(miInf, a +domain error occurs and a NaN is returned. +.sp +.LP +If \fIx\fR is NaN, a NaN is returned. +.sp +.LP +If \fIx\fR is 1, +0 is returned. +.sp +.LP +If \fIx\fR is +Inf, \fIx\fR is returned. +.sp +.LP +For exceptional cases, \fBmatherr\fR(3M) tabulates the values to be returned by +\fBlog()\fR as specified by SVID3 and XPG3. +.SH ERRORS +.sp +.LP +These functions will fail if: +.sp +.ne 2 +.mk +.na +\fBDomain Error\fR +.ad +.RS 16n +.rt +The finite value of \fIx\fR is negative, or \fIx\fR is -Inf. +.sp +If the integer expression (\fBmath_errhandling\fR & \fBMATH_ERREXCEPT\fR) is +non-zero, the invalid floating-point exception is raised. +.sp +The \fBlog()\fR function sets \fBerrno\fR to \fBEDOM\fR if the value of \fIx\fR +is negative. +.RE + +.sp +.ne 2 +.mk +.na +\fBPole Error\fR +.ad +.RS 16n +.rt +The value of \fIx\fR is 0. +.sp +If the integer expression (\fBmath_errhandling\fR & \fBMATH_ERREXCEPT\fR) is +non-zero, the divide-by-zero floating-point exception is raised. +.RE + +.SH USAGE +.sp +.LP +An application wanting to check for exceptions should call +\fBfeclearexcept\fR(\fBFE_ALL_EXCEPT\fR) before calling these functions. On +return, if \fBfetestexcept\fR(\fBFE_INVALID\fR | \fBFE_DIVBYZERO\fR | +\fBFE_OVERFLOW\fR | \fBFE_UNDERFLOW\fR) is non-zero, an exception has been +raised. An application should either examine the return value or check the +floating point exception flags to detect exceptions. +.sp +.LP +An application can also set \fBerrno\fR to 0 before calling \fBlog()\fR. On +return, if \fBerrno\fR is non-zero, an error has occurred. The \fBlogf()\fR and +\fBlogl()\fR functions do not set \fBerrno\fR. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBexp\fR(3M), \fBfeclearexcept\fR(3M), \fBfetestexcept\fR(3M), +\fBisnan\fR(3M), \fBlog10\fR(3M), \fBlog1p\fR(3M), \fBmath.h\fR(3HEAD), +\fBmatherr\fR(3M), \fBattributes\fR(5), \fBstandards\fR(5) diff --git a/usr/src/man/man3m/log10.3m b/usr/src/man/man3m/log10.3m new file mode 100644 index 0000000000..d59a472d58 --- /dev/null +++ b/usr/src/man/man3m/log10.3m @@ -0,0 +1,138 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Copyright (c) 1992, X/Open Company Limited. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH log10 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +log10, log10f, log10l \- base 10 logarithm function +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <math.h> + +\fBdouble\fR \fBlog10\fR(\fBdouble\fR \fIx\fR); +.fi + +.LP +.nf +\fBfloat\fR \fBlog10f\fR(\fBfloat\fR \fIx\fR); +.fi + +.LP +.nf +\fBlong double\fR \fBlog10l\fR(\fBlong double\fR \fIx\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions compute the base 10 logarithm of \fIx\fR, log(10)(\fIx\fR). +.SH RETURN VALUES +.sp +.LP +Upon successful completion, \fBlog10()\fR returns the base 10 logarithm of +\fIx\fR. +.sp +.LP +If \fIx\fR is \(+-0, a pole error occurs and \fBlog10()\fR, \fBlog10f()\fR, and +\fBlog10l()\fR return \(mi\fBHUGE_VAL\fR, \(mi\fBHUGE_VALF\fR, and +\(mi\fBHUGE_VALL\fR, respectively. +.sp +.LP +For finite values of \fIx\fR that are less than 0, or if \fIx\fR is \(miInf, a +domain error occurs and a NaN is returned. +.sp +.LP +If \fIx\fR is NaN, a NaN is returned. +.sp +.LP +If \fIx\fR is 1, +0 is returned. +.sp +.LP +If \fIx\fR is +Inf, \fIx\fR is returned. +.sp +.LP +For exceptional cases, \fBmatherr\fR(3M) tabulates the values to be returned by +\fBlog10()\fR as specified by SVID3 and XPG3. +.SH ERRORS +.sp +.LP +These functions will fail if: +.sp +.ne 2 +.mk +.na +\fBDomain Error\fR +.ad +.RS 16n +.rt +The finite value of \fIx\fR is negative, or \fIx\fR is -Inf. +.sp +If the integer expression (\fBmath_errhandling\fR & \fBMATH_ERREXCEPT\fR) is +non-zero, the invalid floating-point exception is raised. +.sp +The \fBlog10()\fR function sets \fBerrno\fR to \fBEDOM\fR if the value of +\fIx\fR is negative. +.RE + +.sp +.ne 2 +.mk +.na +\fBPole Error\fR +.ad +.RS 16n +.rt +The value of \fIx\fR is 0. +.sp +If the integer expression (\fBmath_errhandling\fR & \fBMATH_ERREXCEPT\fR) is +non-zero, the divide-by-zero floating-point exception is raised. +.RE + +.SH USAGE +.sp +.LP +An application wanting to check for exceptions should call +\fBfeclearexcept\fR(\fBFE_ALL_EXCEPT\fR) before calling these functions. On +return, if \fBfetestexcept\fR(\fBFE_INVALID\fR | \fBFE_DIVBYZERO\fR | +\fBFE_OVERFLOW\fR | \fBFE_UNDERFLOW\fR) is non-zero, an exception has been +raised. An application should either examine the return value or check the +floating point exception flags to detect exceptions. +.sp +.LP +An application can also set \fBerrno\fR to 0 before calling \fBlog10()\fR. On +return, if \fBerrno\fR is non-zero, an error has occurred. The \fBlog10f()\fR +and \fBlog10l()\fR functions do not set \fBerrno\fR. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBfeclearexcept\fR(3M), \fBfetestexcept\fR(3M), \fBisnan\fR(3M), +\fBlog\fR(3M), \fBmath.h\fR(3HEAD), \fBmatherr\fR(3M), \fBpow\fR(3M), +\fBattributes\fR(5), \fBstandards\fR(5) diff --git a/usr/src/man/man3m/log1p.3m b/usr/src/man/man3m/log1p.3m new file mode 100644 index 0000000000..f7d7ba2746 --- /dev/null +++ b/usr/src/man/man3m/log1p.3m @@ -0,0 +1,134 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Copyright (c) 1992, X/Open Company Limited. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH log1p 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +log1p, log1pf, log1pl \- compute natural logarithm +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <math.h> + +\fBdouble\fR \fBlog1p\fR(\fBdouble\fR \fIx\fR); +.fi + +.LP +.nf +\fBfloat\fR \fBlog1pf\fR(\fBfloat\fR \fIx\fR); +.fi + +.LP +.nf +\fBlong double\fR \fBlog1pl\fR(\fBlong double\fR \fIx\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions compute log(e)(1.0 + \fIx\fR). +.SH RETURN VALUES +.sp +.LP +Upon successful completion, these functions return the natural logarithm of 1.0 ++ \fIx\fR. +.sp +.LP +If \fIx\fR is \(mi1, a pole error occurs and \fBlog1p()\fR, \fBlog1pf()\fR, and +\fBlog1pl()\fR return \(mi\fBHUGE_VAL\fR, \(mi\fBHUGE_VALF\fR, and +\(mi\fBHUGE_VALL\fR, respectively. +.sp +.LP +For finite values of \fIx\fR that are less than \(mi1, or if \fIx\fR is +\(miInf, a domain error occurs and a NaN is returned. +.sp +.LP +If \fIx\fR is NaN, a NaN is returned. +.sp +.LP +If \fIx\fR is \(+-0 or +Inf, \fIx\fR is returned. +.sp +.LP +For exceptional cases, \fBmatherr\fR(3M) tabulates the values to be returned by +\fBlog1p()\fR as specified by SVID3 and XPG3. +.SH ERRORS +.sp +.LP +These functions will fail if: +.sp +.ne 2 +.mk +.na +\fBDomain Error\fR +.ad +.RS 16n +.rt +The finite value of \fIx\fR is less than \(mi1, or \fIx\fR is -Inf. +.sp +If the integer expression (\fBmath_errhandling\fR & \fBMATH_ERREXCEPT\fR) is +non-zero, the invalid floating-point exception is raised. +.sp +The \fBlog1p()\fR function sets \fBerrno\fR to \fBEDOM\fR if the value of +\fIx\fR is less than \(mi1. +.RE + +.sp +.ne 2 +.mk +.na +\fBPole Error\fR +.ad +.RS 16n +.rt +The value of \fIx\fR is \(mi1. +.sp +If the integer expression (\fBmath_errhandling\fR & \fBMATH_ERREXCEPT\fR) is +non-zero, the divide-by-zero floating-point exception is raised. +.RE + +.SH USAGE +.sp +.LP +An application wanting to check for exceptions should call +\fBfeclearexcept\fR(\fBFE_ALL_EXCEPT\fR) before calling these functions. On +return, if \fBfetestexcept\fR(\fBFE_INVALID\fR | \fBFE_DIVBYZERO\fR | +\fBFE_OVERFLOW\fR | \fBFE_UNDERFLOW\fR) is non-zero, an exception has been +raised. An application should either examine the return value or check the +floating point exception flags to detect exceptions. +.sp +.LP +An application can also set \fBerrno\fR to 0 before calling \fBlog1p()\fR. On +return, if \fBerrno\fR is non-zero, an error has occurred. The \fBlog1pf()\fR +and \fBlog1pl()\fR functions do not set \fBerrno\fR. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBfeclearexcept\fR(3M), \fBfetestexcept\fR(3M), \fBlog\fR(3M), +\fBmath.h\fR(3HEAD), \fBmatherr\fR(3M), \fBattributes\fR(5), \fBstandards\fR(5) diff --git a/usr/src/man/man3m/log2.3m b/usr/src/man/man3m/log2.3m new file mode 100644 index 0000000000..d7c4af3c95 --- /dev/null +++ b/usr/src/man/man3m/log2.3m @@ -0,0 +1,125 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH log2 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +log2, log2f, log2l \- compute base 2 logarithm functions +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <math.h> + +\fBdouble\fR \fBlog2\fR(\fBdouble\fR \fIx\fR); +.fi + +.LP +.nf +\fBfloat\fR \fBlog2f\fR(\fBfloat\fR \fIx\fR); +.fi + +.LP +.nf +\fBlong double\fR \fBlog2l\fR(\fBlong double\fR \fIx\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions compute the base 2 logarithm of their argument \fIx\fR, +\fBlog2\fR(\fIx\fR). +.SH RETURN VALUES +.sp +.LP +Upon successful completion, these functions return the base 2 logarithm of +\fIx\fR. +.sp +.LP +If \fIx\fR is \(+-0, a pole error occurs and \fBlog2()\fR, \fBlog2f()\fR, and +\fBlog2l()\fR return \(mi\fBHUGE_VAL\fR, \(mi\fBHUGE_VALF\fR, and +\(mi\fBHUGE_VALL\fR, respectively. +.sp +.LP +For finite values of \fIx\fR that are less than 0, or if \fIx\fR is \(miInf a +domain error occurs and a NaN is returned. +.sp +.LP +If \fIx\fR is NaN, a NaN is returned. +.sp +.LP +If \fIx\fR is 1, +0 is returned. +.sp +.LP +If \fIx\fR is +Inf, \fIx\fR is returned. +.SH ERRORS +.sp +.LP +These functions will fail if: +.sp +.ne 2 +.mk +.na +\fBDomain Error\fR +.ad +.RS 16n +.rt +The finite value of \fIx\fR is less than 0, or \fIx\fR is \(miInf. +.sp +If the integer expression (\fBmath_errhandling\fR & \fBMATH_ERREXCEPT\fR) is +non-zero, then the invalid floating-point exception is raised. +.RE + +.sp +.ne 2 +.mk +.na +\fBPole Error\fR +.ad +.RS 16n +.rt +The value of \fIx\fR is 0. +.sp +If the integer expression (\fBmath_errhandling\fR & \fBMATH_ERREXCEPT\fR) is +non-zero, then the divide-by-zero floating-point exception is raised. +.RE + +.SH USAGE +.sp +.LP +An application wanting to check for exceptions should call +\fBfeclearexcept\fR(\fBFE_ALL_EXCEPT\fR) before calling these functions. On +return, if \fBfetestexcept\fR(\fBFE_INVALID\fR | \fBFE_DIVBYZERO\fR | +\fBFE_OVERFLOW\fR | \fBFE_UNDERFLOW\fR) is non-zero, an exception has been +raised. An application should either examine the return value or check the +floating point exception flags to detect exceptions. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBfeclearexcept\fR(3M), \fBfetestexcept\fR(3M), \fBlog\fR(3M), +\fBmath.h\fR(3HEAD), \fBattributes\fR(5), \fBstandards\fR(5) diff --git a/usr/src/man/man3m/logb.3m b/usr/src/man/man3m/logb.3m new file mode 100644 index 0000000000..f2e22cb152 --- /dev/null +++ b/usr/src/man/man3m/logb.3m @@ -0,0 +1,152 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Copyright (c) 1992, X/Open Company Limited. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH logb 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +logb, logbf, logbl \- radix-independent exponent +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <math.h> + +\fBdouble\fR \fBlogb\fR(\fBdouble\fR \fIx\fR); +.fi + +.LP +.nf +\fBfloat\fR \fBlogbf\fR(\fBfloat\fR \fIx\fR); +.fi + +.LP +.nf +\fBlong double\fR \fBlogbl\fR(\fBlong double\fR \fIx\fR); +.fi + +.LP +.nf +cc [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <math.h> + +\fBdouble\fR \fBlogb\fR(\fBdouble\fR \fIx\fR); +.fi + +.LP +.nf +\fBfloat\fR \fBlogbf\fR(\fBfloat\fR \fIx\fR); +.fi + +.LP +.nf +\fBlong double\fR \fBlogbl\fR(\fBlong double\fR \fIx\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions compute the exponent of \fIx\fR, which is the integral part of +log(\fIr\fR) |\fIx\fR|, as a signed floating point value, for non-zero \fIx\fR, +where \fIr\fR is the radix of the machine's floating-point arithmetic, which is +the value of \fBFLT_RADIX\fR defined in the <\fBfloat.h\fR> header. +.SH RETURN VALUES +.sp +.LP +Upon successful completion, these functions return the exponent of \fIx\fR. +.sp +.LP +If \fIx\fR is subnormal: +.RS +4 +.TP +.ie t \(bu +.el o +For SUSv3-conforming applications compiled with the \fBc99\fR compiler driver +(see \fBstandards\fR(5)), the exponent of \fIx\fR as if \fIx\fR were normalized +is returned. +.RE +.RS +4 +.TP +.ie t \(bu +.el o +Otherwise, if compiled with the \fBcc\fR compiler driver, \(mi1022, \(mi126, +and \(mi16382 are returned for \fBlogb()\fR, \fBlogbf()\fR, and \fBlogbl()\fR, +respectively. +.RE +.sp +.LP +If \fIx\fR is \(+-0, a pole error occurs and \fBlogb()\fR, \fBlogbf()\fR, and +\fBlogbl()\fR return \(mi\fBHUGE_VAL\fR, \(mi\fBHUGE_VALF\fR, and +\(mi\fBHUGE_VALL\fR, respectively. +.sp +.LP +If \fIx\fR is NaN, a NaN is returned. +.sp +.LP +If \fIx\fR is \(+-Inf, +Inf is returned. +.SH ERRORS +.sp +.LP +These functions will fail if: +.sp +.ne 2 +.mk +.na +\fBPole Error\fR +.ad +.RS 14n +.rt +The value of \fIx\fR is \(+-0. +.sp +If the integer expression (\fBmath_errhandling\fR & \fBMATH_ERREXCEPT\fR) is +non-zero, the divide-by-zero floating-point exception is raised. +.sp +The \fBlogb()\fR function sets \fBerrno\fR to \fBEDOM\fR if the value of +\fIx\fR is 0. +.RE + +.SH USAGE +.sp +.LP +An application wanting to check for exceptions should call +\fBfeclearexcept\fR(\fBFE_ALL_EXCEPT\fR) before calling these functions. On +return, if \fBfetestexcept\fR(\fBFE_INVALID\fR | \fBFE_DIVBYZERO\fR | +\fBFE_OVERFLOW\fR | \fBFE_UNDERFLOW\fR) is non-zero, an exception has been +raised. An application should either examine the return value or check the +floating point exception flags to detect exceptions. +.sp +.LP +An application can also set \fBerrno\fR to 0 before calling \fBlogb()\fR. On +return, if \fBerrno\fR is non-zero, an error has occurred. The \fBlogbf()\fR +and \fBlogbl()\fR functions do not set \fBerrno\fR. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBfeclearexcept\fR(3M), \fBfetestexcept\fR(3M), \fBilogb\fR(3M), +\fBmath.h\fR(3HEAD), \fBmatherr\fR(3M), \fBscalb\fR(3M), \fBattributes\fR(5), +\fBstandards\fR(5) diff --git a/usr/src/man/man3m/lrint.3m b/usr/src/man/man3m/lrint.3m new file mode 100644 index 0000000000..170a2c8f87 --- /dev/null +++ b/usr/src/man/man3m/lrint.3m @@ -0,0 +1,112 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH lrint 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +lrint, lrintf, lrintl \- round to nearest integer value using current rounding +direction +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <math.h> + +\fBlong\fR \fBlrint\fR(\fBdouble\fR \fIx\fR); +.fi + +.LP +.nf +\fBlong\fR \fBlrintf\fR(\fBfloat\fR \fIx\fR); +.fi + +.LP +.nf +\fBlong\fR \fBlrintl\fR(\fBlong double\fR \fIx\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions round their argument to the nearest integer value, rounding +according to the current rounding direction. +.SH RETURN VALUES +.sp +.LP + Upon successful completion, these functions return the rounded integer value. +.sp +.LP +If \fIx\fR is NaN, a domain error occurs and an unspecified value is returned. +.sp +.LP +If \fIx\fR is +Inf, a domain error occurs and an unspecified value is returned. +.sp +.LP +If \fIx\fR is \(miInf, a domain error occurs and an unspecified value is +returned. +.sp +.LP +If the correct value is positive and too large to represent as a \fBlong\fR, a +domain error occurs and an unspecified value is returned. +.sp +.LP +If the correct value is negative and too large to represent as a \fBlong\fR, a +domain error occurs and an unspecified value is returned. +.SH ERRORS +.sp +.LP +These functions will fail if: +.sp +.ne 2 +.mk +.na +\fBDomain Error\fR +.ad +.RS 16n +.rt +The \fIx\fR argument is NaN or \(+-Inf, or the correct value is not +representable as an integer. +.sp +If the integer expression (\fBmath_errhandling\fR & \fBMATH_ERREXCEPT\fR) is +non-zero, then the invalid floating-point exception is raised. +.RE + +.SH USAGE +.sp +.LP +An application wanting to check for exceptions should call +\fBfeclearexcept\fR(\fBFE_ALL_EXCEPT\fR) before calling these functions. On +return, if \fBfetestexcept\fR(\fBFE_INVALID\fR | \fBFE_DIVBYZERO\fR | +\fBFE_OVERFLOW\fR | \fBFE_UNDERFLOW\fR) is non-zero, an exception has been +raised. An application should either examine the return value or check the +floating point exception flags to detect exceptions. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBfeclearexcept\fR(3M), \fBfetestexcept\fR(3M), \fBllrint\fR(3M), +\fBmath.h\fR(3HEAD), \fBattributes\fR(5), \fBstandards\fR(5) diff --git a/usr/src/man/man3m/lround.3m b/usr/src/man/man3m/lround.3m new file mode 100644 index 0000000000..4b85ce2362 --- /dev/null +++ b/usr/src/man/man3m/lround.3m @@ -0,0 +1,111 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH lround 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +lround, lroundf, lroundl \- round to nearest integer value +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <math.h> + +\fBlong\fR \fBlround\fR(\fBdouble\fR \fIx\fR); +.fi + +.LP +.nf +\fBlong\fR \fBlroundf\fR(\fBfloat\fR \fIx\fR); +.fi + +.LP +.nf +\fBlong\fR \fBlroundl\fR(\fBlong double\fR \fIx\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions round their argument to the nearest integer value, rounding +halfway cases away from zero, regardless of the current rounding direction. +.SH RETURN VALUES +.sp +.LP + Upon successful completion, these functions return the rounded integer value. +.sp +.LP +If \fIx\fR is NaN, a domain error occurs and an unspecified value is returned. +.sp +.LP +If \fIx\fR is +Inf, a domain error occurs and an unspecified value is returned. +.sp +.LP +If \fIx\fR is \(miInf, a domain error occurs and an unspecified value is +returned. +.sp +.LP +If the correct value is positive and too large to represent as a \fBlong\fR, a +domain error occurs and an unspecified value is returned. +.sp +.LP +If the correct value is negative and too large to represent as a \fBlong\fR, a +domain error occurs and an unspecified value is returned. +.SH ERRORS +.sp +.LP +These functions will fail if: +.sp +.ne 2 +.mk +.na +\fBDomain Error\fR +.ad +.RS 16n +.rt +The \fIx\fR argument is NaN or \(+-Inf, or the correct value is not +representable as an integer. +.sp +If the integer expression (\fBmath_errhandling\fR & \fBMATH_ERREXCEPT\fR) is +non-zero, then the invalid floating-point exception is raised. +.RE + +.SH USAGE +.sp +.LP +An application wanting to check for exceptions should call +\fBfeclearexcept\fR(\fBFE_ALL_EXCEPT\fR) before calling these functions. On +return, if \fBfetestexcept\fR(\fBFE_INVALID\fR | \fBFE_DIVBYZERO\fR | +\fBFE_OVERFLOW\fR | \fBFE_UNDERFLOW\fR) is non-zero, an exception has been +raised. An application should either examine the return value or check the +floating point exception flags to detect exceptions. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBfeclearexcept\fR(3M), \fBfetestexcept\fR(3M), \fBllround\fR(3M), +\fBmath.h\fR(3HEAD), \fBattributes\fR(5), \fBstandards\fR(5) diff --git a/usr/src/man/man3m/matherr.3m b/usr/src/man/man3m/matherr.3m new file mode 100644 index 0000000000..4acdb98e7d --- /dev/null +++ b/usr/src/man/man3m/matherr.3m @@ -0,0 +1,557 @@ +'\" te +.\" Copyright (c) 2008, Sun Microsystems, Inc. All Rights Reserved +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH matherr 3M "23 Sep 1997" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +matherr \- math library exception-handling function +.SH SYNOPSIS +.LP +.nf +#include <math.h> + +\fBint\fR \fBmatherr\fR(\fBstruct exception *\fR\fIexc\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +The System V Interface Definition, Third Edition (SVID3) specifies that certain +\fBlibm\fR functions call \fBmatherr()\fR when exceptions are detected. Users +may define their own mechanisms for handling exceptions, by including a +function named \fBmatherr()\fR in their programs. The \fBmatherr()\fR function +is of the form described above. When an exception occurs, a pointer to the +exception structure \fIexc\fR will be passed to the user-supplied +\fBmatherr()\fR function. This structure, which is defined in the +\fB<math.h>\fR header file, is as follows: +.sp +.in +2 +.nf +struct exception { + int type; + char *name; + double arg1, arg2, retval; +}; +.fi +.in -2 + +.sp +.LP +The \fBtype\fR member is an integer describing the type of exception that has +occurred, from the following list of constants (defined in the header file): +.sp +.ne 2 +.mk +.na +\fB\fBDOMAIN\fR\fR +.ad +.RS 13n +.rt +argument domain exception +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBSING\fR\fR +.ad +.RS 13n +.rt +argument singularity +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBOVERFLOW\fR\fR +.ad +.RS 13n +.rt +overflow range exception +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBUNDERFLOW\fR\fR +.ad +.RS 13n +.rt +underflow range exception +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBTLOSS\fR\fR +.ad +.RS 13n +.rt +total loss of significance +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBPLOSS\fR\fR +.ad +.RS 13n +.rt +partial loss of significance +.RE + +.sp +.LP +Both \fBTLOSS\fR and \fBPLOSS\fR reflect limitations of particular algorithms +for trigonometric functions that suffer abrupt declines in accuracy at definite +boundaries. Since the implementation does not suffer such abrupt declines, +\fBPLOSS\fR is never signaled. \fBTLOSS\fR is signaled for Bessel functions +\fIonly\fR to satisfy SVID3 requirements. +.sp +.LP +The \fBname\fR member points to a string containing the name of the function +that incurred the exception. The \fBarg1\fR and \fBarg2\fR members are the +arguments with which the function was invoked. \fBretval\fR is set to the +default value that will be returned by the function unless the user's +\fBmatherr()\fR sets it to a different value. +.sp +.LP +If the user's \fBmatherr()\fR function returns non-zero, no exception message +will be printed and \fBerrno\fR is not set. +.SH SVID3 STANDARD CONFORMANCE +.sp +.LP +When an application is built as a SVID3 conforming application (see +\fBstandards\fR(5)), if \fBmatherr()\fR is not supplied by the user, the +default matherr exception-handling mechanisms, summarized in the table below, +are invoked upon exception: +.sp +.ne 2 +.mk +.na +\fB\fBDOMAIN\fR\fR +.ad +.RS 13n +.rt +0.0 is usually returned, \fBerrno\fR is set to \fBEDOM\fR and a message is +usually printed on standard error. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBSING\fR\fR +.ad +.RS 13n +.rt +The largest finite single-precision number, \fBHUGE\fR of appropriate sign, is +returned, \fBerrno\fR is set to \fBEDOM\fR, and a message is printed on +standard error. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBOVERFLOW\fR\fR +.ad +.RS 13n +.rt +The largest finite single-precision number, \fBHUGE\fR of appropriate sign, is +usually returned and \fBerrno\fR is set to \fBERANGE\fR. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBUNDERFLOW\fR\fR +.ad +.RS 13n +.rt +0.0 is returned and \fBerrno\fR is set to \fBERANGE\fR. +.RE + +.sp +.ne 2 +.mk +.na +\fB\fBTLOSS\fR\fR +.ad +.RS 13n +.rt +0.0 is returned, \fBerrno\fR is set to \fBERANGE\fR, and a message is printed +on standard error. +.RE + +.sp +.LP +In general, \fBerrno\fR is not a reliable error indicator because it can be +unexpectedly set by a function in a handler for an asynchronous signal. +.SS "SVID3 ERROR HANDLING PROCEDURES (compile with cc \e-Xt)" +.sp + +.sp +.TS +tab() box; +cw(1.29i) |cw(.81i) |cw(.79i) |cw(.87i) |cw(1.03i) |cw(.71i) +lw(1.29i) |lw(.81i) |lw(.79i) |lw(.87i) |lw(1.03i) |lw(.71i) +. +<math.h> typeDOMAINSINGOVERFLOWUNDERFLOWTLOSS +_ +\fBerrno\fREDOMEDOMERANGEERANGEERANGE +_ +IEEE ExceptionInvalid OperationDivision by ZeroOverflowUnderflow\(mi +_ +fp_exception_typefp_invalidfp_divisionfp_overflowfp_underflow\(mi +_ +ACOS, ASIN\|(|x| > 1):Md, 0.0\(mi\(mi\(mi\(mi +_ +ACOSH\|(x < 1), ATANH\|(|x| > 1):NaN\(mi\(mi\(mi\(mi +_ +ATAN2\|(0,0):Md, 0.0\(mi\(mi\(mi\(mi +_ +COSH, SINH:\(mi\(mi\(+-HUGE\(mi\(mi +_ +EXP:\(mi\(mi+HUGE0.0\(mi +_ +FMOD\|(x,0):x\(mi\(mi\(mi\(mi +_ +HYPOT:\(mi\(mi+HUGE\(mi\(mi +_ +J0, J1, JN\|(|x| > X_TLOSS):\(mi\(mi\(mi\(miMt, 0.0 +_ +LGAMMA: + usual cases\(mi\(mi+HUGE\(mi\(mi + (x = 0 or \(miinteger) \(miMs, +HUGE\(mi\(mi\(mi +_ +LOG, LOG10: + (x < 0)Md, \(miHUGE\(mi \(mi\(mi\(mi + (x = 0)\(miMs, \(miHUGE\(mi\(mi\(mi +_ +POW: + usual cases\(mi\(mi\(+-HUGE\(+-0.0\(mi + (x < 0) ** (y not an integer)Md, 0.0\(mi\(mi\(mi\(mi + 0 ** 0Md, 0.0\(mi\(mi\(mi\(mi + 0 ** (y < 0)Md, 0.0\(mi\(mi\(mi +_ +REMAINDER\|(x,0):NaN\(mi\(mi\(mi\(mi +_ +SCALB:\(mi\(mi\(+-HUGE_VAL\(+-0.0\(mi +_ +SQRT\|(x < 0):Md, 0.0\(mi\(mi\(mi\(mi +_ +Y0, Y1, YN: + (x < 0)Md, \(miHUGE\(mi\(mi\(mi\(mi + (x = 0)\(miMd, \(miHUGE\(mi\(mi\(mi + (x > X_TLOSS)\(mi\(mi\(mi\(miMt, 0.0 +.TE + +.SS "Abbreviations" +.sp +.ne 2 +.mk +.na +\fBMd\fR +.ad +.RS 12n +.rt +Message is printed (DOMAIN error). +.RE + +.sp +.ne 2 +.mk +.na +\fBMs\fR +.ad +.RS 12n +.rt +Message is printed (SING error). +.RE + +.sp +.ne 2 +.mk +.na +\fBMt\fR +.ad +.RS 12n +.rt +Message is printed (TLOSS error). +.RE + +.sp +.ne 2 +.mk +.na +\fBNaN\fR +.ad +.RS 12n +.rt +IEEE NaN result and invalid operation exception. +.RE + +.sp +.ne 2 +.mk +.na +\fBHUGE\fR +.ad +.RS 12n +.rt +Maximum finite single-precision floating-point number. +.RE + +.sp +.ne 2 +.mk +.na +\fBHUGE_VAL\fR +.ad +.RS 12n +.rt +IEEE \(if result and division-by-zero exception. +.RE + +.sp +.ne 2 +.mk +.na +\fBX_TLOSS\fR +.ad +.RS 12n +.rt +The value X_TLOSS is defined in <values.h>. +.RE + +.sp +.LP +The interaction of IEEE arithmetic and \fBmatherr()\fR is not defined when +executing under IEEE rounding modes other than the default round to nearest: +\fBmatherr()\fR is not always called on overflow or underflow and can return +results that differ from those in this table. +.SH X/OPEN COMMON APPLICATION ENVIRONMENT (CAE) SPECIFICATIONS CONFORMANCE +.sp +.LP +The X/Open System Interfaces and Headers (XSH) Issue 3 and later revisions of +that specification no longer sanctions the use of the \fBmatherr\fR interface. +The following table summarizes the values returned in the exceptional cases. +In general, XSH dictates that as long as one of the input argument(s) is a NaN, +NaN is returned. In particular, \fBpow(NaN,0)\fR = NaN. +.SS "CAE SPECIFICATION ERROR HANDLING PROCEDURES (compile with cc \fB-Xa\fR)" +.sp + +.sp +.TS +tab() box; +cw(.82i) |cw(1.03i) |cw(1i) |cw(.97i) |cw(.96i) |cw(.72i) +lw(.82i) |lw(1.03i) |lw(1i) |lw(.97i) |lw(.96i) |lw(.72i) +. +<math.h> typeDOMAINSINGOVERFLOWUNDERFLOWTLOSS +_ +\fBerrno\fREDOMEDOMERANGEERANGEERANGE +_ +ACOS, ASIN\|(|x| > 1):0.0\(mi\(mi\(mi\(mi +_ +ATAN2\|(0,0):0.0\(mi\(mi\(mi\(mi +_ +COSH, SINH:\(mi\(mi{\(+-HUGE_VAL}\(mi\(mi +_ +EXP:\(mi\(mi{+HUGE_VAL}{0.0}\(mi +_ +FMOD\|(x,0):{NaN}\(mi\(mi\(mi\(mi +_ +HYPOT:\(mi\(mi{+HUGE_VAL}\(mi\(mi +_ +J0, J1, JN\|(|x| > X_TLOSS):\(mi\(mi\(mi\(mi{0.0} +_ +LGAMMA: + usual cases\(mi\(mi{+HUGE_VAL}\(mi\(mi + (x = 0 or \(miinteger) \(mi+HUGE_VAL\(mi\(mi\(mi +_ +LOG, LOG10: + (x < 0)\fB-HUGE_VAL\fR\(mi\(mi\(mi\(mi + (x = 0)\(mi\fB-HUGE_VAL\fR\(mi\(mi\(mi +_ +POW: + usual cases\(mi\(mi\(+-HUGE_VAL\(+-0.0\(mi + (x < 0) ** (y not an integer)0.0\(mi\(mi\(mi\(mi + 0 ** 0{1.0}\(mi\(mi\(mi\(mi + 0 ** (y < 0){\fB-HUGE_VAL\fR}\(mi\(mi\(mi\(mi +_ +SQRT\|(x < 0):0.0\(mi\(mi\(mi\(mi +_ +Y0, Y1, YN: + (x < 0){\fB-HUGE_VAL\fR}\(mi\(mi\(mi\(mi + (x = 0)\(mi{\fB-HUGE_VAL\fR}\(mi\(mi\(mi + (x > X_TLOSS)\(mi\(mi\(mi\(mi0.0 +.TE + +.SS "Abbreviations" +.sp +.ne 2 +.mk +.na +\fB{...}\fR +.ad +.RS 12n +.rt +\fBerrno\fR is not to be relied upon in all braced cases. +.RE + +.sp +.ne 2 +.mk +.na +\fBNaN\fR +.ad +.RS 12n +.rt +IEEE NaN result and invalid operation exception. +.RE + +.sp +.ne 2 +.mk +.na +\fBHUGE_VAL\fR +.ad +.RS 12n +.rt +IEEE \(if result and division-by-zero exception. +.RE + +.sp +.ne 2 +.mk +.na +\fBX_TLOSS\fR +.ad +.RS 12n +.rt +The value X_TLOSS is defined in <\fBvalues.h\fR>. +.RE + +.SH ANSI/ISO-C STANDARD CONFORMANCE +.sp +.LP +The ANSI/ISO-C standard covers a small subset of the CAE specification. +.sp +.LP +The following table summarizes the values returned in the exceptional cases. +.SS "ANSI/ISO-C ERROR HANDLING PROCEDURES (compile with cc \fB-Xc\fR)" +.sp + +.sp +.TS +tab() box; +cw(1.1i) |cw(1.1i) |cw(1.1i) |cw(1.1i) |cw(1.11i) +lw(1.1i) |lw(1.1i) |lw(1.1i) |lw(1.1i) |lw(1.11i) +. +<math.h> typeDOMAINSINGOVERFLOWUNDERFLOW +_ +\fBerrno\fREDOMEDOMERANGEERANGE +_ +ACOS, ASIN\|(|x| > 1):0.0\(mi\(mi\(mi +_ +ATAN2\|(0,0):0.0\(mi\(mi\(mi +_ +EXP:\(mi\(mi+HUGE_VAL0.0 +_ +FMOD\|(x,0):NaN\(mi\(mi\(mi +_ +LOG, LOG10: + (x < 0)\fB-HUGE_VAL\fR\(mi\(mi\(mi + (x = 0)\(mi\fB-HUGE_VAL\fR\(mi\(mi +_ +POW: + usual cases\(mi\(mi\(+-HUGE_VAL\(+-0.0 + (x < 0) ** (y not an integer)0.0\(mi\(mi\(mi + 0 ** (y < 0)\fB-HUGE_VAL\fR\(mi\(mi\(mi +_ +SQRT\|(x < 0):0.0\(mi\(mi\(mi +.TE + +.SS "ABBREVIATIONS" +.sp +.ne 2 +.mk +.na +\fBNaN\fR +.ad +.RS 12n +.rt +IEEE NaN result and invalid operation exception. +.RE + +.sp +.ne 2 +.mk +.na +\fBHUGE_VAL\fR +.ad +.RS 12n +.rt +IEEE \(if result and division-by-zero. +.RE + +.SH EXAMPLES +.LP +\fBExample 1 \fRExample of \fBmatherr()\fR function +.sp +.in +2 +.nf +#include <stdio.h> +#include <stdlib.h> +#include <math.h> + +int +matherr(struct exception *x) { + switch (x\(mi>type) { + case DOMAIN: + /* change sqrt to return sqrt(\(miarg1), not NaN */ + if (!strcmp(x\(mi>name, "sqrt")) { + x\(mi>retval = sqrt(\(mix\(mi>arg1); + return (0); /* print message and set errno */ + } /* FALLTHRU */ + case SING: + /* all other domain or sing exceptions, print message and */ + /* abort */ + fprintf(stderr, "domain exception in %s\en", x\(mi>name); + abort( ); + break; + } + return (0); /* all other exceptions, execute default procedure */ + } +.fi +.in -2 + +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBattributes\fR(5), \fBstandards\fR(5) diff --git a/usr/src/man/man3m/modf.3m b/usr/src/man/man3m/modf.3m new file mode 100644 index 0000000000..7cb6c935a5 --- /dev/null +++ b/usr/src/man/man3m/modf.3m @@ -0,0 +1,92 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Copyright (c) 1992, X/Open Company Limited. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH modf 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +modf, modff, modfl \- decompose floating-point number +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <math.h> + +\fBdouble\fR \fBmodf\fR(\fBdouble\fR \fIx\fR, \fBdouble *\fR\fIiptr\fR); +.fi + +.LP +.nf +\fBfloat\fR \fBmodff\fR(\fBfloat\fR \fIx\fR, \fBfloat *\fR\fIiptr\fR); +.fi + +.LP +.nf +\fBlong double\fR \fBmodfl\fR(\fBlong double\fR \fIx\fR, \fBlong double *\fR\fIiptr\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions break the argument \fIx\fR into integral and fractional parts, +each of which has the same sign as the argument. It stores the integral part as +a \fBdouble\fR for the \fBmodf()\fR function, a float for the \fBmodff()\fR +function, or a long double for the\fBmodfl()\fR function in the object pointed +to by \fIiptr\fR. +.SH RETURN VALUES +.sp +.LP +Upon successful completion, these functions return the signed fractional part +of \fIx\fR. +.sp +.LP +If \fIx\fR is NaN, a NaN is returned and *\fIiptr\fR is set to NaN. +.sp +.LP +If \fIx\fR is \(+-Inf, \(+-0 is returned and *\fIiptr\fR is set to \(+-Inf. +.SH ERRORS +.sp +.LP +No errors are defined. +.SH USAGE +.sp +.LP +These functions compute the function result and *\fIiptr\fR such that: +.sp +.in +2 +.nf +a = modf(x, &iptr) ; +x == a+*iptr ; +.fi +.in -2 + +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBfrexp\fR(3M), \fBisnan\fR(3M), \fBldexp\fR(3M), \fBattributes\fR(5), +\fBstandards\fR(5) diff --git a/usr/src/man/man3m/nan.3m b/usr/src/man/man3m/nan.3m new file mode 100644 index 0000000000..ee987b7256 --- /dev/null +++ b/usr/src/man/man3m/nan.3m @@ -0,0 +1,100 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH nan 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +nan, nanf, nanl \- return quiet NaN +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <math.h> + +\fBdouble\fR \fBnan\fR(\fBconst char *\fR\fItagp\fR); +.fi + +.LP +.nf +\fBfloat\fR \fBnanf\fR(\fBconst char *\fR\fItagp\fR); +.fi + +.LP +.nf +\fBlong double\fR \fBnanl\fR(\fBconst char *\fR\fItagp\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +The function call \fBnan\fR("\fIn\fR-char-sequence") is equivalent to: +.sp +.in +2 +.nf +strtod("NAN(n-char-sequence)", (char **) NULL); +.fi +.in -2 + +.sp +.LP +The function call \fBnan\fR(" ") is equivalent to: +.sp +.in +2 +.nf +strtod("NAN()", (char **) NULL) +.fi +.in -2 + +.sp +.LP +If \fItagp\fR does not point to an \fIn\fR-char sequence or an empty string, +the function call is equivalent to: +.sp +.in +2 +.nf +strtod("NAN", (char **) NULL) +.fi +.in -2 + +.sp +.LP +Function calls to \fBnanf()\fR and \fBnanl()\fR are equivalent to the +corresponding function calls to \fBstrtof()\fR and \fBstrtold()\fR. See +\fBstrtod\fR(3C). +.SH RETURN VALUES +.sp +.LP +These functions return a quiet NaN. +.SH ERRORS +.sp +.LP +No errors are defined. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBmath.h\fR(3HEAD), \fBstrtod\fR(3C), \fBattributes\fR(5), \fBstandards\fR(5) diff --git a/usr/src/man/man3m/nearbyint.3m b/usr/src/man/man3m/nearbyint.3m new file mode 100644 index 0000000000..7b11612955 --- /dev/null +++ b/usr/src/man/man3m/nearbyint.3m @@ -0,0 +1,75 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH nearbyint 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +nearbyint, nearbyintf, nearbyintl \- floating-point rounding functions +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <math.h> + +\fBdouble\fR \fBnearbyint\fR(\fBdouble\fR \fIx\fR); +.fi + +.LP +.nf +\fBfloat\fR \fBnearbyintf\fR(\fBfloat\fR \fIx\fR); +.fi + +.LP +.nf +\fBlong double\fR \fBnearbyintl\fR(\fBlong double\fR \fIx\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions round their argument to an integer value in floating-point +format, using the current rounding direction and without raising the inexact +floating-point exception. +.SH RETURN VALUES +.sp +.LP +Upon successful completion, these functions return the rounded integer value. +.sp +.LP +If \fIx\fR is NaN, a NaN is returned. +.sp +.LP +If \fIx\fR is \(+-0, \(+-0 is returned. +.sp +.LP +If \fIx\fR is \(+-Inf, \fIx\fR is returned. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBfeclearexcept\fR(3M), \fBfetestexcept\fR(3M), \fBmath.h\fR(3HEAD), +\fBattributes\fR(5), \fBstandards\fR(5) diff --git a/usr/src/man/man3m/nextafter.3m b/usr/src/man/man3m/nextafter.3m new file mode 100644 index 0000000000..c054c4d3c3 --- /dev/null +++ b/usr/src/man/man3m/nextafter.3m @@ -0,0 +1,160 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Copyright (c) 1992, X/Open Company Limited. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH nextafter 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +nextafter, nextafterf, nextafterl, nexttoward, nexttowardf, nexttowardl \- next +representable double-precision floating-point number +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <math.h> + +\fBdouble\fR \fBnextafter\fR(\fBdouble\fR \fIx\fR, \fBdouble\fR \fIy\fR); +.fi + +.LP +.nf +\fBfloat\fR \fBnextafterf\fR(\fBfloat\fR \fIx\fR, \fBfloat\fR \fIy\fR); +.fi + +.LP +.nf +\fBlong double\fR \fBnextafterl\fR(\fBlong double\fR \fIx\fR, \fBlong double\fR \fIy\fR); +.fi + +.LP +.nf +\fBdouble\fR \fBnexttoward\fR(\fBdouble\fR \fIx\fR, \fBlong double\fR \fIy\fR); +.fi + +.LP +.nf +\fBfloat\fR \fBnexttowardf\fR(\fBfloat\fR \fIx\fR, \fBlong double\fR \fIy\fR); +.fi + +.LP +.nf +\fBlong double\fR \fBnexttowardl\fR(\fBlong double\fR \fIx\fR, \fBlong double\fR \fIy\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +The \fBnextafter()\fR, \fBnextafterf()\fR, and \fBnextafterl()\fR functions +compute the next representable floating-point value following \fIx\fR in the +direction of \fIy\fR. Thus, if \fIy\fR is less than \fIx\fR, \fBnextafter()\fR +returns the largest representable floating-point number less than \fIx\fR. The +\fBnextafter()\fR, \fBnextafterf()\fR, and \fBnextafterl()\fR functions return +\fIy\fR if \fIx\fR equals \fIy\fR. +.sp +.LP +The \fBnexttoward()\fR, \fBnexttowardf()\fR, and \fBnexttowardl()\fR functions +are equivalent to the corresponding \fBnextafter()\fR functions, except that +the second parameter has type \fBlong double\fR and the functions return +\fIy\fR converted to the type of the function if \fIx\fR equals \fIy\fR. +.SH RETURN VALUES +.sp +.LP +Upon successful completion, these functions return the next representable +floating-point value following \fIx\fR in the direction of \fIy\fR. +.sp +.LP +If \fIx\fR == \fIy\fR, \fIy\fR (of the type \fIx\fR) is returned. +.sp +.LP +If \fIx\fR is finite and the correct function value would overflow, a range +error occurs and \(+-\fBHUGE_VAL\fR, \(+-\fBHUGE_VALF\fR, and +\(+-\fBHUGE_VALL\fR (with the same sign as \fIx\fR) is returned as appropriate +for the return type of the function. +.sp +.LP +If \fIx\fR or \fIy\fR is NaN, a NaN is returned. +.sp +.LP +If \fIx\fR != \fIy\fR and the correct function value is subnormal, zero, or +underflows, a range error occurs and either the correct function value (if +representable) or 0.0 is returned. +.SH ERRORS +.sp +.LP +These functions will fail if: +.sp +.ne 2 +.mk +.na +\fBRange Error\fR +.ad +.RS 15n +.rt +The correct value overflows. +.sp +If the integer expression (\fBmath_errhandling\fR & \fBMATH_ERREXCEPT\fR) is +non-zero, the overflow floating-point exception is raised. +.sp +The \fBnextafter()\fR function sets \fBerrno\fR to \fBERANGE\fR if the correct +value would overflow. +.RE + +.sp +.ne 2 +.mk +.na +\fBRange Error\fR +.ad +.RS 15n +.rt +The correct value underflows. +.sp +If the integer expression (\fBmath_errhandling\fR & \fBMATH_ERREXCEPT\fR) is +non-zero, the underflow floating-point exception is raised. +.RE + +.SH USAGE +.sp +.LP +An application wanting to check for exceptions should call +\fBfeclearexcept\fR(\fBFE_ALL_EXCEPT\fR) before calling these functions. On +return, if \fBfetestexcept\fR(\fBFE_INVALID\fR | \fBFE_DIVBYZERO\fR | +\fBFE_OVERFLOW\fR | \fBFE_UNDERFLOW\fR) is non-zero, an exception has been +raised. An application should either examine the return value or check the +floating point exception flags to detect exceptions. +.sp +.LP +An application can also set \fBerrno\fR to 0 before calling \fBnextafter()\fR. +On return, if \fBerrno\fR is non-zero, an error has occurred. The +\fBnextafterf()\fR, \fBnextafterl()\fR. \fBnexttoward()\fR, +\fBnexttowardf()\fR, and \fBnexttowardl()\fR functions do not set \fBerrno\fR. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBfeclearexcept\fR(3M), \fBfetestexcept\fR(3M), \fBmath.h\fR(3HEAD), +\fBattributes\fR(5), \fBstandards\fR(5) diff --git a/usr/src/man/man3m/pow.3m b/usr/src/man/man3m/pow.3m new file mode 100644 index 0000000000..78f49df142 --- /dev/null +++ b/usr/src/man/man3m/pow.3m @@ -0,0 +1,244 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Copyright (c) 1992, X/Open Company Limited. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH pow 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +pow, powf, powl \- power function +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <math.h> + +\fBdouble\fR \fBpow\fR(\fBdouble\fR x, \fBdouble\fR y); +.fi + +.LP +.nf +\fBfloat\fR \fBpowf\fR(\fBfloat\fR x, \fBfloat\fR y); +.fi + +.LP +.nf +\fBlong double\fR \fBpowl\fR(\fBlong double\fR x, \fBlong double\fR y); +.fi + +.LP +.nf +cc [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <math.h> + +\fBdouble\fR \fBpow\fR(\fBdouble\fR x, \fBdouble\fR y); +.fi + +.LP +.nf +\fBfloat\fR \fBpowf\fR(\fBfloat\fR x, \fBfloat\fR y); +.fi + +.LP +.nf +\fBlong double\fR \fBpowl\fR(\fBlong double\fR x, \fBlong double\fR y); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions compute the value of \fIx\fR raised to the power \fIy,\fR +\fIx\fR^y>. If \fIx\fR is negative, \fIy\fR must be an integer value. +.SH RETURN VALUES +.sp +.LP +Upon successful completion, these functions return the value of \fIx\fR raised +to the power \fIy\fR. +.sp +.LP +For finite values of \fIx\fR < 0, and finite non-integer values of \fIy\fR, a +domain error occurs and either a NaN (if representable), or an +implementation-defined value is returned. +.sp +.LP +If the correct value would cause overflow, a range error occurs and +\fBpow()\fR, \fBpowf()\fR, and \fBpowl()\fR return \fBHUGE_VAL\fR, +\fBHUGE_VALF\fR, and \fBHUGE_VALL\fR, respectively. +.sp +.LP +If \fIx\fR or \fIy\fR is a NaN, a NaN is returned unless: +.RS +4 +.TP +.ie t \(bu +.el o +If \fIx\fR is +1 and \fIy\fR is NaN and the application was compiled with the +\fBc99\fR compiler driver and is therefore SUSv3-conforming (see +\fBstandards\fR(5)), 1.0 is returned. +.RE +.RS +4 +.TP +.ie t \(bu +.el o +For any value of \fIx\fR (including NaN), if \fIy\fR is +0, 1.0 is returned. +.RE +.sp +.LP +For any odd integer value of \fIy\fR > 0, if \fIx\fR is \(+-0, \(+-0 is +returned. +.sp +.LP +For \fIy\fR > 0 and not an odd integer, if \fIx\fR is \(+-0, +0 is returned. +.sp +.LP +If \fIx\fR is \(+-1 and \fIy\fR is \(+-Inf, and the application was compiled +with the \fBcc\fR compiler driver, NaN is returned. If, however, the +application was compiled with the \fBc99\fR compiler driver and is therefore +SUSv3-conforming (see\fBstandards\fR(5)), 1.0 is returned. +.sp +.LP +For |\fIx\fR| < 1, if \fIy\fR is \(miInf, +Inf is returned. +.sp +.LP +For |\fIx\fR| > 1, if \fIy\fR is \(miInf, +0 is returned. +.sp +.LP +For |\fIx\fR| < 1, if \fIy\fR is +Inf, +0 is returned. +.sp +.LP +For |\fIx\fR| > 1, if \fIy\fR is +Inf, +Inf is returned. +.sp +.LP +For \fIy\fR an odd integer < 0, if \fIx\fR is \(miInf, \(mi0 is returned. +.sp +.LP +For \fIy\fR < 0 and not an odd integer, if \fIx\fR is \(miInf, +0 is returned. +.sp +.LP +For \fIy\fR an odd integer > 0, if \fIx\fR is \(miInf, \(miInf is returned. +.sp +.LP +For \fIy\fR > 0 and not an odd integer, if \fIx\fR is \(miInf, +Inf is +returned. +.sp +.LP +For \fIy\fR < 0, if \fIx\fR is +Inf, +0 is returned. +.sp +.LP +For \fIy\fR > 0, if \fIx\fR is +Inf, +Inf is returned. +.sp +.LP +For \fIy\fR an odd integer < 0, if \fIx\fR is \(+-0, a pole error occurs and +\(+-\fBHUGE_VAL\fR, \(+-\fBHUGE_VALF\fR, and \(+-\fBHUGE_VALL\fR are returned +for \fBpow()\fR, \fBpowf()\fR, and \fBpowl()\fR, respectively. +.sp +.LP +For \fIy\fR < 0 and not an odd integer, if \fIx\fR is \(+-0, a pole error +occurs and \fBHUGE_VAL\fR, \fBHUGE_VALF\fR, and \fBHUGE_VALL\fR are returned +for \fBpow()\fR, \fBpowf()\fR, and \fBpowl()\fR, respectively. +.sp +.LP +For exceptional cases, \fBmatherr\fR(3M) tabulates the values to be returned by +\fBpow()\fR as specified by SVID3 and XPG3. +.SH ERRORS +.sp +.LP +These functions will fail if: +.sp +.ne 2 +.mk +.na +\fBDomain Error\fR +.ad +.RS 16n +.rt +The value of \fIx\fR is negative and \fIy\fR is a finite non-integer. +.sp +If the integer expression (\fBmath_errhandling\fR & \fBMATH_ERREXCEPT\fR) is +non-zero, the invalid floating-point exception is raised. +.sp +The \fBpow()\fR function sets \fBerrno\fR to \fBEDOM\fR if the value of \fIx\fR +is negative and \fIy\fR is non-integral. +.RE + +.sp +.ne 2 +.mk +.na +\fBPole Error\fR +.ad +.RS 16n +.rt +The value of \fIx\fR is 0 and \fIy\fR is negative. +.sp +If the integer expression (\fBmath_errhandling\fR & \fBMATH_ERREXCEPT\fR) is +non-zero, the divide-by-zero floating-point exception is raised. +.RE + +.sp +.ne 2 +.mk +.na +\fBRange Error\fR +.ad +.RS 16n +.rt +The result overflows. +.sp +If the integer expression (\fBmath_errhandling\fR & \fBMATH_ERREXCEPT\fR) is +non-zero, the overflow floating-point exception is raised. +.sp +The \fBpow()\fR function sets \fBerrno\fR to \fBEDOM\fR if the value to be +returned would cause overflow. +.RE + +.SH USAGE +.sp +.LP +An application wanting to check for exceptions should call +\fBfeclearexcept\fR(\fBFE_ALL_EXCEPT\fR) before calling these functions. On +return, if \fBfetestexcept\fR(\fBFE_INVALID\fR | \fBFE_DIVBYZERO\fR | +\fBFE_OVERFLOW\fR | \fBFE_UNDERFLOW\fR) is non-zero, an exception has been +raised. An application should either examine the return value or check the +floating point exception flags to detect exceptions. +.sp +.LP +An application can also set \fBerrno\fR to 0 before calling \fBpow()\fR. On +return, if \fBerrno\fR is non-zero, an error has occurred. The \fBpowf()\fR and +\fBpowl()\fR functions do not set \fBerrno\fR. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBexp\fR(3M), \fBfeclearexcept\fR(3M), \fBfetestexcept\fR(3M), +\fBisnan\fR(3M), \fBmath.h\fR(3HEAD), \fBmatherr\fR(3M), \fBattributes\fR(5), +\fBstandards\fR(5) +.SH NOTES +.sp +.LP +Prior to Solaris 2.6, there was a conflict between the \fBpow()\fR function in +this library and the \fBpow()\fR function in the \fBlibmp\fR library. This +conflict was resolved by prepending \fBmp_\fR to all functions in the +\fBlibmp\fR library. See \fBmp\fR(3MP) for more information. diff --git a/usr/src/man/man3m/remainder.3m b/usr/src/man/man3m/remainder.3m new file mode 100644 index 0000000000..567ebd81a9 --- /dev/null +++ b/usr/src/man/man3m/remainder.3m @@ -0,0 +1,108 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Copyright (c) 1992, X/Open Company Limited. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH remainder 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +remainder, remainderf, remainderl \- remainder function +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <math.h> + +\fBdouble\fR \fBremainder\fR(\fBdouble\fR \fIx\fR, \fBdouble\fR \fIy\fR); +.fi + +.LP +.nf +\fBfloat\fR \fBremainderf\fR(\fBfloat\fR \fIx\fR, \fBfloat\fR \fIy\fR); +.fi + +.LP +.nf +\fBlong double\fR \fBremainderl\fR(\fBlong double\fR \fIx\fR, \fBlong double\fR \fIy\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions return the floating point remainder \fIr\fR = \fIx\fR \(mi +\fIn\fR\fIy\fR when \fIy\fR is non-zero. The value \fIn\fR is the integral +value nearest the exact value \fIx\fR/\fIy\fR. When |\fIn\fR \(mi +\fIx\fR/\fIy\fR\|| = \(12, the value \fIn\fR is chosen to be even. +.sp +.LP +The behavior of \fBremainder()\fR is independent of the rounding mode. +.SH RETURN VALUES +.sp +.LP +Upon successful completion, these functions return the floating point remainder +\fIr\fR = \fIx\fR \(mi \fIn\fR\fIy\fR when \fIy\fR is non-zero. +.sp +.LP +If \fIx\fR or \fIy\fR is NaN, a NaN is returned. +.sp +.LP +If \fIx\fR is infinite or \fIy\fR is 0 and the other is non-NaN, a domain error +occurs and a NaN is returned. +.SH ERRORS +.sp +.LP +These functions will fail if: +.sp +.ne 2 +.mk +.na +\fBDomain Error\fR +.ad +.RS 16n +.rt +The \fIx\fR argument is \(+-Inf, or the \fIy\fR argument is \(+-0 and the other +argument is non-NaN. +.sp +If the integer expression (\fBmath_errhandling\fR & \fBMATH_ERREXCEPT\fR) is +non-zero, then the invalid floating-point exception is raised. +.sp +The \fBremainder()\fR function sets \fBerrno\fR to \fBEDOM\fR if \fIy\fR +argument is 0 or the \fIx\fR argument is positive or negative infinity. +.RE + +.SH USAGE +.sp +.LP +An application wanting to check for error situations can set \fBerrno\fR to 0 +before calling \fBremainder()\fR. On return, if \fBerrno\fR is non-zero, an +error has occurred. The \fBremainderf()\fR and \fBremainderl()\fR functions do +not set \fBerrno\fR. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBabs\fR(3C), \fBdiv\fR(3C), \fBfeclearexcept\fR(3M), \fBfetestexcept\fR(3M), +\fBattributes\fR(5), \fBstandards\fR(5) diff --git a/usr/src/man/man3m/remquo.3m b/usr/src/man/man3m/remquo.3m new file mode 100644 index 0000000000..cc2e10ee28 --- /dev/null +++ b/usr/src/man/man3m/remquo.3m @@ -0,0 +1,105 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH remquo 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +remquo, remquof, remquol \- remainder functions +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <math.h> + +\fBdouble\fR \fBremquo\fR(\fBdouble\fR \fIx\fR, \fBdouble\fR \fIy\fR, \fBint *\fR\fIquo\fR); +.fi + +.LP +.nf +\fBfloat\fR \fBremquof\fR(\fBfloat\fR \fIx\fR, \fBfloat\fR \fIy\fR, \fBint *\fR\fIquo\fR); +.fi + +.LP +.nf +\fBlong double\fR \fBremquol\fR(\fBlong double\fR \fIx\fR, \fBlong double\fR \fIy\fR, \fBint *\fR\fIquo\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +The \fBremquo()\fR, \fBremquof()\fR, and \fBremquol()\fR functions compute the +same remainder as the \fBremainder()\fR, \fBremainderf()\fR, and +\fBremainderl()\fR functions, respectively. See \fBremainder\fR(3M). In the +object pointed to by \fIquo\fR, they store a value whose sign is the sign of +\fIx\fR/\fIy\fR and whose magnitude is congruent modulo 2^\fIn\fR to the +magnitude of the integral quotient of \fIx\fR/\fIy\fR, where \fIn\fR is an +integer greater than or equal to 3. +.SH RETURN VALUES +.sp +.LP +These functions return \fIx\fR REM \fIy\fR. +.sp +.LP +If \fIx\fR or \fIy\fR is NaN, a NaN is returned. +.sp +.LP +If \fIx\fR is \(+-Inf or \fIy\fR is 0 and the other argument is non-NaN, a +domain error occurs and a NaN is returned. +.SH ERRORS +.sp +.LP +These functions will fail if: +.sp +.ne 2 +.mk +.na +\fBDomain Error\fR +.ad +.RS 16n +.rt +The \fIx\fR argument is Inf or the \fIy\fR argument is 0 and the other argument +is non-NaN. +.sp +If the integer expression (\fBmath_errhandling\fR & \fBMATH_ERREXCEPT\fR) is +non-zero, then the invalid floating-point exception is raised. +.RE + +.SH USAGE +.sp +.LP +An application wanting to check for exceptions should call +\fBfeclearexcept\fR(\fBFE_ALL_EXCEPT\fR) before calling these functions. On +return, if \fBfetestexcept\fR(\fBFE_INVALID\fR | \fBFE_DIVBYZERO\fR | +\fBFE_OVERFLOW\fR | \fBFE_UNDERFLOW\fR) is non-zero, an exception has been +raised. An application should either examine the return value or check the +floating point exception flags to detect exceptions. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBfeclearexcept\fR(3M), \fBfetestexcept\fR(3M), \fBmath.h\fR(3HEAD), +\fBremainder\fR(3M), \fBattributes\fR(5), \fBstandards\fR(5) diff --git a/usr/src/man/man3m/rint.3m b/usr/src/man/man3m/rint.3m new file mode 100644 index 0000000000..f21200513d --- /dev/null +++ b/usr/src/man/man3m/rint.3m @@ -0,0 +1,85 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Copyright (c) 1992, X/Open Company Limited. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH rint 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +rint, rintf, rintl \- round-to-nearest integral value +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <math.h> + +\fBdouble\fR \fBrint\fR(\fBdouble\fR \fIx\fR); +.fi + +.LP +.nf +\fBfloat\fR \fBrintf\fR(\fBfloat\fR \fIx\fR); +.fi + +.LP +.nf +\fBlong double\fR \fBrintl\fR(\fBlong double\fR \fIx\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions return the integral value (represented as a \fBdouble\fR) +nearest \fIx\fR in the direction of the current rounding mode. +.sp +.LP +If the current rounding mode rounds toward negative infinity, \fBrint()\fR is +equivalent to \fBfloor\fR(3M). If the current rounding mode rounds toward +positive infinity, \fBrint()\fR is equivalent to \fBceil\fR(3M). +.sp +.LP +These functions differ from the \fBnearbyint\fR(3M), \fBnearbyintf()\fR, and +\fBnearbyintl()\fR functions only in that they might raise the inexact +floating-point exception if the result differs in value from the argument. +.SH RETURN VALUES +.sp +.LP +Upon successful completion, these functions return the integer (represented as +a double precision number) nearest \fIx\fR in the direction of the current +rounding mode. +.sp +.LP +If \fIx\fR is NaN, a NaN is returned. +.sp +.LP +If \fIx\fR is \(+-0 or \(+-Inf, \fIx\fR is returned. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBabs\fR(3C), \fBceil\fR(3M), \fBfeclearexcept\fR(3M), \fBfetestexcept\fR(3M), +\fBfloor\fR(3M), \fBisnan\fR(3M), \fBmath.h\fR(3HEAD), \fBnearbyint\fR(3M), +\fBattributes\fR(5), \fBstandards\fR(5) diff --git a/usr/src/man/man3m/round.3m b/usr/src/man/man3m/round.3m new file mode 100644 index 0000000000..8d2b246aa4 --- /dev/null +++ b/usr/src/man/man3m/round.3m @@ -0,0 +1,73 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH round 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +round, roundf, roundl \- round to nearest integer value in floating-point +format +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <math.h> + +\fBdouble\fR \fBround\fR(\fBdouble\fR \fIx\fR); +.fi + +.LP +.nf +\fBfloat\fR \fBroundf\fR(\fBfloat\fR \fIx\fR); +.fi + +.LP +.nf +\fBlong double\fR \fBroundl\fR(\fBlong double\fR \fIx\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions round their argument to the nearest integer value in +floating-point format, rounding halfway cases away from 0, regardless of the +current rounding direction. +.SH RETURN VALUES +.sp +.LP +Upon successful completion, these functions return the rounded integer value. +.sp +.LP +If \fIx\fR is NaN, a NaN is returned. +.sp +.LP +If \fIx\fR is \(+-0 or \(+- Inf, \fIx\fR is returned. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBfeclearexcept\fR(3M), \fBfetestexcept\fR(3M), \fBmath.h\fR(3HEAD), +\fBattributes\fR(5), \fBstandards\fR(5) diff --git a/usr/src/man/man3m/scalb.3m b/usr/src/man/man3m/scalb.3m new file mode 100644 index 0000000000..f38893a5dc --- /dev/null +++ b/usr/src/man/man3m/scalb.3m @@ -0,0 +1,144 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Copyright (c) 1992, X/Open Company Limited. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH scalb 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +scalb, scalbf, scalbl \- load exponent of a radix-independent floating-point +number +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <math.h> + +\fBdouble\fR \fBscalb\fR(\fBdouble\fR \fIx\fR, \fBdouble\fR \fIn\fR); +.fi + +.LP +.nf +\fBfloat\fR \fBscalbf\fR(\fBfloat\fR \fIx\fR, \fBfloat\fR \fIn\fR); +.fi + +.LP +.nf +\fBlong double\fR \fBscalbl\fR(\fBlong double\fR \fIx\fR, \fBlong double\fR \fIn\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions compute x * \fIr\fR^n, where \fIr\fR is the radix of the +machine's floating point arithmetic. When \fIr\fR is 2, \fBscalb()\fR is +equivalent to \fBldexp\fR(3M). The value of \fIr\fR is \fBFLT_RADIX\fR which is +defined in <\fBfloat.h\fR>. +.SH RETURN VALUES +.sp +.LP +Upon successful completion, the \fBscalb()\fR function returns \fIx\fR * +\fIr\fR^n. +.sp +.LP +If \fIx\fR or \fIn\fR is NaN, a NaN is returned. +.sp +.LP +If \fIn\fR is 0, \fIx\fR is returned. +.sp +.LP +If \fIx\fR is \(+-Inf and \fIn\fR is not \(miInf, \fIx\fR is returned. +.sp +.LP +If \fIx\fR is \(+-0 and \fIn\fR is not +Inf, \fIx\fR is returned. +.sp +.LP +If \fIx\fR is \(+-0 and \fIn\fR is +Inf, a domain error occurs and a NaN is +returned. +.sp +.LP +If \fIx\fR is \(+-Inf and \fIn\fR is \(miInf, a domain error occurs and a NaN +is returned. +.sp +.LP +If the result would cause an overflow, a range error occurs and +\(+-\fBHUGE_VAL\fR (according to the sign of \fIx\fR) is returned. +.sp +.LP +For exceptional cases, \fBmatherr\fR(3M) tabulates the values to be returned by +\fBscalb()\fR as specified by SVID3 and XPG3. See \fBstandards\fR(5). +.SH ERRORS +.sp +.LP +These functions will fail if: +.sp +.ne 2 +.mk +.na +\fBDomain Error\fR +.ad +.RS 16n +.rt +If \fIx\fR is 0 and \fIn\fR is +Inf, or \fIx\fR is Inf and \fIn\fR is \(miInf. +.sp +If the integer expression (\fBmath_errhandling\fR & \fBMATH_ERREXCEPT\fR) is +non-zero, then the invalid floating-point exception is raised. +.RE + +.sp +.ne 2 +.mk +.na +\fBRange Error\fR +.ad +.RS 16n +.rt +The result would overflow. +.sp +If the integer expression (\fBmath_errhandling\fR & \fBMATH_ERREXCEPT\fR) is +non-zero, then the overflow floating-point exception is raised. +.RE + +.SH USAGE +.sp +.LP +An application wanting to check for exceptions should call +\fBfeclearexcept\fR(\fBFE_ALL_EXCEPT\fR) before calling these functions. On +return, if \fBfetestexcept\fR(\fBFE_INVALID\fR | \fBFE_DIVBYZERO\fR | +\fBFE_OVERFLOW\fR | \fBFE_UNDERFLOW\fR) is non-zero, an exception has been +raised. An application should either examine the return value or check the +floating point exception flags to detect exceptions. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilitySee below. +_ +MT-LevelMT-Safe +.TE + +.sp +.LP +The \fBscalb()\fR function is Standard. The \fBscalbf()\fR and \fBscalbl()\fR +functions are Stable. +.SH SEE ALSO +.sp +.LP +\fBfeclearexcept\fR(3M), \fBfetestexcept\fR(3M), \fBilogb\fR(3M), +\fBldexp\fR(3M), \fBlogb\fR(3M), \fBmath.h\fR(3HEAD), \fBmatherr\fR(3M), +\fBscalbln\fR(3M), \fBattributes\fR(5), \fBstandards\fR(5) diff --git a/usr/src/man/man3m/scalbln.3m b/usr/src/man/man3m/scalbln.3m new file mode 100644 index 0000000000..a04b34abdd --- /dev/null +++ b/usr/src/man/man3m/scalbln.3m @@ -0,0 +1,124 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH scalbln 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +scalbln, scalblnf, scalblnl, scalbn, scalbnf, scalbnl \- compute exponent using +FLT_RADIX +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <math.h> + +\fBdouble\fR \fBscalbln\fR(\fBdouble\fR \fIx\fR, \fBlong\fR \fIn\fR); +.fi + +.LP +.nf +\fBfloat\fR \fBscalblnf\fR(\fBfloat\fR \fIx\fR, \fBlong\fR \fIn\fR); +.fi + +.LP +.nf +\fBlong double\fR \fBscalblnl\fR(\fBlong double\fR \fIx\fR, \fBlong\fR \fIn\fR); +.fi + +.LP +.nf +\fBdouble\fR \fBscalbn\fR(\fBdouble\fR \fIx\fR, \fBint\fR \fIn\fR); +.fi + +.LP +.nf +\fBfloat\fR \fBscalbnf\fR(\fBfloat\fR \fIx\fR, \fBint\fR \fIn\fR); +.fi + +.LP +.nf +\fBlong double\fR \fBscalbnl\fR(\fBlong double\fR \fIx\fR, \fBint\fR \fIn\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions compute \fIx\fR * \fBFLT_RADIX\fR^n efficiently, not normally +by computing \fBFLT_RADIX\fR^n explicitly. +.SH RETURN VALUES +.sp +.LP +Upon successful completion, these functions return \fIx\fR * +\fBFLT_RADIX\fR^\fIn\fR>. +.sp +.LP +If the result would cause overflow, a range error occurs and these functions +return \(+-\fBHUGE_VAL\fR, \(+-\fBHUGE_VALF\fR, and \(+-\fBHUGE_VALL\fR +(according to the sign of \fIx\fR) as appropriate for the return type of the +function. +.sp +.LP +If \fIx\fR is NaN, a NaN is returned. +.sp +.LP +If \fIx\fR is \(+-0 or \(+-Inf, \fIx\fR is returned. +.sp +.LP +If \fIx\fR is 0, \fIx\fR is returned. +.SH ERRORS +.sp +.LP +These functions will fail if: +.sp +.ne 2 +.mk +.na +\fBRange Error\fR +.ad +.RS 15n +.rt +The result overflows. +.sp +If the integer expression (\fBmath_errhandling\fR & \fBMATH_ERREXCEPT\fR) is +non-zero, then the overflow floating-point exception is raised. +.RE + +.SH USAGE +.sp +.LP +An application wanting to check for exceptions should call +\fBfeclearexcept\fR(\fBFE_ALL_EXCEPT\fR) before calling these functions. On +return, if \fBfetestexcept\fR(\fBFE_INVALID\fR | \fBFE_DIVBYZERO\fR | +\fBFE_OVERFLOW\fR | \fBFE_UNDERFLOW\fR) is non-zero, an exception has been +raised. An application should either examine the return value or check the +floating point exception flags to detect exceptions. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBfeclearexcept\fR(3M), \fBfetestexcept\fR(3M), \fBmath.h\fR(3HEAD), +\fBscalb\fR(3M), \fBattributes\fR(5), \fBstandards\fR(5) diff --git a/usr/src/man/man3m/signbit.3m b/usr/src/man/man3m/signbit.3m new file mode 100644 index 0000000000..4720eaf6c1 --- /dev/null +++ b/usr/src/man/man3m/signbit.3m @@ -0,0 +1,62 @@ +'\" te +.\" Copyright (c) 2001, The IEEE and The Open Group. All Rights Reserved. Portions Copyright (c) 2003, Sun Microsystems, Inc. All Rights Reserved. +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH signbit 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +signbit \- test sign +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <math.h> + +\fBint\fR \fBsignbit\fR(\fBreal-floating\fR \fIx\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +The \fBsignbit()\fR macro determines whether the sign of its argument value is +negative. NaNs, zeros, and infinities have a sign bit. +.SH RETURN VALUES +.sp +.LP +The \fBsignbit()\fR macro returns a non-zero value if and only if the sign of +its argument value is negative. +.SH ERRORS +.sp +.LP +No errors are defined. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBfpclassify\fR(3M), \fBisfinite\fR(3M), \fBisinf\fR(3M), \fBisnan\fR(3M), +\fBisnormal\fR(3M), \fBmath.h\fR(3HEAD), \fBattributes\fR(5), +\fBstandards\fR(5) diff --git a/usr/src/man/man3m/significand.3m b/usr/src/man/man3m/significand.3m new file mode 100644 index 0000000000..5cfb566589 --- /dev/null +++ b/usr/src/man/man3m/significand.3m @@ -0,0 +1,71 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH significand 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +significand, significandf, significandl \- significand function +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <math.h> + +\fBdouble\fR \fBsignificand\fR(\fBdouble\fR \fIx\fR); +.fi + +.LP +.nf +\fBfloat\fR \fBsignificandf\fR(\fBfloat\fR \fIx\fR); +.fi + +.LP +.nf +\fBlong double\fR \fBsignificandl\fR(\fBlong double\fR \fIx\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +If \fIx\fR equals \fIsig \fR* 2^\fIn\fR with \fI1\fR\(<= \fIsig\fR < \fI2\fR, +then these functions return \fIsig\fR. +.SH RETURN VALUES +.sp +.LP +Upon successful completion, these functions return \fIsig\fR. +.sp +.LP +If \fIx\fR is either 0, \(+-Inf or NaN, \fIx\fR is returned. +.SH ERRORS +.sp +.LP +No errors are defined. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStable +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBlogb\fR(3M), \fBscalb\fR(3M), \fBattributes\fR(5) diff --git a/usr/src/man/man3m/sin.3m b/usr/src/man/man3m/sin.3m new file mode 100644 index 0000000000..938a8da3e4 --- /dev/null +++ b/usr/src/man/man3m/sin.3m @@ -0,0 +1,101 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Copyright (c) 1992, X/Open Company Limited. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH sin 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +sin, sinf, sinl \- sine function +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <math.h> + +\fBdouble\fR \fBsin\fR(\fBdouble\fR \fIx\fR); +.fi + +.LP +.nf +\fBfloat\fR \fBsinf\fR(\fBfloat\fR \fIx\fR); +.fi + +.LP +.nf +\fBlong double\fR \fBsinl\fR(\fBlong double\fR \fIx\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions compute the sine of its argument \fIx\fR, measured in radians. +.SH RETURN VALUES +.sp +.LP +Upon successful completion, these functions return the sine of \fIx\fR. +.sp +.LP +If \fIx\fR is NaN, a NaN is returned. +.sp +.LP +If \fIx\fR is \(+-0, \fIx\fR is returned. +.sp +.LP +If \fIx\fR is \(+-Inf, a domain error occurs and a NaN is returned. +.SH ERRORS +.sp +.LP +These functions will fail if: +.sp +.ne 2 +.mk +.na +\fBDomain Error\fR +.ad +.RS 16n +.rt +The \fIx\fR argument is \(+-Inf. +.sp +If the integer expression (\fBmath_errhandling\fR & \fBMATH_ERREXCEPT\fR) is +non-zero, then the invalid floating-point exception is raised. +.RE + +.SH USAGE +.sp +.LP +An application wanting to check for exceptions should call +\fBfeclearexcept\fR(\fBFE_ALL_EXCEPT\fR) before calling these functions. On +return, if \fBfetestexcept\fR(\fBFE_INVALID\fR | \fBFE_DIVBYZERO\fR | +\fBFE_OVERFLOW\fR | \fBFE_UNDERFLOW\fR) is non-zero, an exception has been +raised. An application should either examine the return value or check the +floating point exception flags to detect exceptions. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBasin\fR(3M), \fBfeclearexcept\fR(3M), \fBfetestexcept\fR(3M), +\fBisnan\fR(3M), \fBmath.h\fR(3HEAD), \fBattributes\fR(5), \fBstandards\fR(5) diff --git a/usr/src/man/man3m/sincos.3m b/usr/src/man/man3m/sincos.3m new file mode 100644 index 0000000000..00ad44b957 --- /dev/null +++ b/usr/src/man/man3m/sincos.3m @@ -0,0 +1,60 @@ +'\" te +.\" Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH sincos 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +sincos, sincosf, sincosl \- combined sine and cosine function +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <math.h> + +\fBvoid\fR \fBsincos\fR(\fBdouble\fR \fIx\fR, \fBdouble *\fR\fIs\fR, \fBdouble *\fR\fIc\fR); +.fi + +.LP +.nf +\fBvoid\fR \fBsincosf\fR(\fBfloat\fR \fIx\fR, \fBfloat *\fR\fIs\fR, \fBfloat *\fR\fIc\fR); +.fi + +.LP +.nf +\fBvoid\fR \fBsincosl\fR(\fBlong double\fR \fIx\fR, \fBlong double *\fR\fIs\fR, \fBlong double *\fR\fIc\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions compute the sine and cosine of the first argument \fIx\fR, +measured in radians. +.SH RETURN VALUES +.sp +.LP +Upon successful completion, these functions return the sine of \fIx\fR in +*\fIs\fR and cosine of \fIx\fR in *\fIc\fR. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStable +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBcos\fR(3M), \fBsin\fR(3M), \fBmath.h\fR(3HEAD), \fBattributes\fR(5) diff --git a/usr/src/man/man3m/sinh.3m b/usr/src/man/man3m/sinh.3m new file mode 100644 index 0000000000..fb63ffc2c4 --- /dev/null +++ b/usr/src/man/man3m/sinh.3m @@ -0,0 +1,117 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Copyright (c) 1992, X/Open Company Limited. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH sinh 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +sinh, sinhf, sinhl \- hyperbolic sine function +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <math.h> + +\fBdouble\fR \fBsinh\fR(\fBdouble\fR \fIx\fR); +.fi + +.LP +.nf +\fBfloat\fR \fBsinhf\fR(\fBfloat\fR \fIx\fR); +.fi + +.LP +.nf +\fBlong double\fR \fBsinhl\fR(\fBlong double\fR \fIx\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions compute the hyperbolic sine of \fIx\fR. +.SH RETURN VALUES +.sp +.LP +Upon successful completion, these functions return the hyperbolic sine of +\fIx\fR. +.sp +.LP +If the result would cause an overflow, a range error occurs and +\(+-\fBHUGE_VAL\fR, \(+-\fBHUGE_VALF\fR, and \(+-\fBHUGE_VALL\fR (with the same +sign as \fIx\fR) is returned as appropriate for the type of the function. +.sp +.LP +If \fIx\fR is NaN, a NaN is returned. +.sp +.LP +If \fIx\fR is \(+-0 or \(+-Inf, \fIx\fR is returned. +.sp +.LP +For exceptional cases, \fBmatherr\fR(3M) tabulates the values to be returned by +\fBacos()\fR as specified by SVID3 and XPG3. +.SH ERRORS +.sp +.LP +These functions will fail if: +.sp +.ne 2 +.mk +.na +\fBRange Error\fR +.ad +.RS 15n +.rt +The result would cause an overflow. +.sp +If the integer expression (\fBmath_errhandling\fR & \fBMATH_ERREXCEPT\fR) is +non-zero, the overflow floating-point exception is raised. +.sp +The \fBasinh()\fR function sets \fBerrno\fR to \fBERANGE\fR if the result would +cause an overflow. +.RE + +.SH USAGE +.sp +.LP +An application wanting to check for exceptions should call +\fBfeclearexcept\fR(\fBFE_ALL_EXCEPT\fR) before calling these functions. On +return, if \fBfetestexcept\fR(\fBFE_INVALID\fR | \fBFE_DIVBYZERO\fR | +\fBFE_OVERFLOW\fR | \fBFE_UNDERFLOW\fR) is non-zero, an exception has been +raised. An application should either examine the return value or check the +floating point exception flags to detect exceptions. +.sp +.LP +An application can also set \fBerrno\fR to 0 before calling \fBasinh()\fR. On +return, if \fBerrno\fR is non-zero, an error has occurred. The \fBasinhf()\fR +and \fBasinhl()\fR functions do not set \fBerrno\fR. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBasinh\fR(3M), \fBcosh\fR(3M), \fBfeclearexcept\fR(3M), +\fBfetestexcept\fR(3M), \fBisnan\fR(3M), \fBmath.h\fR(3HEAD), +\fBmatherr\fR(3M), \fBtanh\fR(3M), \fBattributes\fR(5), \fBstandards\fR(5) diff --git a/usr/src/man/man3m/sqrt.3m b/usr/src/man/man3m/sqrt.3m new file mode 100644 index 0000000000..0bdea2d4e1 --- /dev/null +++ b/usr/src/man/man3m/sqrt.3m @@ -0,0 +1,111 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Copyright (c) 1992, X/Open Company Limited. All Rights Reserved. +.\" Copyright (c) 1985 Regents of the University of California. All rights reserved. The Berkeley software License Agreement specifies the terms and conditions for redistribution. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.TH sqrt 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +sqrt, sqrtf, sqrtl \- square root function +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <math.h> + +\fBdouble\fR \fBsqrt\fR(\fBdouble\fR \fIx\fR); +.fi + +.LP +.nf +\fBfloat\fR \fBsqrtf\fR(\fBfloat\fR \fIx\fR); +.fi + +.LP +.nf +\fBlong double\fR \fBsqrtl\fR(\fBlong double\fR \fIx\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions compute the square root of their argument \fIx\fR. +.SH RETURN VALUES +.sp +.LP +Upon successful completion, these functions return the square root of \fIx\fR. +.sp +.LP +For finite values of \fIx\fR < \(mi0, a domain error occurs and either a NaN +(if supported) or an implementation-defined value is returned. +.sp +.LP +If \fIx\fR is NaN, a NaN is returned. +.sp +.LP +If \fIx\fR is \(+-0 or +Inf, \fIx\fR is returned. +.sp +.LP +If \fIx\fR is \(miInf, a domain error occurs and a NaN is returned. +.SH ERRORS +.sp +.LP +These functions will fail if: +.sp +.ne 2 +.mk +.na +\fBDomain Error\fR +.ad +.RS 16n +.rt +The finite value of \fIx\fR is < \(mi0 or \fIx\fR is \(miInf. +.sp +If the integer expression (\fBmath_errhandling\fR & \fBMATH_ERREXCEPT\fR) is +non-zero, the invalid floating-point exception is raised. +.sp +The \fBsqrt()\fR function sets \fBerrno\fR to \fBEDOM\fR if the value of +\fIx\fR is negative. +.RE + +.SH USAGE +.sp +.LP +An application wanting to check for exceptions should call +\fBfeclearexcept\fR(\fBFE_ALL_EXCEPT\fR) before calling these functions. On +return, if \fBfetestexcept\fR(\fBFE_INVALID\fR | \fBFE_DIVBYZERO\fR | +\fBFE_OVERFLOW\fR | \fBFE_UNDERFLOW\fR) is non-zero, an exception has been +raised. An application should either examine the return value or check the +floating point exception flags to detect exceptions. +.sp +.LP +An application can also set \fBerrno\fR to 0 before calling \fBsqrt()\fR. On +return, if \fBerrno\fR is non-zero, an error has occurred. The \fBsqrtf()\fR +and \fBsqrtl()\fR functions do not set \fBerrno\fR. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBfeclearexcept\fR(3M), \fBfetestexcept\fR(3M), \fBisnan\fR(3M), +\fBmath.h\fR(3HEAD), \fBattributes\fR(5), \fBstandards\fR(5) diff --git a/usr/src/man/man3m/tan.3m b/usr/src/man/man3m/tan.3m new file mode 100644 index 0000000000..ec4abcac2a --- /dev/null +++ b/usr/src/man/man3m/tan.3m @@ -0,0 +1,106 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Copyright (c) 1992, X/Open Company Limited. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH tan 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +tan, tanf, tanl \- tangent function +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <math.h> + +\fBdouble\fR \fBtan\fR(\fBdouble\fR \fIx\fR); +.fi + +.LP +.nf +\fBfloat\fR \fBtanf\fR(\fBfloat\fR \fIx\fR); +.fi + +.LP +.nf +\fBlong double\fR \fBtanl\fR(\fBlong double\fR \fIx\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions compute the tangent of their argument \fIx\fR, measured in +radians. +.SH RETURN VALUES +.sp +.LP +Upon successful completion, these functions return the tangent of \fIx\fR. +.sp +.LP +If \fIx\fR is NaN, a NaN is returned. +.sp +.LP +If \fIx\fR is \(+-0, \fIx\fR is returned. +.sp +.LP +If \fIx\fR is \(+-Inf, a domain error occurs and a NaN is returned. +.SH ERRORS +.sp +.LP +These functions will fail if: +.sp +.ne 2 +.mk +.na +\fBDomain Error\fR +.ad +.RS 16n +.rt +The value of \fIx\fR is \(+-Inf. +.sp +If the integer expression (\fBmath_errhandling\fR & \fBMATH_ERREXCEPT\fR) is +non-zero, the invalid floating-point exception is raised. +.RE + +.SH USAGE +.sp +.LP +There are no known floating-point representations such that for a normal +argument, \fBtan\fR(\fIx\fR) is either overflow or underflow. +.sp +.LP +An application wanting to check for exceptions should call +\fBfeclearexcept\fR(\fBFE_ALL_EXCEPT\fR) before calling these functions. On +return, if \fBfetestexcept\fR(\fBFE_INVALID\fR | \fBFE_DIVBYZERO\fR | +\fBFE_OVERFLOW\fR | \fBFE_UNDERFLOW\fR) is non-zero, an exception has been +raised. An application should either examine the return value or check the +floating point exception flags to detect exceptions. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBatan\fR(3M), \fBfeclearexcept\fR(3M), \fBfetestexcept\fR(3M), +\fBisnan\fR(3M), \fBmath.h\fR(3HEAD), \fBattributes\fR(5), \fBstandards\fR(5) diff --git a/usr/src/man/man3m/tanh.3m b/usr/src/man/man3m/tanh.3m new file mode 100644 index 0000000000..d82cb1d437 --- /dev/null +++ b/usr/src/man/man3m/tanh.3m @@ -0,0 +1,79 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Copyright (c) 1992, X/Open Company Limited. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH tanh 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +tanh, tanhf, tanhl \- hyperbolic tangent function +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <math.h> + +\fBdouble\fR \fBtanh\fR(\fBdouble\fR \fIx\fR); +.fi + +.LP +.nf +\fBfloat\fR \fBtanhf\fR(\fBfloat\fR \fIx\fR); +.fi + +.LP +.nf +\fBlong double\fR \fBtanhl\fR(\fBlong double\fR \fIx\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions compute the hyperbolic tangent of their argument \fIx\fR. +.SH RETURN VALUES +.sp +.LP +Upon successful completion, these functions return the hyperbolic tangent of +\fIx\fR. +.sp +.LP +If \fIx\fR is NaN, a NaN is returned. +.sp +.LP +If \fIx\fR is \(+-0, \fIx\fR is returned. +.sp +.LP +If \fIx\fR is \(+-Inf, \(+-1 is returned. +.SH ERRORS +.sp +.LP +No errors are defined. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBatanh\fR(3M), \fBisnan\fR(3M), \fBmath.h\fR(3HEAD), \fBtan\fR(3M), +\fBattributes\fR(5), \fBstandards\fR(5) diff --git a/usr/src/man/man3m/tgamma.3m b/usr/src/man/man3m/tgamma.3m new file mode 100644 index 0000000000..86b6ca38f5 --- /dev/null +++ b/usr/src/man/man3m/tgamma.3m @@ -0,0 +1,142 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH tgamma 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +tgamma, tgammaf, tgammal \- compute gamma function +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <math.h> + +\fBdouble\fR \fBtgamma\fR(\fBdouble\fR \fIx\fR); +.fi + +.LP +.nf +\fBfloat\fR \fBtgammaf\fR(\fBfloat\fR \fIx\fR); +.fi + +.LP +.nf +\fBlong double\fR \fBtgammal\fR(\fBlong double\fR \fIx\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions compute the \fBgamma()\fR function of \fIx\fR. +.SH RETURN VALUES +.sp +.LP +Upon successful completion, these functions return \fBgamma\fR(\fIx\fR). +.sp +.LP +If \fIx\fR is a negative integer, a domain error occurs and a NaN is returned. +.sp +.LP +If the correct value would cause overflow, a range error occurs and +\fBtgamma()\fR, \fBtgammaf()\fR, and \fBtgammal()\fR return the value of the +macro \(+-\fBHUGE_VAL\fR, \(+-\fBHUGE_VALF\fR, or \(+-\fBHUGE_VALL\fR, +respectively. +.sp +.LP +If \fIx\fR is NaN, a NaN is returned. +.sp +.LP +If \fIx\fR is \(+-Inf, \fIx\fR is returned. +.sp +.LP +If x is \(+-0, a pole error occurs and \fBtgamma()\fR, \fBtgammaf()\fR, and +\fBtgammal()\fR return \(+-\fBHUGE_VAL\fR, \(+-\fBHUGE_VALF\fR, and +\(+-\fBHUGE_VALL\fR, respectively. +.sp +.LP +If \fIx\fR is +Inf, a domain error occurs and a NaN is returned. +.SH ERRORS +.sp +.LP +These functions will fail if: +.sp +.ne 2 +.mk +.na +\fBDomain Error\fR +.ad +.RS 16n +.rt +The value of \fIx\fR is a negative integer or \fIx\fR is \(miInf. +.sp +If the integer expression (\fBmath_errhandling\fR & \fBMATH_ERREXCEPT\fR) is +non-zero, then the invalid floating-point exception is raised. +.RE + +.sp +.ne 2 +.mk +.na +\fBPole Error\fR +.ad +.RS 16n +.rt +The value of \fIx\fR is zero. +.sp +If the integer expression (\fBmath_errhandling\fR & \fBMATH_ERREXCEPT\fR) is +non-zero, then the divide-by-zero floating-point exception is raised. +.RE + +.sp +.ne 2 +.mk +.na +\fBRange Error\fR +.ad +.RS 16n +.rt +The value overflows. +.sp +If the integer expression (\fBmath_errhandling\fR & \fBMATH_ERREXCEPT\fR) is +non-zero, then the overflow floating-point exception is raised. +.RE + +.SH USAGE +.sp +.LP +An application wanting to check for exceptions should call +\fBfeclearexcept\fR(\fBFE_ALL_EXCEPT\fR) before calling these functions. On +return, if \fBfetestexcept\fR(\fBFE_INVALID\fR | \fBFE_DIVBYZERO\fR | +\fBFE_OVERFLOW\fR | \fBFE_UNDERFLOW\fR) is non-zero, an exception has been +raised. An application should either examine the return value or check the +floating point exception flags to detect exceptions. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBfeclearexcept\fR(3M), \fBfetestexcept\fR(3M), \fBlgamma\fR(3M), +\fBmath.h\fR(3HEAD), \fBattributes\fR(5), \fBstandards\fR(5) diff --git a/usr/src/man/man3m/trunc.3m b/usr/src/man/man3m/trunc.3m new file mode 100644 index 0000000000..ce03d545e5 --- /dev/null +++ b/usr/src/man/man3m/trunc.3m @@ -0,0 +1,74 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH trunc 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +trunc, truncf, truncl \- round to truncated integer value +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <math.h> + +\fBdouble\fR \fBtrunc\fR(\fBdouble\fR \fIx\fR); +.fi + +.LP +.nf +\fBfloat\fR \fBtruncf\fR(\fBfloat\fR \fIx\fR); +.fi + +.LP +.nf +\fBlong double\fR \fBtruncl\fR(\fBlong double\fR \fIx\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions round their argument to the integer value, in floating format, +nearest to but no larger in magnitude than the argument. +.SH RETURN VALUES +.sp +.LP +Upon successful completion, these functions return the truncated integer value. +.sp +.LP +If \fIx\fR is NaN, a NaN is returned. +.sp +.LP +If \fIx\fR is \(+-0 or \(+-Inf, \fIx\fR is returned. +.SH ERRORS +.sp +.LP +No errors are defined. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityStandard +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBmath.h\fR(3HEAD), \fBattributes\fR(5), \fBstandards\fR(5) diff --git a/usr/src/man/man3m/y0.3m b/usr/src/man/man3m/y0.3m new file mode 100644 index 0000000000..3e8a66ab71 --- /dev/null +++ b/usr/src/man/man3m/y0.3m @@ -0,0 +1,131 @@ +'\" te +.\" Copyright (c) 2001, the Institute of Electrical and Electronics Engineers, Inc. and The Open Group. All Rights Reserved. +.\" Copyright (c) 1992, X/Open Company Limited. All Rights Reserved. +.\" Portions Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. +.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at +.\" http://www.opengroup.org/bookstore/. +.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html. +.\" This notice shall appear on any product containing this material. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH y0 3M "12 Jul 2006" "SunOS 5.11" "Mathematical Library Functions" +.SH NAME +y0, y0f, y0l, y1, y1f, y1l, yn, ynf, ynl \- Bessel functions of the second kind +.SH SYNOPSIS +.LP +.nf +c99 [ \fIflag\fR... ] \fIfile\fR... \fB-lm\fR [ \fIlibrary\fR... ] +#include <math.h> + +\fBdouble\fR \fBy0\fR(\fBdouble\fR \fIx\fR); +.fi + +.LP +.nf +\fBfloat\fR \fBy0f\fR(\fBfloat\fR \fIx\fR); +.fi + +.LP +.nf +\fBlong double\fR \fBy0l\fR(\fBlong double\fR \fIx\fR); +.fi + +.LP +.nf +\fBdouble\fR \fBy1\fR(\fBdouble\fR \fIx\fR); +.fi + +.LP +.nf +\fBfloat\fR \fBy1f\fR(\fBfloat\fR \fIx\fR); +.fi + +.LP +.nf +\fBlong double\fR \fBy1l\fR(\fBlong double\fR \fIx\fR); +.fi + +.LP +.nf +\fBdouble\fR \fByn\fR(\fBint\fR \fIn\fR, \fBdouble\fR \fIx\fR); +.fi + +.LP +.nf +\fBfloat\fR \fBynf\fR(\fBint\fR \fIn\fR, \fBfloat\fR \fIx\fR); +.fi + +.LP +.nf +\fBlong double\fR \fBynl\fR(\fBint\fR \fIn\fR, \fBlong double\fR \fIx\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions compute Bessel functions of \fIx\fR of the second kind of +orders 0, 1 and \fIn\fR, respectively. +.SH RETURN VALUES +.sp +.LP +Upon successful completion, these functions return the relevant Bessel value of +\fIx\fR of the second kind. +.sp +.LP +If \fIx\fR is NaN, a NaN is returned. +.sp +.LP +If \fIx\fR is negative, \(mi\fBHUGE_VAL\fR or NaN is returned. +.sp +.LP +If \fIx\fR is 0.0, \(mi\fBHUGE_VAL\fR is returned. +.sp +.LP +If the correct result would cause overflow, \(mi\fBHUGE_VAL\fR is returned. +.sp +.LP +For exceptional cases, \fBmatherr\fR(3M) tabulates the values to be returned as +specified by SVID3 and XPG3. +.SH ERRORS +.sp +.LP +No errors are returned. +.SH USAGE +.sp +.LP +An application wanting to check for exceptions should call +\fBfeclearexcept\fR(\fBFE_ALL_EXCEPT\fR) before calling these functions. On +return, if \fBfetestexcept\fR(\fBFE_INVALID\fR | \fBFE_DIVBYZERO\fR | +\fBFE_OVERFLOW\fR | \fBFE_UNDERFLOW\fR) is non-zero, an exception has been +raised. An application should either examine the return value or check the +floating point exception flags to detect exceptions. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilitySee below. +_ +MT-LevelMT-Safe +.TE + +.sp +.LP +The \fBy0()\fR, \fBy1()\fR, and \fByn()\fR functions are Standard. The +\fBy0f()\fR, \fBy0l()\fR, \fBy1f()\fR, \fBy1l()\fR, \fBynf()\fR, and +\fBynl()\fR functions are Stable. +.SH SEE ALSO +.sp +.LP +\fBisnan\fR(3M), \fBfeclearexcept\fR(3M), \fBfetestexcept\fR(3M), \fBj0\fR(3M), +\fBmath.h\fR(3HEAD), \fBmatherr\fR(3M), \fBattributes\fR(5), \fBstandards\fR(5) diff --git a/usr/src/man/man3mvec/Makefile b/usr/src/man/man3mvec/Makefile new file mode 100644 index 0000000000..98dd68cc66 --- /dev/null +++ b/usr/src/man/man3mvec/Makefile @@ -0,0 +1,42 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet +# at http://www.illumos.org/license/CDDL. +# + +# Copyright (c) 2012, Igor Kozhukhov <ikozhukhov@gmail.com> + +include ../../Makefile.master + +MANSECT = 3mvec + +MANFILES = vatan2_.3mvec \ + vatan_.3mvec \ + vcos_.3mvec \ + vcospi_.3mvec \ + vexp_.3mvec \ + vhypot_.3mvec \ + vlog_.3mvec \ + vpow_.3mvec \ + vrhypot_.3mvec \ + vrsqrt_.3mvec \ + vsin_.3mvec \ + vsincos_.3mvec \ + vsincospi_.3mvec \ + vsinpi_.3mvec \ + vsqrt_.3mvec \ + vz_abs_.3mvec \ + vz_exp_.3mvec \ + vz_log_.3mvec \ + vz_pow_.3mvec + +.KEEP_STATE: + +include ../Makefile.man + +install: $(ROOTMANFILES) diff --git a/usr/src/man/man3mvec/vatan2_.3mvec b/usr/src/man/man3mvec/vatan2_.3mvec new file mode 100644 index 0000000000..d809b54992 --- /dev/null +++ b/usr/src/man/man3mvec/vatan2_.3mvec @@ -0,0 +1,103 @@ +'\" te +.\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH vatan2_ 3MVEC "14 Dec 2007" "SunOS 5.11" "Vector Math Library Functions" +.SH NAME +vatan2_, vatan2f_ \- vector atan2 functions +.SH SYNOPSIS +.LP +.nf +cc [ \fIflag\fR\&.\|.\|. ] \fIfile\fR\&.\|.\|. \fB-lmvec\fR [ \fIlibrary\fR\&.\|.\|. ] + +\fBvoid\fR \fBvatan2_\fR(\fBint *\fR\fIn\fR, \fBdouble * restrict\fR \fIy\fR, \fBint *\fR\fIstridey\fR, + \fBdouble * restrict\fR \fIx\fR, \fBint *\fR\fIstridex\fR, \fBdouble * restrict\fR \fIz\fR, + \fBint *\fR\fIstridez\fR); +.fi + +.LP +.nf +\fBvoid\fR \fBvatan2f_\fR(\fBint *\fR\fIn\fR, \fBfloat * restrict\fR \fIy\fR, \fBint *\fR\fIstridey\fR, + \fBfloat * restrict\fR \fIx\fR, \fBint *\fR\fIstridex\fR, \fBfloat * restrict\fR \fIz\fR, + \fBint *\fR\fIstridez\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions evaluate the function \fBatan2\fR(\fIy\fR, \fIx\fR) for an +entire vector of values at once. The first parameter specifies the number of +values to compute. Subsequent parameters specify the argument and result +vectors. Each vector is described by a pointer to the first element and a +stride, which is the increment between successive elements. +.sp +.LP +Specifically, \fBvatan2_\fR(\fIn,\fR \fIy\fR, \fIsy\fR, \fIx\fR, \fIsx\fR, +\fIz\fR, \fIsz\fR) computes \fIz\fR[\fIi\fR * *\fIsz\fR] = +\fBatan2\fR(\fIy\fR[\fIi\fR * *\fIsy\fR], \fIx\fR[\fIi\fR * *\fIsx\fR]) for +each \fIi\fR = 0, 1, ..., *\fIn\fR - 1. The \fBvatan2f_()\fR function performs +the same computation for single precision data. +.sp +.LP +These functions are not guaranteed to deliver results that are identical to the +results of the \fBatan2\fR(3M) functions given the same arguments. +Non-exceptional results, however, are accurate to within a unit in the last +place. +.SH USAGE +.sp +.LP +The element count *\fIn\fR must be greater than zero. The strides for the +argument and result arrays can be arbitrary integers, but the arrays themselves +must not be the same or overlap. A zero stride effectively collapses an entire +vector into a single element. A negative stride causes a vector to be accessed +in descending memory order, but note that the corresponding pointer must still +point to the first element of the vector to be used; if the stride is negative, +this will be the highest-addressed element in memory. This convention differs +from the Level 1 BLAS, in which array parameters always refer to the +lowest-addressed element in memory even when negative increments are used. +.sp +.LP +These functions assume that the default round-to-nearest rounding direction +mode is in effect. On x86, these functions also assume that the default +round-to-64-bit rounding precision mode is in effect. The result of calling a +vector function with a non-default rounding mode in effect is undefined. +.sp +.LP +These functions handle special cases and exceptions in the same way as the +\fBatan2()\fR functions when \fBc99\fR \fBMATHERREXCEPT\fR conventions are in +effect. See \fBatan2\fR(3M) for the results for special cases. +.sp +.LP +An application wanting to check for exceptions should call +\fBfeclearexcept\fR(\fBFE_ALL_EXCEPT\fR) before calling these functions. On +return, if \fBfetestexcept\fR(\fBFE_INVALID\fR | \fBFE_DIVBYZERO\fR | +\fBFE_OVERFLOW\fR | \fBFE_UNDERFLOW\fR) is non-zero, an exception has been +raised. The application can then examine the result or argument vectors for +exceptional values. Some vector functions can raise the inexact exception even +if all elements of the argument array are such that the numerical results are +exact. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityCommitted +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBatan2\fR(3M), \fBfeclearexcept\fR(3M), \fBfetestexcept\fR(3M), +\fBattributes\fR(5) diff --git a/usr/src/man/man3mvec/vatan_.3mvec b/usr/src/man/man3mvec/vatan_.3mvec new file mode 100644 index 0000000000..c7eab0a2c0 --- /dev/null +++ b/usr/src/man/man3mvec/vatan_.3mvec @@ -0,0 +1,100 @@ +'\" te +.\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH vatan_ 3MVEC "14 Dec 2007" "SunOS 5.11" "Vector Math Library Functions" +.SH NAME +vatan_, vatanf_ \- vector arctangent functions +.SH SYNOPSIS +.LP +.nf +cc [ \fIflag\fR\&.\|.\|. ] \fIfile\fR\&.\|.\|. \fB-lmvec\fR [ \fIlibrary\fR\&.\|.\|. ] + +\fBvoid\fR \fBvatan_\fR(\fBint *\fR\fIn\fR,\fBdouble * restrict\fR \fIx\fR, \fBint *\fR\fIstridex\fR, + \fBdouble * restrict\fR \fIy\fR, \fBint *\fR\fIstridey\fR); +.fi + +.LP +.nf +\fBvoid\fR \fBvatanf_\fR(\fBint *\fR\fIn\fR, \fBfloat * restrict\fR \fIx\fR, \fBint *\fR\fIstridex\fR, + \fBfloat * restrict\fR \fIy\fR, \fBint *\fR\fIstridey\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions evaluate the function \fBatan\fR(\fIx\fR) for an entire vector +of values at once. The first parameter specifies the number of values to +compute. Subsequent parameters specify the argument and result vectors. Each +vector is described by a pointer to the first element and a stride, which is +the increment between successive elements. +.sp +.LP +Specifically, \fBvatan_\fR(\fIn\fR, \fIx\fR, \fIsx\fR, \fIy\fR, \fIsy\fR) +computes \fIy\fR[\fIi\fR * *\fIsy\fR] = \fBatan\fR(\fIx\fR[\fIi\fR * +*\fIsx\fR]) for each \fIi\fR = 0, 1, ..., *\fIn\fR - 1. The \fBvatanf_()\fR +function performs the same computation for single precision data. +.sp +.LP +These functions are not guaranteed to deliver results that are identical to the +results of the \fBatan\fR(3M) functions given the same arguments. +Non-exceptional results, however, are accurate to within a unit in the last +place. +.SH USAGE +.sp +.LP +The element count *\fIn\fR must be greater than zero. The strides for the +argument and result arrays can be arbitrary integers, but the arrays themselves +must not be the same or overlap. A zero stride effectively collapses an entire +vector into a single element. A negative stride causes a vector to be accessed +in descending memory order, but note that the corresponding pointer must still +point to the first element of the vector to be used; if the stride is negative, +this will be the highest-addressed element in memory. This convention differs +from the Level 1 BLAS, in which array parameters always refer to the +lowest-addressed element in memory even when negative increments are used. +.sp +.LP +These functions assume that the default round-to-nearest rounding direction +mode is in effect. On x86, these functions also assume that the default +round-to-64-bit rounding precision mode is in effect. The result of calling a +vector function with a non-default rounding mode in effect is undefined. +.sp +.LP +These functions handle special cases and exceptions in the same way as the +\fBatan()\fR functions when \fBc99\fR \fBMATHERREXCEPT\fR conventions are in +effect. See \fBatan\fR(3M) for the results for special cases. +.sp +.LP +An application wanting to check for exceptions should call +\fBfeclearexcept\fR(\fBFE_ALL_EXCEPT\fR) before calling these functions. On +return, if \fBfetestexcept\fR(\fBFE_INVALID\fR | \fBFE_DIVBYZERO\fR | +\fBFE_OVERFLOW\fR | \fBFE_UNDERFLOW\fR) is non-zero, an exception has been +raised. The application can then examine the result or argument vectors for +exceptional values. Some vector functions can raise the inexact exception even +if all elements of the argument array are such that the numerical results are +exact. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityCommitted +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBatan\fR(3M), \fBfeclearexcept\fR(3M), \fBfetestexcept\fR(3M), +\fBattributes\fR(5) diff --git a/usr/src/man/man3mvec/vcos_.3mvec b/usr/src/man/man3mvec/vcos_.3mvec new file mode 100644 index 0000000000..146d707c65 --- /dev/null +++ b/usr/src/man/man3mvec/vcos_.3mvec @@ -0,0 +1,100 @@ +'\" te +.\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH vcos_ 3MVEC "14 Dec 2007" "SunOS 5.11" "Vector Math Library Functions" +.SH NAME +vcos_, vcosf_ \- vector cosine functions +.SH SYNOPSIS +.LP +.nf +cc [ \fIflag\fR\&.\|.\|. ] \fIfile\fR\&.\|.\|. \fB-lmvec\fR [ \fIlibrary\fR\&.\|.\|. ] + +\fBvoid\fR \fBvcos_\fR(\fBint *\fR\fIn\fR, \fBdouble * restrict\fR \fIx\fR, \fBint *\fR\fIstridex\fR, + \fBdouble * restrict\fR \fIy\fR, \fBint *\fR\fIstridey\fR); +.fi + +.LP +.nf +\fBvoid\fR \fBvcosf_\fR(\fBint *\fR\fIn\fR, \fBfloat * restrict\fR \fIx\fR, \fBint *\fR\fIstridex\fR, + \fBfloat * restrict\fR \fIy\fR, \fBint *\fR\fIstridey\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions evaluate the function \fBcos\fR(\fIx\fR) for an entire vector +of values at once. The first parameter specifies the number of values to +compute. Subsequent parameters specify the argument and result vectors. Each +vector is described by a pointer to the first element and a stride, which is +the increment between successive elements. +.sp +.LP +Specifically, \fBvcos_\fR(\fIn\fR, \fIx\fR, \fIsx\fR, \fIy\fR, \fIsy\fR) +computes \fIy\fR[\fIi\fR * *\fIsy\fR] = \fBcos\fR(\fIx\fR[\fIi\fR * *\fIsx\fR]) +for each \fIi\fR = 0, 1, ..., *\fIn\fR - 1. The \fBvcosf_()\fR function +performs the same computation for single precision data. +.sp +.LP +These functions are not guaranteed to deliver results that are identical to the +results of the \fBcos\fR(3M) functions given the same arguments. +Non-exceptional results, however, are accurate to within a unit in the last +place. +.SH USAGE +.sp +.LP +The element count *\fIn\fR must be greater than zero. The strides for the +argument and result arrays can be arbitrary integers, but the arrays themselves +must not be the same or overlap. A zero stride effectively collapses an entire +vector into a single element. A negative stride causes a vector to be accessed +in descending memory order, but note that the corresponding pointer must still +point to the first element of the vector to be used; if the stride is negative, +this will be the highest-addressed element in memory. This convention differs +from the Level 1 BLAS, in which array parameters always refer to the +lowest-addressed element in memory even when negative increments are used. +.sp +.LP +These functions assume that the default round-to-nearest rounding direction +mode is in effect. On x86, these functions also assume that the default +round-to-64-bit rounding precision mode is in effect. The result of calling a +vector function with a non-default rounding mode in effect is undefined. +.sp +.LP +These functions handle special cases and exceptions in the same way as the +\fBcos()\fR functions when \fBc99\fR \fBMATHERREXCEPT\fR conventions are in +effect. See \fBcos\fR(3M) for the results for special cases. +.sp +.LP +An application wanting to check for exceptions should call +\fBfeclearexcept\fR(\fBFE_ALL_EXCEPT\fR) before calling these functions. On +return, if \fBfetestexcept\fR(\fBFE_INVALID\fR | \fBFE_DIVBYZERO\fR | +\fBFE_OVERFLOW\fR | \fBFE_UNDERFLOW\fR) is non-zero, an exception has been +raised. The application can then examine the result or argument vectors for +exceptional values. Some vector functions can raise the inexact exception even +if all elements of the argument array are such that the numerical results are +exact. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityCommitted +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBcos\fR(3M), \fBfeclearexcept\fR(3M), \fBfetestexcept\fR(3M), +\fBattributes\fR(5) diff --git a/usr/src/man/man3mvec/vcospi_.3mvec b/usr/src/man/man3mvec/vcospi_.3mvec new file mode 100644 index 0000000000..8334301d65 --- /dev/null +++ b/usr/src/man/man3mvec/vcospi_.3mvec @@ -0,0 +1,111 @@ +'\" te +.\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH vcospi_ 3MVEC "14 Dec 2007" "SunOS 5.11" "Vector Math Library Functions" +.SH NAME +vcospi_, vcospif_ \- vector cospi functions +.SH SYNOPSIS +.LP +.nf +cc [ \fIflag\fR\&.\|.\|. ] \fIfile\fR\&.\|.\|. \fB-lmvec\fR [ \fIlibrary\fR\&.\|.\|. ] + +\fBvoid\fR \fBvcospi_\fR(\fBint *\fR\fIn\fR, \fBdouble * restrict\fR \fIx\fR, \fBint *\fR\fIstridex\fR, + \fBdouble * restrict\fR \fIy\fR, \fBint *\fR\fIstridey\fR); +.fi + +.LP +.nf +\fBvoid\fR \fBvcosfpi_\fR(\fBint *\fR\fIn\fR, \fBfloat * restrict\fR \fIx\fR, \fBint *\fR\fIstridex\fR, + \fBfloat * restrict\fR \fIy\fR, \fBint *\fR\fIstridey\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions evaluate the function \fBcospi\fR(\fIx\fR), defined by +\fBcospi\fR(\fIx\fR) = \fBcos\fR(\c +.if n pi\c +.if t \(*p +\c + * \fIx\fR), for an entire vector of values at once. The first parameter +specifies the number of values to compute. Subsequent parameters specify the +argument and result vectors. Each vector is described by a pointer to the first +element and a stride, which is the increment between successive elements. +.sp +.LP +Specifically, \fBvcospi_\fR(\fIn\fR, \fIx\fR, \fIsx\fR, \fIy\fR, \fIsy\fR) +computes \fIy\fR[\fIi\fR * *\fIsy\fR] = \fBcospi\fR(\fIx\fR[\fIi\fR * +*\fIsx\fR]) for each \fIi\fR = 0, 1, ..., *\fIn\fR - 1. The \fBvcospif_()\fR +function performs the same computation for single precision data. +.sp +.LP +Non-exceptional results are accurate to within a unit in the last place. +.SH USAGE +.sp +.LP +The element count *\fIn\fR must be greater than zero. The strides for the +argument and result arrays can be arbitrary integers, but the arrays themselves +must not be the same or overlap. A zero stride effectively collapses an entire +vector into a single element. A negative stride causes a vector to be accessed +in descending memory order, but note that the corresponding pointer must still +point to the first element of the vector to be used; if the stride is negative, +this will be the highest-addressed element in memory. This convention differs +from the Level 1 BLAS, in which array parameters always refer to the +lowest-addressed element in memory even when negative increments are used. +.sp +.LP +These functions assume that the default round-to-nearest rounding direction +mode is in effect. On x86, these functions also assume that the default +round-to-64-bit rounding precision mode is in effect. The result of calling a +vector function with a non-default rounding mode in effect is undefined. +.sp +.LP +These functions handle special cases and exceptions in the spirit of IEEE 754. +In particular, +.RS +4 +.TP +.ie t \(bu +.el o +\fBcospi\fR(NaN) is NaN, +.RE +.RS +4 +.TP +.ie t \(bu +.el o +\fBcospi\fR(\(+-Inf) is NaN, and an invalid operation exception is raised. +.RE +.sp +.LP +An application wanting to check for exceptions should call +\fBfeclearexcept\fR(\fBFE_ALL_EXCEPT\fR) before calling these functions. On +return, if \fBfetestexcept\fR(\fBFE_INVALID\fR | \fBFE_DIVBYZERO\fR | +\fBFE_OVERFLOW\fR | \fBFE_UNDERFLOW\fR) is non-zero, an exception has been +raised. The application can then examine the result or argument vectors for +exceptional values. Some vector functions can raise the inexact exception even +if all elements of the argument array are such that the numerical results are +exact. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityCommitted +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBfeclearexcept\fR(3M), \fBfetestexcept\fR(3M), \fBattributes\fR(5) diff --git a/usr/src/man/man3mvec/vexp_.3mvec b/usr/src/man/man3mvec/vexp_.3mvec new file mode 100644 index 0000000000..146c25c259 --- /dev/null +++ b/usr/src/man/man3mvec/vexp_.3mvec @@ -0,0 +1,102 @@ +'\" te +.\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH vexp_ 3MVEC "14 Dec 2007" "SunOS 5.11" "Vector Math Library Functions" +.SH NAME +vexp_, vexpf_ \- vector exponential functions +.SH SYNOPSIS +.LP +.nf +cc [ \fIflag\fR\&.\|.\|. ] \fIfile\fR\&.\|.\|. \fB-lmvec\fR [ \fIlibrary\fR\&.\|.\|. ] + +\fBvoid\fR \fBvexp_\fR(\fBint *\fR\fIn\fR, \fBdouble * restrict\fR \fIx\fR, \fBint *\fR\fIstridex\fR, + \fBdouble * restrict\fR \fIy\fR, \fBint *\fR\fIstridey\fR); +.fi + +.LP +.nf +\fBvoid\fR \fBvexpf_\fR(\fBint *\fR\fIn\fR, \fBfloat * restrict\fR \fIx\fR, \fBint *\fR\fIstridex\fR, + \fBfloat * restrict\fR \fIy\fR, \fBint *\fR\fIstridey\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions evaluate the function \fBexp\fR(\fIx\fR) for an entire vector +of values at once. The first parameter specifies the number of values to +compute. Subsequent parameters specify the argument and result vectors. Each +vector is described by a pointer to the first element and a stride, which is +the increment between successive elements. +.sp +.LP +Specifically, \fBvexp_\fR(\fIn\fR, \fIx\fR, \fIsx\fR, \fIy\fR, \fIsy\fR) +computes \fIy\fR[\fIi\fR * *\fIsy\fR] = \fBexp\fR(\fIx\fR[\fIi\fR * *\fIsx\fR]) +for each \fIi\fR = 0, 1, ..., *\fIn\fR - 1. The \fBvexpf_()\fR function +performs the same computation for single precision data. +.sp +.LP +These functions are not guaranteed to deliver results that are identical to the +results of the \fBexp\fR(3M) functions given the same arguments. +Non-exceptional results, however, are accurate to within a unit in the last +place. +.SH USAGE +.sp +.LP +The element count *\fIn\fR must be greater than zero. The strides for the +argument and result arrays can be arbitrary integers, but the arrays themselves +must not be the same or overlap. A zero stride effectively collapses an entire +vector into a single element. A negative stride causes a vector to be accessed +in descending memory order, but note that the corresponding pointer must still +point to the first element of the vector to be used; if the stride is negative, +this will be the highest-addressed element in memory. This convention differs +from the Level 1 BLAS, in which array parameters always refer to the +lowest-addressed element in memory even when negative increments are used. +.sp +.LP +These functions assume that the default round-to-nearest rounding direction +mode is in effect. On x86, these functions also assume that the default +round-to-64-bit rounding precision mode is in effect. The result of calling a +vector function with a non-default rounding mode in effect is undefined. +.sp +.LP +On SPARC, the \fBvexpf_()\fR function delivers +0 rather than a subnormal +result for arguments in the range -103.2789 <= \fIx\fR <= -87.3365. Otherwise, +these functions handle special cases and exceptions in the same way as the +\fBexp()\fR functions when \fBc99\fR \fBMATHERREXCEPT\fR conventions are in +effect. See \fBexp\fR(3M) for the results for special cases. +.sp +.LP +An application wanting to check for exceptions should call +\fBfeclearexcept\fR(\fBFE_ALL_EXCEPT\fR) before calling these functions. On +return, if \fBfetestexcept\fR(\fBFE_INVALID\fR | \fBFE_DIVBYZERO\fR | +\fBFE_OVERFLOW\fR | \fBFE_UNDERFLOW\fR) is non-zero, an exception has been +raised. The application can then examine the result or argument vectors for +exceptional values. Some vector functions can raise the inexact exception even +if all elements of the argument array are such that the numerical results are +exact. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityCommitted +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBexp\fR(3M), \fBfeclearexcept\fR(3M), \fBfetestexcept\fR(3M), +\fBattributes\fR(5) diff --git a/usr/src/man/man3mvec/vhypot_.3mvec b/usr/src/man/man3mvec/vhypot_.3mvec new file mode 100644 index 0000000000..9916c46746 --- /dev/null +++ b/usr/src/man/man3mvec/vhypot_.3mvec @@ -0,0 +1,103 @@ +'\" te +.\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH vhypot_ 3MVEC "14 Dec 2007" "SunOS 5.11" "Vector Math Library Functions" +.SH NAME +vhypot_, vhypotf_ \- vector hypotenuse functions +.SH SYNOPSIS +.LP +.nf +cc [ \fIflag\fR\&.\|.\|. ] \fIfile\fR\&.\|.\|. \fB-lmvec\fR [ \fIlibrary\fR\&.\|.\|. ] + +\fBvoid\fR \fBvhypot_\fR(\fBint *\fR\fIn\fR, \fBdouble * restrict\fR \fIx\fR, \fBint *\fR\fIstridex\fR, + \fBdouble * restrict\fR \fIy\fR, \fBint *\fR\fIstridey\fR, \fBdouble * restrict\fR \fIz\fR, + \fBint *\fR\fIstridez\fR); +.fi + +.LP +.nf +\fBvoid\fR \fBvhypotf_\fR(\fBint *\fR\fIn\fR, \fBfloat * restrict\fR \fIx\fR, \fBint *\fR\fIstridex\fR, + \fBfloat * restrict\fR \fIy\fR, \fBint *\fR\fIstridey\fR, \fBfloat * restrict\fR \fIz\fR, + \fBint *\fR\fIstridez\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions evaluate the function \fBhypot\fR(\fIx\fR, \fIy\fR) for an +entire vector of values at once. The first parameter specifies the number of +values to compute. Subsequent parameters specify the argument and result +vectors. Each vector is described by a pointer to the first element and a +stride, which is the increment between successive elements. +.sp +.LP +Specifically, \fBvhypot_\fR(\fIn\fR, \fIx\fR, \fIsx\fR, \fIy\fR, \fIsy\fR, +\fIz\fR, \fIsz\fR) computes \fIz\fR[\fIi\fR * *\fIsz\fR] = +\fBhypot\fR(\fIx\fR[\fIi\fR * *\fIsx\fR], \fIy\fR[\fIi\fR * *\fIsy\fR]) for +each \fIi\fR = 0, 1, ..., *\fIn\fR - 1. The \fBvhypotf_()\fR function performs +the same computation for single precision data. +.sp +.LP +These functions are not guaranteed to deliver results that are identical to the +results of the \fBhypot\fR(3M) functions given the same arguments. +Non-exceptional results, however, are accurate to within a unit in the last +place. +.SH USAGE +.sp +.LP +The element count *\fIn\fR must be greater than zero. The strides for the +argument and result arrays can be arbitrary integers, but the arrays themselves +must not be the same or overlap. A zero stride effectively collapses an entire +vector into a single element. A negative stride causes a vector to be accessed +in descending memory order, but note that the corresponding pointer must still +point to the first element of the vector to be used; if the stride is negative, +this will be the highest-addressed element in memory. This convention differs +from the Level 1 BLAS, in which array parameters always refer to the +lowest-addressed element in memory even when negative increments are used. +.sp +.LP +These functions assume that the default round-to-nearest rounding direction +mode is in effect. On x86, these functions also assume that the default +round-to-64-bit rounding precision mode is in effect. The result of calling a +vector function with a non-default rounding mode in effect is undefined. +.sp +.LP +These functions handle special cases and exceptions in the same way as the +\fBhypot()\fR functions when \fBc99\fR \fBMATHERREXCEPT\fR conventions are in +effect. See \fBhypot\fR(3M) for the results for special cases. +.sp +.LP +An application wanting to check for exceptions should call +\fBfeclearexcept\fR(\fBFE_ALL_EXCEPT\fR) before calling these functions. On +return, if \fBfetestexcept\fR(\fBFE_INVALID\fR | \fBFE_DIVBYZERO\fR | +\fBFE_OVERFLOW\fR | \fBFE_UNDERFLOW\fR) is non-zero, an exception has been +raised. The application can then examine the result or argument vectors for +exceptional values. Some vector functions can raise the inexact exception even +if all elements of the argument array are such that the numerical results are +exact. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityCommitted +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBhypot\fR(3M), \fBfeclearexcept\fR(3M), \fBfetestexcept\fR(3M), +\fBattributes\fR(5) diff --git a/usr/src/man/man3mvec/vlog_.3mvec b/usr/src/man/man3mvec/vlog_.3mvec new file mode 100644 index 0000000000..84522e98e0 --- /dev/null +++ b/usr/src/man/man3mvec/vlog_.3mvec @@ -0,0 +1,100 @@ +'\" te +.\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH vlog_ 3MVEC "14 Dec 2007" "SunOS 5.11" "Vector Math Library Functions" +.SH NAME +vlog_, vlogf_ \- vector logarithm functions +.SH SYNOPSIS +.LP +.nf +cc [ \fIflag\fR\&.\|.\|. ] \fIfile\fR\&.\|.\|. \fB-lmvec\fR [ \fIlibrary\fR\&.\|.\|. ] + +\fBvoid\fR \fBvlog_\fR(\fBint *\fR\fIn\fR, \fBdouble * restrict\fR \fIx\fR, \fBint *\fR\fIstridex\fR, + \fBdouble * restrict\fR \fIy\fR, \fBint *\fR\fIstridey\fR); +.fi + +.LP +.nf +\fBvoid\fR \fBvlogf_\fR(\fBint *\fR\fIn\fR, \fBfloat * restrict\fR \fIx\fR, \fBint *\fR\fIstridex\fR, + \fBfloat * restrict\fR \fIy\fR, \fBint *\fR\fIstridey\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions evaluate the function \fBlog\fR(\fIx\fR) for an entire vector +of values at once. The first parameter specifies the number of values to +compute. Subsequent parameters specify the argument and result vectors. Each +vector is described by a pointer to the first element and a stride, which is +the increment between successive elements. +.sp +.LP +Specifically, \fBvlog_\fR(\fIn\fR, \fIx\fR, \fIsx\fR, \fIy\fR, \fIsy\fR) +computes \fIy\fR[\fIi\fR * *\fIsy\fR] = \fBlog\fR(\fIx\fR[\fIi\fR * *\fIsx\fR]) +for each \fIi\fR = 0, 1, ..., *\fIn\fR - 1. The \fBvlogf_()\fR function +performs the same computation for single precision data. +.sp +.LP +These functions are not guaranteed to deliver results that are identical to the +results of the \fBlog\fR(3M) functions given the same arguments. +Non-exceptional results, however, are accurate to within a unit in the last +place. +.SH USAGE +.sp +.LP +The element count *\fIn\fR must be greater than zero. The strides for the +argument and result arrays can be arbitrary integers, but the arrays themselves +must not be the same or overlap. A zero stride effectively collapses an entire +vector into a single element. A negative stride causes a vector to be accessed +in descending memory order, but note that the corresponding pointer must still +point to the first element of the vector to be used; if the stride is negative, +this will be the highest-addressed element in memory. This convention differs +from the Level 1 BLAS, in which array parameters always refer to the +lowest-addressed element in memory even when negative increments are used. +.sp +.LP +These functions assume that the default round-to-nearest rounding direction +mode is in effect. On x86, these functions also assume that the default +round-to-64-bit rounding precision mode is in effect. The result of calling a +vector function with a non-default rounding mode in effect is undefined. +.sp +.LP +These functions handle special cases and exceptions in the same way as the +\fBlog()\fR functions when \fBc99\fR \fBMATHERREXCEPT\fR conventions are in +effect. See \fBlog\fR(3M) for the results for special cases. +.sp +.LP +An application wanting to check for exceptions should call +\fBfeclearexcept\fR(\fBFE_ALL_EXCEPT\fR) before calling these functions. On +return, if \fBfetestexcept\fR(\fBFE_INVALID\fR | \fBFE_DIVBYZERO\fR | +\fBFE_OVERFLOW\fR | \fBFE_UNDERFLOW\fR) is non-zero, an exception has been +raised. The application can then examine the result or argument vectors for +exceptional values. Some vector functions can raise the inexact exception even +if all elements of the argument array are such that the numerical results are +exact. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityCommitted +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBlog\fR(3M), \fBfeclearexcept\fR(3M), \fBfetestexcept\fR(3M), +\fBattributes\fR(5) diff --git a/usr/src/man/man3mvec/vpow_.3mvec b/usr/src/man/man3mvec/vpow_.3mvec new file mode 100644 index 0000000000..f7fec2ea6e --- /dev/null +++ b/usr/src/man/man3mvec/vpow_.3mvec @@ -0,0 +1,105 @@ +'\" te +.\" Copyright (c) 2009, Sun Microsystems, Inc. All Rights Reserved. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH vpow_ 3MVEC "16 Jan 2009" "SunOS 5.11" "Vector Math Library Functions" +.SH NAME +vpow_, vpowf_ \- vector power functions +.SH SYNOPSIS +.LP +.nf +cc [ \fIflag\fR\&.\|.\|. ] \fIfile\fR\&.\|.\|. \fB-lmvec\fR [ \fIlibrary\fR\&.\|.\|. ] + +\fBvoid\fR \fBvpow_\fR(\fBint *\fR\fIn\fR, \fBdouble * restrict\fR \fIx\fR, \fBint *\fR\fIstridex\fR, + \fBdouble * restrict\fR \fIy\fR, \fBint *\fR\fIstridey\fR, \fBdouble * restrict\fR \fIz\fR, + \fBint *\fR\fIstridez\fR); +.fi + +.LP +.nf +\fBvoid\fR \fBvpowf_\fR(\fBint *\fR\fIn\fR, \fBfloat * restrict\fR \fIx\fR, \fBint *\fR\fIstridex\fR, + \fBfloat * restrict\fR \fIy\fR, \fBint *\fR\fIstridey\fR, \fBfloat * restrict\fR \fIz\fR, + \fBint *\fR\fIstridez\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions evaluate the function \fBpow\fR(\fIx\fR, \fIy\fR) for an entire +vector of values at once. The first parameter specifies the number of values to +compute. Subsequent parameters specify the argument and result vectors. Each +vector is described by a pointer to the first element and a stride, which is +the increment between successive elements. +.sp +.LP +Specifically, \fBvpow_\fR(\fIn\fR, \fIx\fR, \fIsx\fR, \fIy\fR, \fIsy\fR, +\fIz\fR, \fIsz\fR) computes \fIz\fR[\fIi\fR * *\fIsz\fR] = +\fBpow\fR(\fIx\fR[\fIi\fR * *\fIsx\fR], \fIy\fR[\fIi\fR * *\fIsy\fR]) for each +\fIi\fR = 0, 1, ..., *\fIn\fR - 1. The \fBvpowf_()\fR function performs the +same computation for single precision data. +.sp +.LP +These functions are not guaranteed to deliver results that are identical to the +results of the \fBpow\fR(3M) functions given the same arguments. +Non-exceptional results, however, are accurate to within a unit in the last +place. +.SH USAGE +.sp +.LP +The element count *\fIn\fR must be greater than zero. The strides for the +argument and result arrays can be arbitrary integers, but the arrays themselves +must not be the same or overlap. A zero stride effectively collapses an entire +vector into a single element. A negative stride causes a vector to be accessed +in descending memory order, but note that the corresponding pointer must still +point to the first element of the vector to be used; if the stride is negative, +this will be the highest-addressed element in memory. This convention differs +from the Level 1 BLAS, in which array parameters always refer to the +lowest-addressed element in memory even when negative increments are used. +.sp +.LP +These functions assume that the default round-to-nearest rounding direction +mode is in effect. On x86, these functions also assume that the default +round-to-64-bit rounding precision mode is in effect. The result of calling a +vector function with a non-default rounding mode in effect is undefined. +.sp +.LP +The results of these functions for special cases and exceptions match that of +the \fBpow()\fR functions when the latter are used in a program compiled with +the \fBcc\fR compiler driver (that is, not SUSv3-conforming) and the expression +(\fBmath_errhandling\fR & \fBMATH_ERREXCEPT\fR) is non-zero. These functions do +not set \fBerrno\fR. See \fBpow\fR(3M) for the results for special cases. +.sp +.LP +An application wanting to check for exceptions should call +\fBfeclearexcept\fR(\fBFE_ALL_EXCEPT\fR) before calling these functions. On +return, if \fBfetestexcept\fR(\fBFE_INVALID\fR | \fBFE_DIVBYZERO\fR | +\fBFE_OVERFLOW\fR | \fBFE_UNDERFLOW\fR) is non-zero, an exception has been +raised. The application can then examine the result or argument vectors for +exceptional values. Some vector functions can raise the inexact exception even +if all elements of the argument array are such that the numerical results are +exact. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityCommitted +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBpow\fR(3M), \fBfeclearexcept\fR(3M), \fBfetestexcept\fR(3M), +\fBattributes\fR(5) diff --git a/usr/src/man/man3mvec/vrhypot_.3mvec b/usr/src/man/man3mvec/vrhypot_.3mvec new file mode 100644 index 0000000000..b0f7a9f84a --- /dev/null +++ b/usr/src/man/man3mvec/vrhypot_.3mvec @@ -0,0 +1,124 @@ +'\" te +.\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH vrhypot_ 3MVEC "14 Dec 2007" "SunOS 5.11" "Vector Math Library Functions" +.SH NAME +vrhypot_, vrhypotf_ \- vector reciprocal hypotenuse functions +.SH SYNOPSIS +.LP +.nf +cc [ \fIflag\fR\&.\|.\|. ] \fIfile\fR\&.\|.\|. \fB-lmvec\fR [ \fIlibrary\fR\&.\|.\|. ] + +\fBvoid\fR \fBvrhypot_\fR(\fBint *\fR\fIn\fR, \fBdouble * restrict\fR \fIx\fR, \fBint *\fR\fIstridex\fR, + \fBdouble * restrict\fR \fIy\fR, \fBint *\fR\fIstridey\fR, \fBdouble * restrict\fR \fIz\fR, + \fBint *\fR\fIstridez\fR); +.fi + +.LP +.nf +\fBvoid\fR \fBvrhypotf_\fR(\fBint *\fR\fIn\fR, \fBfloat * restrict\fR \fIx\fR, \fBint *\fR\fIstridex\fR, + \fBfloat * restrict\fR \fIy\fR, \fBint *\fR\fIstridey\fR, \fBfloat * restrict\fR \fIz\fR, + \fBint *\fR\fIstridez\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions evaluate the function \fBrhypot\fR(\fIx\fR, \fIy\fR), defined +by \fBrhypot\fR(\fIx\fR, \fIy\fR) = 1 / \fBhypot\fR(\fIx\fR, \fIy\fR), for an +entire vector of values at once. The first parameter specifies the number of +values to compute. Subsequent parameters specify the argument and result +vectors. Each vector is described by a pointer to the first element and a +stride, which is the increment between successive elements. +.sp +.LP +Specifically, \fBvrhypot_\fR(\fIn\fR, \fIx\fR, \fIsx\fR, \fIy\fR, \fIsy\fR, +\fIz\fR, \fIsz\fR) computes \fIz\fR[\fIi\fR * *\fIsz\fR] = +\fBrhypot\fR(\fIx\fR[\fIi\fR * *\fIsx\fR], \fIy\fR[\fIi\fR * *\fIsy\fR]) for +each \fIi\fR = 0, 1, ..., *\fIn\fR - 1. The \fBvrhypotf_()\fR function +performs the same computation for single precision data. +.sp +.LP +These functions are not guaranteed to deliver results that are identical to the +results of evaluating 1.0 / \fBhypot\fR(\fIx\fR, \fIy\fR) given the same +arguments. Non-exceptional results, however, are accurate to within a unit in +the last place. +.SH USAGE +.sp +.LP +The element count *\fIn\fR must be greater than zero. The strides for the +argument and result arrays can be arbitrary integers, but the arrays themselves +must not be the same or overlap. A zero stride effectively collapses an entire +vector into a single element. A negative stride causes a vector to be accessed +in descending memory order, but note that the corresponding pointer must still +point to the first element of the vector to be used; if the stride is negative, +this will be the highest-addressed element in memory. This convention differs +from the Level 1 BLAS, in which array parameters always refer to the +lowest-addressed element in memory even when negative increments are used. +.sp +.LP +These functions assume that the default round-to-nearest rounding direction +mode is in effect. On x86, these functions also assume that the default +round-to-64-bit rounding precision mode is in effect. The result of calling a +vector function with a non-default rounding mode in effect is undefined. +.sp +.LP +These functions handle special cases and exceptions in the spirit of IEEE 754. +In particular, +.RS +4 +.TP +.ie t \(bu +.el o +if x or \fIy\fR is \(+-Inf, \fBrhypot\fR(\fIx\fR, \fIy\fR) is +0, even if the +other of \fIx\fR or \fIy\fR is NaN, +.RE +.RS +4 +.TP +.ie t \(bu +.el o +if x or \fIy\fR is NaN and neither is infinite, \fBrhypot\fR(\fIx\fR, \fIy\fR) +is NaN +.RE +.RS +4 +.TP +.ie t \(bu +.el o +if \fIx\fR and \fIy\fR are both zero, \fBrhypot\fR(\fIx\fR, \fIy\fR) is +0, and +a division-by-zero exception is raised. +.RE +.sp +.LP +An application wanting to check for exceptions should call +\fBfeclearexcept\fR(\fBFE_ALL_EXCEPT\fR) before calling these functions. On +return, if \fBfetestexcept\fR(\fBFE_INVALID\fR | \fBFE_DIVBYZERO\fR | +\fBFE_OVERFLOW\fR | \fBFE_UNDERFLOW\fR) is non-zero, an exception has been +raised. The application can then examine the result or argument vectors for +exceptional values. Some vector functions can raise the inexact exception even +if all elements of the argument array are such that the numerical results are +exact. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityCommitted +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBhypot\fR(3M), \fBfeclearexcept\fR(3M), \fBfetestexcept\fR(3M), +\fBattributes\fR(5) diff --git a/usr/src/man/man3mvec/vrsqrt_.3mvec b/usr/src/man/man3mvec/vrsqrt_.3mvec new file mode 100644 index 0000000000..b8c6e7a7e3 --- /dev/null +++ b/usr/src/man/man3mvec/vrsqrt_.3mvec @@ -0,0 +1,125 @@ +'\" te +.\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH vrsqrt_ 3MVEC "14 Dec 2007" "SunOS 5.11" "Vector Math Library Functions" +.SH NAME +vrsqrt_, vrsqrtf_ \- vector reciprocal square root functions +.SH SYNOPSIS +.LP +.nf +cc [ \fIflag\fR\&.\|.\|. ] \fIfile\fR\&.\|.\|. \fB-lmvec\fR [ \fIlibrary\fR\&.\|.\|. ] + +\fBvoid\fR \fBvrsqrt_\fR(\fBint *\fR\fIn\fR, \fBdouble * restrict\fR \fIx\fR, \fBint *\fR\fIstridex\fR, + \fBdouble * restrict\fR \fIy\fR, \fBint *\fR\fIstridey\fR); +.fi + +.LP +.nf +\fBvoid\fR \fBvrsqrtf_\fR(\fBint *\fR\fIn\fR, \fBfloat * restrict\fR \fIx\fR, \fBint *\fR\fIstridex\fR, + \fBfloat * restrict\fR \fIy\fR, \fBint *\fR\fIstridey\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions evaluate the function \fBrsqrt\fR(\fIx\fR), defined by +\fBrsqrt\fR(\fIx\fR) = 1 / \fBsqrt\fR(\fIx\fR), for an entire vector of values +at once. The first parameter specifies the number of values to compute. +Subsequent parameters specify the argument and result vectors. Each vector is +described by a pointer to the first element and a stride, which is the +increment between successive elements. +.sp +.LP +Specifically, \fBvrsqrt_\fR(\fIn\fR, \fIx\fR, \fIsx\fR, \fIy\fR, \fIsy\fR) +computes \fIy\fR[\fIi\fR * *\fIsy\fR] = \fBrsqrt\fR(\fIx\fR[\fIi\fR * +*\fIsx\fR]) for each \fIi\fR = 0, 1, ..., *\fIn\fR - 1. The \fBvrsqrtf_()\fR +function performs the same computation for single precision data. +.sp +.LP +These functions are not guaranteed to deliver results that are identical to the +results of evaluating 1.0 / \fBsqrt\fR(\fIx\fR) given the same arguments. +Non-exceptional results, however, are accurate to within a unit in the last +place. +.SH USAGE +.sp +.LP +The element count *\fIn\fR must be greater than zero. The strides for the +argument and result arrays can be arbitrary integers, but the arrays themselves +must not be the same or overlap. A zero stride effectively collapses an entire +vector into a single element. A negative stride causes a vector to be accessed +in descending memory order, but note that the corresponding pointer must still +point to the first element of the vector to be used; if the stride is negative, +this will be the highest-addressed element in memory. This convention differs +from the Level 1 BLAS, in which array parameters always refer to the +lowest-addressed element in memory even when negative increments are used. +.sp +.LP +These functions assume that the default round-to-nearest rounding direction +mode is in effect. On x86, these functions also assume that the default +round-to-64-bit rounding precision mode is in effect. The result of calling a +vector function with a non-default rounding mode in effect is undefined. +.sp +.LP +These functions handle special cases and exceptions in the spirit of IEEE 754. +In particular, +.RS +4 +.TP +.ie t \(bu +.el o +if \fIx\fR < 0, \fBrsqrt\fR(\fIx\fR) is NaN, and an invalid operation exception +is raised, +.RE +.RS +4 +.TP +.ie t \(bu +.el o +\fBrsqrt\fR(NaN) is NaN, +.RE +.RS +4 +.TP +.ie t \(bu +.el o +\fBrsqrt\fR(+Inf) is +0, +.RE +.RS +4 +.TP +.ie t \(bu +.el o +\fBrsqrt\fR(\(+-0) is \(+-Inf, and a division-by-zero exception is raised. +.RE +.sp +.LP +An application wanting to check for exceptions should call +\fBfeclearexcept\fR(\fBFE_ALL_EXCEPT\fR) before calling these functions. On +return, if \fBfetestexcept\fR(\fBFE_INVALID\fR | \fBFE_DIVBYZERO\fR | +\fBFE_OVERFLOW\fR | \fBFE_UNDERFLOW\fR) is non-zero, an exception has been +raised. The application can then examine the result or argument vectors for +exceptional values. Some vector functions can raise the inexact exception even +if all elements of the argument array are such that the numerical results are +exact. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityCommitted +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBsqrt\fR(3M), \fBfeclearexcept\fR(3M), \fBfetestexcept\fR(3M), +\fBattributes\fR(5) diff --git a/usr/src/man/man3mvec/vsin_.3mvec b/usr/src/man/man3mvec/vsin_.3mvec new file mode 100644 index 0000000000..f141e7e3e7 --- /dev/null +++ b/usr/src/man/man3mvec/vsin_.3mvec @@ -0,0 +1,100 @@ +'\" te +.\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH vsin_ 3MVEC "14 Dec 2007" "SunOS 5.11" "Vector Math Library Functions" +.SH NAME +vsin_, vsinf_ \- vector sine functions +.SH SYNOPSIS +.LP +.nf +cc [ \fIflag\fR\&.\|.\|. ] \fIfile\fR\&.\|.\|. \fB-lmvec\fR [ \fIlibrary\fR\&.\|.\|. ] + +\fBvoid\fR \fBvsin_\fR(\fBint *\fR\fIn\fR, \fBdouble * restrict\fR \fIx\fR, \fBint *\fR\fIstridex\fR, + \fBdouble * restrict\fR \fIy\fR, \fBint *\fR\fIstridey\fR); +.fi + +.LP +.nf +\fBvoid\fR \fBvsinf_\fR(\fBint *\fR\fIn\fR, \fBfloat * restrict\fR \fIx\fR, \fBint *\fR\fIstridex\fR, + \fBfloat * restrict\fR \fIy\fR, \fBint *\fR\fIstridey\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions evaluate the function \fBsin\fR(\fIx\fR) for an entire vector +of values at once. The first parameter specifies the number of values to +compute. Subsequent parameters specify the argument and result vectors. Each +vector is described by a pointer to the first element and a stride, which is +the increment between successive elements. +.sp +.LP +Specifically, \fBvsin_\fR(\fIn\fR, \fIx\fR, \fIsx\fR, \fIy\fR, \fIsy\fR) +computes \fIy\fR[\fIi\fR * *\fIsy\fR] = \fBsin\fR(\fIx\fR[\fIi\fR * *\fIsx\fR]) +for each \fIi\fR = 0, 1, ..., *\fIn\fR - 1. The \fBvsinf_()\fR function +performs the same computation for single precision data. +.sp +.LP +These functions are not guaranteed to deliver results that are identical to the +results of the \fBsin\fR(3M) functions given the same arguments. +Non-exceptional results, however, are accurate to within a unit in the last +place. +.SH USAGE +.sp +.LP +The element count *\fIn\fR must be greater than zero. The strides for the +argument and result arrays can be arbitrary integers, but the arrays themselves +must not be the same or overlap. A zero stride effectively collapses an entire +vector into a single element. A negative stride causes a vector to be accessed +in descending memory order, but note that the corresponding pointer must still +point to the first element of the vector to be used; if the stride is negative, +this will be the highest-addressed element in memory. This convention differs +from the Level 1 BLAS, in which array parameters always refer to the +lowest-addressed element in memory even when negative increments are used. +.sp +.LP +These functions assume that the default round-to-nearest rounding direction +mode is in effect. On x86, these functions also assume that the default +round-to-64-bit rounding precision mode is in effect. The result of calling a +vector function with a non-default rounding mode in effect is undefined. +.sp +.LP +These functions handle special cases and exceptions in the same way as the +\fBsin()\fR functions when \fBc99\fR \fBMATHERREXCEPT\fR conventions are in +effect. See \fBsin\fR(3M) for the results for special cases. +.sp +.LP +An application wanting to check for exceptions should call +\fBfeclearexcept\fR(\fBFE_ALL_EXCEPT\fR) before calling these functions. On +return, if \fBfetestexcept\fR(\fBFE_INVALID\fR | \fBFE_DIVBYZERO\fR | +\fBFE_OVERFLOW\fR | \fBFE_UNDERFLOW\fR) is non-zero, an exception has been +raised. The application can then examine the result or argument vectors for +exceptional values. Some vector functions can raise the inexact exception even +if all elements of the argument array are such that the numerical results are +exact. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityCommitted +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBsin\fR(3M), \fBfeclearexcept\fR(3M), \fBfetestexcept\fR(3M), +\fBattributes\fR(5) diff --git a/usr/src/man/man3mvec/vsincos_.3mvec b/usr/src/man/man3mvec/vsincos_.3mvec new file mode 100644 index 0000000000..afbb8f4096 --- /dev/null +++ b/usr/src/man/man3mvec/vsincos_.3mvec @@ -0,0 +1,105 @@ +'\" te +.\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH vsincos_ 3MVEC "14 Dec 2007" "SunOS 5.11" "Vector Math Library Functions" +.SH NAME +vsincos_, vsincosf_ \- vector sincos functions +.SH SYNOPSIS +.LP +.nf +cc [ \fIflag\fR\&.\|.\|. ] \fIfile\fR\&.\|.\|. \fB-lmvec\fR [ \fIlibrary\fR\&.\|.\|. ] + +\fBvoid\fR \fBvsincos_\fR(\fBint *\fR\fIn\fR, \fBdouble * restrict\fR \fIx\fR, \fBint *\fR\fIstridex\fR, + \fBdouble * restrict\fR \fIs\fR, \fBint *\fR\fIstrides\fR, \fBdouble * restrict\fR \fIc\fR, + \fBint *\fR\fIstridec\fR); +.fi + +.LP +.nf +\fBvoid\fR \fBvsincosf_\fR(\fBint *\fR\fIn\fR, \fBfloat * restrict\fR \fIx\fR, \fBint *\fR\fIstridex\fR, + \fBfloat * restrict\fR \fIs\fR, \fBint *\fR\fIstrides\fR, \fBfloat * restrict\fR \fIc\fR, + \fBint *\fR\fIstridec\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions evaluate both \fBsin\fR(\fIx\fR) and \fBcos\fR(\fIx\fR) for an +entire vector of values at once. The first parameter specifies the number of +values to compute. Subsequent parameters specify the argument and result +vectors. Each vector is described by a pointer to the first element and a +stride, which is the increment between successive elements. +.sp +.LP +Specifically, \fBvsincos_\fR(\fIn\fR, \fIx\fR, \fIsx\fR, \fIs\fR, \fIss\fR, +\fIc\fR, \fIsc\fR) simultaneously computes \fIs\fR[\fIi\fR * *\fIss\fR] = +\fBsin\fR(\fIx\fR[\fIi\fR * *\fIsx\fR]) and \fIc\fR[\fIi\fR * *\fIsc\fR] = +\fBcos\fR(\fIx\fR[\fIi\fR * *\fIsx\fR]) for each \fIi\fR = 0, 1, ..., *\fIn\fR +- 1. The \fBvsincosf_()\fR function performs the same computation for single +precision data. +.sp +.LP +These functions are not guaranteed to deliver results that are identical to the +results of the \fBsincos\fR(3M) functions given the same arguments. +Non-exceptional results, however, are accurate to within a unit in the last +place. +.SH USAGE +.sp +.LP +The element count *\fIn\fR must be greater than zero. The strides for the +argument and result arrays can be arbitrary integers, but the arrays themselves +must not be the same or overlap. A zero stride effectively collapses an entire +vector into a single element. A negative stride causes a vector to be accessed +in descending memory order, but note that the corresponding pointer must still +point to the first element of the vector to be used; if the stride is negative, +this will be the highest-addressed element in memory. This convention differs +from the Level 1 BLAS, in which array parameters always refer to the +lowest-addressed element in memory even when negative increments are used. +.sp +.LP +These functions assume that the default round-to-nearest rounding direction +mode is in effect. On x86, these functions also assume that the default +round-to-64-bit rounding precision mode is in effect. The result of calling a +vector function with a non-default rounding mode in effect is undefined. +.sp +.LP +These functions handle special cases and exceptions in the same way as the +\fBsin()\fR and \fBcos()\fR functions when \fBc99\fR \fBMATHERREXCEPT\fR +conventions are in effect. See \fBsin\fR(3M) and \fBcos\fR(3M) for the results +for special cases. +.sp +.LP +An application wanting to check for exceptions should call +\fBfeclearexcept\fR(\fBFE_ALL_EXCEPT\fR) before calling these functions. On +return, if \fBfetestexcept\fR(\fBFE_INVALID\fR | \fBFE_DIVBYZERO\fR | +\fBFE_OVERFLOW\fR | \fBFE_UNDERFLOW\fR) is non-zero, an exception has been +raised. The application can then examine the result or argument vectors for +exceptional values. Some vector functions can raise the inexact exception even +if all elements of the argument array are such that the numerical results are +exact. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityCommitted +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBcos\fR(3M), \fBsin\fR(3M), \fBsincos\fR(3M), \fBfeclearexcept\fR(3M), +\fBfetestexcept\fR(3M), \fBattributes\fR(5) diff --git a/usr/src/man/man3mvec/vsincospi_.3mvec b/usr/src/man/man3mvec/vsincospi_.3mvec new file mode 100644 index 0000000000..109e365254 --- /dev/null +++ b/usr/src/man/man3mvec/vsincospi_.3mvec @@ -0,0 +1,126 @@ +'\" te +.\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH vsincospi_ 3MVEC "14 Dec 2007" "SunOS 5.11" "Vector Math Library Functions" +.SH NAME +vsincospi_, vsincospif_ \- vector sincospi functions +.SH SYNOPSIS +.LP +.nf +cc [ \fIflag\fR\&.\|.\|. ] \fIfile\fR\&.\|.\|. \fB-lmvec\fR [ \fIlibrary\fR\&.\|.\|. ] + +\fBvoid\fR \fBvsincospi_\fR(\fBint *\fR\fIn\fR, \fBdouble * restrict\fR \fIx\fR, \fBint *\fR\fIstridex\fR, + \fBdouble * restrict\fR \fIs\fR, \fBint *\fR\fIstrides\fR, \fBdouble * restrict\fR \fIc\fR, + \fBint *\fR\fIstridec\fR); +.fi + +.LP +.nf +\fBvoid\fR \fBvsincospif_\fR(\fBint *\fR\fIn\fR, \fBfloat * restrict\fR \fIx\fR, \fBint *\fR\fIstridex\fR, + \fBfloat * restrict\fR \fIs\fR, \fBint *\fR\fIstrides\fR, \fBfloat * restrict\fR \fIc\fR, + \fBint *\fR\fIstridec\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions evaluate both \fBsinpi\fR(\fIx\fR) and \fBcospi\fR(\fIx\fR), +defined by \fBsinpi\fR(\fIx\fR) = \fBsin\fR(\c +.if n pi\c +.if t \(*p +\c + * \fIx\fR) and \fBcospi\fR(\fIx\fR) = \fBcos\fR(\c +.if n pi\c +.if t \(*p +\c + * \fIx\fR), for an entire vector of values at once. The first parameter +specifies the number of values to compute. Subsequent parameters specify the +argument and result vectors. Each vector is described by a pointer to the first +element and a stride, which is the increment between successive elements. +.sp +.LP +Specifically, \fBvsincospi_\fR(\fIn\fR, \fIx\fR, \fIsx\fR, \fIs\fR, \fIss\fR, +\fIc\fR, \fIsc\fR) simultaneously computes \fIs\fR[\fIi\fR * *\fIss\fR] = +\fBsinpi\fR(\fIx\fR[\fIi\fR * *\fIsx\fR]) and \fIc\fR[\fIi\fR * *\fIsc\fR] = +\fBcospi\fR(\fIx\fR[\fIi\fR * *\fIsx\fR]) for each \fIi\fR = 0, 1, ..., +*\fIn\fR - 1. The \fBvsincosf_()\fR function performs the same computation for +single precision data. +.sp +.LP +Non-exceptional results are accurate to within a unit in the last place. +.SH USAGE +.sp +.LP +The element count *\fIn\fR must be greater than zero. The strides for the +argument and result arrays can be arbitrary integers, but the arrays themselves +must not be the same or overlap. A zero stride effectively collapses an entire +vector into a single element. A negative stride causes a vector to be accessed +in descending memory order, but note that the corresponding pointer must still +point to the first element of the vector to be used; if the stride is negative, +this will be the highest-addressed element in memory. This convention differs +from the Level 1 BLAS, in which array parameters always refer to the +lowest-addressed element in memory even when negative increments are used. +.sp +.LP +These functions assume that the default round-to-nearest rounding direction +mode is in effect. On x86, these functions also assume that the default +round-to-64-bit rounding precision mode is in effect. The result of calling a +vector function with a non-default rounding mode in effect is undefined. +.sp +.LP +These functions handle special cases and exceptions in the spirit of IEEE 754. +In particular, +.RS +4 +.TP +.ie t \(bu +.el o +\fBsinpi\fR(NaN), \fBcospi\fR(NaN) are NaN, +.RE +.RS +4 +.TP +.ie t \(bu +.el o +\fBsinpi\fR(\(+-0) is \(+-0, +.RE +.RS +4 +.TP +.ie t \(bu +.el o +\fBsinpi\fR(\(+-Inf), \fBcospi\fR(\(+-Inf) are NaN, and an invalid operation +exception is raised. +.RE +.sp +.LP +An application wanting to check for exceptions should call +\fBfeclearexcept\fR(\fBFE_ALL_EXCEPT\fR) before calling these functions. On +return, if \fBfetestexcept\fR(\fBFE_INVALID\fR | \fBFE_DIVBYZERO\fR | +\fBFE_OVERFLOW\fR | \fBFE_UNDERFLOW\fR) is non-zero, an exception has been +raised. The application can then examine the result or argument vectors for +exceptional values. Some vector functions can raise the inexact exception even +if all elements of the argument array are such that the numerical results are +exact. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityCommitted +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBfeclearexcept\fR(3M), \fBfetestexcept\fR(3M), \fBattributes\fR(5) diff --git a/usr/src/man/man3mvec/vsinpi_.3mvec b/usr/src/man/man3mvec/vsinpi_.3mvec new file mode 100644 index 0000000000..1e7262508d --- /dev/null +++ b/usr/src/man/man3mvec/vsinpi_.3mvec @@ -0,0 +1,117 @@ +'\" te +.\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH vsinpi_ 3MVEC "14 Dec 2007" "SunOS 5.11" "Vector Math Library Functions" +.SH NAME +vsinpi_, vsinpif_ \- vector sinpi functions +.SH SYNOPSIS +.LP +.nf +cc [ \fIflag\fR\&.\|.\|. ] \fIfile\fR\&.\|.\|. \fB-lmvec\fR [ \fIlibrary\fR\&.\|.\|. ] + +\fBvoid\fR \fBvsinpi_\fR(\fBint *\fR\fIn\fR, \fBdouble * restrict\fR \fIx\fR, \fBint *\fR\fIstridex\fR, + \fBdouble * restrict\fR \fIy\fR, \fBint *\fR\fIstridey\fR); +.fi + +.LP +.nf +\fBvoid\fR \fBvsinpif_\fR(\fBint *\fR\fIn\fR, \fBfloat * restrict\fR \fIx\fR, \fBint *\fR\fIstridex\fR, + \fBfloat * restrict\fR \fIy\fR, \fBint *\fR\fIstridey\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions evaluate the function \fBsinpi\fR(\fIx\fR), defined by +\fBsinpi\fR(\fIx\fR) = \fBsin\fR(\c +.if n pi\c +.if t \(*p +\c + * \fIx\fR), for an entire vector of values at once. The first parameter +specifies the number of values to compute. Subsequent parameters specify the +argument and result vectors. Each vector is described by a pointer to the first +element and a stride, which is the increment between successive elements. +.sp +.LP +Specifically, \fBvsinpi_\fR(\fIn\fR, \fIx\fR, \fIsx\fR, \fIy\fR, \fIsy\fR) +computes \fIy\fR[\fIi\fR * *\fIsy\fR] = \fBsinpi\fR(\fIx\fR[\fIi\fR * +*\fIsx\fR]) for each \fIi\fR = 0, 1, ..., *\fIn\fR - 1. The \fBvsinpif_()\fR +function performs the same computation for single precision data. +.sp +.LP +Non-exceptional results are accurate to within a unit in the last place. +.SH USAGE +.sp +.LP +The element count *\fIn\fR must be greater than zero. The strides for the +argument and result arrays can be arbitrary integers, but the arrays themselves +must not be the same or overlap. A zero stride effectively collapses an entire +vector into a single element. A negative stride causes a vector to be accessed +in descending memory order, but note that the corresponding pointer must still +point to the first element of the vector to be used; if the stride is negative, +this will be the highest-addressed element in memory. This convention differs +from the Level 1 BLAS, in which array parameters always refer to the +lowest-addressed element in memory even when negative increments are used. +.sp +.LP +These functions assume that the default round-to-nearest rounding direction +mode is in effect. On x86, these functions also assume that the default +round-to-64-bit rounding precision mode is in effect. The result of calling a +vector function with a non-default rounding mode in effect is undefined. +.sp +.LP +These functions handle special cases and exceptions in the spirit of IEEE 754. +In particular, +.RS +4 +.TP +.ie t \(bu +.el o +\fBsinpi\fR(NaN) is NaN, +.RE +.RS +4 +.TP +.ie t \(bu +.el o +\fBsinpi\fR(\(+-0) is \(+-0, +.RE +.RS +4 +.TP +.ie t \(bu +.el o +\fBsinpi\fR(\(+-Inf) is NaN, and an invalid operation exception is raised. +.RE +.sp +.LP +An application wanting to check for exceptions should call +\fBfeclearexcept\fR(\fBFE_ALL_EXCEPT\fR) before calling these functions. On +return, if \fBfetestexcept\fR(\fBFE_INVALID\fR | \fBFE_DIVBYZERO\fR | +\fBFE_OVERFLOW\fR | \fBFE_UNDERFLOW\fR) is non-zero, an exception has been +raised. The application can then examine the result or argument vectors for +exceptional values. Some vector functions can raise the inexact exception even +if all elements of the argument array are such that the numerical results are +exact. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityCommitted +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBfeclearexcept\fR(3M), \fBfetestexcept\fR(3M), \fBattributes\fR(5) diff --git a/usr/src/man/man3mvec/vsqrt_.3mvec b/usr/src/man/man3mvec/vsqrt_.3mvec new file mode 100644 index 0000000000..d27b5e8e74 --- /dev/null +++ b/usr/src/man/man3mvec/vsqrt_.3mvec @@ -0,0 +1,99 @@ +'\" te +.\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH vsqrt_ 3MVEC "14 Dec 2007" "SunOS 5.11" "Vector Math Library Functions" +.SH NAME +vsqrt_, vsqrtf_ \- vector square root functions +.SH SYNOPSIS +.LP +.nf +cc [ \fIflag\fR\&.\|.\|. ] \fIfile\fR\&.\|.\|. \fB-lmvec\fR [ \fIlibrary\fR\&.\|.\|. ] + +\fBvoid\fR \fBvsqrt_\fR(\fBint *\fR\fIn\fR, \fBdouble * restrict\fR \fIx\fR, \fBint *\fR\fIstridex\fR, + \fBdouble * restrict\fR \fIy\fR, \fBint *\fR\fIstridey\fR); +.fi + +.LP +.nf +\fBvoid\fR \fBvsqrtf_\fR(\fBint *\fR\fIn\fR, \fBfloat * restrict\fR \fIx\fR, \fBint *\fR\fIstridex\fR, + \fBfloat * restrict\fR \fIy\fR, \fBint *\fR\fIstridey\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions evaluate the function \fBsqrt\fR(\fIx\fR) for an entire vector +of values at once. The first parameter specifies the number of values to +compute. Subsequent parameters specify the argument and result vectors. Each +vector is described by a pointer to the first element and a stride, which is +the increment between successive elements. +.sp +.LP +Specifically, \fBvsqrt_\fR(\fIn\fR, \fIx\fR, \fIsx\fR, \fIy\fR, \fIsy\fR) +computes \fIy\fR[\fIi\fR * *\fIsy\fR] = \fBsqrt\fR(\fIx\fR[\fIi\fR * +*\fIsx\fR]) for each \fIi\fR = 0, 1, ..., *\fIn\fR - 1. The \fBvsqrtf_()\fR +function performs the same computation for single precision data. +.sp +.LP +Unlike their scalar counterparts, these functions do not always deliver +correctly rounded results. However, the error in each non-exceptional result is +less than one unit in the last place. +.SH USAGE +.sp +.LP +The element count *\fIn\fR must be greater than zero. The strides for the +argument and result arrays can be arbitrary integers, but the arrays themselves +must not be the same or overlap. A zero stride effectively collapses an entire +vector into a single element. A negative stride causes a vector to be accessed +in descending memory order, but note that the corresponding pointer must still +point to the first element of the vector to be used; if the stride is negative, +this will be the highest-addressed element in memory. This convention differs +from the Level 1 BLAS, in which array parameters always refer to the +lowest-addressed element in memory even when negative increments are used. +.sp +.LP +These functions assume that the default round-to-nearest rounding direction +mode is in effect. On x86, these functions also assume that the default +round-to-64-bit rounding precision mode is in effect. The result of calling a +vector function with a non-default rounding mode in effect is undefined. +.sp +.LP +These functions handle special cases and exceptions in the same way as the +\fBsqrt()\fR functions when \fBc99\fR \fBMATHERREXCEPT\fR conventions are in +effect. See \fBsqrt\fR(3M) for the results for special cases. +.sp +.LP +An application wanting to check for exceptions should call +\fBfeclearexcept\fR(\fBFE_ALL_EXCEPT\fR) before calling these functions. On +return, if \fBfetestexcept\fR(\fBFE_INVALID\fR | \fBFE_DIVBYZERO\fR | +\fBFE_OVERFLOW\fR | \fBFE_UNDERFLOW\fR) is non-zero, an exception has been +raised. The application can then examine the result or argument vectors for +exceptional values. Some vector functions can raise the inexact exception even +if all elements of the argument array are such that the numerical results are +exact. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityCommitted +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBsqrt\fR(3M), \fBfeclearexcept\fR(3M), \fBfetestexcept\fR(3M), +\fBattributes\fR(5) diff --git a/usr/src/man/man3mvec/vz_abs_.3mvec b/usr/src/man/man3mvec/vz_abs_.3mvec new file mode 100644 index 0000000000..7464b6ea8d --- /dev/null +++ b/usr/src/man/man3mvec/vz_abs_.3mvec @@ -0,0 +1,99 @@ +'\" te +.\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH vz_abs_ 3MVEC "14 Dec 2007" "SunOS 5.11" "Vector Math Library Functions" +.SH NAME +vz_abs_, vc_abs_ \- vector complex absolute value functions +.SH SYNOPSIS +.LP +.nf +cc [ \fIflag\fR\&.\|.\|. ] \fIfile\fR\&.\|.\|. \fB-lmvec\fR [ \fIlibrary\fR\&.\|.\|. ] + +\fBvoid\fR \fBvz_abs_\fR(\fBint *\fR\fIn\fR, \fBdouble complex * restrict\fR \fIz\fR, + \fBint *\fR\fIstridez\fR, \fBdouble * restrict\fR \fIy\fR, \fBint *\fR\fIstridey\fR); +.fi + +.LP +.nf +\fBvoid\fR \fBvc_abs_\fR(\fBint *\fR\fIn\fR, \fBfloat complex * restrict\fR \fIz\fR, + \fBint *\fR\fIstridez\fR, \fBfloat * restrict\fR \fIy\fR, \fBint *\fR\fIstridey\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions compute the magnitude (or modulus) | \fIz\fR | for an entire +vector of values at once. The first parameter specifies the number of values to +compute. Subsequent parameters specify the argument and result vectors. Each +vector is described by a pointer to the first element and a stride, which is +the increment between successive elements. +.sp +.LP +Specifically, \fBvz_abs_\fR(\fIn,\fR \fIz\fR, \fIsz\fR, \fIy\fR, \fIsy\fR) +computes \fIy\fR[\fIi\fR * *\fIsy\fR] = | \fIz\fR[\fIi\fR * *\fIsz\fR] | for +each \fIi\fR = 0, 1, ..., *\fIn\fR - 1. The \fBvc_abs_()\fR function performs +the same computation for single precision data. +.sp +.LP +These functions are not guaranteed to deliver results that are identical to the +results of the \fBcabs\fR(3M) functions given the same arguments. +Non-exceptional results, however, are accurate to within a unit in the last +place. +.SH USAGE +.sp +.LP +The element count *\fIn\fR must be greater than zero. The strides for the +argument and result arrays can be arbitrary integers, but the arrays themselves +must not be the same or overlap. A zero stride effectively collapses an entire +vector into a single element. A negative stride causes a vector to be accessed +in descending memory order, but note that the corresponding pointer must still +point to the first element of the vector to be used; if the stride is negative, +this will be the highest-addressed element in memory. This convention differs +from the Level 1 BLAS, in which array parameters always refer to the +lowest-addressed element in memory even when negative increments are used. +.sp +.LP +These functions assume that the default round-to-nearest rounding direction +mode is in effect. On x86, these functions also assume that the default +round-to-64-bit rounding precision mode is in effect. The result of calling a +vector function with a non-default rounding mode in effect is undefined. +.sp +.LP +These functions handle special cases and exceptions in the spirit of IEEE 754. +See \fBcabs\fR(3M) for the results for special cases. +.sp +.LP +An application wanting to check for exceptions should call +\fBfeclearexcept\fR(\fBFE_ALL_EXCEPT\fR) before calling these functions. On +return, if \fBfetestexcept\fR(\fBFE_INVALID\fR | \fBFE_DIVBYZERO\fR | +\fBFE_OVERFLOW\fR | \fBFE_UNDERFLOW\fR) is non-zero, an exception has been +raised. The application can then examine the result or argument vectors for +exceptional values. Some vector functions can raise the inexact exception even +if all elements of the argument array are such that the numerical results are +exact. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityCommitted +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBcabs\fR(3M), \fBfeclearexcept\fR(3M), \fBfetestexcept\fR(3M), +\fBattributes\fR(5) diff --git a/usr/src/man/man3mvec/vz_exp_.3mvec b/usr/src/man/man3mvec/vz_exp_.3mvec new file mode 100644 index 0000000000..b860dd3722 --- /dev/null +++ b/usr/src/man/man3mvec/vz_exp_.3mvec @@ -0,0 +1,94 @@ +'\" te +.\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH vz_exp_ 3MVEC "14 Dec 2007" "SunOS 5.11" "Vector Math Library Functions" +.SH NAME +vz_exp_, vc_exp_ \- vector complex exponential functions +.SH SYNOPSIS +.LP +.nf +cc [ \fIflag\fR\&.\|.\|. ] \fIfile\fR\&.\|.\|. \fB-lmvec\fR [ \fIlibrary\fR\&.\|.\|. ] + +\fBvoid\fR \fBvz_exp_\fR(\fBint *\fR\fIn\fR, \fBdouble complex * restrict\fR \fIz\fR, + \fBint *\fR\fIstridez\fR, \fBdouble complex * restrict\fR \fIw\fR \fBint *\fR\fIstridew\fR, + \fBdouble *\fR \fItmp\fR); +.fi + +.LP +.nf +\fBvoid\fR \fBvc_exp_\fR(\fBint *\fR\fIn\fR, \fBfloat complex * restrict\fR \fIz\fR, + \fBint *\fR\fIstridez\fR, \fBfloat complex * restrict\fR \fIw\fR, \fBint *\fR\fIstridew\fR, + \fBfloat *\fR \fItmp\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions evaluate the complex function \fBexp\fR(\fIz\fR) for an entire +vector of values at once. The first parameter specifies the number of values to +compute. Subsequent parameters specify the argument and result vectors. Each +vector is described by a pointer to the first element and a stride, which is +the increment between successive elements. The last argument is a pointer to +scratch storage; this storage must be large enough to hold *\fIn\fR consecutive +values of the real type corresponding to the complex type of the argument and +result. +.sp +.LP +Specifically, \fBvz_exp_\fR(\fIn\fR, \fIz\fR, \fIsz\fR, \fIw\fR, \fIsw\fR, +\fItmp\fR) computes \fIw\fR[\fIi\fR * *\fIsw\fR] = \fBexp\fR(\fIz\fR[\fIi\fR * +*\fIsz\fR]) for each \fIi\fR = 0, 1, ..., *\fIn\fR - 1. The \fBvc_exp_()\fR +function performs the same computation for single precision data. +.sp +.LP +These functions are not guaranteed to deliver results that are identical to the +results of the \fBcexp\fR(3M) functions given the same arguments. +.SH USAGE +.sp +.LP +The element count *\fIn\fR must be greater than zero. The strides for the +argument and result arrays can be arbitrary integers, but the arrays themselves +must not be the same or overlap. A zero stride effectively collapses an entire +vector into a single element. A negative stride causes a vector to be accessed +in descending memory order, but note that the corresponding pointer must still +point to the first element of the vector to be used; if the stride is negative, +this will be the highest-addressed element in memory. This convention differs +from the Level 1 BLAS, in which array parameters always refer to the +lowest-addressed element in memory even when negative increments are used. +.sp +.LP +These functions assume that the default round-to-nearest rounding direction +mode is in effect. On x86, these functions also assume that the default +round-to-64-bit rounding precision mode is in effect. The result of calling a +vector function with a non-default rounding mode in effect is undefined. +.sp +.LP +Unlike the c99 \fBcexp\fR(3M) functions, the vector complex exponential +functions make no attempt to handle special cases and exceptions; they simply +use textbook formulas to compute a complex exponential in terms of real +elementary functions. As a result, these functions can raise different +exceptions and/or deliver different results from \fBcexp()\fR. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityCommitted +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBcexp\fR(3M), \fBattributes\fR(5) diff --git a/usr/src/man/man3mvec/vz_log_.3mvec b/usr/src/man/man3mvec/vz_log_.3mvec new file mode 100644 index 0000000000..e40b293565 --- /dev/null +++ b/usr/src/man/man3mvec/vz_log_.3mvec @@ -0,0 +1,89 @@ +'\" te +.\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH vz_log_ 3MVEC "14 Dec 2007" "SunOS 5.11" "Vector Math Library Functions" +.SH NAME +vz_log_, vc_log_ \- vector complex logarithm functions +.SH SYNOPSIS +.LP +.nf +cc [ \fIflag\fR\&.\|.\|. ] \fIfile\fR\&.\|.\|. \fB-lmvec\fR [ \fIlibrary\fR\&.\|.\|. ] + +\fBvoid\fR \fBvz_log_\fR(\fBint *\fR\fIn\fR, \fBdouble complex * restrict\fR \fIz\fR, + \fBint *\fR\fIstridez\fR, \fBdouble _complex * restrict\fR \fIw\fR, \fBint *\fR\fIstridew\fR); +.fi + +.LP +.nf +\fBvoid\fR \fBvc_log_\fR(\fBint *\fR\fIn\fR, \fBfloat complex * restrict\fR \fIz\fR, + \fBint *\fR\fIstridez\fR, \fBfloat complex * restrict\fR \fIw\fR, \fBint *\fR\fIstridew\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions evaluate the complex function \fBlog\fR(\fIz\fR) for an entire +vector of values at once. The first parameter specifies the number of values to +compute. Subsequent parameters specify the argument and result vectors. Each +vector is described by a pointer to the first element and a stride, which is +the increment between successive elements. +.sp +.LP +Specifically, \fBvz_log_\fR(\fIn\fR, \fIz\fR, \fIsz\fR, \fIw\fR, \fIsw\fR) +computes \fIw\fR[\fIi\fR * *\fIsw\fR] = \fBlog\fR(\fIz\fR[\fIi\fR * *\fIsz\fR]) +for each \fIi\fR = 0, 1, ..., *\fIn\fR - 1. The \fBvc_log_()\fR function +performs the same computation for single precision data. +.sp +.LP +These functions are not guaranteed to deliver results that are identical to the +results of the \fBclog\fR(3M) functions given the same arguments. +.SH USAGE +.sp +.LP +The element count *\fIn\fR must be greater than zero. The strides for the +argument and result arrays can be arbitrary integers, but the arrays themselves +must not be the same or overlap. A zero stride effectively collapses an entire +vector into a single element. A negative stride causes a vector to be accessed +in descending memory order, but note that the corresponding pointer must still +point to the first element of the vector to be used; if the stride is negative, +this will be the highest-addressed element in memory. This convention differs +from the Level 1 BLAS, in which array parameters always refer to the +lowest-addressed element in memory even when negative increments are used. +.sp +.LP +These functions assume that the default round-to-nearest rounding direction +mode is in effect. On x86, these functions also assume that the default +round-to-64-bit rounding precision mode is in effect. The result of calling a +vector function with a non-default rounding mode in effect is undefined. +.sp +.LP +Unlike the c99 \fBclog\fR(3M) functions, the vector complex exponential +functions make no attempt to handle special cases and exceptions; they simply +use textbook formulas to compute a complex exponential in terms of real +elementary functions. As a result, these functions can raise different +exceptions and/or deliver different results from \fBclog()\fR. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityCommitted +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBclog\fR(3M), \fBattributes\fR(5) diff --git a/usr/src/man/man3mvec/vz_pow_.3mvec b/usr/src/man/man3mvec/vz_pow_.3mvec new file mode 100644 index 0000000000..7ee7a0e847 --- /dev/null +++ b/usr/src/man/man3mvec/vz_pow_.3mvec @@ -0,0 +1,97 @@ +'\" te +.\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved. +.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. +.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] +.TH vz_pow_ 3MVEC "14 Dec 2007" "SunOS 5.11" "Vector Math Library Functions" +.SH NAME +vz_pow_, vc_pow_ \- vector complex power functions +.SH SYNOPSIS +.LP +.nf +cc [ \fIflag\fR\&.\|.\|. ] \fIfile\fR\&.\|.\|. \fB-lmvec\fR [ \fIlibrary\fR\&.\|.\|. ] + +\fBvoid\fR \fBvz_pow_\fR(\fBint *\fR\fIn\fR, \fBdouble complex * restrict\fR \fIz\fR, + \fBint *\fR\fIstridez\fR, \fBdouble complex * restrict\fR \fIw\fR, \fBint *\fR\fIstridew\fR, + \fBdouble complex * restrict\fR \fIu\fR, \fBint *\fR\fIstrideu\fR, + \fBdouble *\fR \fItmp\fR); +.fi + +.LP +.nf +\fBvoid\fR \fBvc_pow_\fR(\fBint *\fR\fIn\fR, \fBfloat complex * restrict\fR \fIz\fR, + \fBint *\fR\fIstridez\fR, \fBfloat complex * restrict\fR \fIw\fR, \fBint *\fR\fIstridew\fR, + \fBfloat complex * restrict\fR \fIu\fR, \fBint *\fR\fIstrideu\fR, + \fBfloat *\fR \fItmp\fR); +.fi + +.SH DESCRIPTION +.sp +.LP +These functions evaluate the complex function \fIz\fR^\fIw\fR for an entire +vector of values at once. The first parameter specifies the number of values to +compute. Subsequent parameters specify the argument and result vectors. Each +vector is described by a pointer to the first element and a stride, which is +the increment between successive elements. The last argument is a pointer to +scratch storage; this storage must be large enough to hold 3 * *\fIn\fR +consecutive values of the real type corresponding to the complex type of the +argument and result. +.sp +.LP +Specifically, \fBvz_pow_\fR(\fIn\fR, \fIz\fR, \fIsz\fR, \fIw\fR, \fIsw\fR, +\fIu\fR, \fIsu\fR, \fItmp\fR) computes \fIu\fR[\fIi\fR * *\fIsu\fR] = +(\fIz\fR[\fIi\fR * *\fIsz\fR])^(\fIw\fR[\fIi\fR * *\fIsw\fR]) for each \fIi\fR += 0, 1, ..., *\fIn\fR - 1. The \fBvc_pow_()\fR function performs the same +computation for single precision data. +.sp +.LP +These functions are not guaranteed to deliver results that are identical to the +results of the \fBcpow\fR(3M) functions given the same arguments. +.SH USAGE +.sp +.LP +The element count *\fIn\fR must be greater than zero. The strides for the +argument and result arrays can be arbitrary integers, but the arrays themselves +must not be the same or overlap. A zero stride effectively collapses an entire +vector into a single element. A negative stride causes a vector to be accessed +in descending memory order, but note that the corresponding pointer must still +point to the first element of the vector to be used; if the stride is negative, +this will be the highest-addressed element in memory. This convention differs +from the Level 1 BLAS, in which array parameters always refer to the +lowest-addressed element in memory even when negative increments are used. +.sp +.LP +These functions assume that the default round-to-nearest rounding direction +mode is in effect. On x86, these functions also assume that the default +round-to-64-bit rounding precision mode is in effect. The result of calling a +vector function with a non-default rounding mode in effect is undefined. +.sp +.LP +Unlike the c99 \fBcpow\fR(3M) functions, the vector complex exponential +functions make no attempt to handle special cases and exceptions; they simply +use textbook formulas to compute a complex exponential in terms of real +elementary functions. As a result, these functions can raise different +exceptions and/or deliver different results from \fBcpow()\fR. +.SH ATTRIBUTES +.sp +.LP +See \fBattributes\fR(5) for descriptions of the following attributes: +.sp + +.sp +.TS +tab() box; +cw(2.75i) |cw(2.75i) +lw(2.75i) |lw(2.75i) +. +ATTRIBUTE TYPEATTRIBUTE VALUE +_ +Interface StabilityCommitted +_ +MT-LevelMT-Safe +.TE + +.SH SEE ALSO +.sp +.LP +\fBcpow\fR(3M), \fBattributes\fR(5) diff --git a/usr/src/pkg/manifests/SUNWlibm.mf b/usr/src/pkg/manifests/SUNWlibm.mf new file mode 100644 index 0000000000..1d03babb0b --- /dev/null +++ b/usr/src/pkg/manifests/SUNWlibm.mf @@ -0,0 +1,29 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2011 Nexenta Systems, Inc. All rights reserved. +# + +set name=pkg.fmri value=pkg:/SUNWlibm@0.5.11,5.11-0.132 +set name=pkg.description \ + value="Math & Microtasking Library Headers & Lint Files" +# license license=SUNWlibm.copyright +# license license=SUNWlibmr.copyright +set name=pkg.renamed value=true +# set name=pkg.renamed value=true +set name=pkg.summary value="Math & Microtasking Library Headers & Lint Files" +set name=info.classification \ + value=org.opensolaris.category.2008:System/Libraries +# set name=org.opensolaris.consolidation value=sunpro +set name=variant.arch value=$(ARCH) +set name=variant.opensolaris.zone value=global value=nonglobal +depend fmri=pkg:/system/library/math/header-math@$(PKGVERS) type=require diff --git a/usr/src/pkg/manifests/SUNWlibms.mf b/usr/src/pkg/manifests/SUNWlibms.mf new file mode 100644 index 0000000000..5a2e2db695 --- /dev/null +++ b/usr/src/pkg/manifests/SUNWlibms.mf @@ -0,0 +1,28 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2011 Nexenta Systems, Inc. All rights reserved. +# + +set name=pkg.fmri value=pkg:/SUNWlibms@0.5.11,5.11-0.132 +set name=pkg.description value="Math & Microtasking Libraries" +# license license=SUNWlibms.copyright +# license license=SUNWlibmsr.copyright +set name=pkg.renamed value=true +# set name=pkg.renamed value=true +set name=pkg.summary value="Math & Microtasking Libraries" +set name=description value="Math & Microtasking Libraries" +set name=info.classification \ + value=org.opensolaris.category.2008:System/Libraries +set name=variant.arch value=$(ARCH) +set name=variant.opensolaris.zone value=global value=nonglobal +depend fmri=pkg:/system/library/math@$(PKGVERS) type=require diff --git a/usr/src/pkg/manifests/consolidation-sunpro-sunpro-incorporation.mf b/usr/src/pkg/manifests/consolidation-sunpro-sunpro-incorporation.mf new file mode 100644 index 0000000000..9c9afc945d --- /dev/null +++ b/usr/src/pkg/manifests/consolidation-sunpro-sunpro-incorporation.mf @@ -0,0 +1,18 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy is of the CDDL is also available via the Internet +# at http://www.illumos.org/license/CDDL. +# + +# Copyright 2011, Richard Lowe. + +set name=pkg.fmri \ + value=pkg:/consolidation/sunpro/sunpro-incorporation@$(PKGVERS) +# Don't incorporate, as we were one and that would get deeply confusing +set name=org.opensolaris.noincorp value=true +set name=variant.arch value=$(ARCH) diff --git a/usr/src/pkg/manifests/system-library-math-header-math.mf b/usr/src/pkg/manifests/system-library-math-header-math.mf new file mode 100644 index 0000000000..7150742d1b --- /dev/null +++ b/usr/src/pkg/manifests/system-library-math-header-math.mf @@ -0,0 +1,25 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2012, Igor Kozhukhov <ikozhukhov@gmail.com>. All rights reserved. +# + +# all data have been moved to system/library/math +# +set name=pkg.fmri value=pkg:/system/library/math/header-math@$(PKGVERS) +set name=pkg.description value="Math Library Headers & Lint Files" +set name=pkg.renamed value=true +set name=pkg.summary value="Math Library Headers & Lint Files" +set name=info.classification \ + value=org.opensolaris.category.2008:System/Libraries +set name=variant.arch value=$(ARCH) +depend fmri=pkg:/system/library/math@$(PKGVERS) type=require diff --git a/usr/src/pkg/manifests/system-library-math.man3m.inc b/usr/src/pkg/manifests/system-library-math.man3m.inc new file mode 100644 index 0000000000..c5d5bea532 --- /dev/null +++ b/usr/src/pkg/manifests/system-library-math.man3m.inc @@ -0,0 +1,120 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet +# at http://www.illumos.org/license/CDDL. +# + +# Copyright (c) 2012, Igor Kozhukhov <ikozhukhov@gmail.com> + +file path=usr/share/man/man3m/acos.3m +file path=usr/share/man/man3m/acosh.3m +file path=usr/share/man/man3m/asin.3m +file path=usr/share/man/man3m/asinh.3m +file path=usr/share/man/man3m/atan.3m +file path=usr/share/man/man3m/atan2.3m +file path=usr/share/man/man3m/atanh.3m +file path=usr/share/man/man3m/cabs.3m +file path=usr/share/man/man3m/cacos.3m +file path=usr/share/man/man3m/cacosh.3m +file path=usr/share/man/man3m/carg.3m +file path=usr/share/man/man3m/casin.3m +file path=usr/share/man/man3m/casinh.3m +file path=usr/share/man/man3m/catan.3m +file path=usr/share/man/man3m/catanh.3m +file path=usr/share/man/man3m/cbrt.3m +file path=usr/share/man/man3m/ccos.3m +file path=usr/share/man/man3m/ccosh.3m +file path=usr/share/man/man3m/ceil.3m +file path=usr/share/man/man3m/cexp.3m +file path=usr/share/man/man3m/cimag.3m +file path=usr/share/man/man3m/clog.3m +file path=usr/share/man/man3m/conj.3m +file path=usr/share/man/man3m/copysign.3m +file path=usr/share/man/man3m/cos.3m +file path=usr/share/man/man3m/cosh.3m +file path=usr/share/man/man3m/cpow.3m +file path=usr/share/man/man3m/cproj.3m +file path=usr/share/man/man3m/creal.3m +file path=usr/share/man/man3m/csin.3m +file path=usr/share/man/man3m/csinh.3m +file path=usr/share/man/man3m/csqrt.3m +file path=usr/share/man/man3m/ctan.3m +file path=usr/share/man/man3m/ctanh.3m +file path=usr/share/man/man3m/erf.3m +file path=usr/share/man/man3m/erfc.3m +file path=usr/share/man/man3m/exp.3m +file path=usr/share/man/man3m/exp2.3m +file path=usr/share/man/man3m/expm1.3m +file path=usr/share/man/man3m/fabs.3m +file path=usr/share/man/man3m/fdim.3m +file path=usr/share/man/man3m/feclearexcept.3m +file path=usr/share/man/man3m/fegetenv.3m +file path=usr/share/man/man3m/fegetexceptflag.3m +file path=usr/share/man/man3m/fegetround.3m +file path=usr/share/man/man3m/feholdexcept.3m +file path=usr/share/man/man3m/feraiseexcept.3m +file path=usr/share/man/man3m/fesetprec.3m +file path=usr/share/man/man3m/fetestexcept.3m +file path=usr/share/man/man3m/feupdateenv.3m +file path=usr/share/man/man3m/fex_merge_flags.3m +file path=usr/share/man/man3m/fex_set_handling.3m +file path=usr/share/man/man3m/fex_set_log.3m +file path=usr/share/man/man3m/floor.3m +file path=usr/share/man/man3m/fma.3m +file path=usr/share/man/man3m/fmax.3m +file path=usr/share/man/man3m/fmin.3m +file path=usr/share/man/man3m/fmod.3m +file path=usr/share/man/man3m/fpclassify.3m +file path=usr/share/man/man3m/frexp.3m +file path=usr/share/man/man3m/hypot.3m +file path=usr/share/man/man3m/ilogb.3m +file path=usr/share/man/man3m/isfinite.3m +file path=usr/share/man/man3m/isgreater.3m +file path=usr/share/man/man3m/isgreaterequal.3m +file path=usr/share/man/man3m/isinf.3m +file path=usr/share/man/man3m/isless.3m +file path=usr/share/man/man3m/islessequal.3m +file path=usr/share/man/man3m/islessgreater.3m +file path=usr/share/man/man3m/isnan.3m +file path=usr/share/man/man3m/isnormal.3m +file path=usr/share/man/man3m/isunordered.3m +file path=usr/share/man/man3m/j0.3m +file path=usr/share/man/man3m/ldexp.3m +file path=usr/share/man/man3m/lgamma.3m +file path=usr/share/man/man3m/llrint.3m +file path=usr/share/man/man3m/llround.3m +file path=usr/share/man/man3m/log.3m +file path=usr/share/man/man3m/log10.3m +file path=usr/share/man/man3m/log1p.3m +file path=usr/share/man/man3m/log2.3m +file path=usr/share/man/man3m/logb.3m +file path=usr/share/man/man3m/lrint.3m +file path=usr/share/man/man3m/lround.3m +file path=usr/share/man/man3m/matherr.3m +file path=usr/share/man/man3m/modf.3m +file path=usr/share/man/man3m/nan.3m +file path=usr/share/man/man3m/nearbyint.3m +file path=usr/share/man/man3m/nextafter.3m +file path=usr/share/man/man3m/pow.3m +file path=usr/share/man/man3m/remainder.3m +file path=usr/share/man/man3m/remquo.3m +file path=usr/share/man/man3m/rint.3m +file path=usr/share/man/man3m/round.3m +file path=usr/share/man/man3m/scalb.3m +file path=usr/share/man/man3m/scalbln.3m +file path=usr/share/man/man3m/signbit.3m +file path=usr/share/man/man3m/significand.3m +file path=usr/share/man/man3m/sin.3m +file path=usr/share/man/man3m/sincos.3m +file path=usr/share/man/man3m/sinh.3m +file path=usr/share/man/man3m/sqrt.3m +file path=usr/share/man/man3m/tan.3m +file path=usr/share/man/man3m/tanh.3m +file path=usr/share/man/man3m/tgamma.3m +file path=usr/share/man/man3m/trunc.3m +file path=usr/share/man/man3m/y0.3m diff --git a/usr/src/pkg/manifests/system-library-math.man3mvec.inc b/usr/src/pkg/manifests/system-library-math.man3mvec.inc new file mode 100644 index 0000000000..9ce55bcab3 --- /dev/null +++ b/usr/src/pkg/manifests/system-library-math.man3mvec.inc @@ -0,0 +1,32 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet +# at http://www.illumos.org/license/CDDL. +# + +# Copyright (c) 2012, Igor Kozhukhov <ikozhukhov@gmail.com> + +file path=usr/share/man/man3mvec/vatan2_.3mvec +file path=usr/share/man/man3mvec/vatan_.3mvec +file path=usr/share/man/man3mvec/vcos_.3mvec +file path=usr/share/man/man3mvec/vcospi_.3mvec +file path=usr/share/man/man3mvec/vexp_.3mvec +file path=usr/share/man/man3mvec/vhypot_.3mvec +file path=usr/share/man/man3mvec/vlog_.3mvec +file path=usr/share/man/man3mvec/vpow_.3mvec +file path=usr/share/man/man3mvec/vrhypot_.3mvec +file path=usr/share/man/man3mvec/vrsqrt_.3mvec +file path=usr/share/man/man3mvec/vsin_.3mvec +file path=usr/share/man/man3mvec/vsincos_.3mvec +file path=usr/share/man/man3mvec/vsincospi_.3mvec +file path=usr/share/man/man3mvec/vsinpi_.3mvec +file path=usr/share/man/man3mvec/vsqrt_.3mvec +file path=usr/share/man/man3mvec/vz_abs_.3mvec +file path=usr/share/man/man3mvec/vz_exp_.3mvec +file path=usr/share/man/man3mvec/vz_log_.3mvec +file path=usr/share/man/man3mvec/vz_pow_.3mvec diff --git a/usr/src/pkg/manifests/system-library-math.mf b/usr/src/pkg/manifests/system-library-math.mf new file mode 100644 index 0000000000..e05221bdd1 --- /dev/null +++ b/usr/src/pkg/manifests/system-library-math.mf @@ -0,0 +1,92 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2012, Igor Kozhukhov <ikozhukhov@gmail.com>. All rights reserved. +# + +<include system-library-math.man3m.inc> +<include system-library-math.man3mvec.inc> +set name=pkg.fmri value=pkg:/system/library/math@$(PKGVERS) +set name=pkg.description value="Math Libraries" +set name=pkg.summary value="Math Libraries" +set name=description value="Math Libraries" +set name=info.classification \ + value=org.opensolaris.category.2008:System/Libraries +set name=variant.arch value=$(ARCH) +set name=variant.opensolaris.zone value=global value=nonglobal +dir path=lib +dir path=lib/$(ARCH64) +$(sparc_ONLY)dir path=lib/cpu +$(sparc_ONLY)dir path=lib/cpu/sparcv8plus+vis +$(sparc_ONLY)dir path=lib/cpu/sparcv9+vis +$(sparc_ONLY)dir path=lib/cpu/sparcv9+vis/$(ARCH64) +$(sparc_ONLY)dir path=lib/cpu/sparcv9+vis2 +$(sparc_ONLY)dir path=lib/cpu/sparcv9+vis2/$(ARCH64) +$(i386_ONLY)dir path=lib/libmvec +dir path=usr group=sys +dir path=usr/include +dir path=usr/include/iso +dir path=usr/include/sys +dir path=usr/lib +dir path=usr/lib/$(ARCH64) +dir path=usr/share/man/man3m +dir path=usr/share/man/man3mvec +file path=lib/$(ARCH64)/libm.so.1 +file path=lib/$(ARCH64)/libm.so.2 +file path=lib/$(ARCH64)/libmvec.so.1 +file path=lib/$(ARCH64)/llib-lm.ln +$(sparc_ONLY)file path=lib/cpu/sparcv8plus+vis/libmvec_isa.so.1 +$(sparc_ONLY)file path=lib/cpu/sparcv9+vis/$(ARCH64)/libmvec_isa.so.1 +$(sparc_ONLY)file path=lib/cpu/sparcv9+vis2/$(ARCH64)/libmvec_isa.so.1 +$(sparc_ONLY)file path=lib/cpu/sparcv9+vis2/libmvec_isa.so.1 +file path=lib/libm.so.1 +file path=lib/libm.so.2 +file path=lib/libmvec.so.1 +$(i386_ONLY)file path=lib/libmvec/libmvec_hwcap1.so.1 +file path=lib/llib-lm +file path=lib/llib-lm.ln +file path=usr/include/complex.h +file path=usr/include/fenv.h +file path=usr/include/floatingpoint.h +file path=usr/include/iso/math_c99.h +file path=usr/include/iso/math_iso.h +file path=usr/include/math.h +file path=usr/include/sys/ieeefp.h +file path=usr/include/tgmath.h +legacy pkg=SUNWlibms desc="Math Libraries (Usr)" \ + hotline="Please contact your local service provider" \ + name="Math Libraries (Usr)" vendor="Sun Microsystems, Inc." \ + version=5.11,REV=2009.08.04 +legacy pkg=SUNWlibmsr desc="Math Libraries (Root)" \ + hotline="Please contact your local service provider" \ + name="Math Libraries (Root)" vendor="Sun Microsystems, Inc." \ + version=5.11,REV=2009.08.04 +link path=lib/$(ARCH64)/libm.so target=libm.so.2 +link path=lib/$(ARCH64)/libmvec.so target=libmvec.so.1 +link path=lib/libm.so target=libm.so.2 +link path=lib/libmvec.so target=libmvec.so.1 +link path=usr/lib/$(ARCH64)/libm.so target=../../../lib/$(ARCH64)/libm.so.2 +link path=usr/lib/$(ARCH64)/libm.so.1 target=../../../lib/$(ARCH64)/libm.so.1 +link path=usr/lib/$(ARCH64)/libm.so.2 target=../../../lib/$(ARCH64)/libm.so.2 +link path=usr/lib/$(ARCH64)/libmvec.so \ + target=../../../lib/$(ARCH64)/libmvec.so.1 +link path=usr/lib/$(ARCH64)/libmvec.so.1 \ + target=../../../lib/$(ARCH64)/libmvec.so.1 +link path=usr/lib/$(ARCH64)/llib-lm.ln \ + target=../../../lib/$(ARCH64)/llib-lm.ln +link path=usr/lib/libm.so target=../../lib/libm.so.2 +link path=usr/lib/libm.so.1 target=../../lib/libm.so.1 +link path=usr/lib/libm.so.2 target=../../lib/libm.so.2 +link path=usr/lib/libmvec.so target=../../lib/libmvec.so.1 +link path=usr/lib/libmvec.so.1 target=../../lib/libmvec.so.1 +link path=usr/lib/llib-lm target=../../lib/llib-lm +link path=usr/lib/llib-lm.ln target=../../lib/llib-lm.ln diff --git a/usr/src/tools/aw/aw.c b/usr/src/tools/aw/aw.c index 36891b1a9c..ba7d663187 100644 --- a/usr/src/tools/aw/aw.c +++ b/usr/src/tools/aw/aw.c @@ -571,8 +571,9 @@ main(int argc, char *argv[]) * preprocessor, if present, or to gas if not. */ filename = arg; - if (arglen > 2 && - strcmp(arg + arglen - 2, ".s") == 0) { + if ((arglen > 2) && + ((strcmp(arg + arglen - 2, ".s") == 0) || + (strcmp(arg + arglen - 2, ".S") == 0))) { /* * Though 'as' allows multiple assembler * files to be processed in one invocation diff --git a/usr/src/tools/cw/cw.c b/usr/src/tools/cw/cw.c index ded39c12ff..3bc250dc0c 100644 --- a/usr/src/tools/cw/cw.c +++ b/usr/src/tools/cw/cw.c @@ -20,6 +20,9 @@ */ /* + * Copyright 2011, Richard Lowe. + */ +/* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -33,7 +36,7 @@ */ /* If you modify this file, you must increment CW_VERSION */ -#define CW_VERSION "1.29" +#define CW_VERSION "1.30" /* * -# Verbose mode @@ -392,6 +395,8 @@ static const xarch_table_t xtbl[] = { { "amd64", (SS11|M64), { "-m64", "-mtune=opteron" } }, { "386", SS11, { "-march=i386" } }, { "pentium_pro", SS11, { "-march=pentiumpro" } }, + { "sse", SS11, { "-msse", "-mfpmath=sse" } }, + { "sse2", SS11, { "-msse2", "-mfpmath=sse" } }, #elif defined(__sparc) { "generic", (SS11|M32), { "-m32", "-mcpu=v8" } }, { "generic64", (SS11|M64), { "-m64", "-mcpu=v9" } }, diff --git a/usr/src/uts/common/sys/ccompile.h b/usr/src/uts/common/sys/ccompile.h index 690bb7afb7..bb4dca54f9 100644 --- a/usr/src/uts/common/sys/ccompile.h +++ b/usr/src/uts/common/sys/ccompile.h @@ -139,6 +139,7 @@ extern "C" { #define __RETURNS_TWICE __sun_attr__((__returns_twice__)) #define __CONST __sun_attr__((__const__)) #define __PURE __sun_attr__((__pure__)) +#define __GNU_UNUSED __attribute__((__unused__)) #ifdef __cplusplus } |